I am in the process of training my first CNN to solve a multi-class classification problem. I am feeding in images of animals corresponding to one of 182 classes, however I have ran into some issues. Firstly my code appears to get stuck on optimiser.step(), it has been calculating this for roughly 30 minutes. Secondly my training loss is increasing:
EPOCH: 0 BATCH: 1999 LOSS: 1.5790680234357715
EPOCH: 0 BATCH: 3999 LOSS: 2.9340945997834207
If any one would be able to provide some guidance that would be greatly appreciated. Below is my code
#loading data
train_data = dataset.get_subset(
[transforms.Resize((448, 448)), transforms.ToTensor()]
train_loader = get_train_loader("standard", train_data, batch_size=16)
#definind model
class ConvNet(nn.Module):
def __init__(self):
super(ConvNet, self).__init__()
self.conv1 = nn.Conv2d(3, 6, 3, 1)
self.conv2 = nn.Conv2d(6, 16, 3, 3)
self.fc1 = nn.Linear(37*37*16, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 182)
def forward(self, X):
X = F.relu(self.conv1(X))
X = F.max_pool2d(X, 2, 2)
X = F.relu(self.conv2(X))
X = F.max_pool2d(X, 2, 2)
X = torch.flatten(X, 1)
X = F.relu(self.fc1((X)))
X = F.relu(self.fc2((X)))
X = self.fc3(X)
return F.log_softmax(X, dim=1)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(modell.parameters(), lr=0.001)
import time
start_time = time.time()
epochs = 2
train_losses = []
test_losses = []
train_correct = []
test_correct = []
for i in range(epochs):
trn_corr = 0
tst_corr = 0
running_loss = 0.0
for b, (X_train, Y_train, meta) in enumerate(train_loader):
b+=1 #batch starts at 1
#zero parameter gradients
# pass training to model as float (later compute loss)
output = modell(X_train.float())
#Calculate the loss of outputs with respect to ground truth values
loss = criterion(output, Y_train)
#Backpropagate the loss through the network
#perform parameter update based on the current gradient
predicted = torch.max(output.data, 1)[1]
batch_corr = (predicted == Y_train).sum() # True (1) or False (0)
trn_corr += batch_corr
running_loss += loss.item()
if b%2000 == 1999:
print(f"EPOCH: {i} BATCH: {b} LOSS: {running_loss/2000}")
running_loss = 0.0
As for the loss, it may be due to the model. The model has some rooms for improvement. Only 2 convolution layers is not sufficient for your data, as well as only expanding to 16 channels. Use more convolution layers with more channels. For example, 5 conv layers with channels of 16, 32, 32, 64, 64. Experiment with different numbers of layers and channels to see which one is best. Also, a good learning rate for Adam is 3e-4.To more easily track the models progress, I’d recommend decreasing the interval at which it prints the loss so you can more easily track progress.
About the data, are there enough instances of each class? Is it normalized between 0 and 1?
I'm trying to train an NN to develop a relationship between a certain range of inputs and some mathematical coefficients but I'm struggling to figure out why the loss is not decreasing over the course of training. I've made a simple custom MLP and dataset to interface with the PyTorch module. The dataset will take in a 6 x m matrix for the inputs(m for however many different combinations I have) and a r x m matrix for the coefficients. However, when running the training script the training loss doesn't decrease over the course of 100 epochs. All the shapes match when entering and exiting the NN so I'm lost as to what might be going wrong. These are the shapes of the input and coefficients matrices for the training and test phase respectively.
inputs_train = (6, 581)
coefficients_train = (259, 581)
inputs_test = (6, 145)
coefficients_test = (259, 145)
All the relevant code is shown below.
class MLP(nn.Module):
def __init__(self, r):
super(MLP, self).__init__()
self.flatten = nn.Flatten()
self.linear_relu_stack = nn.Sequential(
nn.Linear(6, 512),
nn.Linear(512, 512),
nn.Linear(512, 512),
nn.Linear(512, 512),
nn.Linear(512, r),
def forward(self, x):
x = self.flatten(x)
logits = self.linear_relu_stack(x)
return logits
class ExpansionCoefficientsDataset(Dataset):
def __init__(self, inputs, alphas):
self.geom_params = inputs
self.expansion_coefficients = alphas
def __len__(self):
return self.expansion_coefficients.shape[1]
def __getitem__(self, idx):
if torch.is_tensor(idx):
idx = idx.tolist()
sample = {'inputs': self.geom_params[:,idx], 'alphas': self.expansion_coefficients[:,idx]}
return sample
train_dataset = ExpansionCoefficientsDataset(inputs_train, coefficients_train)
test_dataset = ExpansionCoefficientsDataset(inputs_test, coefficients_test)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=16)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=16)
model = MLP(r_ideal).double()
criterion = torch.nn.L1Loss()
num_epochs = 100
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
batch_size = len(train_loader)
# Training loop starts
for epoch in range(num_epochs):
print(f"-------------------------------\nEpoch {epoch+1}\n-------------------------------")
size = len(train_loader.dataset)
# Iterating over the training dataset
for i_batch, sample_batched in enumerate(train_loader):
pred = model(sample_batched['inputs'])
# Calculating loss
loss = criterion(pred, sample_batched['alphas'])
# Updating weights according
# to the calculated loss
if i_batch % 8 == 0:
loss, current = loss.item(), i_batch * sample_batched['inputs'].size()[0]
print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")
Looking for tips on building a simple image classifier for CAPTCHA images of text where there are only two possible fonts per letter. Here's an example image:
Approach thus far has been to try to break up the image into 6 equal-size images to try to get individual character images, and build a classifier for these (example below).
Is there a simpler way to go about this? Any tips on how to design the actual model? (A relatively simple CNN should suffice here perhaps?)
Edit: questions on building a suitable model below.
I've tried to build a cursory model on top of resnet50 to subpar effect... this seems like the kind of image classification task that should be relatively trivial.
Any tips on model design greatly appreciated.
Code below:
model = models.resnet50(pretrained=True)
for param in model.parameters():
param.requires_grad = False
model.fc = nn.Sequential(nn.Linear(2048, 512),
nn.Linear(512, 26),
criterion = nn.NLLLoss()
optimizer = optim.Adam(model.fc.parameters(), lr=0.003)
epochs = 10
steps = 0
running_loss = 0
print_every = 10
train_losses, test_losses = [], []
for epoch in range(epochs):
for inputs, labels in train_loader:
steps += 1
inputs, labels = inputs.to(device), labels.to(device)
logps = model.forward(inputs)
loss = criterion(logps, labels)
running_loss += loss.item()
if steps % print_every == 0:
test_loss = 0
accuracy = 0
with torch.no_grad():
for inputs, labels in val_loader:
inputs, labels = inputs.to(device), labels.to(device)
logps = model.forward(inputs)
batch_loss = criterion(logps, labels)
test_loss += batch_loss.item()
ps = torch.exp(logps)
top_p, top_class = ps.topk(1, dim=1)
equals = top_class == labels.view(*top_class.shape)
accuracy += torch.mean(equals.type(torch.FloatTensor)).item()
print(f"Epoch {epoch+1}/{epochs}.. "
f"Train loss: {running_loss/print_every:.3f}.. "
f"Test loss: {test_loss/len(val_loader):.3f}.. "
f"Test accuracy: {accuracy/len(val_loader):.3f}")
running_loss = 0
Results from the above look as follows, hitting <50% accuracy:
The following code (MNIST MLP in PyTorch) delivers approximately the same training loss regardless of having the last layer in the forward pass returning:
Option 1 is incorrect because I use criterion = nn.CrossEntropyLoss() and yet the results are almost identical. Am I missing anything?
import torch
import numpy as np
import time
from torchvision import datasets
import torchvision.transforms as transforms
# number of subprocesses to use for data loading
num_workers = 0
# how many samples per batch to load
batch_size = 20
# convert data to torch.FloatTensor
transform = transforms.ToTensor()
# choose the training and test datasets
train_data = datasets.MNIST(root='data', train=True,
download=True, transform=transform)
test_data = datasets.MNIST(root='data', train=False,
download=True, transform=transform)
# prepare data loaders
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size,
test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size,
import torch.nn as nn
import torch.nn.functional as F
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
# linear layer (784 -> 1 hidden node)
self.fc1 = nn.Linear(28 * 28, 512)
self.dropout1= nn.Dropout(p=0.2, inplace= False)
self.fc2 = nn.Linear(512, 256)
self.dropout2= nn.Dropout(p=0.2, inplace= False)
self.dropout = nn.Dropout(p=0.2, inplace= False)
self.fc3 = nn.Linear(256, 10)
def forward(self, x):
# flatten image input
x = x.view(-1, 28 * 28)
# add hidden layer, with relu activation function
x = F.relu(self.fc1(x))
x = self.dropout1(x)
x = F.relu(self.fc2(x))
x = self.dropout2(x)
x = self.fc3(x)
# return F.log_softmax(x)
return x
# initialize the NN
model = Net()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
n_epochs = 10
model.train() # prep model for training
for epoch in range(n_epochs):
# monitor training loss
train_loss = 0.0
start = time.time()
for data, target in train_loader:
data, target = data.to('cuda'), target.to('cuda')
output = model(data)
loss = criterion(output, target)
train_loss += loss.item()*data.size(0)
train_loss = train_loss/len(train_loader.dataset)
print('Epoch: {} \tTraining Loss: {:.6f} \ttime: {:.6f}'.format(
For numerical stability, the nn.CrossEntropyLoss() is implemented with the softmax layer inside it. So you should NOT use the softmax layer in your forward pass.
From the docs (https://pytorch.org/docs/stable/nn.html#crossentropyloss):
This criterion combines nn.LogSoftmax() and nn.NLLLoss() in one single class.
Using the softmax layer in the forward pass will lead to worse metrics because the gradient magnitudes are lowered (thus, the weight updates are also lowered). I learned it the hard way!
I guess your problem is that the loss is similar at the beginning of training, but at the end of the training, they should not. It is usually a good sanity check to overfit your model in one batch of data. The model should reach 100% accuracy if the batch is small enough. If the model is taking too long to train than you probably have a bug somewhere.
Hope that helps =)
I am trying to train a model in pytorch.
input: 686-array
first layer: 64-array
second layer: 2-array
output: predition either 1 or 0
this is what I have so far:
class autoencoder(nn.Module):
def __init__(self):
super(autoencoder, self).__init__()
self.encoder_softmax = nn.Sequential(
nn.Linear(686, 256),
nn.Linear(256, 2),
def forward(self, x):
x = self.encoder_softmax(x)
return x
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
net = net.to(device)
iterations = 10
learning_rate = 0.98
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(
net.parameters(), lr=learning_rate, weight_decay=1e-5)
for epoch in range(iterations):
loss = 0.0
print("train_dl len: ", len(train_dl))
# net.train()
for i, data in enumerate(train_dl, 0):
inputs, labels, vectorize = data
labels = labels.long().to(device)
inputs = inputs.float().to(device)
outputs = net(inputs)
train_loss = criterion(outputs, labels)
loss += train_loss.item()
loss = loss / len(train_dl)
but when I train the model, the loss is not going down. What am I doing wrong?
You're using nn.CrossEntropyLoss as the loss function, which applies log-softmax, but you also apply softmax in the model:
self.encoder_softmax = nn.Sequential(
nn.Linear(686, 256),
nn.Linear(256, 2),
nn.Softmax() # <- needs to be removed
The output of your model should be the raw logits, without the nn.Softmax.
You should also lower the learning rate, because a learning rate of 0.98 is very high, which makes the training much less stable and you'll likely see the loss oscillate. Are more appropriate learning rate would be in the magnitude of 0.01 or 0.001.
I am a beginner with TensorFlow and I really need some help with this task. I am doing some image pixels classification, and my problem is set this way:
my inputs is array X that contain 20 values. These values represent 4 pixels (there are 5 values per pixel). My output is an array of 4 values, where each value can be either 1 or 0, and it means that the specific pixel may or may not have a specific characteristic. As you can imagine, it is possible that my y is of the form y=[1, 0, 0, 1], so I can have multiple classes for each instance.
To perform this classification task, I organized a neural network with an input layer of 20 neurons, a hidden layer of 15, followed by another hidden layer of 10 and eventually there is an output layer of 4. The activation functions for the hidden layers are ReLU, and I applied 50% dropout to them. As a loss function to minimize, I use tensorflow's sigmoid_cross_entropy_with_logits, because it computes independent probabilities for each class, allowing to perform multilabel classification.
When I first attempted to train the network, I was getting all NaN results, because of (I think) an exploding gradient problem. This apparently solved after I lowered the learning rate.
My problem now is that this network is not converging at all, and I believe this is because something is wrong with the cost and activation functions that I am using.
Note: the input has been scaled with sklearn.preprocessing.StandardScaler
Here is the code:
import tensorflow as tf
n_inputs = 20
n_hidden1 = 15
n_hidden2 = 10
n_outputs = 4
dropout_rate = 0.5
learning_rate = 0.000000001
#This is a boolean value that indicates when to apply dropout
training = tf.placeholder_with_default(True, shape=(), name="training")
X = tf.placeholder(tf.float64, shape=(None, n_inputs), name ="X")
y = tf.placeholder(tf.int64, shape=(None, 4), name = "y")
with tf.name_scope("dnn"):
hidden1 = tf.layers.dense(X, n_hidden1, name="hidden1", activation = tf.nn.relu)
hidden1_drop = tf.layers.dropout(hidden1, dropout_rate, training = training)
hidden2 = tf.layers.dense(hidden1, n_hidden2, name="hidden2", activation=tf.nn.relu)
hidden2_drop = tf.layers.dropout(hidden2, dropout_rate, training = training)
logits = tf.layers.dense(hidden2_drop, n_outputs, name="outputs")
with tf.name_scope("loss"):
xentropy = tf.nn.sigmoid_cross_entropy_with_logits(labels = tf.cast(y, tf.float64), logits = tf.cast(logits, tf.float64))
loss = tf.reduce_mean(xentropy, name = 'loss')
with tf.name_scope("train"):
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
training_op = optimizer.minimize(loss)
init = tf.global_variables_initializer()
saver = tf.train.Saver()
n_epochs = 50
batch_size = 50
n_batches = 1000000
with tf.Session() as sess:
for epoch in range(n_epochs):
for i in range(n_batches):
X_batch = np.asarray(X_batches[i]).reshape(-1, 20)
y_batch = np.asarray(y_batches[i]).reshape(-1, 4)
sess.run(training_op, feed_dict ={X: X_batch, y: y_batch, training:True})
if (i % 10000) == 0:
raws = logits.eval(feed_dict={X: X_batch, training:False})
print("epoca = "+str(epoch))
print("iterazione = "+str(i))
print("accuratezza = "+str(get_global_accuracy_rate(raws, y_batch)))
print("X = "+str(X_batch[0])+ " y = "+str(y_batch[0]))
print("raws = "+str(raws[0])+" pred = " + str(get_prediction(raws[0])))
save_path= saver.save(sess, "./my_model_final_1.ckpt")
Thank you so much in advance for any help!!