I copied the CIFAR10 sample network from PyTorch tutorial and added more layers, including BN. Even after 45 epochs, the network keeps achieving 68% classification accuracy on the test set.
The network consists of:
2 convolutional layers with 3x3 kernels (input size reduces from 32px to 28px)
one max pooling layer (input size reduces from 28px to 14px)
3 convolutional layers with 3x3 kernels (input size reduces from 14px to 8px)
A fully connected network with 3 layers of 256->256->10 neurons
batch normaliation is applied on all layers, including the convolutional layers, except for the last FC layer
Relu is applied on all the convolutional layers and all the hidden FC layers
Did I build/use anything improperly?
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1_1 = nn.Conv2d(3, 16, 3) # 32 -> 30
self.bn1_1 = nn.BatchNorm2d(16)
self.conv1_2 = nn.Conv2d(16, 16, 3) # 30 - > 28
self.bn1_2 = nn.BatchNorm2d(16)
self.pool = nn.MaxPool2d(2, 2) # 28 -> 14
self.conv2_1 = nn.Conv2d(16, 16, 3) # 14 -> 12
self.bn2_1 = nn.BatchNorm2d(16)
self.conv2_2 = nn.Conv2d(16, 16, 3) # 12 -> 10
self.bn2_2 = nn.BatchNorm2d(16)
self.conv2_3 = nn.Conv2d(16, 16, 3) # 10 -> 8
self.bn2_3 = nn.BatchNorm2d(16)
self.fc1 = nn.Linear(16 * 8 * 8, 256)
self.bn4 = nn.BatchNorm1d(256)
self.fc2 = nn.Linear(256, 256)
self.bn5 = nn.BatchNorm1d(256)
self.fc3 = nn.Linear(256, 10)
def forward(self, x):
x = F.relu(self.bn1_1(self.conv1_1(x)))
x = self.pool(F.relu(self.bn1_2(self.conv1_2(x))))
x = F.relu(self.bn2_1(self.conv2_1(x)))
x = F.relu(self.bn2_2(self.conv2_2(x)))
x = F.relu(self.bn2_3(self.conv2_3(x)))
x = x.view(-1, 16 * 8 * 8)
x = F.relu(self.bn4(self.fc1(x)))
x = F.relu(self.bn5(self.fc2(x)))
x = self.fc3(x)
return x
net = Net()
device = 'cuda:0'
net.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
transform = transforms.Compose(
[transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=8,
shuffle=True, num_workers=2)
for epoch in range(128): # loop over the dataset multiple times
for i, data in enumerate(trainloader, 0):
# get the inputs
inputs, labels = data
inputs, labels = inputs.to(device), labels.to(device)
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
Note: added the "Python" tag so the code gets highlighted
Note: updated the forward method to apply F.relu on the hidden FC layers
Use sigmoid activation for the last layer.
Related
I'm trying to train an NN to develop a relationship between a certain range of inputs and some mathematical coefficients but I'm struggling to figure out why the loss is not decreasing over the course of training. I've made a simple custom MLP and dataset to interface with the PyTorch module. The dataset will take in a 6 x m matrix for the inputs(m for however many different combinations I have) and a r x m matrix for the coefficients. However, when running the training script the training loss doesn't decrease over the course of 100 epochs. All the shapes match when entering and exiting the NN so I'm lost as to what might be going wrong. These are the shapes of the input and coefficients matrices for the training and test phase respectively.
inputs_train = (6, 581)
coefficients_train = (259, 581)
inputs_test = (6, 145)
coefficients_test = (259, 145)
All the relevant code is shown below.
class MLP(nn.Module):
def __init__(self, r):
super(MLP, self).__init__()
self.flatten = nn.Flatten()
self.linear_relu_stack = nn.Sequential(
nn.Linear(6, 512),
nn.ReLU(),
nn.Linear(512, 512),
nn.ReLU(),
nn.Linear(512, 512),
nn.ReLU(),
nn.Linear(512, 512),
nn.ReLU(),
nn.Linear(512, r),
nn.ReLU()
)
def forward(self, x):
x = self.flatten(x)
logits = self.linear_relu_stack(x)
return logits
class ExpansionCoefficientsDataset(Dataset):
def __init__(self, inputs, alphas):
self.geom_params = inputs
self.expansion_coefficients = alphas
def __len__(self):
return self.expansion_coefficients.shape[1]
def __getitem__(self, idx):
if torch.is_tensor(idx):
idx = idx.tolist()
sample = {'inputs': self.geom_params[:,idx], 'alphas': self.expansion_coefficients[:,idx]}
return sample
train_dataset = ExpansionCoefficientsDataset(inputs_train, coefficients_train)
test_dataset = ExpansionCoefficientsDataset(inputs_test, coefficients_test)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=16)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=16)
#%%
model = MLP(r_ideal).double()
criterion = torch.nn.L1Loss()
num_epochs = 100
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
batch_size = len(train_loader)
# Training loop starts
for epoch in range(num_epochs):
print(f"-------------------------------\nEpoch {epoch+1}\n-------------------------------")
size = len(train_loader.dataset)
# Iterating over the training dataset
for i_batch, sample_batched in enumerate(train_loader):
pred = model(sample_batched['inputs'])
# Calculating loss
loss = criterion(pred, sample_batched['alphas'])
# Updating weights according
# to the calculated loss
optimizer.zero_grad()
loss.backward()
optimizer.step()
if i_batch % 8 == 0:
loss, current = loss.item(), i_batch * sample_batched['inputs'].size()[0]
print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")
I'm trying to use pytorch in for the IMBD dataset, to predict the positive and negative reviews. When I get to the training state, the following error is given by the criterion function:
ValueError: Target size (torch.Size([64])) must be the same as input size (torch.Size([1136, 64, 1]))
After some research, I saw that the error is because the output of the model is returning a tensor of size [1136, 64, 1], and criterion is expecting only batch results.
Howerver, I don't know how to solve this error.
My code:
import torch
import spacy
import torch.nn as nn
from torchtext.legacy import data
import sys
import csv
import torch.optim as optim
import re
import nltk
from nltk.corpus import stopwords
from torchtext import vocab
from torchtext.legacy.data import Field
from torchtext.legacy import datasets
import pandas as pd
import re
import torch.nn.functional as F
import torch.nn as nn
import torch.optim as optim
import random
SEED = 1234
torch.manual_seed(SEED) # For reproducibility
torch.backends.cudnn.deterministic = True
import torch.nn.functional as F
import torch.nn as nn
class MLP(nn.Module):
def __init__(self, input_dim, embedding_dim, hidden_dim, output_dim):
super().__init__()
self.embedding = nn.Embedding(input_dim, embedding_dim)
self.hidden_fc = nn.Linear(embedding_dim, hidden_dim)
self.fc = nn.Linear(hidden_dim, output_dim)
def forward(self, text):
#text = [sent len, batch size]
embedded = self.embedding(text)
h_1 = F.relu(self.hidden_fc(embedded))
# assert torch.equal(output[-1,:,:], h_1.squeeze(0))
# [batch size, output dim]
return self.fc(h_1.squeeze(0))
def binary_accuracy(preds, y):
"""
Returns accuracy per batch, i.e. if you get 8/10 right, this returns 0.8, NOT 8
"""
#round predictions to the closest integer
rounded_preds = torch.round(torch.sigmoid(preds)) # 0.75 --> 1 0.4 --> 0
correct = (rounded_preds == y).float() #convert into float for division
acc = correct.sum() / len(correct)
return acc
def train(model, iterator, optimizer, criterion):
epoch_loss = 0
epoch_acc = 0
model.train() #Train mode is on
for batch in iterator:
optimizer.zero_grad() #Reset the gradients
predictions = model(batch.text) ## forward propagation
print(predictions.shape)
loss = criterion(predictions, batch.label)
acc = binary_accuracy(predictions, batch.label)
loss.backward() ## backward propagation / calculate gradients
optimizer.step() ## update parameters
epoch_loss += loss.item()
epoch_acc += acc.item()
return epoch_loss / len(iterator), epoch_acc / len(iterator)
def evaluate(model, iterator, criterion):
epoch_loss = 0
epoch_acc = 0
model.eval() #Evaluation mode is on
with torch.no_grad():
for batch in iterator:
predictions = model(batch.text).squeeze(1)
loss = criterion(predictions, batch.label)
acc = binary_accuracy(predictions, batch.label)
epoch_loss += loss.item()
epoch_acc += acc.item()
return epoch_loss / len(iterator), epoch_acc / len(iterator)
TEXT = data.Field(tokenize = 'spacy',
tokenizer_language = 'en_core_web_sm',
lower = True)
LABEL = data.LabelField(dtype = torch.float)
train_data, test_data = datasets.IMDB.splits(TEXT, LABEL) ## IMDB reviews dataset
train_data, valid_data = train_data.split(random_state = random.seed(SEED))
MAX_VOCAB_SIZE = 25_000
TEXT.build_vocab(train_data, max_size = MAX_VOCAB_SIZE) #Build the vocabulary using the top frequent 25K words
LABEL.build_vocab(train_data)
BATCH_SIZE = 64
train_iterator, valid_iterator, test_iterator = data.BucketIterator.splits(
(train_data, valid_data, test_data),
batch_size = BATCH_SIZE)
INPUT_DIM = len(TEXT.vocab)
EMBEDDING_DIM = 100
HIDDEN_DIM = 256
OUTPUT_DIM = 1
model = MLP(INPUT_DIM, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM)
import torch.optim as optim
optimizer = optim.SGD(model.parameters(), lr=1e-3)
criterion = nn.BCEWithLogitsLoss()
N_EPOCHS = 5
best_valid_loss = float('inf')
for epoch in range(N_EPOCHS):
train_loss, train_acc = train(model, train_iterator, optimizer, criterion)
print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
To summarize your problem, you have reviews you want to classify as positive or negative. To do so you train an embedding space to map each word to a vector, then output a probability for each sentence and supervised with the corresponding label using a binary cross-entropy loss nn.BCELossWithLogits.
You current model is comprised of:
nn.Embedding: embeds each word in the sequence independently thus converting the input tensor shape from (seq_len, batch_size) to (seq_len, batch_size, embedding_dim). Where seq_len is the number of tokens in your input sequence.
nn.Linear layer reduces the dimensionality by projecting the features, the tensor shape is converted from (seq_len, batch_size, embedding_dim) to (seq_len, batch_size, hidden_dim).
A non-linearity layer is applied to the sequence of word vectors. Note how the structure of the sentence is retained. And finally, apply a second linear layer to map from (seq_len, batch_size, hidden_dim) to (seq_len, batch_size, output_dim). Still with the sentence structure (cf. the dim=0 with seq_len).
This is the reason why you are getting (1136, 64, 1) as the predictions shape: 1136 must be your sequence length, 64 is BATCH_SIZE, while 1 is OUTPUT_DIM.
Yet you are trying to classify each sequence as a whole, what you would need instead is a single tensor or scalar value per sentence, i.e. a shape of (1, 64, 1). This implies reducing the first dimension corresponding to the sequence dimension, to a single value.
A straightforward and easy way to reduce the dimension such that you can represent the whole sentence with a single vector is by applying an average pool to the sentence. The average vector of the words in each sentence should give you the sentiment of the positiveness/negativeness of the overall sentence. You can apply this operator before the final projection to remain in a relatively high dimension, either with nn.AdaptiveAvgPool1d with an output size of 1 or simply torch.Tensor.mean.
Here is a possible implementation with nn.AdaptiveAvgPool1d:
class MLP(nn.Module):
def __init__(self, input_dim, embedding_dim, hidden_dim, output_dim):
super().__init__()
self.embedding = nn.Embedding(input_dim, embedding_dim)
self.hidden_fc = nn.Linear(embedding_dim, hidden_dim)
self.avg = nn.AdaptiveAvgPool1d(1) # reduces (N, C, L_in) to (N, C, 1)
self.fc = nn.Linear(hidden_dim, output_dim)
def forward(self, text):
# (seq_len, batch_size) = (1136, 64)
embedded = self.embedding(text)
# (seq_len, batch_size, embedding_dim) = (1136, 64, 100)
h_1 = F.relu(self.hidden_fc(embedded))
# (seq_len, batch_size, hidden_dim) = (1136, 64, 256)
avg = self.avg(h_1.permute(1,2,0))
# (batch_size, hidden_dim, 1) = (64, 256, 1)
out = self.fc(avg.squeeze(-1))
# (batch_size, 1, 1) = (64, 1)
return out
Or with torch.Tensor.mean:
class MLP(nn.Module):
def __init__(self, input_dim, embedding_dim, hidden_dim, output_dim):
super().__init__()
self.embedding = nn.Embedding(input_dim, embedding_dim)
self.hidden_fc = nn.Linear(embedding_dim, hidden_dim)
self.fc = nn.Linear(hidden_dim, output_dim)
def forward(self, text):
# (seq_len, batch_size) = (1136, 64)
embedded = self.embedding(text)
# (seq_len, batch_size, embedding_dim) = (1136, 64, 100)
h_1 = F.relu(self.hidden_fc(embedded))
# (seq_len, batch_size, hidden_dim) = (1136, 64, 256)
avg = h_1.mean(0)
# (batch_size, hidden_dim, 1) = (64, 256)
out = self.fc(avg)
# (batch_size, 1, 1) = (64, 1)
return out
Alternative methods involve using more sophisticated neural network layers such as recurrent neural network blocks (nn.RNN, nn.LSTM, nn.GRU)...
I am newbee in neural networks, i have teached my model and now i want to test it. I have wrote a code with google help, but it do not work.
The problem is that i do not understand from where i am getting the 4th dimension.
Code is the following:
import matplotlib.pyplot as plt
import numpy as np
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
transform = transforms.Compose([
transforms.Resize(32),
transforms.CenterCrop(32),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
main_path = 'D:/RTU/dataset/ready_dataset_2classes'
train_data_path = main_path + '/train'
#test_data_path = main_path + '/test'
weigths_path = 'D:/RTU/dataset/weights_done/weights_noise_original037-97%.pt'
train_data = torchvision.datasets.ImageFolder(root=train_data_path, transform=transform)
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
# convolutional layer (sees 32x32x3 image tensor)
self.conv1 = nn.Conv2d(3, 16, 3, padding=1)
# convolutional layer (sees 16x16x16 tensor)
self.conv2 = nn.Conv2d(16, 32, 3, padding=1)
# convolutional layer (sees 8x8x32 tensor)
self.conv3 = nn.Conv2d(32, 64, 3, padding=1)
# max pooling layer
self.pool = nn.MaxPool2d(2, 2)
# linear layer (64 * 4 * 4 -> 500)
self.fc1 = nn.Linear(64 * 4 * 4, 500)
# linear layer (500 -> 10)
self.fc2 = nn.Linear(500, 250)
self.fc3 = nn.Linear(250, 2)
# dropout layer (p=0.25)
self.dropout = nn.Dropout(0.25) #0.25
def forward(self, x):
# add sequence of convolutional and max pooling layers
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = self.pool(F.relu(self.conv3(x)))
# flatten image input
x = x.view(-1, 64 * 4 * 4)
# add dropout layer
x = self.dropout(x)
# add 1st hidden layer, with relu activation function
x = F.relu(self.fc1(x))
# add dropout layer
x = self.dropout(x)
# add 2nd hidden layer, with relu activation function
x = F.relu(self.fc2(x))
x = self.dropout(x)
x = self.fc3(x)
return x
# Disable grad
with torch.no_grad():
# Retrieve item
index = 1
item = train_data[index]
image = item[0]
true_target = item[1]
# Loading the saved model
mlp = Net()
optimizer = optim.SGD(mlp.parameters(), lr=0.01)
epoch=5
valid_loss_min = np.Inf
checkpoint = torch.load(weigths_path , map_location=torch.device('cpu'))
mlp.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
epoch = checkpoint['epoch']
valid_loss_min = checkpoint['valid_loss_min']
mlp.eval()
# Generate prediction
prediction = mlp(image)
# Predicted class value using argmax
predicted_class = np.argmax(prediction)
# Reshape image
#image = image.reshape(28, 28, 1)
# Show result
plt.imshow(image, cmap='gray')
plt.title(f'Prediction: {predicted_class} - Actual target: {true_target}')
plt.show()
The code seems working till the "mlp.eval()" and then i am getting an error Expected 4-dimensional input for 4-dimensional weight [16, 3, 3, 3], but got 3-dimensional input of size [3, 32, 32] instead.
What i am doing wrong?
Error
When you are training neural nets, you are feeding small batches of input data to your model. Indeed even it's not clearly specified when writting Layers in Pytorch, If you look at the documentation, here you can see that Layers receive 4D arrays
with N corresponding to batch size and C to number of channels, here 3 because you are using RGB images
So when testing your model once trained, the testing data should be built the same way to be fed into the network.
Thus if you want to feed 1 image to your network you must reshape it proprely
myimage.reshape(-1,3,32,32)
print(myimage.shape)
#(1,3,32,33)
I am trying to implement binary classification. I have 100K (3 channel, 224 x 224px pre-resized) image dataset that I am trying to train the model for if picture is safe for work or not. I am data engineer with statistician background so I am working on the model like last 5-10 days. I have tried to implement the solutions based on suggestions but unfortunately loss didn’t decrease.
Here is the class implemented by using PyTorch Lightning,
from .dataset import CloudDataset
from .split import DatasetSplit
from pytorch_lightning import LightningModule
from pytorch_lightning.metrics import Accuracy
from torch import stack
from torch.nn import BCEWithLogitsLoss, Conv2d, Dropout, Linear, MaxPool2d, ReLU
from torch.optim import Adam
from torch.utils.data import DataLoader
from torch.utils.data.dataloader import default_collate
from torchvision.transforms import ToTensor
from util import logger
from util.config import config
class ClassifyModel(LightningModule):
def __init__(self):
super(ClassifyModel, self).__init__()
# custom dataset split class
ds = DatasetSplit(config.s3.bucket, config.train.ratio)
# split records for train, validation and test
self._train_itr, self._valid_itr, self._test_itr = ds.split()
self.conv1 = Conv2d(3, 32, 3, padding=1)
self.conv2 = Conv2d(32, 64, 3, padding=1)
self.conv3 = Conv2d(64, 64, 3, padding=1)
self.pool = MaxPool2d(2, 2)
self.fc1 = Linear(7 * 28 * 64, 512)
self.fc2 = Linear(512, 16)
self.fc3 = Linear(16, 4)
self.fc4 = Linear(4, 1)
self.dropout = Dropout(0.25)
self.relu = ReLU(inplace=True)
self.accuracy = Accuracy()
def forward(self, x):
# comments are shape before execution
# [32, 3, 224, 224]
x = self.pool(self.relu(self.conv1(x)))
# [32, 32, 112, 112]
x = self.pool(self.relu(self.conv2(x)))
# [32, 64, 56, 56]
x = self.pool(self.relu(self.conv3(x)))
# [32, 64, 28, 28]
x = self.pool(self.relu(self.conv3(x)))
# [32, 64, 14, 14]
x = self.dropout(x)
# [32, 64, 14, 14]
x = x.view(-1, 7 * 28 * 64)
# [32, 12544]
x = self.relu(self.fc1(x))
# [32, 512]
x = self.relu(self.fc2(x))
# [32, 16]
x = self.relu(self.fc3(x))
# [32, 4]
x = self.dropout(self.fc4(x))
# [32, 1]
x = x.squeeze(1)
# [32]
return x
def configure_optimizers(self):
return Adam(self.parameters(), lr=0.001)
def training_step(self, batch, batch_idx):
image, target = batch
target = target.float()
output = self.forward(image)
loss = BCEWithLogitsLoss()
output = loss(output, target)
logits = self(image)
self.accuracy(logits, target)
return {'loss': output}
def validation_step(self, batch, batch_idx):
image, target = batch
target = target.float()
output = self.forward(image)
loss = BCEWithLogitsLoss()
output = loss(output, target)
return {'val_loss': output}
def collate_fn(self, batch):
batch = list(filter(lambda x: x is not None, batch))
return default_collate(batch)
def train_dataloader(self):
transform = ToTensor()
workers = 0 if config.train.test else config.train.workers
# custom data set class that read files from s3
cds = CloudDataset(config.s3.bucket, self._train_itr, transform)
return DataLoader(
dataset=cds,
batch_size=32,
shuffle=True,
num_workers=workers,
collate_fn=self.collate_fn,
)
def val_dataloader(self):
transform = ToTensor()
workers = 0 if config.train.test else config.train.workers
# custom data set class that read files from s3
cds = CloudDataset(config.s3.bucket, self._valid_itr, transform)
return DataLoader(
dataset=cds,
batch_size=32,
num_workers=workers,
collate_fn=self.collate_fn,
)
def test_dataloader(self):
transform = ToTensor()
workers = 0 if config.train.test else config.train.workers
# custom data set class that read files from s3
cds = CloudDataset(config.s3.bucket, self._test_itr, transform)
return DataLoader(
dataset=cds,
batch_size=32,
shuffle=True,
num_workers=workers,
collate_fn=self.collate_fn,
)
def validation_epoch_end(self, outputs):
avg_loss = stack([x['val_loss'] for x in outputs]).mean()
logger.info(f'Validation loss is {avg_loss}')
def training_epoch_end(self, outs):
accuracy = self.accuracy.compute()
logger.info(f'Training accuracy is {accuracy}')
Here is the custom log output,
epoch 0
Validation loss is 0.5988735556602478
Training accuracy is 0.4441356360912323
epoch 1
Validation loss is 0.6406065225601196
Training accuracy is 0.4441356360912323
epoch 2
Validation loss is 0.621654748916626
Training accuracy is 0.443579763174057
epoch 3
Validation loss is 0.5089989304542542
Training accuracy is 0.4580322504043579
epoch 4
Validation loss is 0.5484663248062134
Training accuracy is 0.4886047840118408
epoch 5
Validation loss is 0.5552918314933777
Training accuracy is 0.6142301559448242
epoch 6
Validation loss is 0.661466121673584
Training accuracy is 0.625903308391571
The problem may related to optimizer or loss function but I couldn't figure out.
Assume that I have 77 samples to train my CNN, and my batch size is 10. Then the last batch has a batch size of 7 instead of 10. Somehow when I pass it to the loss function such as nn.MSELoss(), it gives me the error:
RuntimeError: The size of tensor a (10) must match the size of tensor
b (7) at non-singleton dimension 1
So pytorch doesn't support batches with different sizes?
My code in doubt:
import numpy as np
import torch
from torch import nn
import torchvision
import torch.nn.functional as F
import torch.optim as optim
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 6, (5,4))
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(64, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = x.view(-1, x.shape[1] * x.shape[2] * x.shape[3])
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
model = Net()
batch_size = 10
# Generating Artifical data
x_train = torch.randn((77,1,20,20))
y_train = torch.randint(0,10,size=(77,),dtype=torch.float)
trainset = torch.utils.data.TensorDataset(x_train,y_train)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=0)
# testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=0)
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
for epoch in range(20): # loop over the dataset multiple times
running_loss = 0.0
for i, data in enumerate(trainloader, 0):
# get the inputs
inputs, labels = data
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
# print statistics
running_loss += loss.item()
if i%10==0:
print('epoch{}, step{}, loss: {}'.format(epoch + 1, i + 1, running_loss))
# print("frac post = {}".format(frac_post))
running_loss = 0.0
The problem is not due to the batch size, but to a failure to broadcast properly between the 10 outputs of your CNN and the single label provided in each example.
If you look at the model output and label tensor shapes during the batch where the error is thrown,
print(outputs.shape, labels.shape)
#out: torch.Size([7, 10]) torch.Size([7])
you'll see that the labels are stored in a singleton tensor. According to pytorch broadcasting rules, to be broadcastable two tensors have to be compatible in all trailing dimensions. In this case, the trailing dimension of the model output (10) is incompatible with that of the label (7).
To fix, either add a dummy dimension to the label (assuming you actually want to broadcast the labels to match your ten network outputs), or define a network with scalar outputs. For example:
y_train = torch.randint(0,10,size=(77,1),dtype=torch.float)
results in
print(outputs.shape, labels.shape)
#out: torch.Size([7, 10]) torch.Size([7,1])
# these are broadcastable