How to use view in 4-dimensional neural network - python

I'm new to Neural Networks, so I hope u will forgive me if this is really basic stuff.
So far i managed to use view once in my first very simple project and now i tried to understand something more complex.
I found this code on github and I'm trying to understand it.
This is how network is built.
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 32, 3, 1)
self.conv2 = nn.Conv2d(32, 64, 3, 1)
self.dropout1 = nn.Dropout(0.25)
self.dropout2 = nn.Dropout(0.5)
self.fc1 = nn.Linear(9216, 128)
self.fc2 = nn.Linear(128, 10)
def forward(self, x):
x = self.conv1(x)
x = F.relu(x)
x = self.conv2(x)
x = F.relu(x)
x = F.max_pool2d(x, 2)
x = self.dropout1(x)
x = torch.flatten(x, 1)
x = x.view(-1, 64 * 12 * 12)
x = self.fc1(x)
x = F.relu(x)
x = self.dropout2(x)
x = self.fc2(x)
output = F.log_softmax(x, dim=1)
return output
I'm trying to get the image of number network failed to recognize, yet I don't really know how to use view here.
def test(model, device, test_loader):
model.eval()
test_loss = 0
correct = 0
mistakes = 0
images, labels = next(iter(test_loader))
img = images[0].view(1, 784)
with torch.no_grad():
logps = model(img)
for data, target in test_loader:
data, target = data.to(device), target.to(device)
output = model(data)
test_loss += F.nll_loss(output, target, reduction='sum').item()
pred = output.argmax(dim=1, keepdim=True)
correct += pred.eq(target.view_as(pred)).sum().item()
test_loss /= len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(test_loader.dataset),
100. * correct / len(test_loader.dataset)))
I couldn't find usefull examples so I would be gratefull for any kind of hint.
Every time I modify it I get one of these two errors.
builtins.RuntimeError: Expected 4-dimensional input for 4-dimensional weight [32, 1, 3, 3], but got 2-dimensional input of size [1, 784] instead
builtins.RuntimeError: shape '[1, 784, 3, 3]' is invalid for input of size 784
Thanks in advance for any kind of help.

Go with this
Since your dataloader already has data in 4D . No need to change it using .view()
And your CNN expects data in 4D
def test(model, device, test_loader):
model.eval()
test_loss = 0
correct = 0
mistakes = 0
""" NO NEED OF THIS
images, labels = next(iter(test_loader))
img = images[0].view(1, 784) # This makes your image to 1D
"""
with torch.no_grad():
#logps = model(img)
for data, target in test_loader:
data, target = data.to(device), target.to(device)
output = model(data)
test_loss += F.nll_loss(output, target, reduction='sum').item()
pred = output.argmax(dim=1, keepdim=True)
correct += pred.eq(target.view_as(pred)).sum().item()
test_loss /= len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(test_loader.dataset),
100. * correct / len(test_loader.dataset)))

Related

Pytorch unsure of NN behaviour during training

I'm trying to train an NN to develop a relationship between a certain range of inputs and some mathematical coefficients but I'm struggling to figure out why the loss is not decreasing over the course of training. I've made a simple custom MLP and dataset to interface with the PyTorch module. The dataset will take in a 6 x m matrix for the inputs(m for however many different combinations I have) and a r x m matrix for the coefficients. However, when running the training script the training loss doesn't decrease over the course of 100 epochs. All the shapes match when entering and exiting the NN so I'm lost as to what might be going wrong. These are the shapes of the input and coefficients matrices for the training and test phase respectively.
inputs_train = (6, 581)
coefficients_train = (259, 581)
inputs_test = (6, 145)
coefficients_test = (259, 145)
All the relevant code is shown below.
class MLP(nn.Module):
def __init__(self, r):
super(MLP, self).__init__()
self.flatten = nn.Flatten()
self.linear_relu_stack = nn.Sequential(
nn.Linear(6, 512),
nn.ReLU(),
nn.Linear(512, 512),
nn.ReLU(),
nn.Linear(512, 512),
nn.ReLU(),
nn.Linear(512, 512),
nn.ReLU(),
nn.Linear(512, r),
nn.ReLU()
)
def forward(self, x):
x = self.flatten(x)
logits = self.linear_relu_stack(x)
return logits
class ExpansionCoefficientsDataset(Dataset):
def __init__(self, inputs, alphas):
self.geom_params = inputs
self.expansion_coefficients = alphas
def __len__(self):
return self.expansion_coefficients.shape[1]
def __getitem__(self, idx):
if torch.is_tensor(idx):
idx = idx.tolist()
sample = {'inputs': self.geom_params[:,idx], 'alphas': self.expansion_coefficients[:,idx]}
return sample
train_dataset = ExpansionCoefficientsDataset(inputs_train, coefficients_train)
test_dataset = ExpansionCoefficientsDataset(inputs_test, coefficients_test)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=16)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=16)
#%%
model = MLP(r_ideal).double()
criterion = torch.nn.L1Loss()
num_epochs = 100
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
batch_size = len(train_loader)
# Training loop starts
for epoch in range(num_epochs):
print(f"-------------------------------\nEpoch {epoch+1}\n-------------------------------")
size = len(train_loader.dataset)
# Iterating over the training dataset
for i_batch, sample_batched in enumerate(train_loader):
pred = model(sample_batched['inputs'])
# Calculating loss
loss = criterion(pred, sample_batched['alphas'])
# Updating weights according
# to the calculated loss
optimizer.zero_grad()
loss.backward()
optimizer.step()
if i_batch % 8 == 0:
loss, current = loss.item(), i_batch * sample_batched['inputs'].size()[0]
print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")

How to use TensorFlow RelativePositionEmbedding layers with batches?

I'm trying to incorporate a RelativePositionEmbedding layer into a transformer example. The embedding layer can be found in the build_model method below:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from official.nlp.modeling.layers import position_embedding
def readucr(filename):
data = np.loadtxt(filename, delimiter="\t")
y = data[:, 0]
x = data[:, 1:]
return x, y.astype(int)
root_url = "https://raw.githubusercontent.com/hfawaz/cd-diagram/master/FordA/"
x_train, y_train = readucr(root_url + "FordA_TRAIN.tsv")
x_test, y_test = readucr(root_url + "FordA_TEST.tsv")
x_train = x_train.reshape((x_train.shape[0], x_train.shape[1], 1))
x_test = x_test.reshape((x_test.shape[0], x_test.shape[1], 1))
n_classes = len(np.unique(y_train))
idx = np.random.permutation(len(x_train))
x_train = x_train[idx]
y_train = y_train[idx]
y_train[y_train == -1] = 0
y_test[y_test == -1] = 0
# Build model
def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0):
# Attention and Normalization
x = layers.MultiHeadAttention(
key_dim=head_size, num_heads=num_heads, dropout=dropout
)(inputs, inputs)
x = layers.Dropout(dropout)(x)
x = layers.LayerNormalization(epsilon=1e-6)(x)
res = x + inputs
# Feed Forward Part
x = layers.Conv1D(filters=ff_dim, kernel_size=1, activation="relu")(res)
x = layers.Dropout(dropout)(x)
x = layers.Conv1D(filters=inputs.shape[-1], kernel_size=1)(x)
x = layers.LayerNormalization(epsilon=1e-6)(x)
return x + res
def build_model(
input_shape,
head_size,
num_heads,
ff_dim,
num_transformer_blocks,
mlp_units,
dropout=0,
mlp_dropout=0
):
inputs = keras.Input(shape=input_shape)
x = inputs # => shape is (None, 500, 1)
x = position_embedding.RelativePositionEmbedding(hidden_size=500)(x) # Now (500, 500)
# Add batch dimension back. But how to accept batch size greater than 1?
x = layers.Lambda(lambda x: tf.expand_dims(x, axis=0))(x) # Now (1, 500, 500)
for _ in range(num_transformer_blocks):
x = transformer_encoder(x, head_size, num_heads, ff_dim, dropout)
x = layers.GlobalAveragePooling1D(data_format="channels_first")(x)
for dim in mlp_units:
x = layers.Dense(dim, activation="relu")(x)
x = layers.Dropout(mlp_dropout)(x)
outputs = layers.Dense(n_classes, activation="softmax")(x)
return keras.Model(inputs, outputs)
input_shape = x_train.shape[1:]
model = build_model(
input_shape,
head_size=256,
num_heads=4,
ff_dim=4,
num_transformer_blocks=4,
mlp_units=[128],
mlp_dropout=0.4,
dropout=0.25
)
model.compile(
loss="sparse_categorical_crossentropy",
optimizer=keras.optimizers.Adam(learning_rate=1e-4),
metrics=["sparse_categorical_accuracy"]
)
callbacks = [
keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True),
keras.callbacks.TensorBoard(log_dir="./logs")
]
model.fit(
x_train,
y_train,
validation_split=0.2,
epochs=5,
batch_size=64,
callbacks=callbacks
)
model.evaluate(x_test, y_test, verbose=1)
The following blows up because I've specified batch_size of 64. However everything works fine when setting batch_size to 1 because the expand_dims operation only adds a size 1 batch dimension, as opposed to an Input layer that adds None for arbitrary batch sizes.
So how can I add "back in" a batch dimension greater than 1? Is there another way I should be using the RelativePositionEncoding layer to not interfere with batch sizes?
I've tried looking into the Reshape method as well without success.
I thought this question would solve my issue, but this only adds a leading 1 dimension like the Lambda layer I incorporated, rather than None, which I think would resolve the issue.
I do not think you can pass the output of the RelativePositionEmbedding directly to another layer. If you take a look here, the authors are adding the output of this layer to the original input. Your code will work if you change your model like this:
# ....
# Your code
def build_model(
input_shape,
head_size,
num_heads,
ff_dim,
num_transformer_blocks,
mlp_units,
dropout=0,
mlp_dropout=0
):
inputs = keras.Input(shape=input_shape)
x = inputs # => shape is (None, 500, 1)
pos_encoding = position_embedding.RelativePositionEmbedding(hidden_size=500)(x) # Now (500, 500)
x = inputs + pos_encoding
for _ in range(num_transformer_blocks):
x = transformer_encoder(x, head_size, num_heads, ff_dim, dropout)
x = layers.GlobalAveragePooling1D(data_format="channels_first")(x)
for dim in mlp_units:
x = layers.Dense(dim, activation="relu")(x)
x = layers.Dropout(mlp_dropout)(x)
outputs = layers.Dense(n_classes, activation="softmax")(x)
return keras.Model(inputs, outputs)
# ....
# Your code
45/45 [==============================] - 54s 1s/step - loss: 1.0281 - sparse_categorical_accuracy: 0.5111 - val_loss: 0.7387 - val_sparse_categorical_accuracy: 0.5645
42/42 [==============================] - 8s 187ms/step - loss: 0.7440 - sparse_categorical_accuracy: 0.5424
[0.7440475225448608, 0.5424242615699768]

Training loss is increasing in CNN?

I am in the process of training my first CNN to solve a multi-class classification problem. I am feeding in images of animals corresponding to one of 182 classes, however I have ran into some issues. Firstly my code appears to get stuck on optimiser.step(), it has been calculating this for roughly 30 minutes. Secondly my training loss is increasing:
EPOCH: 0 BATCH: 1999 LOSS: 1.5790680234357715
EPOCH: 0 BATCH: 3999 LOSS: 2.9340945997834207
If any one would be able to provide some guidance that would be greatly appreciated. Below is my code
#loading data
train_data = dataset.get_subset(
"train",
transform=transforms.Compose(
[transforms.Resize((448, 448)), transforms.ToTensor()]
),
)
train_loader = get_train_loader("standard", train_data, batch_size=16)
#definind model
class ConvNet(nn.Module):
def __init__(self):
super(ConvNet, self).__init__()
self.conv1 = nn.Conv2d(3, 6, 3, 1)
self.conv2 = nn.Conv2d(6, 16, 3, 3)
self.fc1 = nn.Linear(37*37*16, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 182)
def forward(self, X):
X = F.relu(self.conv1(X))
X = F.max_pool2d(X, 2, 2)
X = F.relu(self.conv2(X))
X = F.max_pool2d(X, 2, 2)
X = torch.flatten(X, 1)
X = F.relu(self.fc1((X)))
X = F.relu(self.fc2((X)))
X = self.fc3(X)
return F.log_softmax(X, dim=1)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(modell.parameters(), lr=0.001)
import time
start_time = time.time()
#VARIABLES (TRACKER)
epochs = 2
train_losses = []
test_losses = []
train_correct = []
test_correct = []
# FOR LOOP EPOCH
for i in range(epochs):
trn_corr = 0
tst_corr = 0
running_loss = 0.0
#TRAIN
for b, (X_train, Y_train, meta) in enumerate(train_loader):
b+=1 #batch starts at 1
#zero parameter gradients
optimizer.zero_grad()
# pass training to model as float (later compute loss)
output = modell(X_train.float())
#Calculate the loss of outputs with respect to ground truth values
loss = criterion(output, Y_train)
#Backpropagate the loss through the network
loss.backward()
#perform parameter update based on the current gradient
optimizer.step()
predicted = torch.max(output.data, 1)[1]
batch_corr = (predicted == Y_train).sum() # True (1) or False (0)
trn_corr += batch_corr
running_loss += loss.item()
if b%2000 == 1999:
print(f"EPOCH: {i} BATCH: {b} LOSS: {running_loss/2000}")
running_loss = 0.0
train_losses.append(loss)
train_correct.append(trn_corr)
As for the loss, it may be due to the model. The model has some rooms for improvement. Only 2 convolution layers is not sufficient for your data, as well as only expanding to 16 channels. Use more convolution layers with more channels. For example, 5 conv layers with channels of 16, 32, 32, 64, 64. Experiment with different numbers of layers and channels to see which one is best. Also, a good learning rate for Adam is 3e-4.To more easily track the models progress, Iā€™d recommend decreasing the interval at which it prints the loss so you can more easily track progress.
About the data, are there enough instances of each class? Is it normalized between 0 and 1?

RuntimeError: The size of tensor a (133) must match the size of tensor b (10) at non-singleton dimension 1

I am training a CNN model. I am facing issue while doing the training iteration for my model. The code is as below:
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
#convo layers
self.conv1 = nn.Conv2d(3,32,3)
self.conv2 = nn.Conv2d(32,64,3)
self.conv3 = nn.Conv2d(64,128,3)
self.conv4 = nn.Conv2d(128,256,3)
self.conv5 = nn.Conv2d(256,512,3)
#pooling layer
self.pool = nn.MaxPool2d(2,2)
#linear layers
self.fc1 = nn.Linear(512*5*5,2048)
self.fc2 = nn.Linear(2048,1024)
self.fc3 = nn.Linear(1024,133)
#dropout layer
self.dropout = nn.Dropout(0.3)
def forward(self, x):
#first layer
x = self.conv1(x)
x = F.relu(x)
x = self.pool(x)
#x = self.dropout(x)
#second layer
x = self.conv2(x)
x = F.relu(x)
x = self.pool(x)
#x = self.dropout(x)
#third layer
x = self.conv3(x)
x = F.relu(x)
x = self.pool(x)
#x = self.dropout(x)
#fourth layer
x = self.conv4(x)
x = F.relu(x)
x = self.pool(x)
#fifth layer
x = self.conv5(x)
x = F.relu(x)
x = self.pool(x)
#x = self.dropout(x)
#reshape tensor
x = x.view(-1,512*5*5)
#last layer
x = self.dropout(x)
x = self.fc1(x)
x = F.relu(x)
x = self.dropout(x)
x = self.fc2(x)
x = F.relu(x)
x = self.fc3(x)
return x
#loss func
criterion = nn.MSELoss()
optimizer = optim.Adam(net.parameters(), lr = 0.0001)
#criterion = nn.CrossEntropyLoss()
#optimizer = optim.SGD(net.parameters(), lr = 0.05)
def train(n_epochs,model,loader,optimizer,criterion,save_path):
for epoch in range(n_epochs):
train_loss = 0
valid_loss = 0
#training
net.train()
for batch, (data,target) in enumerate(loaders['train']):
optimizer.zero_grad()
outputs = net(data)
#print(outputs.shape)
loss = criterion(outputs,target)
loss.backward()
optimizer.step()
When I use the CrossEntropy Loss function and SGD optimizer, I able able to train the model with no error.
When I use MSE loss function and Adam optimizer, I am facing the following error:
RuntimeError Traceback (most recent call last) <ipython-input-20-2223dd9058dd> in <module>
1 #train the model
2 n_epochs = 2
----> 3 train(n_epochs,net,loaders,optimizer,criterion,'saved_model/dog_model.pt')
<ipython-input-19-a93d145ef9f7> in train(n_epochs, model, loader, optimizer, criterion, save_path)
22
23 #calculate loss
---> 24 loss = criterion(outputs,target)
25
26 #backward prop
RuntimeError: The size of tensor a (133) must match the size of tensor b (10) at non-singleton dimension 1.
Does the selected loss function and optimizer effect the training of the model? Can anyone please help on this?
Well, the error is because the nn.MSELoss() and nn.CrossEntropyLoss() expect different input/target combinations. You cannot simply change the criterion function without changing the inputs and targets appropriately. From the docs:
nn.CrossEntropyLoss:
Input:
(N, C) where C = number of classes, or
(N, C, d_1, d_2, ..., d_K) with K >= 1 in the case of K-dimensional loss.
Target:
(N) where each value is in range [0, C-1] or
(N, d_1, d_2, ..., d_K) with K >= 1 in the case of K-dimensional loss.
nn.MSELoss:
Input:
(N,āˆ—) where āˆ— means, any number of additional dimensions.
Target:
(N,āˆ—), same shape as the input
As you can see, in the MSELoss, Target is expect to have the same shape as input, while in the CrossEntropyLoss, the C dimension is dropped. You cannot use MSELoss as a drop-in replacement for CrossEntropyLoss.
The error message clearly suggests that the error occurred at the line
loss = criterion(outputs,target)
where you are trying to compute the mean-squared error between the input and the target.
See this line: criterion = nn.MSELoss().
I think you should modify your code where you are estimating loss between (output, target) pair of inputs,i.e., loss = criterion(outputs,target) to something like below:
loss = criterion(outputs,target.view(1, -1))
Here, you are making target shape same as outputs from model on line
outputs = net(data)
One more think to notice here is the output of the net model, i.e., outputs will be of shape batch_size X output_channels, where batch size if the first dimension of input images as during the training you will get batches of images, so your shape in the forward method will get an additional batch dimension at dim0: [batch_size, channels, height, width], and ouput_channels is number of output features/channels from the last linear layer in the net model.
And, the the target labels will be of shape batch_size, which is 10 in your case, check batch_size you passed in torch.utils.data.DataLoader(). Therefore, on reshaping it using view(1, -1), it will be of converted into a shape 1 X batch_size, i.e., 1 X 10.
That's why, you are getting the error:
RuntimeError: input and target shapes do not match: input [10 x 133],
target [1 x 10]
So, a way around is to replace loss = criterion(outputs,target.view(1, -1)) with loss = criterion(outputs,target.view(-1, 1)) and change the output_channels of last linear layer to 1 instead of 133. In this way, both of outputs and target shape will be equal and we can compute MSE value then.
Learn more about pytorch MSE loss function from here.

Pytorch loss function error in the last batch

Assume that I have 77 samples to train my CNN, and my batch size is 10. Then the last batch has a batch size of 7 instead of 10. Somehow when I pass it to the loss function such as nn.MSELoss(), it gives me the error:
RuntimeError: The size of tensor a (10) must match the size of tensor
b (7) at non-singleton dimension 1
So pytorch doesn't support batches with different sizes?
My code in doubt:
import numpy as np
import torch
from torch import nn
import torchvision
import torch.nn.functional as F
import torch.optim as optim
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 6, (5,4))
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(64, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = x.view(-1, x.shape[1] * x.shape[2] * x.shape[3])
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
model = Net()
batch_size = 10
# Generating Artifical data
x_train = torch.randn((77,1,20,20))
y_train = torch.randint(0,10,size=(77,),dtype=torch.float)
trainset = torch.utils.data.TensorDataset(x_train,y_train)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=0)
# testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=0)
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
for epoch in range(20): # loop over the dataset multiple times
running_loss = 0.0
for i, data in enumerate(trainloader, 0):
# get the inputs
inputs, labels = data
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
# print statistics
running_loss += loss.item()
if i%10==0:
print('epoch{}, step{}, loss: {}'.format(epoch + 1, i + 1, running_loss))
# print("frac post = {}".format(frac_post))
running_loss = 0.0
The problem is not due to the batch size, but to a failure to broadcast properly between the 10 outputs of your CNN and the single label provided in each example.
If you look at the model output and label tensor shapes during the batch where the error is thrown,
print(outputs.shape, labels.shape)
#out: torch.Size([7, 10]) torch.Size([7])
you'll see that the labels are stored in a singleton tensor. According to pytorch broadcasting rules, to be broadcastable two tensors have to be compatible in all trailing dimensions. In this case, the trailing dimension of the model output (10) is incompatible with that of the label (7).
To fix, either add a dummy dimension to the label (assuming you actually want to broadcast the labels to match your ten network outputs), or define a network with scalar outputs. For example:
y_train = torch.randint(0,10,size=(77,1),dtype=torch.float)
results in
print(outputs.shape, labels.shape)
#out: torch.Size([7, 10]) torch.Size([7,1])
# these are broadcastable

Categories