Error in Calculating neural network Test Accuracy - python

I tried to train my neural network, and then evaluate it's testing accuracy. I am using the code at the bottom of this post to train. The fact is that for other neural networks, I can evaluate the testing accuracy with my code without issue. However, for this neural network (which I constructed correctly according to the description of the neural network paper), I can't evaluate the testing accuracy properly and its giving me the traceback below. So maybe something's wrong in my forward pass?
Here is the training and testing code:
//imports including import deepnet.py
cudnn.benchmark = True
(X_train, y_train), (X_test, y_test) = cifar10.load_data()
X_train = X_train.astype('float32')
X_train = np.transpose(X_train, axes=(0, 3, 1, 2))
X_test = X_test.astype('float32')
X_test = np.transpose(X_test, axes=(0, 3, 1, 2))
X_train /= 255
X_test /= 255
device = torch.device('cuda:0')
# This is where you can load any model of your choice.
# I stole PyTorch Vision's VGG network and modified it to work on CIFAR-10.
# You can take this line out and add any other network and the code
# should run just fine.
model = deepnet.cifar10_deep()
#model.to(device)
# Forward pass
opfun = lambda X: model.forward(Variable(torch.from_numpy(X)))
# Forward pass through the network given the input
predsfun = lambda op: np.argmax(op.data.numpy(), 1)
# Do the forward pass, then compute the accuracy
accfun = lambda op, y: np.mean(np.equal(predsfun(op), y.squeeze()))*100
# Initial point
x0 = deepcopy(model.state_dict())
# Number of epochs to train for
# Choose a large value since LB training needs higher values
# Changed from 150 to 30
nb_epochs = 30
batch_range = [25, 40, 50, 64, 80, 128, 256, 512, 625, 1024, 1250, 1750, 2048, 2500, 3125, 4096, 4500, 5000]
# parametric plot (i.e., don't train the network if set to True)
hotstart = False
if not hotstart:
for batch_size in batch_range:
optimizer = torch.optim.Adam(model.parameters())
model.load_state_dict(x0)
#model.to(device)
average_loss_over_epoch = '-'
print('Optimizing the network with batch size %d' % batch_size)
np.random.seed(1337) #So that both networks see same sequence of batches
for e in range(nb_epochs):
model.eval()
print('Epoch:', e, ' of ', nb_epochs, 'Average loss:', average_loss_over_epoch)
average_loss_over_epoch = 0
# Checkpoint the model every epoch
torch.save(model.state_dict(), "./models/DeepNetC2BatchSize" + str(batch_size) + ".pth")
array = np.random.permutation(range(X_train.shape[0]))
slices = X_train.shape[0] // batch_size
beginning = 0
end = 1
# Training loop!
for _ in range(slices):
start_index = batch_size * beginning
end_index = batch_size * end
smpl = array[start_index:end_index]
model.train()
optimizer.zero_grad()
ops = opfun(X_train[smpl])
tgts = Variable(torch.from_numpy(y_train[smpl]).long().squeeze())
loss_fn = F.nll_loss(ops, tgts)
average_loss_over_epoch += loss_fn.data.numpy() / (X_train.shape[0] // batch_size)
loss_fn.backward()
optimizer.step()
beginning += 1
end += 1
grid_size = 18 #How many points of interpolation between [0, 5000]
data_for_plotting = np.zeros((grid_size, 3)) #Uncomment this line if running entire code from scratch
sharpnesses1eNeg3 = []
sharpnesses5eNeg4 = []
#data_for_plotting = np.load("DeepNetCIFAR10-intermediate-values.npy") #Uncomment this line to use an existing NumPy array
print(data_for_plotting)
i = 0
# Fill in test accuracy values for `grid_size' points in the interpolation
for batch_size in batch_range:
mydict = {}
batchmodel = torch.load("./models/DeepNetC2BatchSize" + str(batch_size) + ".pth")
for key, value in batchmodel.items():
mydict[key] = value
model.load_state_dict(mydict)
j = 0
for datatype in [(X_train, y_train), (X_test, y_test)]:
dataX = datatype[0]
datay = datatype[1]
for smpl in np.split(np.random.permutation(range(dataX.shape[0])), 10):
ops = opfun(dataX[smpl])
tgts = Variable(torch.from_numpy(datay[smpl]).long().squeeze())
var = F.nll_loss(ops, tgts).data.numpy() / 10
if j == 1:
data_for_plotting[i, j-1] += accfun(ops, datay[smpl]) / 10.
j += 1
print(data_for_plotting[i])
np.save('DeepNetCIFAR10-intermediate-values', data_for_plotting)
i += 1
And the model code is here and includes the forward pass
import torch
import torch.nn as nn
F = nn.functional
__all__ = ['cifar10_deepnet', 'cifar100_deepnet']
class VGG(nn.Module):
def __init__(self, num_classes=10):
super(VGG, self).__init__()
self.features = nn.Sequential(
nn.Conv2d(3, 64, kernel_size=3, bias=False),
nn.BatchNorm2d(64),
nn.ReLU(inplace=True),
nn.Dropout(0.3),
nn.Conv2d(64, 64, kernel_size=3, padding = 1, bias=False),
nn.BatchNorm2d(64),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Conv2d(64, 128, kernel_size=3, padding = 1, bias=False),
nn.BatchNorm2d(128),
nn.ReLU(inplace=True),
nn.Dropout(0.4),
nn.Conv2d(128, 128, kernel_size=3, padding = 1, bias=False),
nn.BatchNorm2d(128),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Conv2d(128, 256, kernel_size=3, padding = 1, bias=False),
nn.BatchNorm2d(256),
nn.ReLU(inplace=True),
nn.Dropout(0.4),
nn.Conv2d(256, 256, kernel_size=3, padding = 1, bias=False),
nn.BatchNorm2d(256),
nn.ReLU(inplace=True),
nn.Dropout(0.4),
nn.Conv2d(256, 256, kernel_size=3, padding = 1, bias=False),
nn.BatchNorm2d(256),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Conv2d(256, 512, kernel_size=3, padding = 1, bias=False),
nn.BatchNorm2d(512),
nn.ReLU(inplace=True),
nn.Dropout(0.4),
nn.Conv2d(512, 512, kernel_size=3, padding = 1, bias=False),
nn.BatchNorm2d(512),
nn.ReLU(inplace=True),
nn.Dropout(0.4),
nn.Conv2d(512, 512, kernel_size=3, padding = 1, bias=False),
nn.BatchNorm2d(512),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Conv2d(512, 512, kernel_size=3, padding = 1, bias=False),
nn.BatchNorm2d(512),
nn.ReLU(inplace=True),
nn.Dropout(0.4),
nn.Conv2d(512, 512, kernel_size=3, padding = 1, bias=False),
nn.BatchNorm2d(512),
nn.ReLU(inplace=True),
nn.Dropout(0.4),
nn.Conv2d(512, 512, kernel_size=3, padding = 1, bias=False),
nn.BatchNorm2d(512),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2),
)
self.classifier = nn.Sequential(
nn.Linear(512, 512, bias=False),
nn.Dropout(0.5),
nn.BatchNorm1d(512),
nn.ReLU(inplace=True),
nn.Dropout(0.5),
nn.Linear(512, num_classes)
)
def forward(self, x):
x = self.features(x)
x = x.view(-1, 512)
x = self.classifier(x)
return F.log_softmax(x)
def cifar10_deep(**kwargs):
num_classes = getattr(kwargs, 'num_classes', 10)
return VGG(num_classes)
def cifar100_deep(**kwargs):
num_classes = getattr(kwargs, 'num_classes', 100)
return VGG(num_classes)

You are trying to load a state dict that belongs to another model.
The error shows that your model is the class AlexNet.
RunTimeError: Error(s) in loading state_dict for AlexNet:
But the state dict you are trying to load is from the VGG you posted, which doesn't have the same modules as AlexNet.
You need to use the same model whose state dict you saved before.

Related

How can I check the image sizes while CNN?

I'm trying to classify cat and dog in CNN with PyTorch.
While I made few layers and processing images, I found that final processed feature map size doesn't match with calculated size.
So I tried to check feature map size step by step in CNN process with print shape but it doesn't work.
I heard tensorflow enables check tensor size in steps but how can I do that?
What I want is :
def __init__(self):
super(CNN, self).__init__()
conv1 = nn.Conv2d(1, 16, 3, 1, 1)
conv1_1 = nn.Conv2d(16, 16, 3, 1, 1)
pool1 = nn.MaxPool2d(2)
conv2 = nn.Conv2d(16, 32, 3, 1, 1)
conv2_1 = nn.Conv2d(32, 32, 3, 1, 1)
pool2 = nn.MaxPool2d(2)
conv3 = nn.Conv2d(32, 64, 3, 1, 1)
conv3_1 = nn.Conv2d(64, 64, 3, 1, 1)
conv3_2 = nn.Conv2d(64, 64, 3, 1, 1)
pool3 = nn.MaxPool2d(2)
self.conv_module = nn.Sequential(
conv1,
nn.ReLU(),
conv1_1,
nn.ReLU(),
pool1,
# check first result size
conv2,
nn.ReLU(),
conv2_1,
nn.ReLU(),
pool2,
# check second result size
conv3,
nn.ReLU(),
conv3_1,
nn.ReLU(),
conv3_2,
nn.ReLU(),
pool3,
# check third result size
pool4,
# check fourth result size
pool5
# check fifth result size
)
If there's any other way to check feature size at every step, please give some advice.
Thanks in advance.
To do that you shouldn't use nn.Sequential. Just initialize your layers in __init__() and call them in the forward function. In the forward function you can print the shapes out. For example like this:
class CNN(nn.Module):
def __init__(self):
super(CNN, self).__init__()
self.conv1 = nn.Conv2d(...)
self.maxpool1 = nn.MaxPool2d()
self.conv2 = nn.Conv2d(...)
self.maxpool2 = nn.MaxPool2d()
def forward(self, x):
x = self.conv1(x)
x = F.relu(x)
x = self.maxpool1(x)
print(x.size())
x = self.conv2(x)
x = F.relu(x)
x = self.maxpool2(x)
print(x.size())
Hope thats what you looking for!

Kernel size can't be greater than actual input size

I have a data with depth = 3 and I want to pass it through 3 convolution layers with 3x3x3 kernels each.
My current code is below. The first input is
[batch_size=10, in_channels=1, depth=3, height=128, width=256]
and I notice after the first conv3d layer the output is [10,8,1,126,254]. Obviously it has now depth 1 and doesn't accept it for another 3x3x3 layer. How can I achieve this?
class CNet(nn.Module):
def __init__(self, **kwargs):
super().__init__()
self.conv1 = nn.Conv3d(1, 8, kernel_size=3, stride=1, padding=0)
self.conv2 = nn.Conv3d(8, 16, kernel_size=3, stride=1, padding=0)
self.conv3 = nn.Conv3d(16, 32, kernel_size=3, stride=1, padding=0)
self.fc1 = nn.Linear(value, 2)
def forward(self, X):
X = F.relu(self.conv1(X))
X = F.relu(self.conv2(X))
X = F.max_pool2d(X,2)
X = self.conv3(X)
X = F.max_pool2d(X,2)
X = self.fc1(X)
return F.softmax(X,dim =1)
You need to use padding. If you only want to pad the input for the convolutions after the first one and only in the depth dimensions to get the minimum dimension of 3, you would use padding=(1, 0, 0) (it's 1 because the same padding is applied to both sides, i.e. (padding, input, padding) along that dimension).
self.conv2 = nn.Conv3d(8, 16, kernel_size=3, stride=1, padding=(1, 0, 0))
self.conv3 = nn.Conv3d(16, 32, kernel_size=3, stride=1, padding=(1, 0, 0))
However, it is common to use padding=1 for all dimensions when using kernel_size=3, because that keeps the dimensions unchanged, which makes it much easier to build deeper network, as you don't need to worry about the sizes suddenly getting too small, as it happened already for your depth dimension. Also when no padding is used, the corners are only included in a single calculation, whereas all other elements contribute to multiple calculations. It is recommended to use kernel_size=3 and padding=1 for all your convolutions.
self.conv1 = nn.Conv3d(1, 8, kernel_size=3, stride=1, padding=1)
self.conv2 = nn.Conv3d(8, 16, kernel_size=3, stride=1, padding=1)
self.conv3 = nn.Conv3d(16, 32, kernel_size=3, stride=1, padding=1)

can we pass images for which height!=width through our CNN for training in pytorch?

can we pass images for which height!=width through our CNN in Pytorch?
In CNN, I have convolution, batch-norm, max-pool, relu, and fully connected layers.
My network
self.conv_seqn = nn.Sequential(
nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=4, stride=4),
nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=4, stride=4),
)
self.fc_seqn = nn.Sequential(
nn.Linear(1843200, 256),
nn.ReLU(inplace=True),
nn.Linear(256, total_configs)
)
my forward function
forward()
{
x = self.conv_seqn(x)
x = x.view(x.size(0), -1)
x = self.fc_seqn(x)
return x
}
If input image of size 3840*1920*3 after applying conv_seqn() it should be of size [1, 128, 120, 60] but I getting the size of [1,128,120,120] (batch size =1 here)
any suggestion will be highly helpful.

How to get rid of checkerboard artifacts

I am using a fully convolutional autoencoder to color black and white images, however, the output has a checkerboard pattern and I want to get rid of it. The checkerboard artifacts I have seen so far allways have been far smaller than mine and the usual way to get rid of them is replacing all unpooling operations with bilinear upsampling (I have been told that).
But I can not simply replace the unpooling operation because I work with different sized images, thus the unpooling operation is needed, else the output tensor could have a different size than the original.
TLDR:
How can I get rid of these checkerboard-artifacts without replacing the unpooling operations?
class AE(nn.Module):
def __init__(self):
super(AE, self).__init__()
self.leaky_reLU = nn.LeakyReLU(0.2)
self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=1, return_indices=True)
self.unpool = nn.MaxUnpool2d(kernel_size=2, stride=2, padding=1)
self.softmax = nn.Softmax2d()
self.conv1 = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, stride=1, padding=1)
self.conv2 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1)
self.conv3 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1)
self.conv4 = nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1)
self.conv5 = nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=3, stride=1, padding=1)
self.conv6 = nn.ConvTranspose2d(in_channels=1024, out_channels=512, kernel_size=3, stride=1, padding=1)
self.conv7 = nn.ConvTranspose2d(in_channels=512, out_channels=256, kernel_size=3, stride=1, padding=1)
self.conv8 = nn.ConvTranspose2d(in_channels=256, out_channels=128, kernel_size=3, stride=1, padding=1)
self.conv9 = nn.ConvTranspose2d(in_channels=128, out_channels=64, kernel_size=3, stride=1, padding=1)
self.conv10 = nn.ConvTranspose2d(in_channels=64, out_channels=2, kernel_size=3, stride=1, padding=1)
def forward(self, x):
# encoder
x = self.conv1(x)
x = self.leaky_reLU(x)
size1 = x.size()
x, indices1 = self.pool(x)
x = self.conv2(x)
x = self.leaky_reLU(x)
size2 = x.size()
x, indices2 = self.pool(x)
x = self.conv3(x)
x = self.leaky_reLU(x)
size3 = x.size()
x, indices3 = self.pool(x)
x = self.conv4(x)
x = self.leaky_reLU(x)
size4 = x.size()
x, indices4 = self.pool(x)
######################
x = self.conv5(x)
x = self.leaky_reLU(x)
x = self.conv6(x)
x = self.leaky_reLU(x)
######################
# decoder
x = self.unpool(x, indices4, output_size=size4)
x = self.conv7(x)
x = self.leaky_reLU(x)
x = self.unpool(x, indices3, output_size=size3)
x = self.conv8(x)
x = self.leaky_reLU(x)
x = self.unpool(x, indices2, output_size=size2)
x = self.conv9(x)
x = self.leaky_reLU(x)
x = self.unpool(x, indices1, output_size=size1)
x = self.conv10(x)
x = self.softmax(x)
return x
Instead of using an upconv layer such as nn.ConvTranspose2d, you can use interpolation in the decoder part to go back to your initial format, such as torch.nn.functional.interpolate. It will prevent you from having checkerboards artifacts.
If you want learnable weights in the decoder, you should also use a conv layer such as nn.Conv2d after each interpolation.
Skip connection is commonly used in Encoder-Decoder architecture and it helps to produce accurate result by passing appearance information from shallow layer of encoder (discriminator) to corresponding deeper layer of decoder (generator). Unet is the widely used Encoder-Decoder type architecture. Linknet is also very popular and it differs with Unet in the way of fusing appearance information of encoder layer with the decoder layer. In case of Unet, incoming features (from encoder) are concatenated in the corresponding decoder layer. On the other hand, Linknet performs addition and that why Linknet requires fewer number of operations in a single forward pass and significantly faster than the Unet.
Your each convolution block in Decoder might looks like following:
Additionally, i'm attaching a figure bellow depicting architecture of Unet and LinkNet. Hope using skip connection will help.
This pattern you have because of deconvolution (nn.ConvTranspose2d). The article explains it in detail.
You may try Upsample as alternative. This will not provide the checkerboard pattern.
Works like this:
import torch
input = torch.arange(1, 5, dtype=torch.float32).view(1, 1, 2, 2)
m = torch.nn.Upsample(scale_factor=2, mode='nearest')
m(input)
However you will not be able to learn anything with Upsample. It is just a transform.
So it is a trade. There are many papers online how to deal with the checkerboard pattern for different problems.
The idea is to train your network so the checkerboard pattern is gone.
As Kaushik Roy has specified, Skip-Connections are the way to go!
class AE(nn.Module):
def __init__(self):
super(AE, self).__init__()
self.leaky_reLU = nn.LeakyReLU(0.2)
self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=1, return_indices=True)
self.unpool = nn.MaxUnpool2d(kernel_size=2, stride=2, padding=1)
self.softmax = nn.Softmax2d()
self.conv1 = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, stride=1, padding=1)
self.conv2 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1)
self.conv3 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1)
self.conv4 = nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1)
self.conv5 = nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=3, stride=1, padding=1)
self.conv6 = nn.Conv2d(in_channels=1024, out_channels=512, kernel_size=3, stride=1, padding=1)
self.conv7 = nn.Conv2d(in_channels=1024, out_channels=256, kernel_size=3, stride=1, padding=1)
self.conv8 = nn.Conv2d(in_channels=512, out_channels=128, kernel_size=3, stride=1, padding=1)
self.conv9 = nn.Conv2d(in_channels=256, out_channels=64, kernel_size=3, stride=1, padding=1)
self.conv10 = nn.Conv2d(in_channels=128, out_channels=2, kernel_size=3, stride=1, padding=1)
def forward(self, x):
# encoder
x = self.conv1(x)
out1 = self.leaky_reLU(x)
x = out1
size1 = x.size()
x, indices1 = self.pool(x)
x = self.conv2(x)
out2 = self.leaky_reLU(x)
x = out2
size2 = x.size()
x, indices2 = self.pool(x)
x = self.conv3(x)
out3 = self.leaky_reLU(x)
x = out3
size3 = x.size()
x, indices3 = self.pool(x)
x = self.conv4(x)
out4 = self.leaky_reLU(x)
x = out4
size4 = x.size()
x, indices4 = self.pool(x)
######################
x = self.conv5(x)
x = self.leaky_reLU(x)
x = self.conv6(x)
x = self.leaky_reLU(x)
######################
# decoder
x = self.unpool(x, indices4, output_size=size4)
x = self.conv7(torch.cat((x, out4), 1))
x = self.leaky_reLU(x)
x = self.unpool(x, indices3, output_size=size3)
x = self.conv8(torch.cat((x, out3), 1))
x = self.leaky_reLU(x)
x = self.unpool(x, indices2, output_size=size2)
x = self.conv9(torch.cat((x, out2), 1))
x = self.leaky_reLU(x)
x = self.unpool(x, indices1, output_size=size1)
x = self.conv10(torch.cat((x, out1), 1))
x = self.softmax(x)
return x
This answer was posted as an edit to the question How to get rid of checkerboard artifacts by the OP Stefan under CC BY-SA 4.0.

Given groups=1, weight of size 16 16 3 3, expected input[16, 64, 222, 222] to have 16 channels, but got 64 channels instead?

I try to run the following program for image classification problems in Pytorch.
I am new to PyTorch and I am not getting what's wrong with the code. I tried reshaping the images but no help.
I am running this code with Cuda. I have around 750 classes and 10-20 images in one class. My dataset is a benchmark dataset and every image has a size of 60*160.
class Net(nn.Module):
def __init__(self):
super().__init__()
self.ConvLayer1 = nn.Sequential(
nn.Conv2d(3, 64, 3), # inp (3, 512, 512) changes doing here original (3, 64, 3)
nn.Conv2d(8, 16, 3), # original (8,16,3)
nn.MaxPool2d(2),
nn.ReLU() # op (16, 256, 256)
)
self.ConvLayer2 = nn.Sequential(
nn.Conv2d(16, 32, 5), # inp (16, 256, 256)
nn.Conv2d(32, 32, 3),
nn.MaxPool2d(4),
nn.ReLU() # op (32, 64, 64)
)
self.ConvLayer3 = nn.Sequential(
nn.Conv2d(32, 64, 3), # inp (32, 64, 64) original (32,64,3)
nn.Conv2d(64, 64, 5),
nn.MaxPool2d(2),
nn.ReLU() # op (64, 32, 32)
)
self.ConvLayer4 = nn.Sequential(
nn.Conv2d(64, 128, 5), # inp (64, 32, 32)
nn.Conv2d(128, 128, 3),
nn.MaxPool2d(2),
nn.ReLU() # op (128, 16, 16)
)
self.Lin1 = nn.Linear(15488, 15)
self.Lin2 = nn.Linear(1500, 150)
self.Lin3 = nn.Linear(150, 15)
def forward(self, x):
x = self.ConvLayer1(x)
x = self.ConvLayer2(x)
x = self.ConvLayer3(x)
x = self.ConvLayer4(x)
x = x.view(x.size(0), -1)
x = self.Lin1(x)
return F.log_softmax(x, dim = 1)
'''
'''
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.0001, momentum=0.5)
for epoch in tqdm(range(2)): # loop over the dataset multiple times
running_loss = 0.0
for i, data in enumerate(dataloaders['train']):
# get the inputs; data is a list of [inputs, labels]
inputs, class_names = data
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
outputs = model(inputs)
loss = criterion(outputs, class_names)
loss.backward()
optimizer.step()
# print statistics
running_loss += loss.item()
# if i % 10 == 0: # print every 10 mini-batches
# print('[%d, %5d] loss: %.3f' %
# (epoch + 1, i + 1, running_loss / 2000))
# running_loss = 0.0
break
print('Finished Training')
Getting this error and I don't know where to make changes.
Given groups=1, the weight of size 16 16 3 3, expected input[16, 64, 222, 222] to have 16 channels, but got 64 channels instead.
The number of output channels in a conv layer need to match the number of input channels in the next conv layer. Say you have nn.Conv(3, 64, 3) then the next conv layer needs to begin nn.Conv(64, .... Right now the issue is that you're trying to pass a 64 channel result into a conv layer which you've defined to expect an 8 channel input.

Categories