Autoencoder outputs B&W images from color - python

We are in the process of training an AE on CIFAR10 images. We used the following architecture:
class OurAE(nn.Module):
def __init__(self, in_channels, z_channels):
super(OurAE, self).__init__()
self.tot_diff = None
self.in_channels = in_channels
curr_channels = in_channels
encoder = []
channels = [3, 16, 64] + [z_channels]
for out_channels in channels:
encoder += [
nn.Conv2d(in_channels=curr_channels, out_channels=out_channels, kernel_size=3, padding=1, stride=2),
nn.ReLU()
]
curr_channels = out_channels
self.encoder = nn.Sequential(*encoder)
curr_channels = z_channels
decoder = []
channels = [64, 16, 3] + [in_channels]
for out_channels in channels:
decoder += [
nn.ConvTranspose2d(in_channels=curr_channels, out_channels=out_channels, kernel_size=4, padding=1, stride=2),
nn.ReLU()
]
curr_channels = out_channels
decoder = decoder[:-1] # removing the RELU layer
decoder.append(nn.Sigmoid())
self.decoder = nn.Sequential(*decoder)
def forward(self, x):
return self.decoder(self.encoder(x))
We are not sure why but we always get black and white images.
We tried to replace the Sigmoid with ReLU in the last layer but to no avail.
These are the loss function and the optimizers that we used:
optimizer = torch.optim.Adam(classifier.parameters(), lr=lr)
criterion = torch.nn.CrossEntropyLoss()
Here is an example of an input and output of the AE after training:

I had the same problem. I changed the loss function to 'mae'. That cleared the issue for me.

Related

Trying to understand this CNN Model - PyTorch

I am trying to understand this model, by building it from scratch.
I am trying to start with just one layer, but I am getting error stating output of layer1 is not the expected input type for fc1.
RuntimeError: mat1 and mat2 shapes cannot be multiplied (64x197136 and 2704x2704)
I am not 100% sure if I can do this with 1 layer, but I am trying to learn by playing around with it.
code(Which I am trying to play around with):
# Craete a neural network from pytorch
# https://www.kaggle.com/code/reukki/pytorch-cnn-tutorial-with-cats-and-dogs
class Cnn(nn.Module):
def __init__(self):
super(Cnn,self).__init__()
self.layer1 = nn.Sequential(
nn.Conv2d(in_channels=1, out_channels=16, kernel_size=3, padding=0, stride=2),
nn.BatchNorm2d(num_features=16),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2)
)
self.fc1 = nn.Linear(3*3*16,10)
self.dropout = nn.Dropout(0.5)
self.fc2 = nn.Linear(10,2)
self.relu = nn.ReLU()
def forward(self,x):
out = self.layer1(x)
out = out.view(out.size(0),-1)
out = self.fc1(out)
out = self.relu(out)
out = self.fc2(out)
return out
I am also unsure what out.view(out.size(0),-1) does.
Would be great if anyone can help me understand better. Looking forward to some
[Update]
Kaggle Notebook link:- https://www.kaggle.com/austonpramodh/cats-dogs-cnn-learning
out.view(out.size(0),-1) flattens your output dimension to 1-dimension(1d) according to (whatever_number_fits_it_perfectly_for_1d), for example if out.size(0) is 512x512, this command will make it to 262144 and remove the 2nd dimension. You can also say it as 262144x1.
I was able to get the answer from one of my friend, Thanks to Vihari.
class Cnn(nn.Module):
def __init__(self):
super(Cnn,self).__init__()
self.layer1 = nn.Sequential(
nn.Conv2d(in_channels=1, out_channels=16, kernel_size=3, padding=0, stride=2),
nn.BatchNorm2d(num_features=16),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2)
)
self.fc1 = nn.Linear(55 * 55 * 16, 2)
self.relu = nn.ReLU()
def forward(self,x):
out = self.layer1(x)
# print("****Layer Size***")
# # 55*55*16
# print(out.size(1))
# print("****Layer Size***")
out = self.relu(out)
out = out.view(out.size(0), out.size(1)*out.size(2)* out.size(3))
# print("****View Size***")
# print(out.size()[-1], out.size(1))
# print("****View Size***")
out = self.fc1(out)
return out
Also, Thanks for the out.view explanation #Pranav.

how to change the out_features of densenet121 model?

How to change the out_features of densenet121 model?
I am using the code below to train the model:
from torch.nn.modules.dropout import Dropout
class Densnet121(nn.Module):
def __init__(self):
super(Densnet121, self).__init__()
self.cnn1 = nn.Conv2d(in_channels=3 , out_channels=64 , kernel_size=3 , stride=1 )
self.Densenet_121 = models.densenet121(pretrained=True)
self.gap = AvgPool2d(kernel_size=2, stride=1, padding=1)
self.bn1 = nn.BatchNorm2d(1024)
self.do1 = nn.Dropout(0.25)
self.linear = nn.Linear(256,256)
self.bn2 = nn.BatchNorm2d(256)
self.do2 = nn.Dropout(0.25)
self.output = nn.Linear(64 * 64 * 64,2)
self.act = nn.ReLU()
def densenet(self):
for param in self.Densenet_121.parameters():
param.requires_grad = False
self.Densenet_121.classifier = nn.Linear(1024, 1024)
return self.Densenet_121
def forward(self, x):
img = self.act(self.cnn1(x))
img = self.densenet(img)
img = self.gap(img)
img = self.bn1(img)
img = self.do1(img)
img = self.linear(img)
img = self.bn2(img)
img = self.do2(img)
img = torch.flatten(img, 1)
img = self.output(img)
return img
When training this model, I face the following error:
RuntimeError: Given groups=1, weight of size [64, 3, 7, 7], expected input[64, 64, 62, 62] to have 3 channels, but got 64 channels instead
Your first conv layer outputs a tensor of shape (b, 64, h, w) while the following layer, the densenet model expects 3 channels. Hence the error that was raised:
"expected input [...] to have 3 channels, but got 64 channels instead"
Unfortunately, this value is hardcoded in the source of the Densenet class, see reference.
One workaround however is to overwrite the first convolutional layer after the densenet has been initialized. Something like this should work:
# First gather the conv layer specs
conv = self.Densenet_121.features.conv0
kwargs = {k: getattr(conv, k) for k in
('out_channels', 'stride', 'kernel_size', 'padding', 'bias')}
# overwrite with identical specs with new in_channels
model.features.conv0 = nn.Conv2d(in_channels=64, **kwargs)
Alternatively, you can do:
w = model.features.conv0.weight
w.data = torch.rand(len(w), 64, *w.shape[:2])
Which replaces the underlying convolutional layer weight without affecting its metadata (eg. conv.in_channels remains equal to 3), this could have side effects. So I would recommend following the first approach.

How can I match a Decoder's input to a Pretrained Resnet18 Encoder?

I am trying to build a custom Decoder with skip connections to run with a pretrained Resnet18 Encoder for Image Segmentation task. The total number of classes are 150.
The Resnet18 Encoder has fc output of 512. In order to match the Encoder's output to Decoder's input, I am trying to set the Conv layers of Decoder such that it matches the output from Encoder i.e [151, 512, 1, 1]. However, no matter what combination of layers I make, I am not able to match the input and output tensors.
Here is the relevant part of Decoder Code
class ResNet18Transpose(nn.Module):
def __init__(self, transblock, layers, num_classes=150):
self.inplanes = 512
super(ResNet18Transpose, self).__init__()
self.deconv1 = self._make_transpose(transblock, 256 * transblock.expansion, layers[0], stride=2)
self.deconv2 = self._make_transpose(transblock, 128 * transblock.expansion, layers[1], stride=2)
self.deconv3 = self._make_transpose(transblock, 64 * transblock.expansion, layers[2], stride=2)
self.deconv4 = self._make_transpose(transblock, 32 * transblock.expansion, layers[3], stride=2)
self.skip0 = self._make_skip_layer(64, 64 * transblock.expansion)
self.skip1 = self._make_skip_layer(128, 64 * transblock.expansion)
self.skip2 = self._make_skip_layer(256, 64 * transblock.expansion)
self.skip3 = self._make_skip_layer(512, 128 * transblock.expansion)
self.inplanes = 64
self.final_conv = self._make_transpose(transblock, 64 * transblock.expansion, 3)
self.final_deconv = nn.ConvTranspose2d(self.inplanes * transblock.expansion, num_classes, kernel_size=2,
stride=2, padding=0, bias=True)
self.out6_conv = nn.Conv2d(1024, num_classes, kernel_size=1, stride=1, bias=True)
self.out5_conv = nn.Conv2d(128 * transblock.expansion, num_classes, kernel_size=1, stride=1, bias=True)
self.out4_conv = nn.Conv2d(128 * transblock.expansion, num_classes, kernel_size=1, stride=1, bias=True)
self.out3_conv = nn.Conv2d(64 * transblock.expansion, num_classes, kernel_size=1, stride=1, bias=True)
self.out2_conv = nn.Conv2d(32 * transblock.expansion, num_classes, kernel_size=1, stride=1, bias=True)
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
elif isinstance(m, nn.ConvTranspose2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
elif isinstance(m, SynchronizedBatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
The forward block for Decoder is as follows:
def forward(self, x, labels=None, sparse_mode=False, use_skip=True):
[in0, in1, in2, in3, in4] = x
if labels:
[lab0, lab1, lab2, lab3, lab4] = labels
out6 = self.out6_conv(in4)
if sparse_mode:
if labels:
mask4 = (lab4==0).unsqueeze(1).repeat(1,in4.shape[1],1,1).type(in4.dtype)
else:
mask4 = (torch.argmax(out6, dim=1)==0).unsqueeze(1).repeat(1,in4.shape[1],1,1).type(in4.dtype)
in4 = in4 * mask4
# upsample 1
x = self.deconv1(in4)
out5 = self.out5_conv(x)
if sparse_mode:
if labels:
mask3 = (lab3==0).unsqueeze(1).repeat(1,in3.shape[1],1,1).type(in3.dtype)
else:
mask3 = (torch.argmax(out5, dim=1)==0).unsqueeze(1).repeat(1,in3.shape[1],1,1).type(in3.dtype)
in3 = in3 * mask3
if use_skip:
x = x + self.skip3(in3)
# upsample 2
x = self.deconv2(x)
out4 = self.out4_conv(x)
if sparse_mode:
if labels:
mask2 = (lab2==0).unsqueeze(1).repeat(1,in2.shape[1],1,1).type(in2.dtype)
else:
mask2 = (torch.argmax(out4, dim=1)==0).unsqueeze(1).repeat(1,in2.shape[1],1,1).type(in2.dtype)
in2 = in2 * mask2
if use_skip:
x = x + self.skip2(in2)
# upsample 3
x = self.deconv3(x)
out3 = self.out3_conv(x)
if sparse_mode:
if labels:
mask1 = (lab1==0).unsqueeze(1).repeat(1,in1.shape[1],1,1).type(in1.dtype)
else:
mask1 = (torch.argmax(out3, dim=1)==0).unsqueeze(1).repeat(1,in1.shape[1],1,1).type(in1.dtype)
in1 = in1 * mask1
if use_skip:
x = x + self.skip1(in1)
# upsample 4
x = self.deconv4(x)
out2 = self.out2_conv(x)
if sparse_mode:
if labels:
mask0 = (lab0==0).unsqueeze(1).repeat(1,in0.shape[1],1,1).type(in0.dtype)
else:
mask0 = (torch.argmax(out2, dim=1)==0).unsqueeze(1).repeat(1,in0.shape[1],1,1).type(in0.dtype)
in0 = in0 * mask0
if use_skip:
x = x + self.skip0(in0)
# final
x = self.final_conv(x)
out1 = self.final_deconv(x)
return [out6, out5, out4, out3, out2, out1]
I get the following error:
File "/project/xfu/aamir/Golden-QGN/models/resnet.py", line 447, in forward
out6 = self.out6_conv(in4)
File "/project/xfu/aamir/anaconda3/envs/QGN/lib/python3.8/site-packages/torch/nn/modules/module.py", line 722, in _call_impl
result = self.forward(*input, **kwargs)
File "/project/xfu/aamir/anaconda3/envs/QGN/lib/python3.8/site-packages/torch/nn/modules/conv.py", line 419, in forward
return self._conv_forward(input, self.weight)
File "/project/xfu/aamir/anaconda3/envs/QGN/lib/python3.8/site-packages/torch/nn/modules/conv.py", line 415, in _conv_forward
return F.conv2d(input, weight, self.bias, self.stride,
RuntimeError: Given groups=1, weight of size [151, 1024, 1, 1], expected input[8, 512, 8, 8] to have 1024 channels, but got 512 channels instead
Exception in thread Thread-1:
If I change the Decoder Layers as follows:
self.out6_conv = nn.Conv2d(512, num_classes, kernel_size=1, stride=1, bias=True)
self.out5_conv = nn.Conv2d(256 * transblock.expansion, num_classes, kernel_size=1, stride=1, bias=True)
self.out4_conv = nn.Conv2d(128 * transblock.expansion, num_classes, kernel_size=1, stride=1, bias=True)
self.out3_conv = nn.Conv2d(64 * transblock.expansion, num_classes, kernel_size=1, stride=1, bias=True)
self.out2_conv = nn.Conv2d(32 * transblock.expansion, num_classes, kernel_size=1, stride=1, bias=True)
I get the followng error. Note that the input tensor has also changed.
File "/project/xfu/aamir/anaconda3/envs/QGN/lib/python3.8/site-packages/torch/nn/modules/conv.py", line 415, in _conv_forward
return F.conv2d(input, weight, self.bias, self.stride,
RuntimeError: Given groups=1, weight of size [64, 64, 3, 3], expected input[8, 32, 128, 128] to have 64 channels, but got 32 channels instead
I also tried altering the Resnet18 Encoder's fc output to 1024 instead of 512 as follows:
def resnet18(pretrained=False, **kwargs):
"""Constructs a ResNet-18 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
if pretrained:
model.load_state_dict(load_url(model_urls['resnet18']))
model.fc = torch.nn.Linear(1024, 150)
return model
But nothing seems to work. I also could not find a skip connections based decoder for resnet18 on github / internet. Any help will be appreciated.
NOTE: I only want to work on Resnet18. Also the images being input to the network through dataloader are working perfectly fine with a Resnet50 Encoder + Custom Decoder. I also tried changing crop size from 0 to 128 to 256 to 512 but in vain.
Here is the code that is being run in models.py for image segmentation. I tried using the set_trace() method to debug the code. The code stops running just before the (pred, pred_quad) = self.decoder(fmap, labels_scaled) line in following code
class SegmentationModule(SegmentationModuleBase):
def __init__(self, net_enc, net_dec, crit, deep_sup_scale=None, quad_sup=False, running_avg_param=0.99):
super(SegmentationModule, self).__init__()
self.encoder = net_enc
self.decoder = net_dec
self.crit = crit
if deep_sup_scale:
if deep_sup_scale < 0:
self.adapt_weights = True
self.running_avg_param = running_avg_param
deep_sup_scale = 1
else:
self.adapt_weights = False
self.loss_weights = [(deep_sup_scale**(i+1)) for i in range(5)]
self.quad_sup = quad_sup
def forward(self, feed_dict, *, segSize=None):
inputs = feed_dict['img_data'].cuda()
if segSize is None: # training
labels_orig_scale = feed_dict['seg_label_0'].cuda()
labels_scaled = []
fmap = self.encoder(inputs, return_feature_maps=True)
if self.quad_sup:
labels_scaled.append(feed_dict['seg_label_1'].cuda())
labels_scaled.append(feed_dict['seg_label_2'].cuda())
labels_scaled.append(feed_dict['seg_label_3'].cuda())
labels_scaled.append(feed_dict['seg_label_4'].cuda())
labels_scaled.append(feed_dict['seg_label_5'].cuda())
(pred, pred_quad) = self.decoder(fmap, labels_scaled)
else:
pred = self.decoder(fmap)
loss = self.crit(pred, labels_orig_scale)
if self.quad_sup:
loss_orig = loss
for i in range(len(pred_quad)):
loss_quad = self.crit(pred_quad[i], labels_scaled[i])
loss = loss + loss_quad * self.loss_weights[i]
if self.adapt_weights:
self.loss_weights[i] = self.running_avg_param * self.loss_weights[i] + \
(1 - self.running_avg_param) * (loss_quad/loss_orig).data.cpu().numpy()
acc = self.pixel_acc(pred, labels_orig_scale, self.quad_sup)
return loss, acc
else: # inference
if 'qtree' in feed_dict:
labels_scaled = [feed_dict['qtree'][l].cuda() for l in range(1,6)]
else:
labels_scaled = None
pred = self.decoder(self.encoder(inputs, return_feature_maps=True), labels_scaled, segSize=segSize)
return pred
So here is how I resolved the error.
Disabled the skip connections. Not sure why the decoder doesnt work with skip connections.
I had given incorrect layers [6, 3, 4, 3] input to Decoder. i.e for Resnet18 the layers should be [2,2,2,2]

CNN in pytorch "Expected 4-dimensional input for 4-dimensional weight [32, 1, 5, 5], but got 3-dimensional input of size [16, 64, 64] instead"

I am new to pytorch. I am trying to use chinese mnist dataset to train the neural network that shows in below code. Is that a problem of the neural network input or something else goes wrong in my code. I have tried many ways to fix it but instead it shows me other errors
train_df = chin_mnist_df.groupby('value').apply(lambda x: x.sample(700, random_state=SEED)).reset_index(drop=True)
x_train, y_train = train_df.iloc[:, :-2], train_df.iloc[:, -2]
valid_df = chin_mnist_df.groupby('value').apply(lambda x: x.sample(200, random_state=SEED)).reset_index(drop=True)
x_valid, y_valid = valid_df.iloc[:, :-2], valid_df.iloc[:, -2]
test_df = chin_mnist_df.groupby('value').apply(lambda x: x.sample(100, random_state=SEED)).reset_index(drop=True)
x_test, y_test = test_df.iloc[:, :-2], test_df.iloc[:, -2]
train_ds = Dataset(x_train, y_train)
train_dataloader = torch.utils.data.DataLoader(train_ds, batch_size=16, shuffle=True)
valid_ds = Dataset(x_valid, y_valid)
valid_dataloader = torch.utils.data.DataLoader(valid_ds, batch_size=16, shuffle=True)
test_ds = Dataset(x_test, y_test)
test_dataloader = torch.utils.data.DataLoader(test_ds, batch_size=16, shuffle=True)
# Convolutional neural network (two convolutional layers)
class ConvNet(nn.Module):
def __init__(self):
super(ConvNet, self).__init__()
self.layer1 = nn.Sequential(
nn.Conv2d(1, 32, kernel_size=5, stride=1, padding=2),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2))
self.layer2 = nn.Sequential(
nn.Conv2d(32, 64, kernel_size=5, stride=1, padding=2),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2))
self.drop_out = nn.Dropout()
self.fc1 = nn.Linear(7 * 7 * 64, 1000)
self.fc2 = nn.Linear(1000, 15)
def forward(self, x):
out = self.layer1(x)
out = self.layer2(out)
out = out.reshape(out.size(0), -1)
out = self.drop_out(out)
out = self.fc1(out)
out = self.fc2(out)
return out
model = ConvNet()
klisi=[]
apoklisi=[]
apoklisi2=[]
klisi2=[]
olatalr=[]
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
total_step = len(train_dataloader)
loss_list = []
acc_list = []
for epoch in range(num_epochs):
for i,data in enumerate(train_dataloader):#(images, labels)
batch_inputs, batch_labels = data[0][:].to(device).type(torch.float), data[1][:].to(device)
# Run the forward pass
outputs = model(batch_inputs)
loss = criterion(outputs, batch_labels)
Your training images are greyscale images. That is, they only have one channel (as opposed to the three RGB color channels in color images).
It seems like your Dataset (implicitly) "squeezes" this singleton dimension, and instead of having a batch of shape BxCxHxW = 16x1x64x64, you end up with a batch of shape 16x64x64.
Try:
# ...
batch_inputs, batch_labels = data[0][:].to(device).type(torch.float), data[1][:].to(device)
batch_inputs = batch_inputs[:, None, ...] # explicitly add the singleton channel dimension
# Run the forward pass
# ...

How to add a L2 regularization term in my loss function

I’m going to compare the difference between with and without regularization, so I want to custom two loss functions.
My loss function with L2 norm:
###NET
class CNN(nn.Module):
def __init__(self):
super(CNN,self).__init__()
self.layer1 = nn.Sequential(
nn.Conv2d(3, 16, kernel_size = 5, padding=2),
nn.ReLU(),
nn.MaxPool2d(2))
self.layer2 = nn.Sequential(
nn.Conv2d(16, 32, kernel_size = 5, padding=2),
nn.ReLU(),
nn.MaxPool2d(2))
self.layer3 = nn.Sequential(
nn.Conv2d(32, 32, kernel_size = 5, padding=2),
nn.ReLU(),
nn.MaxPool2d(4))
self.fc = nn.Linear(32*32*32,11)
def forward(self, x):
out = self.layer1(x)
out = self.layer2(out)
out = self.layer3(out)
out = out.view(out.size(0), -1)
out = self.fc(out)
return out
net = CNN()
###OPTIMIZER
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr = LR, momentum = MOMENTUM)
1.How can I add a L2 norm in my loss function?
2.If I want to write the loss function by myself (without using optim.SGD) and do the grad-decent by autograd, how can I do?
Thanks for your help!
You can explicitly compute the norm of the weights yourself, and add it to the loss.
reg = 0
for param in CNN.parameters():
reg += 0.5 * (param ** 2).sum() # you can replace it with abs().sum() to get L1 regularization
loss = criterion(CNN(x), y) + reg_lambda * reg # make the regularization part of the loss
loss.backward() # continue as usuall
See this thread for more info.

Categories