BatchNorm and Activation in Conv2DTranspose - python

I am trying to stack 3 Conv2DTranspose as below
self.up1 = nn.ConvTranspose2d(in_channels=out3, out_channels=out2, kernel_size=3, padding=1,
stride=2, output_padding=1)
self.up2 = nn.ConvTranspose2d(in_channels=out2, out_channels=out1, kernel_size=3, padding=1,
stride=2, output_padding=1)
self.up3 = nn.ConvTranspose2d(in_channels=out1, out_channels=n_out_channels, kernel_size=3, padding=1,stride=2, output_padding=1)
#forward
x = self.up_tran1(x)
x = self.up_tran2(x)
x = self.up_tran3(x)
Do I need to add batchnorm and activation like regular Conv2d? Or batchnorm and activation are not needed for Conv2dTranspose?

Related

Pytorch: Lower the parameters in U-net model

can anyone give me some tips on how i would be able to lower the amount of parameters in the following U-net implementation. I'm having trouble with over-fitting on my training data and i would like to lower the parameters in order to see if it improves the validation data accuracy.
Layers:
First2D
layers = [
nn.Conv2d(in_channels, middle_channels, kernel_size=3, padding=1),
nn.BatchNorm2d(middle_channels),
nn.ReLU(inplace=True),
nn.Conv2d(middle_channels, out_channels, kernel_size=3, padding=1),
nn.BatchNorm2d(out_channels),
nn.ReLU(inplace=True)
]
Encoder2D
layers = [
nn.MaxPool2d(kernel_size=downsample_kernel),
nn.Conv2d(in_channels, middle_channels, kernel_size=3, padding=1),
nn.BatchNorm2d(middle_channels),
nn.ReLU(inplace=True),
nn.Conv2d(middle_channels, out_channels, kernel_size=3, padding=1),
nn.BatchNorm2d(out_channels),
nn.ReLU(inplace=True)
]
Center2D
layers = [
nn.MaxPool2d(kernel_size=2),
nn.Conv2d(in_channels, middle_channels, kernel_size=3, padding=1),
nn.BatchNorm2d(middle_channels),
nn.ReLU(inplace=True),
nn.Conv2d(middle_channels, out_channels, kernel_size=3, padding=1),
nn.BatchNorm2d(out_channels),
nn.ReLU(inplace=True),
nn.ConvTranspose2d(out_channels, deconv_channels, kernel_size=2, stride=2)
]
Decoder2D
layers = [
nn.Conv2d(in_channels, middle_channels, kernel_size=3, padding=1),
nn.BatchNorm2d(middle_channels),
nn.ReLU(inplace=True),
nn.Conv2d(middle_channels, out_channels, kernel_size=3, padding=1),
nn.BatchNorm2d(out_channels),
nn.ReLU(inplace=True),
nn.ConvTranspose2d(out_channels, deconv_channels, kernel_size=2, stride=2)
]
Last2D
layers = [
nn.Conv2d(in_channels, middle_channels, kernel_size=3, padding=1),
nn.BatchNorm2d(middle_channels),
nn.ReLU(inplace=True),
nn.Conv2d(middle_channels, middle_channels, kernel_size=3, padding=1),
nn.BatchNorm2d(middle_channels),
nn.ReLU(inplace=True),
nn.Conv2d(middle_channels, out_channels, kernel_size=1),
nn.Softmax(dim=1)
]
One way to decrease the number of parameters is to decrease the number of channels in the convolution. You wouldn't be able to change the number of model input and output channels, because they depend on the data, but you can change the number of intermediate channels.
Remember that the output of one layer is the input to the next layer, so keep the number of output channels in the first layer the same as the number of input channels in the second layer, for every pair of layers. Example would be
layers = [
nn.Conv2d(in_channels, middle_channels//2, kernel_size=3, padding=1),
nn.BatchNorm2d(middle_channels//2),
nn.ReLU(inplace=True),
nn.Conv2d(middle_channels//2, out_channels, kernel_size=3, padding=1),
nn.BatchNorm2d(out_channels),
nn.ReLU(inplace=True)
]
Now, coming to the original question of overfitting, first you might want to try to use other things first, before reducing model size. Some things include data augmentations and dropout.

Error in Calculating neural network Test Accuracy

I tried to train my neural network, and then evaluate it's testing accuracy. I am using the code at the bottom of this post to train. The fact is that for other neural networks, I can evaluate the testing accuracy with my code without issue. However, for this neural network (which I constructed correctly according to the description of the neural network paper), I can't evaluate the testing accuracy properly and its giving me the traceback below. So maybe something's wrong in my forward pass?
Here is the training and testing code:
//imports including import deepnet.py
cudnn.benchmark = True
(X_train, y_train), (X_test, y_test) = cifar10.load_data()
X_train = X_train.astype('float32')
X_train = np.transpose(X_train, axes=(0, 3, 1, 2))
X_test = X_test.astype('float32')
X_test = np.transpose(X_test, axes=(0, 3, 1, 2))
X_train /= 255
X_test /= 255
device = torch.device('cuda:0')
# This is where you can load any model of your choice.
# I stole PyTorch Vision's VGG network and modified it to work on CIFAR-10.
# You can take this line out and add any other network and the code
# should run just fine.
model = deepnet.cifar10_deep()
#model.to(device)
# Forward pass
opfun = lambda X: model.forward(Variable(torch.from_numpy(X)))
# Forward pass through the network given the input
predsfun = lambda op: np.argmax(op.data.numpy(), 1)
# Do the forward pass, then compute the accuracy
accfun = lambda op, y: np.mean(np.equal(predsfun(op), y.squeeze()))*100
# Initial point
x0 = deepcopy(model.state_dict())
# Number of epochs to train for
# Choose a large value since LB training needs higher values
# Changed from 150 to 30
nb_epochs = 30
batch_range = [25, 40, 50, 64, 80, 128, 256, 512, 625, 1024, 1250, 1750, 2048, 2500, 3125, 4096, 4500, 5000]
# parametric plot (i.e., don't train the network if set to True)
hotstart = False
if not hotstart:
for batch_size in batch_range:
optimizer = torch.optim.Adam(model.parameters())
model.load_state_dict(x0)
#model.to(device)
average_loss_over_epoch = '-'
print('Optimizing the network with batch size %d' % batch_size)
np.random.seed(1337) #So that both networks see same sequence of batches
for e in range(nb_epochs):
model.eval()
print('Epoch:', e, ' of ', nb_epochs, 'Average loss:', average_loss_over_epoch)
average_loss_over_epoch = 0
# Checkpoint the model every epoch
torch.save(model.state_dict(), "./models/DeepNetC2BatchSize" + str(batch_size) + ".pth")
array = np.random.permutation(range(X_train.shape[0]))
slices = X_train.shape[0] // batch_size
beginning = 0
end = 1
# Training loop!
for _ in range(slices):
start_index = batch_size * beginning
end_index = batch_size * end
smpl = array[start_index:end_index]
model.train()
optimizer.zero_grad()
ops = opfun(X_train[smpl])
tgts = Variable(torch.from_numpy(y_train[smpl]).long().squeeze())
loss_fn = F.nll_loss(ops, tgts)
average_loss_over_epoch += loss_fn.data.numpy() / (X_train.shape[0] // batch_size)
loss_fn.backward()
optimizer.step()
beginning += 1
end += 1
grid_size = 18 #How many points of interpolation between [0, 5000]
data_for_plotting = np.zeros((grid_size, 3)) #Uncomment this line if running entire code from scratch
sharpnesses1eNeg3 = []
sharpnesses5eNeg4 = []
#data_for_plotting = np.load("DeepNetCIFAR10-intermediate-values.npy") #Uncomment this line to use an existing NumPy array
print(data_for_plotting)
i = 0
# Fill in test accuracy values for `grid_size' points in the interpolation
for batch_size in batch_range:
mydict = {}
batchmodel = torch.load("./models/DeepNetC2BatchSize" + str(batch_size) + ".pth")
for key, value in batchmodel.items():
mydict[key] = value
model.load_state_dict(mydict)
j = 0
for datatype in [(X_train, y_train), (X_test, y_test)]:
dataX = datatype[0]
datay = datatype[1]
for smpl in np.split(np.random.permutation(range(dataX.shape[0])), 10):
ops = opfun(dataX[smpl])
tgts = Variable(torch.from_numpy(datay[smpl]).long().squeeze())
var = F.nll_loss(ops, tgts).data.numpy() / 10
if j == 1:
data_for_plotting[i, j-1] += accfun(ops, datay[smpl]) / 10.
j += 1
print(data_for_plotting[i])
np.save('DeepNetCIFAR10-intermediate-values', data_for_plotting)
i += 1
And the model code is here and includes the forward pass
import torch
import torch.nn as nn
F = nn.functional
__all__ = ['cifar10_deepnet', 'cifar100_deepnet']
class VGG(nn.Module):
def __init__(self, num_classes=10):
super(VGG, self).__init__()
self.features = nn.Sequential(
nn.Conv2d(3, 64, kernel_size=3, bias=False),
nn.BatchNorm2d(64),
nn.ReLU(inplace=True),
nn.Dropout(0.3),
nn.Conv2d(64, 64, kernel_size=3, padding = 1, bias=False),
nn.BatchNorm2d(64),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Conv2d(64, 128, kernel_size=3, padding = 1, bias=False),
nn.BatchNorm2d(128),
nn.ReLU(inplace=True),
nn.Dropout(0.4),
nn.Conv2d(128, 128, kernel_size=3, padding = 1, bias=False),
nn.BatchNorm2d(128),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Conv2d(128, 256, kernel_size=3, padding = 1, bias=False),
nn.BatchNorm2d(256),
nn.ReLU(inplace=True),
nn.Dropout(0.4),
nn.Conv2d(256, 256, kernel_size=3, padding = 1, bias=False),
nn.BatchNorm2d(256),
nn.ReLU(inplace=True),
nn.Dropout(0.4),
nn.Conv2d(256, 256, kernel_size=3, padding = 1, bias=False),
nn.BatchNorm2d(256),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Conv2d(256, 512, kernel_size=3, padding = 1, bias=False),
nn.BatchNorm2d(512),
nn.ReLU(inplace=True),
nn.Dropout(0.4),
nn.Conv2d(512, 512, kernel_size=3, padding = 1, bias=False),
nn.BatchNorm2d(512),
nn.ReLU(inplace=True),
nn.Dropout(0.4),
nn.Conv2d(512, 512, kernel_size=3, padding = 1, bias=False),
nn.BatchNorm2d(512),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Conv2d(512, 512, kernel_size=3, padding = 1, bias=False),
nn.BatchNorm2d(512),
nn.ReLU(inplace=True),
nn.Dropout(0.4),
nn.Conv2d(512, 512, kernel_size=3, padding = 1, bias=False),
nn.BatchNorm2d(512),
nn.ReLU(inplace=True),
nn.Dropout(0.4),
nn.Conv2d(512, 512, kernel_size=3, padding = 1, bias=False),
nn.BatchNorm2d(512),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2),
)
self.classifier = nn.Sequential(
nn.Linear(512, 512, bias=False),
nn.Dropout(0.5),
nn.BatchNorm1d(512),
nn.ReLU(inplace=True),
nn.Dropout(0.5),
nn.Linear(512, num_classes)
)
def forward(self, x):
x = self.features(x)
x = x.view(-1, 512)
x = self.classifier(x)
return F.log_softmax(x)
def cifar10_deep(**kwargs):
num_classes = getattr(kwargs, 'num_classes', 10)
return VGG(num_classes)
def cifar100_deep(**kwargs):
num_classes = getattr(kwargs, 'num_classes', 100)
return VGG(num_classes)
You are trying to load a state dict that belongs to another model.
The error shows that your model is the class AlexNet.
RunTimeError: Error(s) in loading state_dict for AlexNet:
But the state dict you are trying to load is from the VGG you posted, which doesn't have the same modules as AlexNet.
You need to use the same model whose state dict you saved before.

Kernel size can't be greater than actual input size

I have a data with depth = 3 and I want to pass it through 3 convolution layers with 3x3x3 kernels each.
My current code is below. The first input is
[batch_size=10, in_channels=1, depth=3, height=128, width=256]
and I notice after the first conv3d layer the output is [10,8,1,126,254]. Obviously it has now depth 1 and doesn't accept it for another 3x3x3 layer. How can I achieve this?
class CNet(nn.Module):
def __init__(self, **kwargs):
super().__init__()
self.conv1 = nn.Conv3d(1, 8, kernel_size=3, stride=1, padding=0)
self.conv2 = nn.Conv3d(8, 16, kernel_size=3, stride=1, padding=0)
self.conv3 = nn.Conv3d(16, 32, kernel_size=3, stride=1, padding=0)
self.fc1 = nn.Linear(value, 2)
def forward(self, X):
X = F.relu(self.conv1(X))
X = F.relu(self.conv2(X))
X = F.max_pool2d(X,2)
X = self.conv3(X)
X = F.max_pool2d(X,2)
X = self.fc1(X)
return F.softmax(X,dim =1)
You need to use padding. If you only want to pad the input for the convolutions after the first one and only in the depth dimensions to get the minimum dimension of 3, you would use padding=(1, 0, 0) (it's 1 because the same padding is applied to both sides, i.e. (padding, input, padding) along that dimension).
self.conv2 = nn.Conv3d(8, 16, kernel_size=3, stride=1, padding=(1, 0, 0))
self.conv3 = nn.Conv3d(16, 32, kernel_size=3, stride=1, padding=(1, 0, 0))
However, it is common to use padding=1 for all dimensions when using kernel_size=3, because that keeps the dimensions unchanged, which makes it much easier to build deeper network, as you don't need to worry about the sizes suddenly getting too small, as it happened already for your depth dimension. Also when no padding is used, the corners are only included in a single calculation, whereas all other elements contribute to multiple calculations. It is recommended to use kernel_size=3 and padding=1 for all your convolutions.
self.conv1 = nn.Conv3d(1, 8, kernel_size=3, stride=1, padding=1)
self.conv2 = nn.Conv3d(8, 16, kernel_size=3, stride=1, padding=1)
self.conv3 = nn.Conv3d(16, 32, kernel_size=3, stride=1, padding=1)

can we pass images for which height!=width through our CNN for training in pytorch?

can we pass images for which height!=width through our CNN in Pytorch?
In CNN, I have convolution, batch-norm, max-pool, relu, and fully connected layers.
My network
self.conv_seqn = nn.Sequential(
nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=4, stride=4),
nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=4, stride=4),
)
self.fc_seqn = nn.Sequential(
nn.Linear(1843200, 256),
nn.ReLU(inplace=True),
nn.Linear(256, total_configs)
)
my forward function
forward()
{
x = self.conv_seqn(x)
x = x.view(x.size(0), -1)
x = self.fc_seqn(x)
return x
}
If input image of size 3840*1920*3 after applying conv_seqn() it should be of size [1, 128, 120, 60] but I getting the size of [1,128,120,120] (batch size =1 here)
any suggestion will be highly helpful.

How to get rid of checkerboard artifacts

I am using a fully convolutional autoencoder to color black and white images, however, the output has a checkerboard pattern and I want to get rid of it. The checkerboard artifacts I have seen so far allways have been far smaller than mine and the usual way to get rid of them is replacing all unpooling operations with bilinear upsampling (I have been told that).
But I can not simply replace the unpooling operation because I work with different sized images, thus the unpooling operation is needed, else the output tensor could have a different size than the original.
TLDR:
How can I get rid of these checkerboard-artifacts without replacing the unpooling operations?
class AE(nn.Module):
def __init__(self):
super(AE, self).__init__()
self.leaky_reLU = nn.LeakyReLU(0.2)
self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=1, return_indices=True)
self.unpool = nn.MaxUnpool2d(kernel_size=2, stride=2, padding=1)
self.softmax = nn.Softmax2d()
self.conv1 = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, stride=1, padding=1)
self.conv2 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1)
self.conv3 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1)
self.conv4 = nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1)
self.conv5 = nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=3, stride=1, padding=1)
self.conv6 = nn.ConvTranspose2d(in_channels=1024, out_channels=512, kernel_size=3, stride=1, padding=1)
self.conv7 = nn.ConvTranspose2d(in_channels=512, out_channels=256, kernel_size=3, stride=1, padding=1)
self.conv8 = nn.ConvTranspose2d(in_channels=256, out_channels=128, kernel_size=3, stride=1, padding=1)
self.conv9 = nn.ConvTranspose2d(in_channels=128, out_channels=64, kernel_size=3, stride=1, padding=1)
self.conv10 = nn.ConvTranspose2d(in_channels=64, out_channels=2, kernel_size=3, stride=1, padding=1)
def forward(self, x):
# encoder
x = self.conv1(x)
x = self.leaky_reLU(x)
size1 = x.size()
x, indices1 = self.pool(x)
x = self.conv2(x)
x = self.leaky_reLU(x)
size2 = x.size()
x, indices2 = self.pool(x)
x = self.conv3(x)
x = self.leaky_reLU(x)
size3 = x.size()
x, indices3 = self.pool(x)
x = self.conv4(x)
x = self.leaky_reLU(x)
size4 = x.size()
x, indices4 = self.pool(x)
######################
x = self.conv5(x)
x = self.leaky_reLU(x)
x = self.conv6(x)
x = self.leaky_reLU(x)
######################
# decoder
x = self.unpool(x, indices4, output_size=size4)
x = self.conv7(x)
x = self.leaky_reLU(x)
x = self.unpool(x, indices3, output_size=size3)
x = self.conv8(x)
x = self.leaky_reLU(x)
x = self.unpool(x, indices2, output_size=size2)
x = self.conv9(x)
x = self.leaky_reLU(x)
x = self.unpool(x, indices1, output_size=size1)
x = self.conv10(x)
x = self.softmax(x)
return x
Instead of using an upconv layer such as nn.ConvTranspose2d, you can use interpolation in the decoder part to go back to your initial format, such as torch.nn.functional.interpolate. It will prevent you from having checkerboards artifacts.
If you want learnable weights in the decoder, you should also use a conv layer such as nn.Conv2d after each interpolation.
Skip connection is commonly used in Encoder-Decoder architecture and it helps to produce accurate result by passing appearance information from shallow layer of encoder (discriminator) to corresponding deeper layer of decoder (generator). Unet is the widely used Encoder-Decoder type architecture. Linknet is also very popular and it differs with Unet in the way of fusing appearance information of encoder layer with the decoder layer. In case of Unet, incoming features (from encoder) are concatenated in the corresponding decoder layer. On the other hand, Linknet performs addition and that why Linknet requires fewer number of operations in a single forward pass and significantly faster than the Unet.
Your each convolution block in Decoder might looks like following:
Additionally, i'm attaching a figure bellow depicting architecture of Unet and LinkNet. Hope using skip connection will help.
This pattern you have because of deconvolution (nn.ConvTranspose2d). The article explains it in detail.
You may try Upsample as alternative. This will not provide the checkerboard pattern.
Works like this:
import torch
input = torch.arange(1, 5, dtype=torch.float32).view(1, 1, 2, 2)
m = torch.nn.Upsample(scale_factor=2, mode='nearest')
m(input)
However you will not be able to learn anything with Upsample. It is just a transform.
So it is a trade. There are many papers online how to deal with the checkerboard pattern for different problems.
The idea is to train your network so the checkerboard pattern is gone.
As Kaushik Roy has specified, Skip-Connections are the way to go!
class AE(nn.Module):
def __init__(self):
super(AE, self).__init__()
self.leaky_reLU = nn.LeakyReLU(0.2)
self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=1, return_indices=True)
self.unpool = nn.MaxUnpool2d(kernel_size=2, stride=2, padding=1)
self.softmax = nn.Softmax2d()
self.conv1 = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, stride=1, padding=1)
self.conv2 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1)
self.conv3 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1)
self.conv4 = nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1)
self.conv5 = nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=3, stride=1, padding=1)
self.conv6 = nn.Conv2d(in_channels=1024, out_channels=512, kernel_size=3, stride=1, padding=1)
self.conv7 = nn.Conv2d(in_channels=1024, out_channels=256, kernel_size=3, stride=1, padding=1)
self.conv8 = nn.Conv2d(in_channels=512, out_channels=128, kernel_size=3, stride=1, padding=1)
self.conv9 = nn.Conv2d(in_channels=256, out_channels=64, kernel_size=3, stride=1, padding=1)
self.conv10 = nn.Conv2d(in_channels=128, out_channels=2, kernel_size=3, stride=1, padding=1)
def forward(self, x):
# encoder
x = self.conv1(x)
out1 = self.leaky_reLU(x)
x = out1
size1 = x.size()
x, indices1 = self.pool(x)
x = self.conv2(x)
out2 = self.leaky_reLU(x)
x = out2
size2 = x.size()
x, indices2 = self.pool(x)
x = self.conv3(x)
out3 = self.leaky_reLU(x)
x = out3
size3 = x.size()
x, indices3 = self.pool(x)
x = self.conv4(x)
out4 = self.leaky_reLU(x)
x = out4
size4 = x.size()
x, indices4 = self.pool(x)
######################
x = self.conv5(x)
x = self.leaky_reLU(x)
x = self.conv6(x)
x = self.leaky_reLU(x)
######################
# decoder
x = self.unpool(x, indices4, output_size=size4)
x = self.conv7(torch.cat((x, out4), 1))
x = self.leaky_reLU(x)
x = self.unpool(x, indices3, output_size=size3)
x = self.conv8(torch.cat((x, out3), 1))
x = self.leaky_reLU(x)
x = self.unpool(x, indices2, output_size=size2)
x = self.conv9(torch.cat((x, out2), 1))
x = self.leaky_reLU(x)
x = self.unpool(x, indices1, output_size=size1)
x = self.conv10(torch.cat((x, out1), 1))
x = self.softmax(x)
return x
This answer was posted as an edit to the question How to get rid of checkerboard artifacts by the OP Stefan under CC BY-SA 4.0.

Categories