I’d like to reconstruct 3D object from 2D images.
For that, I try to use convolutional auto encoder. However, in which layer should I lift the dimensionality?
I wrote a code below, however, it shows an error:
“RuntimeError: invalid argument 2: size ‘[1 x 1156 x 1156]’ is invalid for input of with 2312 elements at pytorch-src/torch/lib/TH/THStorage.c:41”
class dim_lifting(nn.Module):
def __init__(self):
super(dim_lifting, self).__init__()
self.encode = nn.Sequential(
nn.Conv2d(1, 34, kernel_size=5, padding=2),
nn.MaxPool2d(2),
nn.Conv2d(34, 16, kernel_size=5, padding=2),
nn.MaxPool2d(2),
nn.Conv2d(16, 8, kernel_size=5, padding=2),
nn.MaxPool2d(2),
nn.LeakyReLU()
)
self.fc1 = nn.Linear(2312, 2312)
self.decode = nn.Sequential(
nn.ConvTranspose3d(1, 16, kernel_size=5, padding=2),
nn.LeakyReLU(),
nn.ConvTranspose3d(16, 32, kernel_size=5, padding=2),
nn.LeakyReLU(),
nn.MaxPool2d(2))
def forward(self, x):
out = self.encode(x)
out = out.view(out.size(0), -1)
out = self.fc1(out)
out = out.view(1, 1156, 1156)
out = self.decode(out)
return out
Error happens here
out = out.view(1, 1156, 1156)
I cannot test my suggestion because your example is not complete.
I think your line should like
out = out.view(x.size(0), -1)
this way you're flattening out your input.
Related
I am trying to understand this model, by building it from scratch.
I am trying to start with just one layer, but I am getting error stating output of layer1 is not the expected input type for fc1.
RuntimeError: mat1 and mat2 shapes cannot be multiplied (64x197136 and 2704x2704)
I am not 100% sure if I can do this with 1 layer, but I am trying to learn by playing around with it.
code(Which I am trying to play around with):
# Craete a neural network from pytorch
# https://www.kaggle.com/code/reukki/pytorch-cnn-tutorial-with-cats-and-dogs
class Cnn(nn.Module):
def __init__(self):
super(Cnn,self).__init__()
self.layer1 = nn.Sequential(
nn.Conv2d(in_channels=1, out_channels=16, kernel_size=3, padding=0, stride=2),
nn.BatchNorm2d(num_features=16),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2)
)
self.fc1 = nn.Linear(3*3*16,10)
self.dropout = nn.Dropout(0.5)
self.fc2 = nn.Linear(10,2)
self.relu = nn.ReLU()
def forward(self,x):
out = self.layer1(x)
out = out.view(out.size(0),-1)
out = self.fc1(out)
out = self.relu(out)
out = self.fc2(out)
return out
I am also unsure what out.view(out.size(0),-1) does.
Would be great if anyone can help me understand better. Looking forward to some
[Update]
Kaggle Notebook link:- https://www.kaggle.com/austonpramodh/cats-dogs-cnn-learning
out.view(out.size(0),-1) flattens your output dimension to 1-dimension(1d) according to (whatever_number_fits_it_perfectly_for_1d), for example if out.size(0) is 512x512, this command will make it to 262144 and remove the 2nd dimension. You can also say it as 262144x1.
I was able to get the answer from one of my friend, Thanks to Vihari.
class Cnn(nn.Module):
def __init__(self):
super(Cnn,self).__init__()
self.layer1 = nn.Sequential(
nn.Conv2d(in_channels=1, out_channels=16, kernel_size=3, padding=0, stride=2),
nn.BatchNorm2d(num_features=16),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2)
)
self.fc1 = nn.Linear(55 * 55 * 16, 2)
self.relu = nn.ReLU()
def forward(self,x):
out = self.layer1(x)
# print("****Layer Size***")
# # 55*55*16
# print(out.size(1))
# print("****Layer Size***")
out = self.relu(out)
out = out.view(out.size(0), out.size(1)*out.size(2)* out.size(3))
# print("****View Size***")
# print(out.size()[-1], out.size(1))
# print("****View Size***")
out = self.fc1(out)
return out
Also, Thanks for the out.view explanation #Pranav.
I'm trying to train a lenet model in pytorch, The ideia is to put images of any size in it, so I started doing with nn.AdaptiveAvgPool2d but the error comes as
mat1 dim 1 must match mat2 dim 0
Here is my code
class LeNet5(nn.Module):
def __init__(self, num_classes=10):
super(LeNet5, self).__init__()
self.conv_1 = nn.Conv2d(
in_channels=1, out_channels=32, kernel_size=5, bias=False
)
self.relu_1 = nn.ReLU(inplace=True)
self.maxpool_1 = nn.MaxPool2d(kernel_size=2, stride=2)
self.conv_2 = nn.Conv2d(
in_channels=32, out_channels=256, kernel_size=5, bias=False
)
self.relu_2 = nn.ReLU(inplace=True)
self.maxpool_2 = nn.MaxPool2d(kernel_size=2, stride=2)
self.avgpool = nn.AdaptiveAvgPool2d(output_size=1)
self.flatten = nn.Flatten()
self.fc_1 = nn.Linear(in_features=4096, out_features=120, bias=False)
self.fc_2 = nn.Linear(in_features=120, out_features=84)
self.fc_3 = nn.Linear(in_features=84, out_features=num_classes)
def forward(self, input):
conv_1_output = self.conv_1(input)
relu_1_output = self.relu_1(conv_1_output)
maxpool_1_output = self.maxpool_1(relu_1_output)
conv_2_output = self.conv_2(maxpool_1_output)
relu_2_output = self.relu_2(conv_2_output)
maxpool_2_output = self.maxpool_2(relu_2_output)
flatten_output = self.flatten((self.avgpool(maxpool_2_output).view(maxpool_2_output.shape[0], -1)))
fc_1_output = self.fc_1(flatten_output)
fc_2_output = self.fc_2(fc_1_output)
fc_3_output = self.fc_3(fc_2_output)
return fc_3_output
if you read the theory on AdaptiveAvgPool2d, this is what it says " we specify the output size And the stride and kernel-size are automatically selected to adapt to the needs"
More info available here
Hence Your spatial dimension is reduced by AdaptiveAvgPool2d and not the depth of feature maps.
So, the spatial dimension will be 1x1 and depth will still be 256 , making your
self.fc_1 = nn.Linear(in_features=256, out_features=120, bias=False) and not self.fc_1 = nn.Linear(in_features=4096, out_features=120, bias=False)
I´ve got a problem when passing an image to my unet. I get the following error :
Traceback (most recent call last):
File "path\Main.py", line 101, in <module>
outputs = model(inputs[None,...].float())
File "C:\Users\...\AppData\Local\Programs\Python\Python39\lib\site-packages\torch\nn\modules\module.py", line 727, in _call_impl
result = self.forward(*input, **kwargs)
File "path\UNets.py", line 53, in forward
upconv2 = self.upconv2(torch.cat([upconv1,conv4]),1)
RuntimeError: Sizes of tensors must match except in dimension 2. Got 15 and 10 (The offending index is 0)
The images i use are of different sizes, i format them to 256x256. They are grayscale images
my dataloading:
def getImageAndTransform(self,item):
data = Image.open(self.datalist[item])
label = Image.open(self.labellist[item])
p = torchvision.transforms.Compose([torchvision.transforms.Scale((256,256))])
data = p(data)
label = p(label)
data = torch.from_numpy(np.array(data))
label = torch.from_numpy(np.array((label)))
return data, label
my Unet:
class Unet(SegmentationNetwork):
def __init__(self,config):
super(StandardUnet,self).__init__(config = config)
#down
self.downconv1 =self.contract_block(self.in_channels,self.channels[0],self.kernel[0],self.padding[0])
self.downconv2 =self.contract_block(self.channels[0],self.channels[1],self.kernel[1],self.padding[1])
self.downconv3 =self.contract_block(self.channels[1],self.channels[2],self.kernel[2],self.padding[2])
self.downconv4 =self.contract_block(self.channels[2],self.channels[3],self.kernel[3],self.padding[3])
self.downconv5 =self.contract_block(self.channels[3],self.channels[4],self.kernel[4],self.padding[4])
#up
self.upconv1 = self.expand_block(self.channels[4],self.channels[3],self.kernel[4],self.padding[4])
self.upconv2 = self.expand_block(self.channels[3],self.channels[2],self.kernel[3],self.padding[3])
self.upconv3 = self.expand_block(self.channels[2], self.channels[1], self.kernel[2], self.padding[2])
self.upconv4 = self.expand_block(self.channels[1], self.channels[0], self.kernel[1], self.padding[1])
self.upconv5 = self.expand_block(self.channels[0], self.out_channels, self.kernel[0], self.padding[0])
def forward(self,x):
#down
conv1 = self.downconv1(x)
conv2 = self.downconv2(conv1)
conv3 = self.downconv3(conv2)
conv4 = self.downconv4(conv3)
conv5 = self.downconv5(conv4)
#up
upconv1 = self.upconv1(conv5)
upconv2 = self.upconv2(torch.cat([upconv1,conv4]),1)
upconv3 = self.upconv3(torch.cat([upconv2,conv3]),1)
upconv4 = self.upconv4(torch.cat([upconv3,conv2]),1)
upconv5 = self.upconv5(torch.cat([upconv4,conv1]),1)
self.out = upconv5
def contract_block(self,in_channels,out_channels,kernel_size, padding):
contract = nn.Sequential(
nn.Conv2d(in_channels,out_channels,kernel_size=kernel_size,stride=1,padding=padding),
nn.BatchNorm2d(out_channels),
nn.ReLU(inplace=True),
nn.Conv2d(out_channels,out_channels,kernel_size=kernel_size,stride=1,padding=padding),
nn.BatchNorm2d(out_channels),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3,stride=2,padding=1))
return contract
def expand_block(self,in_channels,out_channels,kernel_size,padding):
expand = nn.Sequential(
nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=padding),
nn.BatchNorm2d(out_channels),
nn.ReLU(inplace=True),
nn.Conv2d(out_channels, out_channels, kernel_size, stride=1, padding=padding),
nn.BatchNorm2d(out_channels),
nn.ReLU(inplace=True),
nn.ConvTranspose2d(out_channels, out_channels, kernel_size=3, stride=2, padding=1, output_padding=1)
)
return expand
my implementation:
for i, data in enumerate(dataloader_train, 0): # inputdata as list of [inputs,labels]
data[0].size()
data[1].size()
inputs, labels = data[0].to(device), data[1].to(device)
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
outputs = model(inputs[None,...].float())
loss = loss_function(outputs, labels)
loss.backward()
optimizer.step()
Can anyone tell me what i should do to fix this?
Maybe the question is stupid, but since i´m new to torch and deep learning in general i would apprechiate help
Thanks
I found my error, there was a bracket in the wrong place in the upconvolution step.
correct it would be upconv2 = self.upconv2(torch.cat([upconv1,conv4],1))
So I am trying to use torch.nn.utils.prune.global_unstructured.
I did it on a simple model and that worked. model.cov2 or other layers and that works. I am trying to do it on a model that's (nested)? I get errors as:
AttributeError: 'CNN' object has no attribute 'conv1'
and other errors. I tried everything to access this deep cov1, but I couldn't.
You can find the model code below:
class CNN(nn.Module):
def __init__(self):
"""CNN Builder."""
super(CNN, self).__init__()
self.conv_layer = nn.Sequential(
# Conv Layer block 1
nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(inplace=True),
nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2),
# Conv Layer block 2
nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(inplace=True),
nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Dropout2d(p=0.05),
# Conv Layer block 3
nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(inplace=True),
nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2),
)
self.fc_layer = nn.Sequential(
nn.Dropout(p=0.1),
nn.Linear(4096, 1024),
nn.ReLU(inplace=True),
nn.Linear(1024, 512),
nn.ReLU(inplace=True),
nn.Dropout(p=0.1),
nn.Linear(512, 100)
)
def forward(self, x):
"""Perform forward."""
# conv layers
x = self.conv_layer(x)
# flatten
x = x.view(x.size(0), -1)
# fc layer
x = self.fc_layer(x)
return x
How can I apply pruning on this model?
Your modules are not names 'conv1' or 'conv2', you can see the names using the named_modules generator. From above, you have a 'conv_stem' which can be indexed as model.conv_stem[0] to access. You can iterate over modules to create a dict like:
parameters_to_prune = (
(model.conv1, 'weight'),
(model.conv2, 'weight'),
(model.fc1, 'weight'),
(model.fc2, 'weight'),
(model.fc3, 'weight'), )
and pass this in. See for more: https://colab.research.google.com/github/pytorch/tutorials/blob/gh-pages/_downloads/f40ae04715cdb214ecba048c12f8dddf/pruning_tutorial.ipynb#scrollTo=UVFjM079F0Oi
Use this method to see the names of layers
for layer_name, param in model.named_parameters():
print(f"layer name: {layer_name} has {param.shape}")
and pass those names to prune method
for eg , in prune.random_unstructured(module_name, name="weight", amount=0.3)
When showing the inner layers' names using print method, it can be found that when nn.Sequential is used, the inner layer cannot be called directly by programmer since their names are like xxx.0, xxx.1 etc, and xxx.0.weight, xxx.0.bias as well. That's actually not the right grammar in Python. So rewrite the code and seperate the layers in nn.Sequential may be a right choise, although it is more complex.
I am initializing a Convolutional DQN with the following code:
class ConvDQN(nn.Module):
def __init__(self, input_dim, output_dim):
super(ConvDQN, self).__init__()
self.input_dim = input_dim
self.output_dim = output_dim
self.conv = nn.Sequential(
nn.Conv2d(self.input_dim, 32, kernel_size=8, stride=4),
nn.ReLU(),
nn.Conv2d(32, 64, kernel_size=4, stride=2),
nn.ReLU(),
nn.Conv2d(64, 64, kernel_size=3, stride=1),
nn.ReLU()
)
self.fc_input_dim = self.feature_size()
self.fc = nn.Sequential(
nn.Linear(self.fc_input_dim, 128),
nn.ReLU(),
nn.Linear(128, 256),
nn.ReLU(),
nn.Linear(256, self.output_dim)
)
def forward(self, state):
features = self.conv(state)
features = features.view(features.size(0), -1)
qvals = self.fc(features)
return qvals
def feature_size(self):
return self.conv(autograd.Variable(torch.zeros(1, *self.input_dim))).view(1, -1).size(1)
And it gives me the error:
File "dqn.py", line 86, in __init__
self.fc_input_dim = self.feature_size()
File "dqn.py", line 105, in feature_size
return self.conv(autograd.Variable(torch.zeros(32, *self.input_dim))).view(1, -1).size(1)
File "C:\Users\ariji\AppData\Local\Programs\Python\Python37\lib\site-packages\torch\nn\modules\module.py", line 489, in __call__
result = self.forward(*input, **kwargs)
File "C:\Users\ariji\AppData\Local\Programs\Python\Python37\lib\site-packages\torch\nn\modules\container.py", line 92, in forward
input = module(input)
File "C:\Users\ariji\AppData\Local\Programs\Python\Python37\lib\site-packages\torch\nn\modules\module.py", line 489, in __call__
result = self.forward(*input, **kwargs)
File "C:\Users\ariji\AppData\Local\Programs\Python\Python37\lib\site-packages\torch\nn\modules\conv.py", line 320, in forward
self.padding, self.dilation, self.groups)
RuntimeError: Expected 4-dimensional input for 4-dimensional weight [32, 4, 8, 8], but got 2-dimensional input of size [1, 4] instead
So I get the fact that the input that I am passing to the convolutional network is of incorrect dimensions. What I do not understand is how I am supposed to add the required dimensions to my input? Or should I change something in my convolutional network?
You pass the conv layer torch.zeros(1, *self.input_dim) which is torch.Size([1, 4]), but you initialize the conv layer as,
nn.Sequential(
nn.Conv2d(self.input_dim, 32, kernel_size=8, stride=4),
nn.ReLU(),
nn.Conv2d(32, 64, kernel_size=4, stride=2),
nn.ReLU(),
nn.Conv2d(64, 64, kernel_size=3, stride=1),
nn.ReLU()
)
So self.conv is expecting a tensor of that size but you pass it torch.Size([1, 4])