I try to run following programe for images classification in Pytorch and it returns me
Given groups=1, weight of size [6, 3, 5, 5], expected input[192, 1, 256, 256] to have 3 channels, but got 1 channels instead ?
num_classes = 2
class CNN(nn.Module):
def __init__(self,input_size,n_features,output_size):
super(CNN, self).__init__()
self.n_features = n_features
self.conv1 = nn.Conv2d(in_channels=3,out_channels=n_features,kernel_size=5)
self.conv2 = nn.Conv2d(in_channels=n_features,out_channels=3*n_features,kernel_size=5)
self.conv3 = nn.Conv2d(in_channels=3*n_features,out_channels=n_features,kernel_size=5)
self.conv1_bn = nn.BatchNorm2d(n_features)
self.conv2_bn = nn.BatchNorm2d(3*n_features)
self.fc1 = nn.Linear(n_features*4*4,2)
def forward(self,x):
x = self.conv1(x)
x = F.relu(self.conv1_bn(x))
x = F.max_pool2d(x,kernel_size=2)
x = self.conv2(x)
x = F.relu(self.conv1_bn(x))
x = F.max_pool2d(x,kernel_size=2)
x = x.view(-1,self.n_features*4*4)
x = self.fc1(x)
x = F.log_softmax(x,dim=1)
return x
To get rid of the problem you should change number of in_channels in self.conv1 from 3 to 1, cause from error message it seems you training on images of size (1, 256, 256). Also consider setting stride option of convolutional laeyrs to something more than default 1, or increasing number of layers.
Related
I made a DCGAN using Pytorch and I want to modify it so the last TransConv2D layer output can be passed into a LSTM layer.
For this, I got an array of images data with shape (2092, 64, 64, 3) which is also gonna be the input for the Neural Network . I've extracted the RGB arrays to pass each color array into the LSTM layer.
The following is the TEST code, used outside the Neural Network:
Red = data[:,:,:,0]
outputR = []
for i in range(8, 64):
outputR.append(Red[i-8:i, 0])
outputR = np.array(outputR)
Everything goes fine and I get outputR with shape (56,8,64) which can be used as input for the LSTM.
However, when I try to apply this code to my Neural Network:
R = RGB[:,0,:,:] # Pytorch tensor with [n_samples, RGB, height, width]
G = RGB[:,1,:,:]
B = RGB[:,2,:,:]
R = R.detach().cpu().numpy()
G = G.detach().cpu().numpy()
B = B.detach().cpu().numpy()
outputR = []
outputG = []
outputB = []
for i in range(8, 64):
outputR.append(R[i-8:i,0])
outputG.append(G[i-8:i,0])
outputB.append(B[i-8:i,0])
outputR = np.array(outputR, dtype='float32')
outputG = np.array(outputG, dtype='float32')
outputB = np.array(outputB, dtype='float32')
I get the following error: ValueError: setting an array element with a sequence.
Why this difference? In both cases, I'm operating with sequences, arrays with a batch_size. Yet, when I'm testing the operation outside a neural network, everything goes smoothly. The error only happens inside the Neural Network.
Here's the complete code for the neural network:
class Generator(nn.Module):
def __init__(self):
super(Generator, self).__init__()
# input is Z(batch_size, 100, 1, 1), going into a convolution
self.transconv1 = nn.ConvTranspose2d( 100, 64 * 8, 4, 1, 0, bias=False)
self.batchnorm1 = nn.BatchNorm2d(64 * 8)
self.ReLU = nn.ReLU(True)
# state size. (ngf*8) x 4 x 4
self.transconv2 = nn.ConvTranspose2d(64 * 8, 64 * 4, 4, 2, 1, bias=False)
self.batchnorm2 = nn.BatchNorm2d(64 * 4)
# state size. (ngf*4) x 8 x 8
self.transconv3 = nn.ConvTranspose2d( 64 * 4, 64 * 2, 4, 2, 1, bias=False)
self.batchnorm3 = nn.BatchNorm2d(64 * 2)
# state size. (ngf*2) x 16 x 16
self.transconv4 = nn.ConvTranspose2d(64 * 2, 64, 4, 2, 1, bias=False)
self.batchnorm4 = nn.BatchNorm2d(64)
# state size. (ngf) x 32 x 32
self.transconv5 = nn.ConvTranspose2d( 64, 3, 4, 2, 1, bias=False)
self.lstm5 = nn.LSTM(input_size=64,hidden_size=64,num_layers=10, bias=False, batch_first=True)
# state size. (nc) x 64 x 64
def forward(self, input):
x = self.transconv1(input)
x = self.batchnorm1(x)
x = self.ReLU(x)
x = self.transconv2(x)
x = self.batchnorm2(x)
x = self.ReLU(x)
x = self.transconv3(x)
x = self.batchnorm3(x)
x = self.transconv4(x)
x = self.batchnorm4(x)
x = self.transconv5(x)
R, G, B = self._preprocess(x)
outR = self.lstm5(R)
outG = self.lstm5(G)
outB = self.lstm5(B)
output = np.stack((outR, outG, outB), axis=1) #
return output
def _preprocess(self, RGB):
R = RGB[:,0,:,:]
G = RGB[:,1,:,:]
B = RGB[:,2,:,:]
R = R.detach().cpu().numpy()
G = G.detach().cpu().numpy()
B = B.detach().cpu().numpy()
print(R.shape)
outputR = []
outputG = []
outputB = []
for i in range(8, 64):
outputR.append(R[i-8:i,0])
outputG.append(G[i-8:i,0])
outputB.append(B[i-8:i,0])
outputR = np.array(outputR, dtype='float32')
outputG = np.array(outputG, dtype='float32')
outputB = np.array(outputB, dtype='float32')
#(n_samples, timesteps, features)
return torch.from_numpy(outputR), torch.from_numpy(outputG), torch.from_numpy(outputB)
I wonder how to use tochviz to generate network architecture, when the output is a list type?
the demo code is as follows:
import torch
import torch.nn as nn
class ConvNet(nn.Module):
def __init__(self):
super(ConvNet, self).__init__()
self.conv1 = nn.Sequential(
nn.Conv2d(1, 16, 3, 1, 1),
nn.ReLU(),
nn.AvgPool2d(2, 2)
)
self.conv2 = nn.Sequential(
nn.Conv2d(16, 32, 3, 1, 1),
nn.ReLU(),
nn.MaxPool2d(2, 2)
)
self.fc = nn.Sequential(
nn.Linear(32 * 7 * 7, 128),
nn.ReLU(),
nn.Linear(128, 64),
nn.ReLU()
)
self.out = nn.Linear(64, 10)
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
x = x.view(x.size(0), -1)
x = self.fc(x)
output = []
output.append(x)
output.append(self.out(x))
return output
MyConvNet = ConvNet()
and I use torchviz to view this network's architecture like
from torchviz import make_dot
x = torch.randn(1, 1, 28, 28).requires_grad_(True)
y = MyConvNet(x)
MyConvNetVis = make_dot(y, params=dict(list(MyConvNet.named_parameters()) + [('x', x)]))
MyConvNetVis.format = "png"
MyConvNetVis.directory = "data"
MyConvNetVis.view()
then, I was blocked with this problem
AttributeError Traceback (most recent call last)
<ipython-input-23-c8e3cd3a8b4e> in <module>
2 x = torch.randn(1, 1, 28, 28).requires_grad_(True)
3 y = MyConvNet(x)
----> 4 MyConvNetVis = make_dot(y, params=dict(list(MyConvNet.named_parameters()) + [('x', x)]))
5 MyConvNetVis.format = "png"
6 MyConvNetVis.directory = "data"
~/anaconda3/envs/torch1.3/lib/python3.6/site-packages/torchviz/dot.py in make_dot(var, params)
35 return '(' + (', ').join(['%d' % v for v in size]) + ')'
36
---> 37 output_nodes = (var.grad_fn,) if not isinstance(var, tuple) else tuple(v.grad_fn for v in var)
38
39 def add_nodes(var):
AttributeError: 'list' object has no attribute 'grad_fn'
Any advice will be appreciated.
The error indicates torchviz is trying to navigate through the network using grad_fn in order to compute its own graph. However, a tuple is not a tensor and doesn't have the gra_fn property.
I'm not quite sure you can have multiple outputs (i.e. a tuple as output) when working with torchviz. As a workaround, if you just want to visualize the network you could replace the tuple with concatenation of the two tensors using torch.cat:
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
x = x.view(x.size(0), -1)
x = self.fc(x)
out = self.out(x)
output = torch.cat([x, out], dim=1)
return output
The result is:
Notice how the last node is a CatBackward with two incoming branches, one from AddmmBackward (out) and the other from ReluBackward0 (x). This last node is fictitious and is not present in your actual model, so you could remove it from the graph, by hand.
earlier answer shows packages that can build the architectural diagram/graph for a Pytorch Model:
torchviz/pytorchviz
TensorBoard
Netron
HiddenLayer
I understand that the balancing the pole example requires 2 outputs. Reinforcement Learning (DQN) Tutorial
Here is the output for self.head
print ('x',self.head)
x = Linear(in_features=512, out_features=2, bias=True)
When I run the epochs below is the outputs:
print (self.head(x.view(x.size(0), -1)))
return self.head(x.view(x.size(0), -1))
tensor([[-0.6945, -0.1930]])
tensor([[-0.0195, -0.1452]])
tensor([[-0.0906, -0.1816]])
tensor([[ 0.0631, -0.9051]])
tensor([[-0.0982, -0.5109]])
...
The size of x is:
x = torch.Size([121, 32, 2, 8])
So I am trying to understand what x.view(x.size(0), -1) is doing?
I understand from the comment in the code that it's returning:
Returns tensor([[left0exp,right0exp]...]).
But how does x which is torch.Size([121, 32, 2, 8]) being reduced to a tensor of size 2?
Is there an alternative way of writing that makes more sense? What if I had 4 outputs. How would I represent that? Why x.size(0). Why -1?
So appears to take self.head with 4 outputs to 2 outputs. Is that correct?
At the bottom is that class I am referring:
class DQN(nn.Module):
def __init__(self, h, w, outputs):
super(DQN, self).__init__()
self.conv1 = nn.Conv2d(3, 16, kernel_size=5, stride=2)
self.bn1 = nn.BatchNorm2d(16)
self.conv2 = nn.Conv2d(16, 32, kernel_size=5, stride=2)
self.bn2 = nn.BatchNorm2d(32)
self.conv3 = nn.Conv2d(32, 32, kernel_size=5, stride=2)
self.bn3 = nn.BatchNorm2d(32)
# Number of Linear input connections depends on output of conv2d layers
# and therefore the input image size, so compute it.
def conv2d_size_out(size, kernel_size = 5, stride = 2):
return (size - (kernel_size - 1) - 1) // stride + 1
convw = conv2d_size_out(conv2d_size_out(conv2d_size_out(w)))
convh = conv2d_size_out(conv2d_size_out(conv2d_size_out(h)))
linear_input_size = convw * convh * 32
self.head = nn.Linear(linear_input_size, outputs)
# Called with either one element to determine next action, or a batch
# during optimization. Returns tensor([[left0exp,right0exp]...]).
def forward(self, x):
x = F.relu(self.bn1(self.conv1(x)))
x = F.relu(self.bn2(self.conv2(x)))
x = F.relu(self.bn3(self.conv3(x)))
return self.head(x.view(x.size(0), -1))
x.view(x.size(0), -1) is flattening the tensor, this is because the Linear layer only accepts a vector (1d array). To break it down, x.view() reshapes the tensor of the specified shape (more info). x.shape(0) returns 1st dimension of the tensor (which is the batch size, this should remain the constant). The -1 in x.view() is a filler, in other words, its dimensions that we don't know, so PyTorch automatically calculates it. For example, if x = torch.tensor([1,2,3,4]), to reshape the tensor to a 2x2, you could do x.view(2,2) or x.view(2,-1) or x.view(-1,2).
The output shape is not a tensor shape of 2, but that of 121,2 (the 121 is the batch size, and the 2 comes from the Linear layers output). So to change the output size from 2, to 4, you would have to change the outputs argument in the __init__ function to 4.
Let me explain the objective first. Let's say I have 1000 images each with an associated quality score [in range of 0-10]. Now, I am trying to perform the image quality assessment using CNN with regression(in PyTorch). I have divided the images into equal size patches. Now, I have created a CNN network in order to perform the linear regression.
Following is the code:
class MultiLabelNN(nn.Module):
def __init__(self):
super(MultiLabelNN, self).__init__()
self.conv1 = nn.Conv2d(1, 32, 5)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(32, 64, 5)
self.fc1 = nn.Linear(3200,1024)
self.fc2 = nn.Linear(1024, 512)
self.fc3 = nn.Linear(512, 1)
def forward(self, x):
x = self.conv1(x)
x = F.relu(x)
x = self.pool(x)
x = self.conv2(x)
x = F.relu(x)
x = x.view(-1, 3200)
x = self.fc1(x)
x = F.relu(x)
x = self.fc2(x)
x = F.relu(x)
x = self.fc3(x)
return x
While running this code of network I am getting following error
input and target shapes do not match: input [400 x 1], target [200 x 1]
the target shape is [200x1] is because I have taken the batch size of 200. I found the solution that if I change "self.fc1 = nn.Linear(3200,1024)" and "x = x.view(-1, 3200)" here from 3200 to 6400 my code runs without any error.
Similarly, It will throw an error input and target shapes do not match: input [100 x 1], target [200 x 1] if I put 12800 instead of 6400
Now my doubt is that I am not able to understand the reason behind this. If I am giving 200 images as input to my network then why the input shape is getting affected while changing the parameters when I move from convolutional layer to fully connected layer. I hope I have clearly mentioned my doubt. Even though I anybody has any doubt please ask me. It will be a great help. Thanks in advance.
class MultiLabelNN(nn.Module):
def __init__(self):
super(MultiLabelNN, self).__init__()
self.conv1 = nn.Conv2d(1, 32, 5)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(32, 64, 5)
self.fc1 = nn.Linear(6400,1024)
self.fc2 = nn.Linear(1024, 512)
self.fc3 = nn.Linear(512, 1)
def forward(self, x):
#shape of x is (b_s, 32,32,1)
x = self.conv1(x) #shape of x is (b_s, 28,28,132)
x = F.relu(x)
x = self.pool(x) #shape of x now becomes (b_s X 14 x 14 x 32)
x = self.conv2(x) # shape(b_s, 10x10x64)
x = F.relu(x)#size is (b_s x 10 x 10 x 64)
x = x.view(-1, 3200) # shape of x is now(b_s*2, 3200)
#this is the problem
#you can fc1 to be of shape (6400,1024) and that will work
x = self.fc1(x)
x = F.relu(x)
x = self.fc2(x)
x = F.relu(x)
x = self.fc3(x)
return x
I think this should work. Let me know if some errors still remain.
I have a image input 340px*340px and I want to classify it to 2 classes.
I want to create convolution neural network (PyTorch framework). I have a problem with input and output of layer.
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
# 3 channels (RGB), kernel=5, but i don't understand why 6.
self.conv1 = nn.Conv2d(3, 6, 5)
self.pool = nn.MaxPool2d(2, 2)
#why 16?
self.conv2 = nn.Conv2d(6, 16, 5)
#why 107584 = 328*328
self.fc1 = nn.Linear(107584, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 2)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
# i dont understand this line
x = x.view(x.size(0), -1)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
Is it network is correct?
# 3 channels (RGB), kernel=5, but i don't understand why 6.
The second parameter of Conv2d is out_channels. In a convolutional layer you can arbitrarily define a number of out channels. So it's set to 6 because someone set it to 6.
# why 16?
Same as above.
#why 107584 = 328*328
and
\ # i dont understand this line
Tensor.view() returns a new tensor with the same data as the self tensor but of a different size.
x = x.view(x.size(0), -1): -1 means "infer from other dimensions" so, you are forcing the Tensor to be [1, 15*164*164] => [1, 403440].
403440 is also the correct value for self.fc1 = nn.Linear(107584, 120), instead of 107584.