Setting CNN input dimensions correctly (Pytorch) - python

I keep getting this error:
Traceback (most recent call last):
File "/Users/robbymoseley/Desktop/IOP/IOP-ML/Model/IOP_model.py", line 201, in <module>
logps = model(images)
File "/Applications/Python Virtual Environment/lib/python3.7/site-packages/torch/nn/modules/module.py", line 889, in _call_impl
result = self.forward(*input, **kwargs)
File "/Users/robbymoseley/Desktop/IOP/IOP-ML/Model/IOP_model.py", line 95, in forward
x = self.pool(F.relu(self.conv1(x)))
File "/Applications/Python Virtual Environment/lib/python3.7/site-packages/torch/nn/modules/module.py", line 889, in _call_impl
result = self.forward(*input, **kwargs)
File "/Applications/Python Virtual Environment/lib/python3.7/site-packages/torch/nn/modules/conv.py", line 399, in forward
return self._conv_forward(input, self.weight, self.bias)
File "/Applications/Python Virtual Environment/lib/python3.7/site-packages/torch/nn/modules/conv.py", line 396, in _conv_forward
self.padding, self.dilation, self.groups)
RuntimeError: Expected 4-dimensional input for 4-dimensional weight [6, 1, 5, 5], but got 2-dimensional input of size [10, 307200] instead
This is the model I am trying to use:
class Net(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(1, 6, 5)
# we use the maxpool multiple times, but define it once
self.pool = nn.MaxPool2d(2, 2)
# in_channels = 6 because self.conv1 output 6 channel
self.conv2 = nn.Conv2d(6, 16, 5)
# 5*5 comes from the dimension of the last convnet layer
self.fc1 = nn.Linear(input_size, hidden_sizes[0])
self.fc2 = nn.Linear(hidden_sizes[0], hidden_sizes[1])
self.fc3 = nn.Linear(hidden_sizes[1], output_size)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = x.view(-1, 16 * 5 * 5)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x) # no activation on final layer
return x
Input images are 640x480.
Here is my main function as well:
if __name__ == __main__:
if torch.cuda.is_available():
device = torch.device("cuda:0")
else:
device = torch.device("cpu")
model = Net().to(device)
criterion = nn.NLLLoss()
images, labels = next(iter(train_loader))
# images = images.view(images.shape[0], -1)
print(images.shape)
images = images.view(images.shape[0], -1)
logps = model(images)
loss = criterion(logps, labels) # calculate the NLL loss
# training process
optimizer = optim.SGD(model.parameters(), lr=0.003, momentum=0.9)
loss_fn = nn.NLLLoss()
# train the model
train_model(num_epochs, model)
# validate the model
validate_model(model)
print("Execution has finished")
How can I adjust either the expected input dimensions or the weights? Or can I change the dimensions of my input to properly reflect the structure of my model? If so, how can I do this?

based off the convolutional part i did notice the problem, where your final pooling layer out channel was not calculated correctly
The following is how the code should work based off your input size that you mentioned 640x480x1
import torch
import torch.nn as nn
import torch.nn.functional as F
class Model(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(1, 6, 5)
# we use the maxpool multiple times, but define it once
self.pool = nn.MaxPool2d(2, 2)
# in_channels = 6 because self.conv1 output 6 channel
self.conv2 = nn.Conv2d(6, 16, 5)
# 5*5 comes from the dimension of the last convnet layer
self.fc1 = nn.Linear(16 * 157 * 117, 256) # You didnt provide the numbers here but i did calculate the in channels based off the prev layer
self.fc2 = nn.Linear(256, 10)
self.fc3 = nn.Linear(10, 2)
def forward(self, x):
x = self.conv1(x)
print('Conv1 Shape: {}'.format(x.shape))
x = self.pool(F.relu(x))
print('Pool1 Shape: {}'.format(x.shape))
x = self.conv2(x)
print('Conv2 Shape: {}'.format(x.shape))
x = self.pool(F.relu(x))
print('Pool2 Shape: {}'.format(x.shape))
x = x.view(-1, 16 * 157 * 117)
print('Flatten Shape: {}'.format(x.shape))
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x) # no activation on final layer
return x
model = Model()
model(torch.randn(1, 1, 640, 480)) # 1 Channel since your first conv layer input inchannel is 1
Output shape for each layer
model(torch.randn(1, 1, 640, 480))
Conv1 Shape: torch.Size([1, 6, 636, 476])
Pool1 Shape: torch.Size([1, 6, 318, 238])
Conv2 Shape: torch.Size([1, 16, 314, 234])
Pool2 Shape: torch.Size([1, 16, 157, 117])
Flatten Shape: torch.Size([1, 293904])

Related

How can I match a Decoder's input to a Pretrained Resnet18 Encoder?

I am trying to build a custom Decoder with skip connections to run with a pretrained Resnet18 Encoder for Image Segmentation task. The total number of classes are 150.
The Resnet18 Encoder has fc output of 512. In order to match the Encoder's output to Decoder's input, I am trying to set the Conv layers of Decoder such that it matches the output from Encoder i.e [151, 512, 1, 1]. However, no matter what combination of layers I make, I am not able to match the input and output tensors.
Here is the relevant part of Decoder Code
class ResNet18Transpose(nn.Module):
def __init__(self, transblock, layers, num_classes=150):
self.inplanes = 512
super(ResNet18Transpose, self).__init__()
self.deconv1 = self._make_transpose(transblock, 256 * transblock.expansion, layers[0], stride=2)
self.deconv2 = self._make_transpose(transblock, 128 * transblock.expansion, layers[1], stride=2)
self.deconv3 = self._make_transpose(transblock, 64 * transblock.expansion, layers[2], stride=2)
self.deconv4 = self._make_transpose(transblock, 32 * transblock.expansion, layers[3], stride=2)
self.skip0 = self._make_skip_layer(64, 64 * transblock.expansion)
self.skip1 = self._make_skip_layer(128, 64 * transblock.expansion)
self.skip2 = self._make_skip_layer(256, 64 * transblock.expansion)
self.skip3 = self._make_skip_layer(512, 128 * transblock.expansion)
self.inplanes = 64
self.final_conv = self._make_transpose(transblock, 64 * transblock.expansion, 3)
self.final_deconv = nn.ConvTranspose2d(self.inplanes * transblock.expansion, num_classes, kernel_size=2,
stride=2, padding=0, bias=True)
self.out6_conv = nn.Conv2d(1024, num_classes, kernel_size=1, stride=1, bias=True)
self.out5_conv = nn.Conv2d(128 * transblock.expansion, num_classes, kernel_size=1, stride=1, bias=True)
self.out4_conv = nn.Conv2d(128 * transblock.expansion, num_classes, kernel_size=1, stride=1, bias=True)
self.out3_conv = nn.Conv2d(64 * transblock.expansion, num_classes, kernel_size=1, stride=1, bias=True)
self.out2_conv = nn.Conv2d(32 * transblock.expansion, num_classes, kernel_size=1, stride=1, bias=True)
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
elif isinstance(m, nn.ConvTranspose2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
elif isinstance(m, SynchronizedBatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
The forward block for Decoder is as follows:
def forward(self, x, labels=None, sparse_mode=False, use_skip=True):
[in0, in1, in2, in3, in4] = x
if labels:
[lab0, lab1, lab2, lab3, lab4] = labels
out6 = self.out6_conv(in4)
if sparse_mode:
if labels:
mask4 = (lab4==0).unsqueeze(1).repeat(1,in4.shape[1],1,1).type(in4.dtype)
else:
mask4 = (torch.argmax(out6, dim=1)==0).unsqueeze(1).repeat(1,in4.shape[1],1,1).type(in4.dtype)
in4 = in4 * mask4
# upsample 1
x = self.deconv1(in4)
out5 = self.out5_conv(x)
if sparse_mode:
if labels:
mask3 = (lab3==0).unsqueeze(1).repeat(1,in3.shape[1],1,1).type(in3.dtype)
else:
mask3 = (torch.argmax(out5, dim=1)==0).unsqueeze(1).repeat(1,in3.shape[1],1,1).type(in3.dtype)
in3 = in3 * mask3
if use_skip:
x = x + self.skip3(in3)
# upsample 2
x = self.deconv2(x)
out4 = self.out4_conv(x)
if sparse_mode:
if labels:
mask2 = (lab2==0).unsqueeze(1).repeat(1,in2.shape[1],1,1).type(in2.dtype)
else:
mask2 = (torch.argmax(out4, dim=1)==0).unsqueeze(1).repeat(1,in2.shape[1],1,1).type(in2.dtype)
in2 = in2 * mask2
if use_skip:
x = x + self.skip2(in2)
# upsample 3
x = self.deconv3(x)
out3 = self.out3_conv(x)
if sparse_mode:
if labels:
mask1 = (lab1==0).unsqueeze(1).repeat(1,in1.shape[1],1,1).type(in1.dtype)
else:
mask1 = (torch.argmax(out3, dim=1)==0).unsqueeze(1).repeat(1,in1.shape[1],1,1).type(in1.dtype)
in1 = in1 * mask1
if use_skip:
x = x + self.skip1(in1)
# upsample 4
x = self.deconv4(x)
out2 = self.out2_conv(x)
if sparse_mode:
if labels:
mask0 = (lab0==0).unsqueeze(1).repeat(1,in0.shape[1],1,1).type(in0.dtype)
else:
mask0 = (torch.argmax(out2, dim=1)==0).unsqueeze(1).repeat(1,in0.shape[1],1,1).type(in0.dtype)
in0 = in0 * mask0
if use_skip:
x = x + self.skip0(in0)
# final
x = self.final_conv(x)
out1 = self.final_deconv(x)
return [out6, out5, out4, out3, out2, out1]
I get the following error:
File "/project/xfu/aamir/Golden-QGN/models/resnet.py", line 447, in forward
out6 = self.out6_conv(in4)
File "/project/xfu/aamir/anaconda3/envs/QGN/lib/python3.8/site-packages/torch/nn/modules/module.py", line 722, in _call_impl
result = self.forward(*input, **kwargs)
File "/project/xfu/aamir/anaconda3/envs/QGN/lib/python3.8/site-packages/torch/nn/modules/conv.py", line 419, in forward
return self._conv_forward(input, self.weight)
File "/project/xfu/aamir/anaconda3/envs/QGN/lib/python3.8/site-packages/torch/nn/modules/conv.py", line 415, in _conv_forward
return F.conv2d(input, weight, self.bias, self.stride,
RuntimeError: Given groups=1, weight of size [151, 1024, 1, 1], expected input[8, 512, 8, 8] to have 1024 channels, but got 512 channels instead
Exception in thread Thread-1:
If I change the Decoder Layers as follows:
self.out6_conv = nn.Conv2d(512, num_classes, kernel_size=1, stride=1, bias=True)
self.out5_conv = nn.Conv2d(256 * transblock.expansion, num_classes, kernel_size=1, stride=1, bias=True)
self.out4_conv = nn.Conv2d(128 * transblock.expansion, num_classes, kernel_size=1, stride=1, bias=True)
self.out3_conv = nn.Conv2d(64 * transblock.expansion, num_classes, kernel_size=1, stride=1, bias=True)
self.out2_conv = nn.Conv2d(32 * transblock.expansion, num_classes, kernel_size=1, stride=1, bias=True)
I get the followng error. Note that the input tensor has also changed.
File "/project/xfu/aamir/anaconda3/envs/QGN/lib/python3.8/site-packages/torch/nn/modules/conv.py", line 415, in _conv_forward
return F.conv2d(input, weight, self.bias, self.stride,
RuntimeError: Given groups=1, weight of size [64, 64, 3, 3], expected input[8, 32, 128, 128] to have 64 channels, but got 32 channels instead
I also tried altering the Resnet18 Encoder's fc output to 1024 instead of 512 as follows:
def resnet18(pretrained=False, **kwargs):
"""Constructs a ResNet-18 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
if pretrained:
model.load_state_dict(load_url(model_urls['resnet18']))
model.fc = torch.nn.Linear(1024, 150)
return model
But nothing seems to work. I also could not find a skip connections based decoder for resnet18 on github / internet. Any help will be appreciated.
NOTE: I only want to work on Resnet18. Also the images being input to the network through dataloader are working perfectly fine with a Resnet50 Encoder + Custom Decoder. I also tried changing crop size from 0 to 128 to 256 to 512 but in vain.
Here is the code that is being run in models.py for image segmentation. I tried using the set_trace() method to debug the code. The code stops running just before the (pred, pred_quad) = self.decoder(fmap, labels_scaled) line in following code
class SegmentationModule(SegmentationModuleBase):
def __init__(self, net_enc, net_dec, crit, deep_sup_scale=None, quad_sup=False, running_avg_param=0.99):
super(SegmentationModule, self).__init__()
self.encoder = net_enc
self.decoder = net_dec
self.crit = crit
if deep_sup_scale:
if deep_sup_scale < 0:
self.adapt_weights = True
self.running_avg_param = running_avg_param
deep_sup_scale = 1
else:
self.adapt_weights = False
self.loss_weights = [(deep_sup_scale**(i+1)) for i in range(5)]
self.quad_sup = quad_sup
def forward(self, feed_dict, *, segSize=None):
inputs = feed_dict['img_data'].cuda()
if segSize is None: # training
labels_orig_scale = feed_dict['seg_label_0'].cuda()
labels_scaled = []
fmap = self.encoder(inputs, return_feature_maps=True)
if self.quad_sup:
labels_scaled.append(feed_dict['seg_label_1'].cuda())
labels_scaled.append(feed_dict['seg_label_2'].cuda())
labels_scaled.append(feed_dict['seg_label_3'].cuda())
labels_scaled.append(feed_dict['seg_label_4'].cuda())
labels_scaled.append(feed_dict['seg_label_5'].cuda())
(pred, pred_quad) = self.decoder(fmap, labels_scaled)
else:
pred = self.decoder(fmap)
loss = self.crit(pred, labels_orig_scale)
if self.quad_sup:
loss_orig = loss
for i in range(len(pred_quad)):
loss_quad = self.crit(pred_quad[i], labels_scaled[i])
loss = loss + loss_quad * self.loss_weights[i]
if self.adapt_weights:
self.loss_weights[i] = self.running_avg_param * self.loss_weights[i] + \
(1 - self.running_avg_param) * (loss_quad/loss_orig).data.cpu().numpy()
acc = self.pixel_acc(pred, labels_orig_scale, self.quad_sup)
return loss, acc
else: # inference
if 'qtree' in feed_dict:
labels_scaled = [feed_dict['qtree'][l].cuda() for l in range(1,6)]
else:
labels_scaled = None
pred = self.decoder(self.encoder(inputs, return_feature_maps=True), labels_scaled, segSize=segSize)
return pred
So here is how I resolved the error.
Disabled the skip connections. Not sure why the decoder doesnt work with skip connections.
I had given incorrect layers [6, 3, 4, 3] input to Decoder. i.e for Resnet18 the layers should be [2,2,2,2]

I am so sure that I have already put all data and model, but I still Get this error

this is the error stack.
Traceback (most recent call last):
File "my_train.py", line 55, in <module>
test()
File "my_train.py", line 51, in test
train.train()
File "my_train.py", line 37, in train
outputs = self.model(inputs)
File "/home/rs/andy/python-venv/torch/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1110, in _call_impl
return forward_call(*input, **kwargs)
File "/home/rs/andy/andy/MyFCN/model/FCN/FCN32.py", line 12, in forward
y = self.up_sampling(feature, None, 512)
File "/home/rs/andy/andy/MyFCN/model/FCN/FCN.py", line 47, in up_sampling
y = self.bn(batch_norm)(y)
File "/home/rs/andy/python-venv/torch/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1110, in _call_impl
return forward_call(*input, **kwargs)
File "/home/rs/andy/python-venv/torch/lib/python3.8/site-packages/torch/nn/modules/batchnorm.py", line 168, in forward
return F.batch_norm(
File "/home/rs/andy/python-venv/torch/lib/python3.8/site-packages/torch/nn/functional.py", line 2421, in batch_norm
return torch.batch_norm(
RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu! (when checking argument for argument weight in method wrapper__cudnn_batch_norm)
This is all my code:
I think the error line will happen in this file as I put data to GPU in this section.
train:
import torch.cuda
from torch import nn, optim
from torch.utils.data import DataLoader
import dataset
from model.FCN import FCN32
class Train:
def __init__(self, dataset_path, model, batch_size, shuffle):
self.dataset = dataset.ObtTrainDataset(dataset_path)
self.batch_size = batch_size
self.shuffle = shuffle
self.device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"using {self.device}")
self.model = model
def train(self):
self.model = self.model.to(self.device)
epoch = 10
criterion = nn.CrossEntropyLoss().to(self.device)
optimizer = optim.Adam(self.model.parameters(), lr=0.0001)
dl = DataLoader(dataset=self.dataset, batch_size=self.batch_size, shuffle=False)
for i in range(epoch):
print("------------{} begin--------------".format(i))
self.model.train()
running_loss = 0.0
j = 0
for data in dl:
j += 1
inputs, target = data
inputs = inputs.to(self.device)
target = target.to(self.device)
target = torch.squeeze(target, 1).long().to(self.device)
optimizer.zero_grad()
outputs = self.model(inputs)
loss = criterion(outputs, target)
loss.backward()
optimizer.step()
running_loss += loss.cpu().item()
print(running_loss)
torch.save(self.model.state_dict(), f"models/obt_10_{i}.pth")
def test():
dataset = "data/obt/image"
model = FCN32(256, 5)
train = Train(dataset, model, 8, True)
train.train()
if __name__ == '__main__':
test()
model code:
This is the implementation of FCN32s, I think there is nothing that goes run. But the error stack indicts that the FCN.py file line 47 wrong.
FCN:
from torch import nn
from model.FCN.vgg import VGG16
class FCN(nn.Module):
def __init__(self, input_size, num_classes, backbone="vgg16"):
super().__init__()
all_backones = ["vgg16"]
if backbone not in all_backones:
raise ValueError(f"backbone must be ont of the item in {all_backones}")
if backbone == "vgg16":
self.features = VGG16(input_size)
self.num_classes = num_classes
self.deconv1 = nn.ConvTranspose2d(512, 512, 3, 2, padding=1, output_padding=1)
self.deconv2 = nn.ConvTranspose2d(512, 256, 3, 2, padding=1, output_padding=1)
self.deconv3 = nn.ConvTranspose2d(256, 128, 3, 2, padding=1, output_padding=1)
self.deconv4 = nn.ConvTranspose2d(128, 64, 3, 2, padding=1, output_padding=1)
self.deconv5 = nn.ConvTranspose2d(64, 32, 3, 2, padding=1, output_padding=1)
self.classifier = nn.Conv2d(32, num_classes, kernel_size=1, padding="same")
self.bn = nn.BatchNorm2d
self.relu = nn.ReLU()
def forward(self, x):
raise NotImplementedError("please implement it")
def up_sampling(self, x1, x2=None, batch_norm=None):
deconv = None
assert batch_norm is not None
if batch_norm == 512:
deconv = self.deconv1
elif batch_norm == 256:
deconv = self.deconv2
elif batch_norm == 128:
deconv = self.deconv3
elif batch_norm == 64:
deconv = self.deconv4
elif batch_norm == 32:
deconv = self.deconv5
y = deconv(x1)
y = self.relu(y)
if x2 is None:
y = self.bn(batch_norm)(y)
else:
y = self.bn(batch_norm)(y + x2)
return y
if __name__ == '__main__':
pass
FCN32s
import torch
from torch import nn
from model.FCN import FCN
class FCN32(FCN):
def forward(self, x):
feature = self.features(x)["pool32"]
y = self.up_sampling(feature, None, 512)
y = self.up_sampling(y, None, 256)
y = self.up_sampling(y, None, 128)
y = self.up_sampling(y, None, 64)
y = self.up_sampling(y, None, 32)
y = self.classifier(y)
return y
dataset:
import os
from glob import glob
import numpy as np
import torch
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms as T
class ObtTrainDataset(Dataset):
def __init__(self, image_path=r"data/obt/image", mode="train"):
assert mode in ("train", "val", "test")
self.image_path = image_path
self.image_list = glob(os.path.join(self.image_path, "*.npy"))
self.mode = mode
if mode in ("train", "val"):
self.mask_path = self.image_path
self.transform_x = T.Compose(
[T.ToTensor(), T.Resize((256, 256))])
self.transform_mask = T.Compose([T.ToTensor(), T.Resize((256, 256))])
def __getitem__(self, index):
if self.mode in ("train", "val"):
image_name = os.path.basename(self.image_list[index])
X = np.load(os.path.join(self.image_path, image_name))
masks = np.load(os.path.join(self.image_path + "Masks", image_name))
X = X / 1.0
masks = masks / 1.0
X = self.transform_x(X)
masks = self.transform_mask(masks)
X = X.type(torch.FloatTensor)
masks = masks.type(torch.FloatTensor)
return X, masks
else:
X = Image.open(self.image_list[index])
X = self.transform_x(X)
path = self.image_list[index]
return X, path
def __len__(self):
return len(self.image_list)
I have already debuged it for 3 hours help!!!
Your use of BatchNorm2d is very odd. In your base class, FCN, you do not define a batch norm layer, but rather only a "place holder": self.bn is not a layer, but rather a function that returns a layer, given the parameter num_features. Then, at each forward pass, you create, from scratch a new BatchNorm2d layer, and forward your features through this layer.
Needless to say, that the new layer that is created during the forward pass is not moved to the GPU: the .to(self.device) is called before the forward pass and before the batch norm layer was created.
But this is not the worst thing here: the fact that you create a new batch norm layer for each forward pass means that the layer cannot learn the statistics of your data and thus is unable to function properly.

How to combine RNN with CNN

I am working on a program that subtracts using two images from MNIST. In doing so, we are using an algorithm that combines CNN and RNN.
enter image description here
class CRNN_Model(tf.keras.Model):
def __init__(self):
super(CRNN_Model, self).__init__()
self.hidden_size = 256
self.batch_size = 128
self.sequence_size = 2
self.output_size = 19
self.img_size = 28
#cnn
self.reshape = tf.keras.layers.Reshape((10000, 28, 28, 1))
self.conv1 = tf.keras.layers.Conv2D(16, 3, padding='same', activation='relu', input_shape=(28, 28, 1))
self.maxpooling1 = tf.keras.layers.MaxPooling2D()
self.conv2 = tf.keras.layers.Conv2D(32, 3, padding='same', activation='relu')
self.maxpooling2 = tf.keras.layers.MaxPooling2D()
self.flatten1 = tf.keras.layers.Flatten()
self.hidden = tf.keras.layers.Dense(self.hidden_size, activation='relu')
#rnn
self.rnn_cell = tf.keras.layers.SimpleRNNCell(self.hidden_size, activation=None)
def call(self, x):
#cnn
hidden_list = []
for i in range(2):
x = x[0][i]
x = self.conv1(x)
x = self.maxpooling1(x)
x = self.conv2(x)
x = self.maxpooling2(x)
x = self.flatten1(x)
x = self.hidden(x)
hidden_list.append(x)
self.hidden_list = tf.transpose(hidden_list, perm=[1, 0, 2])
#rnn
self.initial_state = self.rnn_cell.zero_state(self.batch_size, tf_float32)
state = self.initial_state
outputs = []
for t in range(self.sequence_size):
(output, state) = self.rnn_cell(self.hidden_list[:, t, :], state)
outputs.append(output)
self.outputs = outputs
self.prediction = tf.keras.layers.Dense(self.outputs[-1], self.output_size)
self.pred_output = tf.keras.activations.softmax(self.prediction)
return self.pred_output
However, an error occurs at the input to the first conv1 layer.
ValueError: Input 0 of layer conv2d_111 is incompatible with the layer: : expected min_ndim=4, found ndim=3. Full shape received: (28, 28, 1)
The shape of the data we are giving is as follows.
print(np.shape(train_x))
print(np.shape(train_y))
print(np.shape(test_x))
print(np.shape(test_y))
(10000, 2, 28, 28, 1)
→(number of data * number of image * width * height * channel)
(10000, 1)
(2000, 2, 28, 28, 1)
(2000, 1)
How do I give the data?

ValueError: Python inputs incompatible with input_signature:

System information
OS Platform and Distribution :CentOS Linux release 7.7.1908
-TensorFlow version:2.3.0
I am following this example:https://www.tensorflow.org/tutorials/text/image_captioning?hl=en
It is working as it should be and saving checkpoints and I want to now convert this to a TF Lite model.
Here is the Link of full convert code:https://colab.research.google.com/drive/1GJkGcwWvDAWMooTsECzuSRUSPbirADhb?usp=sharing
Here is the Link of full train code:
https://colab.research.google.com/drive/1X2d9WW1EMEzN8Rgva3rtjevP0T_jFccj?usp=sharing
I also following the isssue#32999
Here is what I am running to save and them convert the inference graph:
#tf.function
def evaluate(image):
hidden = decoder.reset_states(batch_size=1)
temp_input = tf.expand_dims(load_image(image)[0], 0)
img_tensor_val = image_features_extract_model(temp_input)
img_tensor_val = tf.reshape(img_tensor_val, (img_tensor_val.shape[0], -1, img_tensor_val.shape[3]))
features = encoder(img_tensor_val)
dec_input = tf.expand_dims([tokenizer.word_index['<start>']], 0)
result = []
for i in range(max_length):
predictions, hidden, attention_weights = decoder(dec_input, features, hidden)
predicted_id = tf.random.categorical(predictions, 1)[0][0]
# print(tokenizer.index_word)
print(predicted_id,predicted_id.dtype)
# for key,value in tokenizer.index_word.items():
# key = tf.convert_to_tensor(key)
# tf.dtypes.cast(key,tf.int64)
# print(key)
# print(tokenizer.index_word)
result.append(predicted_id)
# if tokenizer.index_word[predicted_id] == '<end>':
# return result
dec_input = tf.expand_dims([predicted_id], 0)
return result
export_dir = "./"
tflite_enc_input = ''
ckpt.f = evaluate
to_save = evaluate.get_concrete_function('')
converter = tf.lite.TFLiteConverter.from_concrete_functions([to_save])
tflite_model = converter.convert()
but I get this error
ValueError: in user code:
convert2savedmodel.py:310 evaluate *
predictions, hidden, attention_weights = decoder(dec_input, features, hidden)
/share/nishome/19930072_0/miniconda3/envs/tf2.3/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer.py:985 __call__ **
outputs = call_fn(inputs, *args, **kwargs)
/share/nishome/19930072_0/miniconda3/envs/tf2.3/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py:780 __call__
result = self._call(*args, **kwds)
/share/nishome/19930072_0/miniconda3/envs/tf2.3/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py:840 _call
return self._stateless_fn(*args, **kwds)
/share/nishome/19930072_0/miniconda3/envs/tf2.3/lib/python3.7/site-packages/tensorflow/python/eager/function.py:2828 __call__
graph_function, args, kwargs = self._maybe_define_function(args, kwargs)
/share/nishome/19930072_0/miniconda3/envs/tf2.3/lib/python3.7/site-packages/tensorflow/python/eager/function.py:3171 _maybe_define_function
*args, **kwargs)
/share/nishome/19930072_0/miniconda3/envs/tf2.3/lib/python3.7/site-packages/tensorflow/python/eager/function.py:2622 canonicalize_function_inputs
self._flat_input_signature)
/share/nishome/19930072_0/miniconda3/envs/tf2.3/lib/python3.7/site-packages/tensorflow/python/eager/function.py:2713 _convert_inputs_to_signature
format_error_message(inputs, input_signature))
ValueError: Python inputs incompatible with input_signature:
inputs: (
Tensor("ExpandDims_1:0", shape=(1, 1), dtype=int32),
Tensor("cnn__encoder/StatefulPartitionedCall:0", shape=(1, 64, 256), dtype=float32),
Tensor("zeros:0", shape=(1, 512), dtype=float32))
input_signature: (
TensorSpec(shape=(1, 1), dtype=tf.int64, name=None),
TensorSpec(shape=(1, 64, 256), dtype=tf.float32, name=None),
TensorSpec(shape=(1, 512), dtype=tf.float32, name=None))
Encoder Model:
class CNN_Encoder(tf.keras.Model):
def __init__(self, embedding):
super(CNN_Encoder, self).__init__()
# shape after fc == (batch_size, 64, embedding_dim)
self.fc = tf.keras.layers.Dense(embedding_dim)
#tf.function(input_signature=[tf.TensorSpec(shape=(1, 64, features_shape),dtype=tf.dtypes.float32)])
def call(self, x):
x = self.fc(x)
x = tf.nn.relu(x)
return x
Decoder model:
class RNN_Decoder(tf.keras.Model):
def __init__(self, embedding_dim, units, vocab_size):
super(RNN_Decoder, self).__init__()
self.units = units
self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
self.gru = tf.keras.layers.GRU(self.units,
return_sequences=True,
return_state=True,
recurrent_initializer='glorot_uniform',
unroll = True)
self.fc1 = tf.keras.layers.Dense(self.units)
self.fc2 = tf.keras.layers.Dense(vocab_size)
self.attention = BahdanauAttention(self.units)
#tf.function(input_signature=[tf.TensorSpec(shape=[1, 1], dtype=tf.int64),
tf.TensorSpec(shape=[1, 64, 256], dtype=tf.float32),
tf.TensorSpec(shape=[1, 512], dtype=tf.float32)])
def call(self, x , features, hidden):
context_vector, attention_weights = self.attention(features, hidden)
#x shape after passing through embedding == (batch_size, 1, embedding_dim)
x = self.embedding(x)
#x shape after concatenation == (batch_size, 1, embedding_dim + hidden_size)
x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=-1)
output, state = self.gru(x)
#shape == (batch_size, max_length, hidden_size)
x = self.fc1(output)
#x shape == (batch_size, max_length, hidden_size)
x = tf.reshape(x, (-1, x.shape[2]))
# output shape == (batch_size * max_length, vocab)
x = self.fc2(x)
return x, state, attention_weights
def reset_states(self, batch_size):
return tf.zeros((batch_size, self.units))
I just change the tf.function to int32 as below:
#tf.function(input_signature=[tf.TensorSpec(shape=[1, 1], dtype=tf.int32), tf.TensorSpec(shape=[1, 64,256], dtype=tf.float32), tf.TensorSpec(shape=[1, 512], dtype=tf.float32)])
but another error came:
ValueError: Python inputs incompatible with input_signature:
Tensor("ExpandDims_2:0", shape=(1, 1), dtype=int64),
Tensor("cnn__encoder/StatefulPartitionedCall:0", shape=(1, 64, 256), dtype=float32),
Tensor("rnn__decoder/StatefulPartitionedCall:1", shape=(1, 512), dtype=float32))
input_signature: (
TensorSpec(shape=(1, 1), dtype=tf.int32, name=None),
TensorSpec(shape=(1, 64, 256), dtype=tf.float32, name=None),
TensorSpec(shape=(1, 512), dtype=tf.float32, name=None))```
Why the dtypes of inputs change from int64 to int32?

ret = torch._C._nn.nll_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index) throws IndexError: Target 42 is out of bounds

After loading CIFAR 100, I try to train my neural network. But I don't know why I get the out of bounds error shown below
Optimizing the network with batch size 25
Epoch: 0 of 30 Average loss: -
/mnt_home/klee/LBSBGenGapSharpnessResearch/vgg.py:43: UserWarning: Implicit dimension choice for log_softmax has been deprecated. Change the call to include dim=X as an argument.
return F.log_softmax(x)
Traceback (most recent call last):
File "plot_parametric_pytorch_cifar100.py", line 130, in <module>
loss_fn = F.nll_loss(ops, tgts)
File "/home/klee/anaconda3/envs/sharpenv/lib/python3.7/site-packages/torch/nn/functional.py", line 2115, in nll_loss
ret = torch._C._nn.nll_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index)
IndexError: Target 42 is out of bounds.
Here is the script I'm running:
cudnn.benchmark = True
(X_train, y_train), (X_test, y_test) = cifar10.load_data()
X_train = X_train.astype('float32')
X_train = np.transpose(X_train, axes=(0, 3, 1, 2))
X_test = X_test.astype('float32')
X_test = np.transpose(X_test, axes=(0, 3, 1, 2))
X_train /= 255
X_test /= 255
device = torch.device('cuda:0')
# This is where you can load any model of your choice.
# I stole PyTorch Vision's VGG network and modified it to work on CIFAR-10.
# You can take this line out and add any other network and the code
# should run just fine.
model = vgg.vgg11_bn()
#model.to(device)
# Forward pass
opfun = lambda X: model.forward(Variable(torch.from_numpy(X)))
# Forward pass through the network given the input
predsfun = lambda op: np.argmax(op.data.numpy(), 1)
# Do the forward pass, then compute the accuracy
accfun = lambda op, y: np.mean(np.equal(predsfun(op), y.squeeze()))*100
# Initial point
x0 = deepcopy(model.state_dict())
# Number of epochs to train for
# Choose a large value since LB training needs higher values
# Changed from 150 to 30
nb_epochs = 30
batch_range = [25, 40, 50, 64, 80, 128, 256, 512, 625, 1024, 1250, 1750, 2048, 2500, 3125, 4096, 4500, 5000]
# parametric plot (i.e., don't train the network)
hotstart = False
if not hotstart:
for batch_size in batch_range:
optimizer = torch.optim.Adam(model.parameters())
model.load_state_dict(x0)
#model.to(device)
average_loss_over_epoch = '-'
print('Optimizing the network with batch size %d' % batch_size)
np.random.seed(1337) #So that both networks see same sequence of batches
for e in range(nb_epochs):
model.eval()
print('Epoch:', e, ' of ', nb_epochs, 'Average loss:', average_loss_over_epoch)
average_loss_over_epoch = 0
# Checkpoint the model every epoch
torch.save(model.state_dict(), "./models/30EpochCIFAR100ExperimentBatchSize" + str(batch_size) + ".pth")
array = np.random.permutation(range(X_train.shape[0]))
slices = X_train.shape[0] // batch_size
beginning = 0
end = 1
# Training loop!
for _ in range(slices):
start_index = batch_size * beginning
end_index = batch_size * end
smpl = array[start_index:end_index]
model.train()
optimizer.zero_grad()
ops = opfun(X_train[smpl])
tgts = Variable(torch.from_numpy(y_train[smpl]).long().squeeze())
loss_fn = F.nll_loss(ops, tgts) <--- errorring linne
average_loss_over_epoch += loss_fn.data.numpy() / (X_train.shape[0] // batch_size)
loss_fn.backward()
optimizer.step()
beginning += 1
end += 1
and here's the VGG model:
__all__ = [
'VGG', 'vgg11', 'vgg11_bn', 'vgg13', 'vgg13_bn', 'vgg16', 'vgg16_bn',
'vgg19_bn', 'vgg19',
]
model_urls = {
'vgg11': 'https://s3.amazonaws.com/pytorch/models/vgg11-fb7e83b2.pth',
'vgg13': 'https://s3.amazonaws.com/pytorch/models/vgg13-58758d87.pth',
'vgg16': 'https://s3.amazonaws.com/pytorch/models/vgg16-82412952.pth',
'vgg19': 'https://s3.amazonaws.com/pytorch/models/vgg19-341d7465.pth',
}
class VGG(nn.Module):
def __init__(self, features):
super(VGG, self).__init__()
self.features = features
self.classifier = nn.Sequential(
nn.Dropout(),
nn.Linear(512, 4096),
nn.ReLU(True),
nn.Dropout(),
nn.Linear(4096, 4096),
nn.ReLU(True),
nn.Linear(4096, 10),
)
self._initialize_weights()
def forward(self, x):
x = self.features(x)
x = x.view(x.size(0), -1)
x = self.classifier(x)
return F.log_softmax(x)
def _initialize_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
if m.bias is not None:
m.bias.data.zero_()
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
elif isinstance(m, nn.Linear):
n = m.weight.size(1)
m.weight.data.normal_(0, 0.01)
m.bias.data.zero_()
def make_layers(cfg, batch_norm=False):
layers = []
in_channels = 3
for v in cfg:
if v == 'M':
layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
else:
conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
if batch_norm:
layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
else:
layers += [conv2d, nn.ReLU(inplace=True)]
in_channels = v
return nn.Sequential(*layers)
cfg = {
'A': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
'B': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
'D': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
'E': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
}
def vgg11(pretrained=False, **kwargs):
"""VGG 11-layer model (configuration "A")
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = VGG(make_layers(cfg['A']), **kwargs)
if pretrained:
model.load_state_dict(model_zoo.load_url(model_urls['vgg11']))
return model
def vgg11_bn(**kwargs):
"""VGG 11-layer model (configuration "A") with batch normalization"""
return VGG(make_layers(cfg['A'], batch_norm=True), **kwargs)
I'm not sure how to fix the index error. I think it has to do with the number of classes, but I'm not sure where to fix that (in the above code): I read https://discuss.pytorch.org/t/indexerror-target-2-is-out-of-bounds/69614 but not sure where to go from here.
You are using CIFAR-100, which has 100 classes (hence the name). But your model only predicts 10 classes. Naturally, any class above 10 will result in an error.
The output of the last linear linear in the model's classifier needs to be changed to 100:
self.classifier = nn.Sequential(
nn.Dropout(),
nn.Linear(512, 4096),
nn.ReLU(True),
nn.Dropout(),
nn.Linear(4096, 4096),
nn.ReLU(True),
nn.Linear(4096, 100),
)

Categories