How to predict a single image with Skorch? - python

I've just created a Neural Network with Skorch to detect aircrafts on a picture and I trained it with a train dataset with the shape (40000, 64, 64, 3).
Then I tested it with a test dataset of (15000, 64, 64, 3).
module = nn.Sequential(
nn.Conv2d(3, 64, 3),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2),
nn.Conv2d(64, 64, 3),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2),
nn.Conv2d(64, 64, 3),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2),
nn.Flatten(),
nn.Linear(6 * 6 * 64, 256),
nn.Linear(256, 256),
nn.ReLU(),
nn.Linear(256, 2),
nn.Softmax(),
)
early_stopping = EarlyStopping(monitor='valid_loss', lower_is_better=True)
net = NeuralNetClassifier(
module,
max_epochs=20,
lr=1e-4,
callbacks=[early_stopping],
# Shuffle training data on each epoch
iterator_train__shuffle=True,
device="cuda" if torch.cuda.is_available() else "cpu",
optimizer=optim.Adam
)
net.fit(
train_images_balanced.transpose((0, 3, 1, 2)).astype(np.float32),
train_labels_balanced
)
Now I need to test it on 512*512 pictures, so I have a new dataset of (30, 512, 512, 3).
So I took a sliding window code, that allowed me to divide the picture in 64*64 parts.
def sliding_window(image, stepSize, windowSize):
# slide a window across the image
for y in range(0, image.shape[0], stepSize):
for x in range(0, image.shape[1], stepSize):
# yield the current window
yield (x, y, image[y:y + windowSize[1], x:x + windowSize[0]])
Now I wanna be able to predict if every single 64*64 image contains an aircraft, but I don't know how to do it, as net.predict() takes a dataset as an argument (arg : dim 4)

net.predict() takes a dataset as an argument (arg : dim 4)
net.predict accepts a number of data formats, among other things datasets. However, it seems for your case it would be best if it would accept torch tensors or numpy arrays - and it does! Just pass your 64x64 chunks to net.predict, something like this:
# (n, 512, 512, 3)
X = my_data
# (n, 4096, 64, 64, 3)
X = sliding_window(X, 64, 64)
# (n * 4096, 64, 64, 3)
X = X.reshape(-1, 64, 64, 3)
y = net.predict(X)

Related

Pytorch 3D cnn input shapes incompatible

I am attempting to classify 3D blocks of data with H,D,W of 64,1024,64 respectively. These are done in batches of 2. However the input shapes do not seem to be loading in correctly and I get the error: Expected 4D (unbatched) or 5D (batched) input to conv3d, but got input of size: [2, 1]. However the input shape is [2, 1, 64, 1024, 64].
Please see the code below:
print('Images shape ', images.shape)
# forward + backward + optimize
outputs = Net(images)
Which calls the CNN3D class:
class CNN3D(nn.Module):
def __init__(self, input_shape, conv_layers, kernel_size, out_channel_ratio, FC_layers):
super(CNN3D, self).__init__()
self.input = input_shape
model = [
self._conv_layer_set(1, 8),
# self._conv_layer_set(8, 16),
# self._conv_layer_set(16, 32),
nn.Flatten(),
# nn.Linear((6*14*6), 256),
# nn.LeakyReLU(),
# nn.Linear(256, 128),
# nn.LeakyReLU(),
# nn.Linear(1952752, 1)
nn.LazyLinear(1)
]
self.model = nn.Sequential(*model)
def _conv_layer_set(self, in_c, out_c):
conv_layer = nn.Sequential(
nn.Conv3d(in_c, out_c, kernel_size=(3, 7, 3), padding=0),
nn.LeakyReLU(),
nn.MaxPool3d((2, 4, 2)),
)
return conv_layer
def forward(self, x):
print('Input shape ', self.input)
for layer in self.model:
x = layer(x)
print(x.size())
return self.model(x)
This gives the following output:
Images shape torch.Size([2, 1, 64, 1024, 64])
Images shape torch.Size([2, 1, 64, 1024, 64])
Input shape (1, 64, 1024, 64)
torch.Size([2, 8, 31, 254, 31])
torch.Size([2, 1952752])
torch.Size([2, 1])
return F.conv3d(
RuntimeError: Expected 4D (unbatched) or 5D (batched) input to conv3d, but got input of size: [2, 1]
The error seems inconsistent with the input shape so I am not sure what is going on.
Thanks

PyTorch mat1 and mat2 shapes cannot be multiplied (4x460800 and 80000x16)

I'm trying to find road lanes using PyTorch. I created dataset and my model. But when I try to train my model, I get mat1 and mat2 shapes cannot be multiplied (4x460800 and 80000x16) error. I've tried other topic's solutions but those solutions didn't help me very much.
My dataset is bunch of road images with their validation images. I have .csv file that contains names of images (such as 'image1.jpg, image2.jpg'). Original size of images and validation images is 1280x720. I convert them 200x200 in my dataset code.
Road image:
Validation image:
Here's my dataset:
import os
import pandas as pd
import random
import torch
import torchvision.transforms.functional as TF
from torch.utils.data import Dataset
from torchvision import transforms
from PIL import Image
class Dataset(Dataset):
def __init__(self, csv_file, root_dir, val_dir, transform=None):
self.annotations = pd.read_csv(csv_file)
self.root_dir = root_dir
self.val_dir = val_dir
self.transform = transform
def __len__(self):
return len(self.annotations)
def __getitem__(self, index):
img_path = os.path.join(self.root_dir, self.annotations.iloc[index, 0])
image = Image.open(img_path).convert('RGB')
mask_path = os.path.join(self.val_dir, self.annotations.iloc[index, 0])
mask = Image.open(mask_path).convert('RGB')
transform = transforms.Compose([
transforms.Resize((200, 200)),
transforms.ToTensor()
])
if self.transform:
image = self.transform(image)
mask = self.transform(mask)
return image, mask
My model:
import torch
import torch.nn as nn
class Net(nn.Module):
def __init__(self):
super().__init__()
self.cnn_layers = nn.Sequential(
# Conv2d, 3 inputs, 128 outputs
# 200x200 image size
nn.Conv2d(3, 128, kernel_size=3, stride=1, padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2),
# Conv2d, 128 inputs, 64 outputs
# 100x100 image size
nn.Conv2d(128, 64, kernel_size=3, stride=1, padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2),
# Conv2d, 64 inputs, 32 outputs
# 50x50 image size
nn.Conv2d(64, 32, kernel_size=3, stride=1, padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2)
)
self.linear_layers = nn.Sequential(
# Linear, 32*50*50 inputs, 16 outputs
nn.Linear(32 * 50 * 50, 16),
# Linear, 16 inputs, 3 outputs
nn.Linear(16, 3)
)
def forward(self, x):
x = self.cnn_layers(x)
x = x.view(x.size(0), -1)
x = self.linear_layers(x)
return x
How to avoid this error and train my images on these validation images?
The answer: In your case, NN input has a shape (3, 1280, 720), not (3, 200, 200) as you want. Probably you have forgotten to modify transform argument in RNetDataset. It stays None, so transforms are not applied and the image is not resized. Another possibility is that it happens due to these lines:
transform = transforms.Compose([
transforms.Resize((200, 200)),
transforms.ToTensor()
])
if self.transform:
image = self.transform(image)
mask = self.transform(mask)
You have two variables named transform, but one with self. - maybe you messed them up. Verify it and the problem should go away.
How I came up with it: 460800 is clearly a tensor size after reshaping before linear layers. According to the architecture, tensor processed with self.cnn_layers should have 32 layers, so its height multiplied by width should give 460800 / 32 = 14400. Suppose that its height = H, width = W, so H x W = 14400. Let's understand, what was the original input size in this case? nn.MaxPool2d(kernel_size=2, stride=2) layer divides height and width by 2, and it happens three times. So, the original input size has been 8H x 8W = 64 x 14400 = 936000. Finally, notice that 936000 = 1280 * 720. This can't be a magical coincidence. Case closed!
Another suggestion: even if you apply transforms correctly, your code might not work. Suppose that you have an input of size (4, 3, 200, 200), where 4 is a batch size. Layers in your architecture will process this input as follows:
nn.Conv2d(3, 128, kernel_size=3, stride=1, padding=1) # -> (4, 128, 200, 200)
nn.MaxPool2d(kernel_size=2, stride=2) # -> (4, 128, 100, 100)
nn.Conv2d(128, 64, kernel_size=3, stride=1, padding=1) # -> (4, 64, 100, 100)
nn.MaxPool2d(kernel_size=2, stride=2) # -> (4, 64, 50, 50)
nn.Conv2d(64, 32, kernel_size=3, stride=1, padding=1) # -> (4, 32, 50, 50)
nn.MaxPool2d(kernel_size=2, stride=2) # -> (4, 32, 25, 25)
So, your first layer in self.linear_layers should be not nn.Linear(32 * 50 * 50, 16), but nn.Linear(32 * 25 * 25, 16). With this change, everything should be fine.

How can I check the image sizes while CNN?

I'm trying to classify cat and dog in CNN with PyTorch.
While I made few layers and processing images, I found that final processed feature map size doesn't match with calculated size.
So I tried to check feature map size step by step in CNN process with print shape but it doesn't work.
I heard tensorflow enables check tensor size in steps but how can I do that?
What I want is :
def __init__(self):
super(CNN, self).__init__()
conv1 = nn.Conv2d(1, 16, 3, 1, 1)
conv1_1 = nn.Conv2d(16, 16, 3, 1, 1)
pool1 = nn.MaxPool2d(2)
conv2 = nn.Conv2d(16, 32, 3, 1, 1)
conv2_1 = nn.Conv2d(32, 32, 3, 1, 1)
pool2 = nn.MaxPool2d(2)
conv3 = nn.Conv2d(32, 64, 3, 1, 1)
conv3_1 = nn.Conv2d(64, 64, 3, 1, 1)
conv3_2 = nn.Conv2d(64, 64, 3, 1, 1)
pool3 = nn.MaxPool2d(2)
self.conv_module = nn.Sequential(
conv1,
nn.ReLU(),
conv1_1,
nn.ReLU(),
pool1,
# check first result size
conv2,
nn.ReLU(),
conv2_1,
nn.ReLU(),
pool2,
# check second result size
conv3,
nn.ReLU(),
conv3_1,
nn.ReLU(),
conv3_2,
nn.ReLU(),
pool3,
# check third result size
pool4,
# check fourth result size
pool5
# check fifth result size
)
If there's any other way to check feature size at every step, please give some advice.
Thanks in advance.
To do that you shouldn't use nn.Sequential. Just initialize your layers in __init__() and call them in the forward function. In the forward function you can print the shapes out. For example like this:
class CNN(nn.Module):
def __init__(self):
super(CNN, self).__init__()
self.conv1 = nn.Conv2d(...)
self.maxpool1 = nn.MaxPool2d()
self.conv2 = nn.Conv2d(...)
self.maxpool2 = nn.MaxPool2d()
def forward(self, x):
x = self.conv1(x)
x = F.relu(x)
x = self.maxpool1(x)
print(x.size())
x = self.conv2(x)
x = F.relu(x)
x = self.maxpool2(x)
print(x.size())
Hope thats what you looking for!

ValueError: Negative dimension size caused by subtracting 2 from 1 for 'max_pooling3d_3/MaxPool3D'(op: 'MaxPool3D')with input shapes[?,1,60,60,128]

I am building a keras UNET model for 3D image segmentation.
Image shape 240, 240, 150
The input shape is 240, 240, 150, 4, 335 >> training data
The output shape should be 240, 240, 150, 335 >> training labels
I am using Conv3D, MaxPooling3D, Conv3DTranspose, and concatenate layers to build the model
I am facing this error during the model building where I am doing upsampling
ValueError: Negative dimension size caused by subtracting 2 from 1 for 'max_pooling3d_3/MaxPool3D' (op: 'MaxPool3D') with input shapes: [?,1,60,60,128].
I searched for some solutions and found Layers padding='same' and k.set_image_data_format('channels_last')
with this I faced a new error when doing the concatination after the up sampling
ValueError: A `Concatenate` layer requires inputs with matching shapes except for the concat axis. Got inputs shapes: [(None, 30, 30, 18, 256), (None, 30, 30, 19, 256)]
I currently looping between those two errors and can't find what is exact issue to solve nether how to solve it
Here is the code where I am building the model
def build_unet_model(input_shape):
inputs = Input(input_shape)
conv1 = create_shared_convolution(inputs, 32, config.KERNEL_SIZE)
block1 = down_convolution(conv1, config.POOL_SIZE)
conv2 = create_shared_convolution(block1, 64, config.KERNEL_SIZE)
block2 = down_convolution(conv2, config.POOL_SIZE)
conv3 = create_shared_convolution(block2, 128, config.KERNEL_SIZE)
block3 = down_convolution(conv3, config.POOL_SIZE)
conv4 = create_shared_convolution(block3, 256, config.KERNEL_SIZE)
block4 = down_convolution(conv4, config.POOL_SIZE)
conv5 = create_shared_convolution(block4, 512, config.KERNEL_SIZE) # mid_con
up1 = concatenate_layers(create_up_convolution(conv5, 256, config.STRIDE_SIZE), conv4)
conv6 = create_shared_convolution(up1, 256, config.KERNEL_SIZE)
up2 = concatenate_layers(create_up_convolution(conv6, 128, config.STRIDE_SIZE), conv3)
conv7 = create_shared_convolution(up2, 128, config.KERNEL_SIZE)
up3 = concatenate_layers(create_up_convolution(conv7, 64, config.STRIDE_SIZE), conv2)
conv8 = create_shared_convolution(up3, 64, config.KERNEL_SIZE)
up4 = concatenate_layers(create_up_convolution(conv8, 32, config.STRIDE_SIZE), conv1)
conv9 = create_shared_convolution(up4, 32, config.KERNEL_SIZE)
outputs = create_output_layer(conv9, 4, (1, 1, 1))
model = Model(inputs=[inputs], outputs=[outputs])
print(model.summary())
return model.compile(optimizer=AdaBound(lr=1e-5, final_lr=1), loss=utils.ce_dl_loss, metrics=['accuracy'])
and those are the 5 functions used in the model building
def create_shared_convolution(input_layer, number_of_nets, kernel_size,
activation='relu', padding='same',
kernel_initializer=initializers.random_normal(stddev=0.01)):
conv = Conv3D(number_of_nets, kernel_size, activation=activation, padding=padding,
kernel_initializer=kernel_initializer)(input_layer)
conv = Conv3D(number_of_nets, kernel_size, activation=activation, padding=padding,
kernel_initializer=kernel_initializer)(conv)
return conv
def down_convolution(input_layer, pool_size):
return MaxPooling3D(pool_size=pool_size)(input_layer)
def create_up_convolution(input_layer, number_of_nets, stride_size, padding='same',
kernel_initializer=initializers.random_normal(stddev=0.01)):
return Conv3DTranspose(number_of_nets, stride_size, strides=stride_size, padding=padding,
kernel_initializer=kernel_initializer)(input_layer)
def concatenate_layers(layer1, layer2):
return merge.concatenate([layer1, layer2])
def create_output_layer(input_layer, number_of_nets, kernel_size, activation='relu',
kernel_initializer=initializers.random_normal(stddev=0.01)):
conv = Conv3D(number_of_nets, kernel_size, activation=activation,
kernel_initializer=kernel_initializer)(input_layer)
return Activation('softmax')(conv)
Here are some explanations on both errors.
The first one is due to your features maps being too small in your network. I don't have the detail of your network architecture, but if you apply a lot of maxpooling layers to your input (of shape 240, 240, 150), it might end up with only one value on a dimension (probably something like (N, N, 1)). Adding an other maxpooling on top of this is impossible because you don't have enough value on the dimension to perform it. That's why it raises the negative dimension error.
The second one is probably due to the maxpooling layers. When you apply your first maxpooling, there isn't any issue: the output shape is (120, 120, 75), so upsampling it would give you back the (240, 240, 150). But the next maxpooling (applied to (120, 120, 75)) would produce an output with shape (60, 60, 37) because the last dimension is odd. And upsampling it would give (120, 120, 74). Hence the mismatch. A solution to this is to add ZeroPadding layers when the dimension is odd before concatenating them.

Given groups=1, weight of size 16 16 3 3, expected input[16, 64, 222, 222] to have 16 channels, but got 64 channels instead?

I try to run the following program for image classification problems in Pytorch.
I am new to PyTorch and I am not getting what's wrong with the code. I tried reshaping the images but no help.
I am running this code with Cuda. I have around 750 classes and 10-20 images in one class. My dataset is a benchmark dataset and every image has a size of 60*160.
class Net(nn.Module):
def __init__(self):
super().__init__()
self.ConvLayer1 = nn.Sequential(
nn.Conv2d(3, 64, 3), # inp (3, 512, 512) changes doing here original (3, 64, 3)
nn.Conv2d(8, 16, 3), # original (8,16,3)
nn.MaxPool2d(2),
nn.ReLU() # op (16, 256, 256)
)
self.ConvLayer2 = nn.Sequential(
nn.Conv2d(16, 32, 5), # inp (16, 256, 256)
nn.Conv2d(32, 32, 3),
nn.MaxPool2d(4),
nn.ReLU() # op (32, 64, 64)
)
self.ConvLayer3 = nn.Sequential(
nn.Conv2d(32, 64, 3), # inp (32, 64, 64) original (32,64,3)
nn.Conv2d(64, 64, 5),
nn.MaxPool2d(2),
nn.ReLU() # op (64, 32, 32)
)
self.ConvLayer4 = nn.Sequential(
nn.Conv2d(64, 128, 5), # inp (64, 32, 32)
nn.Conv2d(128, 128, 3),
nn.MaxPool2d(2),
nn.ReLU() # op (128, 16, 16)
)
self.Lin1 = nn.Linear(15488, 15)
self.Lin2 = nn.Linear(1500, 150)
self.Lin3 = nn.Linear(150, 15)
def forward(self, x):
x = self.ConvLayer1(x)
x = self.ConvLayer2(x)
x = self.ConvLayer3(x)
x = self.ConvLayer4(x)
x = x.view(x.size(0), -1)
x = self.Lin1(x)
return F.log_softmax(x, dim = 1)
'''
'''
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.0001, momentum=0.5)
for epoch in tqdm(range(2)): # loop over the dataset multiple times
running_loss = 0.0
for i, data in enumerate(dataloaders['train']):
# get the inputs; data is a list of [inputs, labels]
inputs, class_names = data
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
outputs = model(inputs)
loss = criterion(outputs, class_names)
loss.backward()
optimizer.step()
# print statistics
running_loss += loss.item()
# if i % 10 == 0: # print every 10 mini-batches
# print('[%d, %5d] loss: %.3f' %
# (epoch + 1, i + 1, running_loss / 2000))
# running_loss = 0.0
break
print('Finished Training')
Getting this error and I don't know where to make changes.
Given groups=1, the weight of size 16 16 3 3, expected input[16, 64, 222, 222] to have 16 channels, but got 64 channels instead.
The number of output channels in a conv layer need to match the number of input channels in the next conv layer. Say you have nn.Conv(3, 64, 3) then the next conv layer needs to begin nn.Conv(64, .... Right now the issue is that you're trying to pass a 64 channel result into a conv layer which you've defined to expect an 8 channel input.

Categories