Related
import imageio
import glob
import numpy as np
import tensorflow as tf
from tensorflow.keras import datasets, layers, models
import matplotlib.pyplot as plt
trainImages = []
for imagePath in glob.glob('C:/Users/razva/*.png'):
image = imageio.imread(imagePath)
trainImages.append(image)
trainImages = np.array(trainImages)
f = open('C:/Users/razva/train.txt')
trainLabels = f.readlines()
for i in range(len(trainLabels)):
trainLabels[i] = int(trainLabels[i][11])
trainLabels = np.array(trainLabels)
validationImages = []
for imagePath in glob.glob('C:/Users/razva/*.png'):
image = imageio.imread(imagePath)
validationImages.append(image)
validationImages = np.array(validationImages)
f = open('C:/Users/razva/validation.txt')
validationLabels = f.readlines()
for i in range(len(validationLabels)):
validationLabels[i] = int(validationLabels[i][11])
validationLabels = np.array(validationLabels)
mean_image = np.mean(trainImages, axis = 0)
sd = np.std(trainImages)
trainImages = (trainImages - mean_image) / sd
mean_image1 = np.mean(validationImages, axis = 0)
sd1 = np.std(validationImages)
validationImages = (validationImages - mean_image1) / sd1
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(10))
model.compile(optimizer='adam', loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=['accuracy'])
history = model.fit(trainImages, trainLabels, epochs=10, validation_data=(validationImages, validationLabels))
I have this cnn for image classification, trainImages and trainLabel(from 0 to 8) are training data, validationImages and validationLabels are for test. Images are 32 * 32. I cant make this algorithm work, tell me if u observe more errors pls.
I can't tell exactly where the problem is since I have no access to the loaded images, but the issue is that you are providing samples without the "channel" axis which in the specified input_shape=(32, 32, 3) is indicated as having size 3. Each sample (image) must have 3 dimensions (width, height, channels) but, on the contrary, you are passing samples with just 2 dimensions (width and height).
This is most likely due to the fact that you are probably loading gray-scale images with just one channel, which is not explicitly assigned an axis by numpy. If this is the case, make sure that both trainImages and validationImages have shape (32, 32, 1), otherwise just expand the last dimension with np.expand_dims(_trainImages_, axis=-1) (same for the validation set) before feeding them to the model. Accordingly, adjust to (32, 32, 1) the input_shape in the first Conv2D layer.
Hope it helps, otherwise let me have further details.
I'm new to pytorch. Here's an architecture of a tensorflow model and I'd like to convert it into a pytorch model.
I have done most of the codes but am confused about a few places.
1) In tensorflow, the Conv2D function takes filter as an input. However, in pytorch, the function takes the size of input channels and output channels as inputs. So how do I find the equivalent number of input channels and output channels, provided with the size of the filter.
2) In tensorflow, the dense layer has a parameter called 'nodes'. However, in pytorch, the same layer has 2 different inputs (the size of the input parameters and size of the targeted parameters), how do I determine them based on the number of the nodes.
Here's the tensorflow code.
from keras.utils import to_categorical
from keras.models import Sequential, load_model
from keras.layers import Conv2D, MaxPool2D, Dense, Flatten, Dropout
model = Sequential()
model.add(Conv2D(filters=32, kernel_size=(5,5), activation='relu', input_shape=X_train.shape[1:]))
model.add(Conv2D(filters=32, kernel_size=(5,5), activation='relu'))
model.add(MaxPool2D(pool_size=(2, 2)))
model.add(Dropout(rate=0.25))
model.add(Conv2D(filters=64, kernel_size=(3, 3), activation='relu'))
model.add(Conv2D(filters=64, kernel_size=(3, 3), activation='relu'))
model.add(MaxPool2D(pool_size=(2, 2)))
model.add(Dropout(rate=0.25))
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dropout(rate=0.5))
model.add(Dense(43, activation='softmax'))
Here's my code.:
import torch.nn.functional as F
import torch
# The network should inherit from the nn.Module
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
# Define 2D convolution layers
# 3: input channels, 32: output channels, 5: kernel size, 1: stride
self.conv1 = nn.Conv2d(3, 32, 5, 1) # The size of input channel is 3 because all images are coloured
self.conv2 = nn.Conv2d(32, 64, 5, 1)
self.conv3 = nn.Conv2d(64, 128, 3, 1)
self.conv3 = nn.Conv2d(128, 256, 3, 1)
# It will 'filter' out some of the input by the probability(assign zero)
self.dropout1 = nn.Dropout2d(0.25)
self.dropout2 = nn.Dropout2d(0.5)
# Fully connected layer: input size, output size
self.fc1 = nn.Linear(36864, 128)
self.fc2 = nn.Linear(128, 10)
# forward() link all layers together,
def forward(self, x):
x = self.conv1(x)
x = F.relu(x)
x = self.conv2(x)
x = F.relu(x)
x = F.max_pool2d(x, 2)
x = self.dropout1(x)
x = self.conv3(x)
x = F.relu(x)
x = self.conv4(x)
x = F.relu(x)
x = F.max_pool2d(x, 2)
x = self.dropout1(x)
x = torch.flatten(x, 1)
x = self.fc1(x)
x = F.relu(x)
x = self.dropout2(x)
x = self.fc2(x)
output = F.log_softmax(x, dim=1)
return output
Thanks in advance!
1) In pytorch, we take input channels and output channels as an input. In your first layer, the input channels will be the number of color channels in your image. After that it's always going to be the same as the output channels from your previous layer (output channels are specified by the filters parameter in Tensorflow).
2). Pytorch is slightly annoying in the fact that when flattening your conv outputs you'll have to calculate the shape yourself. You can either use an equation to calculate this (𝑂𝑢𝑡=(𝑊−𝐹+2𝑃)/𝑆+1), or make a shape calculating function to get the shape of a dummy image after it's been passed through the conv part of the network. This parameter will be your size of input argument; the size of your output argument will just be the number of nodes you want in your next fully connected layer.
I am new to tensorflow and Semantic segmentation.
I am designing a U-Net for semantic segmentaion. Each image has one object that I want to classify. But in total I have images of 10 different objects. I am confused, how can I prepare my mask input? Is it considered as multi-label segmentation or only for one class?
Should I convert my input to one hot encoded? Should I use to_categorical? I find exaples for multi-class segmentation, but I don't know, If that's the case here. Because in one image I only have one object to detect/classify.
I tried using this as my code for input. But I am not sure, what I am doing is right or not.
#Generation of batches of image and mask
class DataGen(keras.utils.Sequence):
def __init__(self, image_names, path, batch_size, image_size=128):
self.image_names = image_names
self.path = path
self.batch_size = batch_size
self.image_size = image_size
def __load__(self, image_name):
# Path
image_path = os.path.join(self.path, "images/aug_test", image_name) + ".png"
mask_path = os.path.join(self.path, "masks/aug_test",image_name) + ".png"
# Reading Image
image = cv2.imread(image_path, 1)
image = cv2.resize(image, (self.image_size, self.image_size))
# Reading Mask
mask = cv2.imread(mask_path, -1)
mask = cv2.resize(mask, (self.image_size, self.image_size))
## Normalizaing
image = image/255.0
mask = mask/255.0
return image, mask
def __getitem__(self, index):
if(index+1)*self.batch_size > len(self.image_names):
self.batch_size = len(self.image_names) - index*self.batch_size
image_batch = self.image_names[index*self.batch_size : (index+1)*self.batch_size]
image = []
mask = []
for image_name in image_batch:
_img, _mask = self.__load__(image_name)
image.append(_img)
mask.append(_mask)
#This is where I am defining my input
image = np.array(image)
mask = np.array(mask)
mask = tf.keras.utils.to_categorical(mask, num_classes=10, dtype='float32') #Is this true?
return image, mask
def __len__(self):
return int(np.ceil(len(self.image_names)/float(self.batch_size)))
Is this true? If it is, then, to get the label/class as output what should I change in my input? Should I change the value of pixel of my mask according to my class?
Here is my U-Net architecture.
# Convolution and deconvolution Blocks
def down_scaling_block(x, filters, kernel_size=(3, 3), padding="same", strides=1):
conv = keras.layers.Conv2D(filters, kernel_size, padding=padding, strides=strides, activation="relu")(x)
conv = keras.layers.Conv2D(filters, kernel_size, padding=padding, strides=strides, activation="relu")(conv)
pool = keras.layers.MaxPool2D((2, 2), (2, 2))(conv)
return conv, pool
def up_scaling_block(x, skip, filters, kernel_size=(3, 3), padding="same", strides=1):
conv_t = keras.layers.UpSampling2D((2, 2))(x)
concat = keras.layers.Concatenate()([conv_t, skip])
conv = keras.layers.Conv2D(filters, kernel_size, padding=padding, strides=strides, activation="relu")(concat)
conv = keras.layers.Conv2D(filters, kernel_size, padding=padding, strides=strides, activation="relu")(conv)
return conv
def bottleneck(x, filters, kernel_size=(3, 3), padding="same", strides=1):
conv = keras.layers.Conv2D(filters, kernel_size, padding=padding, strides=strides, activation="relu")(x)
conv = keras.layers.Conv2D(filters, kernel_size, padding=padding, strides=strides, activation="relu")(conv)
return conv
def UNet():
filters = [16, 32, 64, 128, 256]
inputs = keras.layers.Input((image_size, image_size, 3))
'''inputs2 = keras.layers.Input((image_size, image_size, 1))
conv1_2, pool1_2 = down_scaling_block(inputs2, filters[0])'''
Input = inputs
conv1, pool1 = down_scaling_block(Input, filters[0])
conv2, pool2 = down_scaling_block(pool1, filters[1])
conv3, pool3 = down_scaling_block(pool2, filters[2])
'''conv3 = keras.layers.Conv2D(filters[2], kernel_size=(3,3), padding="same", strides=1, activation="relu")(pool2)
conv3 = keras.layers.Conv2D(filters[2], kernel_size=(3,3), padding="same", strides=1, activation="relu")(conv3)
drop3 = keras.layers.Dropout(0.5)(conv3)
pool3 = keras.layers.MaxPooling2D((2,2), (2,2))(drop3)'''
conv4, pool4 = down_scaling_block(pool3, filters[3])
bn = bottleneck(pool4, filters[4])
deConv1 = up_scaling_block(bn, conv4, filters[3]) #8 -> 16
deConv2 = up_scaling_block(deConv1, conv3, filters[2]) #16 -> 32
deConv3 = up_scaling_block(deConv2, conv2, filters[1]) #32 -> 64
deConv4 = up_scaling_block(deConv3, conv1, filters[0]) #64 -> 128
outputs = keras.layers.Conv2D(10, (1, 1), padding="same", activation="softmax")(deConv4)
model = keras.models.Model(inputs, outputs)
return model
model = UNet()
model.compile(optimizer='adam', loss="categorical_crossentropy", metrics=["acc"])
train_gen = DataGen(train_img, train_path, image_size=image_size, batch_size=batch_size)
valid_gen = DataGen(valid_img, train_path, image_size=image_size, batch_size=batch_size)
test_gen = DataGen(test_img, test_path, image_size=image_size, batch_size=batch_size)
train_steps = len(train_img)//batch_size
valid_steps = len(valid_img)//batch_size
model.fit_generator(train_gen, validation_data=valid_gen, steps_per_epoch=train_steps, validation_steps=valid_steps,
epochs=epochs)
I hope that I explained my question properly. Any help appriciated!
UPDATE: I changed the value of each pixel in mask as per object class. (If the image contains object which I want to classify as object no. 2, then I changed the value of mask pixel to 2. the whole array of mask will contain 0(bg) and 2(object). Accordingly for each object, the mask will contain 0 and 3, 0 and 10 etc.)
Here I first changed the mask to binary and then if the value of pixel is greater than 1, I changed it to 1 or 2 or 3. (according to object/class no.)
Then I converted them to one_hot with to_categorical as shown in my code. training runs but the network doesnt learn anything. Accuracy and loss keep swinging between two values. What is my mistake here? Am I making a mistake at generating mask (changing the value of pixels?) Or at the function to_categorical?
PROBLEM FOUND:
I was making an error while creating mask.. I was reading image with cv2, which reads image as heightxwidth.. I was creating mask with pixel values according to class, after considering my image dimention as widthxheight.. Which was causing problem and making network not to learn anything.. It is working now..
Each image has one object that I want to classify. But in total I have images of 10 different objects. I am confused, how can I prepare my mask input? Is it considered as multi-label segmentation or only for one class?
If your dataset has N different labels (i.e: 0 - background, 1 - dogs, 2 -cats...), you have a multi class problem, even if your images contain only kind of object.
Should I convert my input to one hot encoded? Should I use to_categorical?
Yes, you should one-hot encode your labels. Using to_categorical boils down to the source format of your labels. Say you have N classes and your labels are (height, width, 1), where each pixel has a value in range [0,N). In that case keras.utils.to_categorical(label, N) will provide a float (height,width,N) label, where each pixel is 0 or 1. And you don't have to divide by 255.
if your source format is different, you may have to use a custom function to get the same output format.
Check out this repo (not my work): keras-unet. The notebooks folder contain two examples to train a u-net on small datasets. They are not multiclass, but it is easy to go step by step to use your own dataset. Star by loading your labels as:
im = Image.open(mask).resize((512,512))
im = to_categorical(im,NCLASSES)
reshape and normalize like this:
x = np.asarray(imgs_np, dtype=np.float32)/255
y = np.asarray(masks_np, dtype=np.float32)
y = y.reshape(y.shape[0], y.shape[1], y.shape[2], NCLASSES)
x = x.reshape(x.shape[0], x.shape[1], x.shape[2], 3)
adapt your model to NCLASSES
model = custom_unet(
input_shape,
use_batch_norm=False,
num_classes=NCLASSES,
filters=64,
dropout=0.2,
output_activation='softmax')
select the correct loss:
from keras.losses import categorical_crossentropy
model.compile(
optimizer=SGD(lr=0.01, momentum=0.99),
loss='categorical_crossentropy',
metrics=[iou, iou_thresholded])
Hope it helps
I have trained a model for Handwritten digit recognition from MNIST.
The input shape of 1st Conv2D is (28, 28, 1)
After training, I wanted to predict a downloaded image of a digit.
The shape of the image was (1024, 791, 3), upon applying the following code i resized the image to (28,28,1) and called model.predict() :
resized_image = cv2.resize(image, (28, 28))
#resized_image = tf.image.resize(image, size = (28,28))
resized_image = resized_image / 225.0
resized_image = resized_image[:,:,:1]
prediction = model.predict(resized_image)
#prediction = model.predict(resized_image, batch_size = 1, verbose = 1, steps = 1)
print(labels[np.argmax(prediction)])
But i am getting following error:
Error when checking input: expected conv2d_17_input to have 4 dimensions, but got array with shape (28, 28, 1)
model is :
model = tf.keras.models.Sequential([
tf.keras.layers.Conv2D(filters = 32, kernel_size = (3,3), padding = 'Same', activation = 'relu', input_shape = (28,28,1)),
tf.keras.layers.MaxPool2D(pool_size = (2,2)),
tf.keras.layers.Conv2D(filters = 64, kernel_size = (3,3), padding = 'Same', activation = 'relu'),
tf.keras.layers.MaxPool2D(pool_size = (2,2)),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(128, activation = 'relu'),
tf.keras.layers.Dense(10, activation = "softmax")
])
I have also tried uncommenting
resized_image = tf.image.resize(image, size = (28,28))
prediction = model.predict(resized_image, batch_size = 1, verbose = 1, steps = 1)
Yet i received the same error.
You didn't show your model, but usually, you need to consider the batch. So, your input must be [batch_size, width, height, channel]. If you have just one image, you must set batch_size=1, in your case, [1, 28, 28, 1].
Also, make sure your input is a tf.tensor.
I am trying to train a CNN using my own data for a binary classification problem. But I got a problem with the expected size as input which I thought is (224,224,3). I searched for this case and I found some people said that it can be fixed with reshaping the image size from (224,224,3) to (1,224,224,3) but it did not work.
here is my code:
import scipy.io
import tensorflow as tf
import cv2
# Parameters
img_height = 224
img_width = 224
img_depth = 3
classes = 2
# Load Data
db_name = 'polo'
db_path = 'D:/databases/' + db_name + '/'
db_data = scipy.io.loadmat(db_path + 'db_py.mat')
db_size = len(db_data['db']['images'][0][0][0])
faces_path = 'data/' + db_name + '/faces/'
images = []
labels = [0] * db_size
for i in range(0,db_size):
filename = 'data/' + db_name + '/faces/' + db_data['db']['images'][0][0][0][i][2][0]
image = cv2.imread(filename)
image = cv2.resize(image, (img_height, img_width))
images.append(image)
labels[i] = db_data['db']['subjects'][0][0][0][i][4][0][0][0][0][0]
inputs = tf.keras.layers.Input(shape=(img_height,img_width,img_depth))
layers = tf.keras.layers.Conv2D(32, (3, 3), padding="same")(inputs)
layers = tf.keras.layers.Activation("relu")(layers)
layers = tf.keras.layers.BatchNormalization(axis=-1)(layers)
layers = tf.keras.layers.Conv2D(32, (3, 3), padding="same")(layers)
layers = tf.keras.layers.Activation("relu")(layers)
layers = tf.keras.layers.BatchNormalization(axis=-1)(layers)
layers = tf.keras.layers.MaxPooling2D(pool_size=(2, 2))(layers)
layers = tf.keras.layers.Dropout(0.25)(layers)
layers = tf.keras.layers.Conv2D(64, (3, 3), padding="same")(layers)
layers = tf.keras.layers.Activation("relu")(layers)
layers = tf.keras.layers.BatchNormalization(axis=-1)(layers)
layers = tf.keras.layers.Conv2D(64, (3, 3), padding="same")(layers)
layers = tf.keras.layers.Activation("relu")(layers)
layers = tf.keras.layers.BatchNormalization(axis=-1)(layers)
layers = tf.keras.layers.MaxPooling2D(pool_size=(2, 2))(layers)
layers = tf.keras.layers.Dropout(0.25)(layers)
layers = tf.keras.layers.Flatten()(layers)
layers = tf.keras.layers.Dense(512)(layers)
layers = tf.keras.layers.Activation("relu")(layers)
layers = tf.keras.layers.BatchNormalization()(layers)
layers = tf.keras.layers.Dropout(0.5)(layers)
layers = tf.keras.layers.Dense(classes)(layers)
layers = tf.keras.layers.Activation("softmax")(layers)
InitialLearnRate = 0.03
MaxEpochs = 30
MiniBatchSize = 32
opt = tf.keras.optimizers.SGD(lr=InitialLearnRate, decay=InitialLearnRate / MaxEpochs)
model = tf.keras.Model(inputs, layers , name="net")
model.compile(loss="categorical_crossentropy", optimizer=opt,
metrics=["accuracy"])
model.summary()
H = model.fit(images, labels,
batch_size=MiniBatchSize, epochs=MaxEpochs, verbose=1,steps_per_epoch=10)
If you go to the official documentation and search for the conv2d input shape you'll see:
4D tensor with shape: (batch, channels, rows, cols) if data_format is "channels_first" or
4D tensor with shape: (batch, rows, cols, channels) if data_format is "channels_last"
Alternatively, here's the detailed answer on input formatting.
If you have multiple images, you'd have an input of the size (batch_size, 244,244, 3) in your case. What I see you doing is that you create a list containing all these images. I would try:
images = np.empty(batch_size, 244, 244, 3)
for i in range(0,db_size):
filename = ('data/'
+ db_name
+ '/faces/'
+ db_data['db']['images'][0][0][0][i][2][0])
image = cv2.imread(filename)
images[i] = cv2.resize(image, (img_height, img_width))
If this is not helpful, the error message you receive could help other people to answer your question.