Tensorflow U-Net segmentation mask input - python

I am new to tensorflow and Semantic segmentation.
I am designing a U-Net for semantic segmentaion. Each image has one object that I want to classify. But in total I have images of 10 different objects. I am confused, how can I prepare my mask input? Is it considered as multi-label segmentation or only for one class?
Should I convert my input to one hot encoded? Should I use to_categorical? I find exaples for multi-class segmentation, but I don't know, If that's the case here. Because in one image I only have one object to detect/classify.
I tried using this as my code for input. But I am not sure, what I am doing is right or not.
#Generation of batches of image and mask
class DataGen(keras.utils.Sequence):
def __init__(self, image_names, path, batch_size, image_size=128):
self.image_names = image_names
self.path = path
self.batch_size = batch_size
self.image_size = image_size
def __load__(self, image_name):
# Path
image_path = os.path.join(self.path, "images/aug_test", image_name) + ".png"
mask_path = os.path.join(self.path, "masks/aug_test",image_name) + ".png"
# Reading Image
image = cv2.imread(image_path, 1)
image = cv2.resize(image, (self.image_size, self.image_size))
# Reading Mask
mask = cv2.imread(mask_path, -1)
mask = cv2.resize(mask, (self.image_size, self.image_size))
## Normalizaing
image = image/255.0
mask = mask/255.0
return image, mask
def __getitem__(self, index):
if(index+1)*self.batch_size > len(self.image_names):
self.batch_size = len(self.image_names) - index*self.batch_size
image_batch = self.image_names[index*self.batch_size : (index+1)*self.batch_size]
image = []
mask = []
for image_name in image_batch:
_img, _mask = self.__load__(image_name)
image.append(_img)
mask.append(_mask)
#This is where I am defining my input
image = np.array(image)
mask = np.array(mask)
mask = tf.keras.utils.to_categorical(mask, num_classes=10, dtype='float32') #Is this true?
return image, mask
def __len__(self):
return int(np.ceil(len(self.image_names)/float(self.batch_size)))
Is this true? If it is, then, to get the label/class as output what should I change in my input? Should I change the value of pixel of my mask according to my class?
Here is my U-Net architecture.
# Convolution and deconvolution Blocks
def down_scaling_block(x, filters, kernel_size=(3, 3), padding="same", strides=1):
conv = keras.layers.Conv2D(filters, kernel_size, padding=padding, strides=strides, activation="relu")(x)
conv = keras.layers.Conv2D(filters, kernel_size, padding=padding, strides=strides, activation="relu")(conv)
pool = keras.layers.MaxPool2D((2, 2), (2, 2))(conv)
return conv, pool
def up_scaling_block(x, skip, filters, kernel_size=(3, 3), padding="same", strides=1):
conv_t = keras.layers.UpSampling2D((2, 2))(x)
concat = keras.layers.Concatenate()([conv_t, skip])
conv = keras.layers.Conv2D(filters, kernel_size, padding=padding, strides=strides, activation="relu")(concat)
conv = keras.layers.Conv2D(filters, kernel_size, padding=padding, strides=strides, activation="relu")(conv)
return conv
def bottleneck(x, filters, kernel_size=(3, 3), padding="same", strides=1):
conv = keras.layers.Conv2D(filters, kernel_size, padding=padding, strides=strides, activation="relu")(x)
conv = keras.layers.Conv2D(filters, kernel_size, padding=padding, strides=strides, activation="relu")(conv)
return conv
def UNet():
filters = [16, 32, 64, 128, 256]
inputs = keras.layers.Input((image_size, image_size, 3))
'''inputs2 = keras.layers.Input((image_size, image_size, 1))
conv1_2, pool1_2 = down_scaling_block(inputs2, filters[0])'''
Input = inputs
conv1, pool1 = down_scaling_block(Input, filters[0])
conv2, pool2 = down_scaling_block(pool1, filters[1])
conv3, pool3 = down_scaling_block(pool2, filters[2])
'''conv3 = keras.layers.Conv2D(filters[2], kernel_size=(3,3), padding="same", strides=1, activation="relu")(pool2)
conv3 = keras.layers.Conv2D(filters[2], kernel_size=(3,3), padding="same", strides=1, activation="relu")(conv3)
drop3 = keras.layers.Dropout(0.5)(conv3)
pool3 = keras.layers.MaxPooling2D((2,2), (2,2))(drop3)'''
conv4, pool4 = down_scaling_block(pool3, filters[3])
bn = bottleneck(pool4, filters[4])
deConv1 = up_scaling_block(bn, conv4, filters[3]) #8 -> 16
deConv2 = up_scaling_block(deConv1, conv3, filters[2]) #16 -> 32
deConv3 = up_scaling_block(deConv2, conv2, filters[1]) #32 -> 64
deConv4 = up_scaling_block(deConv3, conv1, filters[0]) #64 -> 128
outputs = keras.layers.Conv2D(10, (1, 1), padding="same", activation="softmax")(deConv4)
model = keras.models.Model(inputs, outputs)
return model
model = UNet()
model.compile(optimizer='adam', loss="categorical_crossentropy", metrics=["acc"])
train_gen = DataGen(train_img, train_path, image_size=image_size, batch_size=batch_size)
valid_gen = DataGen(valid_img, train_path, image_size=image_size, batch_size=batch_size)
test_gen = DataGen(test_img, test_path, image_size=image_size, batch_size=batch_size)
train_steps = len(train_img)//batch_size
valid_steps = len(valid_img)//batch_size
model.fit_generator(train_gen, validation_data=valid_gen, steps_per_epoch=train_steps, validation_steps=valid_steps,
epochs=epochs)
I hope that I explained my question properly. Any help appriciated!
UPDATE: I changed the value of each pixel in mask as per object class. (If the image contains object which I want to classify as object no. 2, then I changed the value of mask pixel to 2. the whole array of mask will contain 0(bg) and 2(object). Accordingly for each object, the mask will contain 0 and 3, 0 and 10 etc.)
Here I first changed the mask to binary and then if the value of pixel is greater than 1, I changed it to 1 or 2 or 3. (according to object/class no.)
Then I converted them to one_hot with to_categorical as shown in my code. training runs but the network doesnt learn anything. Accuracy and loss keep swinging between two values. What is my mistake here? Am I making a mistake at generating mask (changing the value of pixels?) Or at the function to_categorical?
PROBLEM FOUND:
I was making an error while creating mask.. I was reading image with cv2, which reads image as heightxwidth.. I was creating mask with pixel values according to class, after considering my image dimention as widthxheight.. Which was causing problem and making network not to learn anything.. It is working now..

Each image has one object that I want to classify. But in total I have images of 10 different objects. I am confused, how can I prepare my mask input? Is it considered as multi-label segmentation or only for one class?
If your dataset has N different labels (i.e: 0 - background, 1 - dogs, 2 -cats...), you have a multi class problem, even if your images contain only kind of object.
Should I convert my input to one hot encoded? Should I use to_categorical?
Yes, you should one-hot encode your labels. Using to_categorical boils down to the source format of your labels. Say you have N classes and your labels are (height, width, 1), where each pixel has a value in range [0,N). In that case keras.utils.to_categorical(label, N) will provide a float (height,width,N) label, where each pixel is 0 or 1. And you don't have to divide by 255.
if your source format is different, you may have to use a custom function to get the same output format.
Check out this repo (not my work): keras-unet. The notebooks folder contain two examples to train a u-net on small datasets. They are not multiclass, but it is easy to go step by step to use your own dataset. Star by loading your labels as:
im = Image.open(mask).resize((512,512))
im = to_categorical(im,NCLASSES)
reshape and normalize like this:
x = np.asarray(imgs_np, dtype=np.float32)/255
y = np.asarray(masks_np, dtype=np.float32)
y = y.reshape(y.shape[0], y.shape[1], y.shape[2], NCLASSES)
x = x.reshape(x.shape[0], x.shape[1], x.shape[2], 3)
adapt your model to NCLASSES
model = custom_unet(
input_shape,
use_batch_norm=False,
num_classes=NCLASSES,
filters=64,
dropout=0.2,
output_activation='softmax')
select the correct loss:
from keras.losses import categorical_crossentropy
model.compile(
optimizer=SGD(lr=0.01, momentum=0.99),
loss='categorical_crossentropy',
metrics=[iou, iou_thresholded])
Hope it helps

Related

Satellites images reconstruction

I have to realize a project about images reconstruction. I have an 1024x1024 image which represents a place before a natural disaster, and I got the same image but post disaster. The goal is to reconstruct the post disaster image using an auto encoder.
First I've split my 1024x1024 into several tile with a size of 16x16. Then I created my layers for the auto encoder.
I guess I need to fit my model for every tiles to calculate the weights, but can I? Like create a for loop and fit my model for every tiles.
And when I realize the prediction, it's wrong and the same result for each tile.
I have no clue how to do that and I'm new in image processing and deep learning so I need help.
I share some piece of code here. Thanks!
My auto encoder layers below:
input_layer = layers.Input(shape= (16,16,1))
encoded = layers.Conv2D(64, (4,4), activation = 'relu' , padding = 'same')(input_layer)
encoded = layers.MaxPooling2D((2,2), padding='same')(encoded)
encoded_1 = layers.Conv2D(32, (4,4), activation = 'relu' , padding = 'same')(encoded)
encoded_1 = layers.MaxPooling2D((2,2), padding='same')(encoded_1)
encoded_2 = layers.Conv2D(16, (4,4), activation = 'relu' , padding = 'same')(encoded_1)
encoded_2 = layers.MaxPooling2D((2,2), padding='same')(encoded_2)
encoded_3 = layers.Conv2D(8, (4,4), activation = 'relu' , padding = 'same')(encoded_2)
encoded_3 = layers.MaxPooling2D((2,2), padding='same')(encoded_3)
encoding_layer = layers.Conv2D(4,(4,4), activation = 'relu', padding='same')(encoded_3)
decoded = layers.Conv2D(8, (4,4), activation='relu', padding='same')(encoding_layer)
decoded = layers.UpSampling2D((2,2))(decoded)
decoded_1 = layers.Conv2D(16,(4,4), activation = 'relu', padding='same')(decoded)
decoded_1 = layers.UpSampling2D((2,2))(decoded_1)
decoded_2 = layers.Conv2D(32, (4,4), activation='relu', padding='same')(decoded_1)
decoded_2 = layers.UpSampling2D((2,2))(decoded_2)
decoded_3 = layers.Conv2D(64,(4,4), activation = 'relu', padding='same')(decoded_2)
decoded_3 = layers.UpSampling2D((2,2))(decoded_3)
decoding_layer = layers.Conv2D(1,(4,4), activation='sigmoid', padding='same')(decoded_3)
autoencoder = models.Model(input_layer, decoding_layer)
my prediction block below:
number_epochs = 25
autoencoder.compile('adam', loss='binary_crossentropy')
for i in range(0,64):
for j in range(0,64):
history = autoencoder.fit(x = bands_split_train[i,j], y = bands_split_test[i,j], batch_size = 256, epochs=number_epochs,shuffle=True)
the bands_split_train variable is an array with a shape of (64,64,16,16,1)
And Below it's how I predict 1 tile :
predicted_tiles = autoencoder.predict(bands_split_train[0,0])
With that I have the same result for every single tile in the image

Keras crop input image to origin that is predicted by model

I would like to build a model in Keras that predicts what regions of an image are important, using this model:
crop_points = keras.Sequential([
Conv2D(8, (3,3), input_shape=(28, 28, 1)),
MaxPooling2D(),
Conv2D(8, (3,3)),
MaxPooling2D(),
Conv2D(8, (3,3)),
Flatten(),
Dense(16),
RepeatVector(num_samples),
LSTM(32, return_sequences=True),
TimeDistributed(Dense(2))
])
The model predicts a tensor of the length num_samples with the origin of the cropping region. shape = (num_samples, 2) I would now like to have a Lambda layer (or a custom layer if that works better) to crop the input image to each of those predicted tensors for further processing with another model. This needs to be done in Keras as this model should be trained end-to-end and in the end, be exported to CoreML.
My current Lambda layer looks like this:
# Lambda layer
def crop_image(tensor):
image = tensor[0]
point = tensor[1]
x_location = int(K.cast(point[0],"int32"))
y_location = int(K.cast(point[1],"int32"))
print("x: {}, y: {}".format(x,y))
chunk = x_test[i][x_location:x_location + chunk_size, y_location:y_location + chunk_size]
flattened = K.concatenate(chunk).ravel()
flattened = K.append(chunk, x_location)
flattened = K.append(chunk, y_location)
#flattened = np.array(flattened)
#return tf.convert_to_tensor(flattened)
My training data looks like this:
(x_train, _), (x_test, _) = keras.datasets.mnist.load_data()
x_train = x_train / 255
x_test = x_test / 255
However, Keras complains int() argument must be a string, a bytes-like object or a number, not 'Tensor' as it is unable to convert the tensors to integers, which I need for cropping the image. How shall I write my Lambda layer to crop the image dynamically based on the predicted origins?

how do I get Fixed feature output from Keras CNN layer

I'm trying to get 1 dimentional Fixed outcomes from same Image.
By passing through the Convolution Layer, MaxPooling layer, Flatten Layer in Keras.layers
I don't know it's because of weight of ConvD2 kernel, although I gave a Initializer.Ones()
the weight values are keep changing. what did I missed?
Is there any great idea to get Fixed value from same Image through Keras.layers?
here is my code
## get image
img = cv2.imread("puppy.jpg")
resize_img = cv2.resize(img, (128, 128))
resize_img = resize_img / 255.0
reshape_img = resize_img.reshape((-1, 128, 128, 3))
## set layers
initializer = tf.keras.initializers.Ones()
conv_layer = tf.keras.layers.Conv2D(1, (3, 3), input_shape=(128, 128, 3), kernel_regularizer=initializer)
polling_layer = tf.keras.layers.MaxPooling2D((2, 2))
flatten_layer = tf.keras.layers.Flatten()
## set model
model = Sequential([
conv_layer,
polling_layer,
flatten_layer,
])
model.trainable = False
weight = model.get_weights()
print(weight)
g = model(reshape_img)
print(g)

Tensorflow can't append batches together after doing the first epoch

I am running into problems with my code after I removed the loss function of the compile step (set it equal to loss=None) and added one with the intention of adding another, loss function through the add_loss method. I can call fit and it trains for one epoch but then I get this error:
ValueError: operands could not be broadcast together with shapes (128,) (117,) (128,)
My batch size is 128. It looks like 117 is somehow dependent on the number of examples that I am using. When I vary the number of examples, I get different numbers from 117. They are all my number of examples mod my batch size. I am at a loss about how to fix this issue. I am using tf.data.TFRecordDataset as input.
I have the following simplified model:
class MyModel(Model):
def __init__(self):
super(MyModel, self).__init__()
encoder_input = layers.Input(shape=INPUT_SHAPE, name='encoder_input')
x = encoder_input
x = layers.Conv2D(64, (3, 3), activation='relu', padding='same', strides=2)(x)
x = layers.BatchNormalization()(x)
x = layers.Conv2D(32, (3, 3), activation='relu', padding='same', strides=2)(x)
x = layers.BatchNormalization()(x)
x = layers.Flatten()(x)
encoded = layers.Dense(LATENT_DIM, name='encoded')(x)
self.encoder = Model(encoder_input, outputs=[encoded])
self.decoder = tf.keras.Sequential([
layers.Input(shape=LATENT_DIM),
layers.Dense(32 * 32 * 32),
layers.Reshape((32, 32, 32)),
layers.Conv2DTranspose(32, kernel_size=3, strides=2, activation='relu', padding='same'),
layers.Conv2DTranspose(64, kernel_size=3, strides=2, activation='relu', padding='same'),
layers.Conv2D(3, kernel_size=(3, 3), activation='sigmoid', padding='same')])
def call(self, x):
encoded = self.encoder(x)
decoded = self.decoder(encoded)
# Loss function. Has to be here because I intend to add another, more layer-interdependent, loss function.
r_loss = tf.math.reduce_sum(tf.math.square(x - decoded), axis=[1, 2, 3])
self.add_loss(r_loss)
return decoded
def read_tfrecord(example):
example = tf.io.parse_single_example(example, CELEB_A_FORMAT)
image = decode_image(example['image'])
return image, image
def load_dataset(filenames, func):
dataset = tf.data.TFRecordDataset(
filenames
)
dataset = dataset.map(partial(func), num_parallel_calls=tf.data.AUTOTUNE)
return dataset
def train_autoencoder():
filenames_train = glob.glob(TRAIN_PATH)
train_dataset_x_x = load_dataset(filenames_train[:4], func=read_tfrecord)
autoencoder = Autoencoder()
# The loss function used to be defined here and everything worked fine before.
def r_loss(y_true, y_pred):
return tf.math.reduce_sum(tf.math.square(y_true - y_pred), axis=[1, 2, 3])
optimizer = tf.keras.optimizers.Adam(1e-4)
autoencoder.compile(optimizer=optimizer, loss=None)
autoencoder.fit(train_dataset_x_x.batch(AUTOENCODER_BATCH_SIZE),
epochs=AUTOENCODER_NUM_EPOCHS,
shuffle=True)
If you only want to get rid of the error and don't care about the last "remainder" batch of your dataset, you can use the keyword argument drop_remainder=True inside of train_dataset_x_x.batch(), that way all of your batches will be the same size.
FYI, it's usually better practice to batch your dataset outside of the function call for fit:
data = data.batch(32)
model.fit(data)
the loss function can not be set in the call method .
the call method is intended to make a forward pass not to copute the loss .
u need to add the loss function in the compile method or after it

How to convert a tensorflow model to a pytorch model?

I'm new to pytorch. Here's an architecture of a tensorflow model and I'd like to convert it into a pytorch model.
I have done most of the codes but am confused about a few places.
1) In tensorflow, the Conv2D function takes filter as an input. However, in pytorch, the function takes the size of input channels and output channels as inputs. So how do I find the equivalent number of input channels and output channels, provided with the size of the filter.
2) In tensorflow, the dense layer has a parameter called 'nodes'. However, in pytorch, the same layer has 2 different inputs (the size of the input parameters and size of the targeted parameters), how do I determine them based on the number of the nodes.
Here's the tensorflow code.
from keras.utils import to_categorical
from keras.models import Sequential, load_model
from keras.layers import Conv2D, MaxPool2D, Dense, Flatten, Dropout
model = Sequential()
model.add(Conv2D(filters=32, kernel_size=(5,5), activation='relu', input_shape=X_train.shape[1:]))
model.add(Conv2D(filters=32, kernel_size=(5,5), activation='relu'))
model.add(MaxPool2D(pool_size=(2, 2)))
model.add(Dropout(rate=0.25))
model.add(Conv2D(filters=64, kernel_size=(3, 3), activation='relu'))
model.add(Conv2D(filters=64, kernel_size=(3, 3), activation='relu'))
model.add(MaxPool2D(pool_size=(2, 2)))
model.add(Dropout(rate=0.25))
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dropout(rate=0.5))
model.add(Dense(43, activation='softmax'))
Here's my code.:
import torch.nn.functional as F
import torch
# The network should inherit from the nn.Module
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
# Define 2D convolution layers
# 3: input channels, 32: output channels, 5: kernel size, 1: stride
self.conv1 = nn.Conv2d(3, 32, 5, 1) # The size of input channel is 3 because all images are coloured
self.conv2 = nn.Conv2d(32, 64, 5, 1)
self.conv3 = nn.Conv2d(64, 128, 3, 1)
self.conv3 = nn.Conv2d(128, 256, 3, 1)
# It will 'filter' out some of the input by the probability(assign zero)
self.dropout1 = nn.Dropout2d(0.25)
self.dropout2 = nn.Dropout2d(0.5)
# Fully connected layer: input size, output size
self.fc1 = nn.Linear(36864, 128)
self.fc2 = nn.Linear(128, 10)
# forward() link all layers together,
def forward(self, x):
x = self.conv1(x)
x = F.relu(x)
x = self.conv2(x)
x = F.relu(x)
x = F.max_pool2d(x, 2)
x = self.dropout1(x)
x = self.conv3(x)
x = F.relu(x)
x = self.conv4(x)
x = F.relu(x)
x = F.max_pool2d(x, 2)
x = self.dropout1(x)
x = torch.flatten(x, 1)
x = self.fc1(x)
x = F.relu(x)
x = self.dropout2(x)
x = self.fc2(x)
output = F.log_softmax(x, dim=1)
return output
Thanks in advance!
1) In pytorch, we take input channels and output channels as an input. In your first layer, the input channels will be the number of color channels in your image. After that it's always going to be the same as the output channels from your previous layer (output channels are specified by the filters parameter in Tensorflow).
2). Pytorch is slightly annoying in the fact that when flattening your conv outputs you'll have to calculate the shape yourself. You can either use an equation to calculate this (𝑂𝑢𝑡=(𝑊−𝐹+2𝑃)/𝑆+1), or make a shape calculating function to get the shape of a dummy image after it's been passed through the conv part of the network. This parameter will be your size of input argument; the size of your output argument will just be the number of nodes you want in your next fully connected layer.

Categories