how to create keras conv2d layer on grayscale image set - python

i have created this NN
#Encoder
encoder_input = Input(shape=(1,height, width))
encoder_output = Conv2D(64, (3,3), activation='relu', padding='same', strides=2)(encoder_input)
encoder_output = Conv2D(128, (3,3), activation='relu', padding='same')(encoder_output)
encoder_output = Conv2D(128, (3,3), activation='relu', padding='same', strides=2)(encoder_output)
encoder_output = Conv2D(256, (3,3), activation='relu', padding='same')(encoder_output)
encoder_output = Conv2D(256, (3,3), activation='relu', padding='same', strides=2)(encoder_output)
encoder_output = Conv2D(512, (3,3), activation='relu', padding='same')(encoder_output)
encoder_output = Conv2D(512, (3,3), activation='relu', padding='same')(encoder_output)
encoder_output = Conv2D(256, (3,3), activation='relu', padding='same')(encoder_output)
#Decoder
decoder_output = Conv2D(128, (3,3), activation='relu', padding='same')(encoder_output)
decoder_output = UpSampling2D((2, 2))(decoder_output)
decoder_output = Conv2D(64, (3,3), activation='relu', padding='same')(decoder_output)
decoder_output = UpSampling2D((2, 2))(decoder_output)
decoder_output = Conv2D(32, (3,3), activation='relu', padding='same')(decoder_output)
decoder_output = Conv2D(16, (3,3), activation='relu', padding='same')(decoder_output)
decoder_output = Conv2D(2, (3, 3), activation='tanh', padding='same')(decoder_output)
decoder_output = UpSampling2D((2, 2))(decoder_output)
model = Model(inputs=encoder_input, outputs=decoder_output)
model.compile(optimizer='adam', loss='mse' , metrics=['accuracy'])
clean_images = model.fit(train_images,y_train_red, epochs=200)
and train images is created by
train_images = np.array([ImageOperation.resizeImage(cv2.imread(train_path + str(i) + ".jpg"), height, width) for i in
range(train_size)])
y_train_red = [img[:, :, 2]/255 for img in train_images]
train_images = np.array([ImageOperation.grayImg(item) for item in train_images])
and when i execute the code i recieved the following error
Error when checking input: expected input_1 to have 4 dimensions, but got array with shape (10, 200, 200)
how to solve it?

Your images are 2D (Height x Width), whereas it expects 3D images. Reshape your images to add additional dimension such as,
train_images = train_images.reshape(train_size, height, width, 1)

as the documentation says: https://www.tensorflow.org/api_docs/python/tf/keras/layers/Conv2D
you need a 4 dimensional input for Conv2d layer. you have to a add a channel either after or before 2 main dimensions of the image:
train_images = train_images.reshape(train_size, height, width, 1)
or
train_images = train_images.reshape(train_size, 1, height, width)
in both cases you have to define the art of input in every layer in the network with data_format="channels_first" or data_format="channels_last".
for example:
ncoder_output = Conv2D(64, (3,3), activation='relu', padding='same', strides=2, data_format="channels_last")(encoder_input)

Related

Tensorflow input tensor cannot be reached given provided output tensors

I have a U-Net model trained on cloud segmentation images with the following structure
inputs = keras.Input(input_shape)
conv1 = layers.Conv2D(64, 3, activation='relu', padding='same',
kernel_initializer='he_normal', name='conv1')(inputs)
conv2 = layers.Conv2D(64, 3, activation='relu', padding='same',
kernel_initializer='he_normal', name='conv2')(conv1)
pool1 = layers.MaxPooling2D(pool_size=(2, 2))(conv2)
conv3 = layers.Conv2D(128, 3, activation='relu', padding='same',
kernel_initializer='he_normal', name='conv3')(pool1)
conv4 = layers.Conv2D(128, 3, activation='relu', padding='same',
kernel_initializer='he_normal', name='conv4')(conv3)
pool2 = layers.MaxPooling2D(pool_size=(2, 2))(conv4)
conv5 = layers.Conv2D(256, 3, activation='relu', padding='same',
kernel_initializer='he_normal', name='conv5')(pool2)
conv6 = layers.Conv2D(256, 3, activation='relu', padding='same',
kernel_initializer='he_normal', name='conv6')(conv5)
pool3 = layers.MaxPooling2D(pool_size=(2, 2))(conv6)
conv7 = layers.Conv2D(512, 3, activation='relu', padding='same',
kernel_initializer='he_normal', name='conv7')(pool3)
conv8 = layers.Conv2D(512, 3, activation='relu', padding='same',
kernel_initializer='he_normal', name='conv8')(conv7)
drop4 = layers.Dropout(0.5)(conv8)
pool4 = layers.MaxPooling2D(pool_size=(2, 2))(drop4)
conv9 = layers.Conv2D(1024, 3, activation='relu', padding='same',
kernel_initializer='he_normal', name='conv9')(pool4)
conv10 = layers.Conv2D(1024, 3, activation='relu', padding='same',
kernel_initializer='he_normal', name='conv19')(conv9)
drop5 = layers.Dropout(0.5)(conv10)
up6 = layers.Conv2D(512, 2, activation='relu', padding='same',
kernel_initializer='he_normal', name='up6')(
layers.UpSampling2D(size=(2, 2))(drop5))
merge6 = layers.concatenate([drop4, up6], axis=3)
conv11 = layers.Conv2D(512, 3, activation='relu', padding='same',
kernel_initializer='he_normal', name='conv11')(merge6)
conv12 = layers.Conv2D(512, 3, activation='relu', padding='same',
kernel_initializer='he_normal', name='conv12')(conv11)
up7 = layers.Conv2D(256, 2, activation='relu', padding='same',
kernel_initializer='he_normal', name='up7')(
layers.UpSampling2D(size=(2, 2))(conv12))
merge7 = layers.concatenate([conv6, up7], axis=3)
conv13 = layers.Conv2D(256, 3, activation='relu', padding='same',
kernel_initializer='he_normal', name='conv13')(merge7)
conv14 = layers.Conv2D(256, 3, activation='relu', padding='same',
kernel_initializer='he_normal', name='conv14')(conv13)
up8 = layers.Conv2D(128, 2, activation='relu', padding='same',
kernel_initializer='he_normal', name='up8')(
layers.UpSampling2D(size=(2, 2))(conv14))
merge8 = layers.concatenate([conv4, up8], axis=3)
conv15 = layers.Conv2D(128, 3, activation='relu', padding='same',
kernel_initializer='he_normal', name='conv15')(merge8)
conv16 = layers.Conv2D(128, 3, activation='relu', padding='same',
kernel_initializer='he_normal', name='conv16')(conv15)
up9 = layers.Conv2D(64, 2, activation='relu', padding='same',
kernel_initializer='he_normal', name='up9')(
layers.UpSampling2D(size=(2, 2))(conv16))
merge9 = layers.concatenate([conv2, up9], axis=3)
conv17 = layers.Conv2D(64, 3, activation='relu', padding='same',
kernel_initializer='he_normal', name='conv17')(merge9)
conv18 = layers.Conv2D(1, 1, activation='sigmoid', name='conv_out')(conv17)
model = keras.Model(inputs=inputs, outputs=conv18)
I want to use Keras functions to split the computation, to do so I have the following function
selected_layer_idx = 1
get_first_part = K.function([model.layers[0].input], [model.layers[selected_layer_idx].output])
output_first_part = get_first_part([np.expand_dims(img, 0)])[0]
get_second_part = K.function([model.layers[selected_layer_idx + 1].input], [model.layers[-1].output])
final_output = get_second_part(output_first_part)
If I select one of the first two convolutional layers everything goes fine but when selecting any other layer I get the following error.
ValueError: Found input tensor cannot be reached given provided output tensors. Please make sure the tensor KerasTensor(type_spec=TensorSpec(shape=(None, 384, 384, 4), dtype=tf.float32, name='input_1'), name='input_1', description="created by layer 'input_1'") is included in the model inputs when building functional model.
I am having trouble understanding what is the trouble here, I have already seen this question but the highlited problem should be different since here I don't have any shared name between different layers.
How could I fix this behavior?
EDIT
I have found the error, this function
get_second_part = K.function([model.layers[selected_layer_idx + 1].input], [model.layers[-1].output])
can't be created since we need merge9 to calculate the final output. But in order to produce merge9 we need to have info about conv2 which is not known by the function if selected_layer_idx is greater than 2. To fix this behavior we need to create a third function that calculates conv2's output and pass it to a fourth function that merges the two values and produces the final output

he_normal kernel initialization and global average pooling

I'm trying to implement he_normal kernel initialization and global average pooling in my model, but I don't know how to do it.
#beginmodel
model = Sequential([
Conv2D(16, 3, padding='same', activation='relu', input_shape=(100, 100,1)),
MaxPooling2D(),
Conv2D(32, 3, padding='same', activation='relu'),
MaxPooling2D(),
Conv2D(64, 3, padding='same', activation='relu'),
MaxPooling2D(),
Conv2D(128, 3, padding='same', activation='relu'),
MaxPooling2D(),
Flatten(),
Dense(215, activation='relu'),
Dense(10)
])
Every keras layer has an initializer argument so u can use it to pass your initializer method (he_normal is present in keras).
Global average pooling for images reduces the dimension of the network to 2D. it can be used instead of flatten operation.
I suggest u also to use a softmax activation in your last layer to get probability score if u are carrying out a classification problem.
here an example
n_class, n_samples = 10, 3
X = np.random.uniform(0,1, (n_samples,100,100,1))
y = np.random.randint(0,n_class, n_samples)
model = Sequential([
Conv2D(16, 3, padding='same', activation='relu', kernel_initializer='he_normal',
input_shape=(100, 100,1)),
MaxPooling2D(),
Conv2D(32, 3, padding='same', activation='relu', kernel_initializer='he_normal'),
MaxPooling2D(),
Conv2D(64, 3, padding='same', activation='relu', kernel_initializer='he_normal'),
MaxPooling2D(),
Conv2D(128, 3, padding='same', activation='relu', kernel_initializer='he_normal'),
GlobalAvgPool2D(),
Dense(215, activation='relu'),
Dense(n_class, activation='softmax')
])
model.compile('adam', 'sparse_categorical_crossentropy')
model.fit(X,y, epochs=3)

restricting output values in keras layer

i have written this NN
decoder_output = Conv2D(64, (3,3), activation='relu', padding='same')(encoder_input)
decoder_output = UpSampling2D((2, 2))(decoder_output)
decoder_output = Conv2D(32, (3,3), activation='relu', padding='same')(decoder_output)
decoder_output = UpSampling2D((2, 2))(decoder_output)
decoder_output = Conv2D(16, (3,3), activation='relu', padding='same')(decoder_output)
decoder_output = UpSampling2D((2, 2))(decoder_output)
decoder_output = Conv2D(2, (3, 3), activation='sigmoid', padding='same')(decoder_output)
decoder_output = UpSampling2D((2, 2))(decoder_output)
decoder_output = Flatten()(decoder_output)
decoder_output = Dense(height*width, activation='relu')(decoder_output)
model = Model(inputs=encoder_input, outputs=decoder_output)
model.compile(optimizer='adam', loss='mse')
clean_images = model.fit(train_images, y_train_red, epochs=10,validation_data=(validation_images,y_validation_red))
which suppose to return an image values.
is there a way to restrict the return values to be int and/or maximize the ouput layer value to 255?
What should happen is that your model will learn to not output values above 255 and below 0. However, in the instances that it does, you could clip the values to be between 0 and 255 when you are predicting. Regarding integer outputs, there isn't a way that I know of. However, you could round the outputs when you are predicting.

Convolutional Neural Net Architecture - correct?

I am trying to train a convolutional neural net. Therefore I am using a datset of 646 images/license plates which contains 8 characters (0-9, A-Z; without letter 'O' and blank spaces, in total 36 possible characters). These are my training data X_train. Their shape is (646, 40, 200, 3) with color code 3. I resized them to the same shape.
I also have a dataset which contains the labels of this images, which I one-hot-encoded to a numpy array of shape (646, 8, 36). This data is my y_train data.
Now, I am trying to apply a Neural Network which looks like this:
The architecture is taken from this paper: https://ieeexplore.ieee.org/abstract/document/8078501
I excluded the batch normalization part, because this part is not the most interesting one for me. But I am very unsure regarding the top of the layer. That means the part after the last pooling layer beginning with model.add(Flatten())...
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3), input_shape = (40, 200, 3), activation = "relu"))
model.add(Conv2D(32, kernel_size=(3, 3), activation = "relu"))
model.add(Conv2D(32, kernel_size=(3, 3), activation = "relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, kernel_size=(3, 3), activation = "relu"))
model.add(Conv2D(64, kernel_size=(3, 3), activation = "relu"))
model.add(Conv2D(64, kernel_size=(3, 3), activation = "relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(128, kernel_size=(3, 3), activation = "relu"))
model.add(Conv2D(128, kernel_size=(3, 3), activation = "relu"))
model.add(Conv2D(128, kernel_size=(3, 3), activation = "relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(16000, activation = "relu"))
model.add(Dense(128, activation = "relu"))
model.add(Dense(36, activation = "relu"))
model.add(Dense(8*36, activation="Softmax"))
model.add(keras.layers.Reshape((8, 36)))
Thank you very much in advance!
Assuming the image below matches your model architecture, the code can be used to create the model. Ensure you have some padding for the input images.
import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Conv2D, Flatten, MaxPooling2D, Dense, Input, Reshape, Concatenate
def create_model(input_shape = (40, 200, 3)):
input_img = Input(shape=input_shape)
model = Conv2D(32, kernel_size=(3, 3), input_shape = (40, 200, 3), activation = "relu")(input_img)
model = Conv2D(32, kernel_size=(3, 3), padding="same", activation = "relu")(model)
model = Conv2D(32, kernel_size=(3, 3), padding="same", activation = "relu")(model)
model = MaxPooling2D(pool_size=(2, 2))(model)
model = Conv2D(64, kernel_size=(3, 3), padding="same", activation = "relu")(model)
model = Conv2D(64, kernel_size=(3, 3), padding="same", activation = "relu")(model)
model = Conv2D(64, kernel_size=(3, 3), padding="same", activation = "relu")(model)
model = MaxPooling2D(pool_size=(2, 2))(model)
model = Conv2D(128, kernel_size=(3, 3), padding="same", activation = "relu")(model)
model = Conv2D(128, kernel_size=(3, 3), padding="same", activation = "relu")(model)
model = Conv2D(128, kernel_size=(3, 3), padding="same", activation = "relu")(model)
model = MaxPooling2D(pool_size=(2, 2))(model)
backbone = Flatten()(model)
branches = []
for i in range(8):
branches.append(backbone)
branches[i] = Dense(16000, activation = "relu", name="branch_"+str(i)+"_Dense_16000")(branches[i])
branches[i] = Dense(128, activation = "relu", name="branch_"+str(i)+"_Dense_128")(branches[i])
branches[i] = Dense(36, activation = "softmax", name="branch_"+str(i)+"_output")(branches[i])
output = Concatenate(axis=1)(branches)
output = Reshape((8, 36))(output)
model = Model(input_img, output)
return model

U-net: how to improve accuracy of multiclass segmentation?

I have been using U-nets for a while now, and notice that in most of my applications, it generates an over-estimation surrounding a specific class.
For example, here's a grayscale image:
And a manual segmentation of 3 classes (lesion [green], tissue [magenta], background [all else]):
The issue I notice on prediction (over-estimation at boundaries):
The typical architecture used looks something like this:
def get_unet(dim=128, dropout=0.5, n_classes=3):
inputs = Input((dim, dim, 1))
conv1 = Conv2D(64, 3, activation='relu', padding='same', kernel_initializer='he_normal')(inputs)
conv1 = Conv2D(64, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv1)
pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)
conv2 = Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(pool1)
conv2 = Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv2)
pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)
conv3 = Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(pool2)
conv3 = Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv3)
pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)
conv4 = Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(pool3)
conv4 = Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv4)
conv4 = Dropout(dropout)(conv4)
pool4 = MaxPooling2D(pool_size=(2, 2))(conv4)
conv5 = Conv2D(1024, 3, activation='relu', padding='same', kernel_initializer='he_normal')(pool4)
conv5 = Conv2D(1024, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv5)
conv5 = Dropout(dropout)(conv5)
up6 = concatenate([UpSampling2D(size=(2, 2))(conv5), conv4], axis=3)
conv6 = Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(up6)
conv6 = Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv6)
up7 = concatenate([UpSampling2D(size=(2, 2))(conv6), conv3], axis=3)
conv7 = Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(up7)
conv7 = Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv7)
up8 = concatenate([UpSampling2D(size=(2, 2))(conv7), conv2], axis=3)
conv8 = Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(up8)
conv8 = Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv8)
up9 = concatenate([UpSampling2D(size=(2, 2))(conv8), conv1], axis=3)
conv9 = Conv2D(64, 3, activation='relu', padding='same', kernel_initializer='he_normal')(up9)
conv9 = Conv2D(64, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv9)
conv10 = Conv2D(n_classes, (1, 1), activation='relu', padding='same', ker nel_initializer='he_normal')(conv9)
conv10 = Reshape((dim * dim, n_classes))(conv10)
output = Activation('softmax')(conv10)
model = Model(inputs=[inputs], outputs=[output])
return model
Plus:
mgpu_model.compile(optimizer='adadelta', loss='categorical_crossentropy',
metrics=['accuracy'], sample_weight_mode='temporal')
open(p, 'w').write(json_string)
model_checkpoint = callbacks.ModelCheckpoint(f, save_best_only=True)
reduce_lr_cback = callbacks.ReduceLROnPlateau(
monitor='val_loss', factor=0.2,
patience=5, verbose=1,
min_lr=0.05 * 0.0001)
h = mgpu_model.fit(train_gray, train_masks,
batch_size=64, epochs=50,
verbose=1, shuffle=True, validation_split=0.2, sample_weight=sample_weights,
callbacks=[model_checkpoint, reduce_lr_cback])
My Question:
Do you have any insight or suggestion on how to change either the architecture or hyperparameters to mitigate the over-estimation? This could include even using a different architecture that may be better at more precise segmentation. (Please note I already do class balancing/weighting to compensate for imbalances in class frequency)
You can experiment with various loss functions instead of cross entropy. For multi-class segmentation, you can try:
generalized dice loss
dice loss (summed across all classes)
categorical focal loss
boundary loss
The winner of brats 2018 used autoencoder regularization (https://github.com/IAmSuyogJadhav/3d-mri-brain-tumor-segmentation-using-autoencoder-regularization). You could try this as well. The idea in that paper is that the model is also learning how to better encode the features in the latent space, and that helps the model with segmentation somehow.

Categories