I have been using U-nets for a while now, and notice that in most of my applications, it generates an over-estimation surrounding a specific class.
For example, here's a grayscale image:
And a manual segmentation of 3 classes (lesion [green], tissue [magenta], background [all else]):
The issue I notice on prediction (over-estimation at boundaries):
The typical architecture used looks something like this:
def get_unet(dim=128, dropout=0.5, n_classes=3):
inputs = Input((dim, dim, 1))
conv1 = Conv2D(64, 3, activation='relu', padding='same', kernel_initializer='he_normal')(inputs)
conv1 = Conv2D(64, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv1)
pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)
conv2 = Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(pool1)
conv2 = Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv2)
pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)
conv3 = Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(pool2)
conv3 = Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv3)
pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)
conv4 = Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(pool3)
conv4 = Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv4)
conv4 = Dropout(dropout)(conv4)
pool4 = MaxPooling2D(pool_size=(2, 2))(conv4)
conv5 = Conv2D(1024, 3, activation='relu', padding='same', kernel_initializer='he_normal')(pool4)
conv5 = Conv2D(1024, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv5)
conv5 = Dropout(dropout)(conv5)
up6 = concatenate([UpSampling2D(size=(2, 2))(conv5), conv4], axis=3)
conv6 = Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(up6)
conv6 = Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv6)
up7 = concatenate([UpSampling2D(size=(2, 2))(conv6), conv3], axis=3)
conv7 = Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(up7)
conv7 = Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv7)
up8 = concatenate([UpSampling2D(size=(2, 2))(conv7), conv2], axis=3)
conv8 = Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(up8)
conv8 = Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv8)
up9 = concatenate([UpSampling2D(size=(2, 2))(conv8), conv1], axis=3)
conv9 = Conv2D(64, 3, activation='relu', padding='same', kernel_initializer='he_normal')(up9)
conv9 = Conv2D(64, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv9)
conv10 = Conv2D(n_classes, (1, 1), activation='relu', padding='same', ker nel_initializer='he_normal')(conv9)
conv10 = Reshape((dim * dim, n_classes))(conv10)
output = Activation('softmax')(conv10)
model = Model(inputs=[inputs], outputs=[output])
return model
Plus:
mgpu_model.compile(optimizer='adadelta', loss='categorical_crossentropy',
metrics=['accuracy'], sample_weight_mode='temporal')
open(p, 'w').write(json_string)
model_checkpoint = callbacks.ModelCheckpoint(f, save_best_only=True)
reduce_lr_cback = callbacks.ReduceLROnPlateau(
monitor='val_loss', factor=0.2,
patience=5, verbose=1,
min_lr=0.05 * 0.0001)
h = mgpu_model.fit(train_gray, train_masks,
batch_size=64, epochs=50,
verbose=1, shuffle=True, validation_split=0.2, sample_weight=sample_weights,
callbacks=[model_checkpoint, reduce_lr_cback])
My Question:
Do you have any insight or suggestion on how to change either the architecture or hyperparameters to mitigate the over-estimation? This could include even using a different architecture that may be better at more precise segmentation. (Please note I already do class balancing/weighting to compensate for imbalances in class frequency)
You can experiment with various loss functions instead of cross entropy. For multi-class segmentation, you can try:
generalized dice loss
dice loss (summed across all classes)
categorical focal loss
boundary loss
The winner of brats 2018 used autoencoder regularization (https://github.com/IAmSuyogJadhav/3d-mri-brain-tumor-segmentation-using-autoencoder-regularization). You could try this as well. The idea in that paper is that the model is also learning how to better encode the features in the latent space, and that helps the model with segmentation somehow.
Related
I have a U-Net model trained on cloud segmentation images with the following structure
inputs = keras.Input(input_shape)
conv1 = layers.Conv2D(64, 3, activation='relu', padding='same',
kernel_initializer='he_normal', name='conv1')(inputs)
conv2 = layers.Conv2D(64, 3, activation='relu', padding='same',
kernel_initializer='he_normal', name='conv2')(conv1)
pool1 = layers.MaxPooling2D(pool_size=(2, 2))(conv2)
conv3 = layers.Conv2D(128, 3, activation='relu', padding='same',
kernel_initializer='he_normal', name='conv3')(pool1)
conv4 = layers.Conv2D(128, 3, activation='relu', padding='same',
kernel_initializer='he_normal', name='conv4')(conv3)
pool2 = layers.MaxPooling2D(pool_size=(2, 2))(conv4)
conv5 = layers.Conv2D(256, 3, activation='relu', padding='same',
kernel_initializer='he_normal', name='conv5')(pool2)
conv6 = layers.Conv2D(256, 3, activation='relu', padding='same',
kernel_initializer='he_normal', name='conv6')(conv5)
pool3 = layers.MaxPooling2D(pool_size=(2, 2))(conv6)
conv7 = layers.Conv2D(512, 3, activation='relu', padding='same',
kernel_initializer='he_normal', name='conv7')(pool3)
conv8 = layers.Conv2D(512, 3, activation='relu', padding='same',
kernel_initializer='he_normal', name='conv8')(conv7)
drop4 = layers.Dropout(0.5)(conv8)
pool4 = layers.MaxPooling2D(pool_size=(2, 2))(drop4)
conv9 = layers.Conv2D(1024, 3, activation='relu', padding='same',
kernel_initializer='he_normal', name='conv9')(pool4)
conv10 = layers.Conv2D(1024, 3, activation='relu', padding='same',
kernel_initializer='he_normal', name='conv19')(conv9)
drop5 = layers.Dropout(0.5)(conv10)
up6 = layers.Conv2D(512, 2, activation='relu', padding='same',
kernel_initializer='he_normal', name='up6')(
layers.UpSampling2D(size=(2, 2))(drop5))
merge6 = layers.concatenate([drop4, up6], axis=3)
conv11 = layers.Conv2D(512, 3, activation='relu', padding='same',
kernel_initializer='he_normal', name='conv11')(merge6)
conv12 = layers.Conv2D(512, 3, activation='relu', padding='same',
kernel_initializer='he_normal', name='conv12')(conv11)
up7 = layers.Conv2D(256, 2, activation='relu', padding='same',
kernel_initializer='he_normal', name='up7')(
layers.UpSampling2D(size=(2, 2))(conv12))
merge7 = layers.concatenate([conv6, up7], axis=3)
conv13 = layers.Conv2D(256, 3, activation='relu', padding='same',
kernel_initializer='he_normal', name='conv13')(merge7)
conv14 = layers.Conv2D(256, 3, activation='relu', padding='same',
kernel_initializer='he_normal', name='conv14')(conv13)
up8 = layers.Conv2D(128, 2, activation='relu', padding='same',
kernel_initializer='he_normal', name='up8')(
layers.UpSampling2D(size=(2, 2))(conv14))
merge8 = layers.concatenate([conv4, up8], axis=3)
conv15 = layers.Conv2D(128, 3, activation='relu', padding='same',
kernel_initializer='he_normal', name='conv15')(merge8)
conv16 = layers.Conv2D(128, 3, activation='relu', padding='same',
kernel_initializer='he_normal', name='conv16')(conv15)
up9 = layers.Conv2D(64, 2, activation='relu', padding='same',
kernel_initializer='he_normal', name='up9')(
layers.UpSampling2D(size=(2, 2))(conv16))
merge9 = layers.concatenate([conv2, up9], axis=3)
conv17 = layers.Conv2D(64, 3, activation='relu', padding='same',
kernel_initializer='he_normal', name='conv17')(merge9)
conv18 = layers.Conv2D(1, 1, activation='sigmoid', name='conv_out')(conv17)
model = keras.Model(inputs=inputs, outputs=conv18)
I want to use Keras functions to split the computation, to do so I have the following function
selected_layer_idx = 1
get_first_part = K.function([model.layers[0].input], [model.layers[selected_layer_idx].output])
output_first_part = get_first_part([np.expand_dims(img, 0)])[0]
get_second_part = K.function([model.layers[selected_layer_idx + 1].input], [model.layers[-1].output])
final_output = get_second_part(output_first_part)
If I select one of the first two convolutional layers everything goes fine but when selecting any other layer I get the following error.
ValueError: Found input tensor cannot be reached given provided output tensors. Please make sure the tensor KerasTensor(type_spec=TensorSpec(shape=(None, 384, 384, 4), dtype=tf.float32, name='input_1'), name='input_1', description="created by layer 'input_1'") is included in the model inputs when building functional model.
I am having trouble understanding what is the trouble here, I have already seen this question but the highlited problem should be different since here I don't have any shared name between different layers.
How could I fix this behavior?
EDIT
I have found the error, this function
get_second_part = K.function([model.layers[selected_layer_idx + 1].input], [model.layers[-1].output])
can't be created since we need merge9 to calculate the final output. But in order to produce merge9 we need to have info about conv2 which is not known by the function if selected_layer_idx is greater than 2. To fix this behavior we need to create a third function that calculates conv2's output and pass it to a fourth function that merges the two values and produces the final output
I'm trying to implement he_normal kernel initialization and global average pooling in my model, but I don't know how to do it.
#beginmodel
model = Sequential([
Conv2D(16, 3, padding='same', activation='relu', input_shape=(100, 100,1)),
MaxPooling2D(),
Conv2D(32, 3, padding='same', activation='relu'),
MaxPooling2D(),
Conv2D(64, 3, padding='same', activation='relu'),
MaxPooling2D(),
Conv2D(128, 3, padding='same', activation='relu'),
MaxPooling2D(),
Flatten(),
Dense(215, activation='relu'),
Dense(10)
])
Every keras layer has an initializer argument so u can use it to pass your initializer method (he_normal is present in keras).
Global average pooling for images reduces the dimension of the network to 2D. it can be used instead of flatten operation.
I suggest u also to use a softmax activation in your last layer to get probability score if u are carrying out a classification problem.
here an example
n_class, n_samples = 10, 3
X = np.random.uniform(0,1, (n_samples,100,100,1))
y = np.random.randint(0,n_class, n_samples)
model = Sequential([
Conv2D(16, 3, padding='same', activation='relu', kernel_initializer='he_normal',
input_shape=(100, 100,1)),
MaxPooling2D(),
Conv2D(32, 3, padding='same', activation='relu', kernel_initializer='he_normal'),
MaxPooling2D(),
Conv2D(64, 3, padding='same', activation='relu', kernel_initializer='he_normal'),
MaxPooling2D(),
Conv2D(128, 3, padding='same', activation='relu', kernel_initializer='he_normal'),
GlobalAvgPool2D(),
Dense(215, activation='relu'),
Dense(n_class, activation='softmax')
])
model.compile('adam', 'sparse_categorical_crossentropy')
model.fit(X,y, epochs=3)
I am working on a biomedical image segmentation project. I am using U-net model for the job. The problem is, when i train the model, the validation loss doesn't seem to be practical.
I used the dice_coef_loss as loss function, as well as, in the metric. The result of the training is the below graph. The graph clearly shows that validation loss is not following my loss function, cause the two graphs are distinguishable. Though, the train loss does follow the train dice_coef_loss values.
(The first image from the left is training and validation loss, third one is tarining and validation dice_coef_loss as metric)
The history graph of training
(Sorry i am not yet eligible to embed an image, please check the link)
Here is my model
def unet(input_size=(256,256,1)):
inputs = Input(input_size)
conv1 = Conv2D(32, (3, 3), activation='relu', padding='same')(inputs)
conv1 = Conv2D(32, (3, 3), activation='relu', padding='same')(conv1)
pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)
conv2 = Conv2D(64, (3, 3), activation='relu', padding='same')(pool1)
conv2 = Conv2D(64, (3, 3), activation='relu', padding='same')(conv2)
pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)
conv3 = Conv2D(128, (3, 3), activation='relu', padding='same')(pool2)
conv3 = Conv2D(128, (3, 3), activation='relu', padding='same')(conv3)
pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)
conv4 = Conv2D(256, (3, 3), activation='relu', padding='same')(pool3)
conv4 = Conv2D(256, (3, 3), activation='relu', padding='same')(conv4)
pool4 = MaxPooling2D(pool_size=(2, 2))(conv4)
conv5 = Conv2D(512, (3, 3), activation='relu', padding='same')(pool4)
conv5 = Conv2D(512, (3, 3), activation='relu', padding='same')(conv5)
up6 = concatenate([Conv2DTranspose(256, (2, 2), strides=(2, 2), padding='same')(conv5), conv4], axis=3)
conv6 = Conv2D(256, (3, 3), activation='relu', padding='same')(up6)
conv6 = Conv2D(256, (3, 3), activation='relu', padding='same')(conv6)
up7 = concatenate([Conv2DTranspose(128, (2, 2), strides=(2, 2), padding='same')(conv6), conv3], axis=3)
conv7 = Conv2D(128, (3, 3), activation='relu', padding='same')(up7)
conv7 = Conv2D(128, (3, 3), activation='relu', padding='same')(conv7)
up8 = concatenate([Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same')(conv7), conv2], axis=3)
conv8 = Conv2D(64, (3, 3), activation='relu', padding='same')(up8)
conv8 = Conv2D(64, (3, 3), activation='relu', padding='same')(conv8)
up9 = concatenate([Conv2DTranspose(32, (2, 2), strides=(2, 2), padding='same')(conv8), conv1], axis=3)
conv9 = Conv2D(32, (3, 3), activation='relu', padding='same')(up9)
conv9 = Conv2D(32, (3, 3), activation='relu', padding='same')(conv9)
conv10 = Conv2D(1, (1, 1), activation='sigmoid')(conv9)
return Model(inputs=[inputs], outputs=[conv10])
Here is the loss function
import keras.backend as K
smooth=100
def dice_coef(y_true, y_pred):
y_truef=K.flatten(y_true)
y_predf=K.flatten(y_pred)
And=K.sum(y_truef* y_predf)
return((2* And + smooth) / (K.sum(y_truef) + K.sum(y_predf) + smooth))
def dice_coef_loss(y_true, y_pred):
return -dice_coef(y_true, y_pred)
Compilation
model.compile(optimizer=Adam(lr=1e-5), loss=dice_coef_loss, metrics=["binary_accuracy", dice_coef_loss, iou])
Note
I have tried:
learning rate tuning
tuning batch size
data augmentation
changing the loss function
If any one want to look at the code here is the link to kaggle kernel
Addition Note
To make my self clear :
The 1st and second graph was generated from a same function using same dataset (either from train or validation time), so I want the train_loss curve from 1st plot equal to train_dice_coef_loss from 3rd plot. And val_loss curve from 1st plot equal to val_dice_coef_loss from 3rd plot.
But unfortunately val_loss curve doesn't match val_dice_coef_loss curve.
P.S. I am new here. Any suggestion to improve my question is appreciated. Thank you.
As, ZabirAlNazi suggested it was a problem of the used library. Changing the imports from keras to tensorflow.keras solved that issue.
from tensorflow.keras.models import Model, load_model, save_model
from tensorflow.keras.layers import Input, Activation, BatchNormalization, Dropout, Lambda, Dense, Conv2D
i have created this NN
#Encoder
encoder_input = Input(shape=(1,height, width))
encoder_output = Conv2D(64, (3,3), activation='relu', padding='same', strides=2)(encoder_input)
encoder_output = Conv2D(128, (3,3), activation='relu', padding='same')(encoder_output)
encoder_output = Conv2D(128, (3,3), activation='relu', padding='same', strides=2)(encoder_output)
encoder_output = Conv2D(256, (3,3), activation='relu', padding='same')(encoder_output)
encoder_output = Conv2D(256, (3,3), activation='relu', padding='same', strides=2)(encoder_output)
encoder_output = Conv2D(512, (3,3), activation='relu', padding='same')(encoder_output)
encoder_output = Conv2D(512, (3,3), activation='relu', padding='same')(encoder_output)
encoder_output = Conv2D(256, (3,3), activation='relu', padding='same')(encoder_output)
#Decoder
decoder_output = Conv2D(128, (3,3), activation='relu', padding='same')(encoder_output)
decoder_output = UpSampling2D((2, 2))(decoder_output)
decoder_output = Conv2D(64, (3,3), activation='relu', padding='same')(decoder_output)
decoder_output = UpSampling2D((2, 2))(decoder_output)
decoder_output = Conv2D(32, (3,3), activation='relu', padding='same')(decoder_output)
decoder_output = Conv2D(16, (3,3), activation='relu', padding='same')(decoder_output)
decoder_output = Conv2D(2, (3, 3), activation='tanh', padding='same')(decoder_output)
decoder_output = UpSampling2D((2, 2))(decoder_output)
model = Model(inputs=encoder_input, outputs=decoder_output)
model.compile(optimizer='adam', loss='mse' , metrics=['accuracy'])
clean_images = model.fit(train_images,y_train_red, epochs=200)
and train images is created by
train_images = np.array([ImageOperation.resizeImage(cv2.imread(train_path + str(i) + ".jpg"), height, width) for i in
range(train_size)])
y_train_red = [img[:, :, 2]/255 for img in train_images]
train_images = np.array([ImageOperation.grayImg(item) for item in train_images])
and when i execute the code i recieved the following error
Error when checking input: expected input_1 to have 4 dimensions, but got array with shape (10, 200, 200)
how to solve it?
Your images are 2D (Height x Width), whereas it expects 3D images. Reshape your images to add additional dimension such as,
train_images = train_images.reshape(train_size, height, width, 1)
as the documentation says: https://www.tensorflow.org/api_docs/python/tf/keras/layers/Conv2D
you need a 4 dimensional input for Conv2d layer. you have to a add a channel either after or before 2 main dimensions of the image:
train_images = train_images.reshape(train_size, height, width, 1)
or
train_images = train_images.reshape(train_size, 1, height, width)
in both cases you have to define the art of input in every layer in the network with data_format="channels_first" or data_format="channels_last".
for example:
ncoder_output = Conv2D(64, (3,3), activation='relu', padding='same', strides=2, data_format="channels_last")(encoder_input)
I am trying to train a convolutional neural net. Therefore I am using a datset of 646 images/license plates which contains 8 characters (0-9, A-Z; without letter 'O' and blank spaces, in total 36 possible characters). These are my training data X_train. Their shape is (646, 40, 200, 3) with color code 3. I resized them to the same shape.
I also have a dataset which contains the labels of this images, which I one-hot-encoded to a numpy array of shape (646, 8, 36). This data is my y_train data.
Now, I am trying to apply a Neural Network which looks like this:
The architecture is taken from this paper: https://ieeexplore.ieee.org/abstract/document/8078501
I excluded the batch normalization part, because this part is not the most interesting one for me. But I am very unsure regarding the top of the layer. That means the part after the last pooling layer beginning with model.add(Flatten())...
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3), input_shape = (40, 200, 3), activation = "relu"))
model.add(Conv2D(32, kernel_size=(3, 3), activation = "relu"))
model.add(Conv2D(32, kernel_size=(3, 3), activation = "relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, kernel_size=(3, 3), activation = "relu"))
model.add(Conv2D(64, kernel_size=(3, 3), activation = "relu"))
model.add(Conv2D(64, kernel_size=(3, 3), activation = "relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(128, kernel_size=(3, 3), activation = "relu"))
model.add(Conv2D(128, kernel_size=(3, 3), activation = "relu"))
model.add(Conv2D(128, kernel_size=(3, 3), activation = "relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(16000, activation = "relu"))
model.add(Dense(128, activation = "relu"))
model.add(Dense(36, activation = "relu"))
model.add(Dense(8*36, activation="Softmax"))
model.add(keras.layers.Reshape((8, 36)))
Thank you very much in advance!
Assuming the image below matches your model architecture, the code can be used to create the model. Ensure you have some padding for the input images.
import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Conv2D, Flatten, MaxPooling2D, Dense, Input, Reshape, Concatenate
def create_model(input_shape = (40, 200, 3)):
input_img = Input(shape=input_shape)
model = Conv2D(32, kernel_size=(3, 3), input_shape = (40, 200, 3), activation = "relu")(input_img)
model = Conv2D(32, kernel_size=(3, 3), padding="same", activation = "relu")(model)
model = Conv2D(32, kernel_size=(3, 3), padding="same", activation = "relu")(model)
model = MaxPooling2D(pool_size=(2, 2))(model)
model = Conv2D(64, kernel_size=(3, 3), padding="same", activation = "relu")(model)
model = Conv2D(64, kernel_size=(3, 3), padding="same", activation = "relu")(model)
model = Conv2D(64, kernel_size=(3, 3), padding="same", activation = "relu")(model)
model = MaxPooling2D(pool_size=(2, 2))(model)
model = Conv2D(128, kernel_size=(3, 3), padding="same", activation = "relu")(model)
model = Conv2D(128, kernel_size=(3, 3), padding="same", activation = "relu")(model)
model = Conv2D(128, kernel_size=(3, 3), padding="same", activation = "relu")(model)
model = MaxPooling2D(pool_size=(2, 2))(model)
backbone = Flatten()(model)
branches = []
for i in range(8):
branches.append(backbone)
branches[i] = Dense(16000, activation = "relu", name="branch_"+str(i)+"_Dense_16000")(branches[i])
branches[i] = Dense(128, activation = "relu", name="branch_"+str(i)+"_Dense_128")(branches[i])
branches[i] = Dense(36, activation = "softmax", name="branch_"+str(i)+"_output")(branches[i])
output = Concatenate(axis=1)(branches)
output = Reshape((8, 36))(output)
model = Model(input_img, output)
return model