I'm trying to separate close objects as was shown in the U-Net paper (here). For this, one generates weight maps which can be used for pixel-wise losses. The following code describes the network I use from this blog post.
x_train_val = # list of images (imgs, 256, 256, 3)
y_train_val = # list of masks (imgs, 256, 256, 1)
y_weights = # list of weight maps (imgs, 256, 256, 1) according to the blog post
# visual inspection confirms the correct calculation of these maps
# Blog posts' loss function
def my_loss(target, output):
return - tf.reduce_sum(target * output,
len(output.get_shape()) - 1)
# Standard Unet model from blog post
_epsilon = tf.convert_to_tensor(K.epsilon(), np.float32)
def make_weighted_loss_unet(input_shape, n_classes):
ip = L.Input(shape=input_shape)
weight_ip = L.Input(shape=input_shape[:2] + (n_classes,))
conv1 = L.Conv2D(64, 3, activation='relu', padding='same', kernel_initializer='he_normal')(ip)
conv1 = L.Conv2D(64, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv1)
conv1 = L.Dropout(0.1)(conv1)
mpool1 = L.MaxPool2D()(conv1)
conv2 = L.Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(mpool1)
conv2 = L.Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv2)
conv2 = L.Dropout(0.2)(conv2)
mpool2 = L.MaxPool2D()(conv2)
conv3 = L.Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(mpool2)
conv3 = L.Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv3)
conv3 = L.Dropout(0.3)(conv3)
mpool3 = L.MaxPool2D()(conv3)
conv4 = L.Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(mpool3)
conv4 = L.Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv4)
conv4 = L.Dropout(0.4)(conv4)
mpool4 = L.MaxPool2D()(conv4)
conv5 = L.Conv2D(1024, 3, activation='relu', padding='same', kernel_initializer='he_normal')(mpool4)
conv5 = L.Conv2D(1024, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv5)
conv5 = L.Dropout(0.5)(conv5)
up6 = L.Conv2DTranspose(512, 2, strides=2, kernel_initializer='he_normal', padding='same')(conv5)
conv6 = L.Concatenate()([up6, conv4])
conv6 = L.Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv6)
conv6 = L.Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv6)
conv6 = L.Dropout(0.4)(conv6)
up7 = L.Conv2DTranspose(256, 2, strides=2, kernel_initializer='he_normal', padding='same')(conv6)
conv7 = L.Concatenate()([up7, conv3])
conv7 = L.Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv7)
conv7 = L.Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv7)
conv7 = L.Dropout(0.3)(conv7)
up8 = L.Conv2DTranspose(128, 2, strides=2, kernel_initializer='he_normal', padding='same')(conv7)
conv8 = L.Concatenate()([up8, conv2])
conv8 = L.Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv8)
conv8 = L.Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv8)
conv8 = L.Dropout(0.2)(conv8)
up9 = L.Conv2DTranspose(64, 2, strides=2, kernel_initializer='he_normal', padding='same')(conv8)
conv9 = L.Concatenate()([up9, conv1])
conv9 = L.Conv2D(64, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv9)
conv9 = L.Conv2D(64, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv9)
conv9 = L.Dropout(0.1)(conv9)
c10 = L.Conv2D(n_classes, 1, activation='softmax', kernel_initializer='he_normal')(conv9)
# Mimic crossentropy loss
c11 = L.Lambda(lambda x: x / tf.reduce_sum(x, len(x.get_shape()) - 1, True))(c10)
c11 = L.Lambda(lambda x: tf.clip_by_value(x, _epsilon, 1. - _epsilon))(c11)
c11 = L.Lambda(lambda x: K.log(x))(c11)
weighted_sm = L.multiply([c11, weight_ip])
model = Model(inputs=[ip, weight_ip], outputs=[weighted_sm])
return model
I then compile and fit the model as is shown below:
model = make_weighted_loss_unet((256, 256, 3), 1) # shape of input, number of classes
model.compile(optimizer='adam', loss=my_loss, metrics=['acc'])
model.fit([x_train_val, y_weights], y_train_val, validation_split=0.1, epochs=1)
The model can then train as usual. However, the loss doesn't seem to improve much. Furthermore, when I try to predict on new images, I obviously don't have the weight maps (because they are calculated on the labeled masks). I tried to use empty / zero arrays shaped like the weight map but that only yields in blank / zero predictions. I also tried different metrics and more standards losses without any success.
Did anyone face the same issue or have an alternative in implementing this weighted loss? Thanks in advance. BBQuercus
A simpler way to write custom loss with pixel weights
In your code, the loss is scattered around, between my_loss and make_weighted_loss_unet functions. You can add targets as an input and use model.add_loss to structure the code better :
def make_weighted_loss_unet(input_shape, n_classes):
ip = L.Input(shape=input_shape)
weight_ip = L.Input(shape=input_shape[:2] + (n_classes,))
targets = L.input(shape=input_shape[:2] + (n_classes,))
# .... rest of your model definition code ...
c10 = L.Conv2D(n_classes, 1, activation='softmax', kernel_initializer='he_normal')(conv9)
model.add_loss(pixel_weighted_cross_entropy(weights_ip, targets, c10))
# .... return Model .... NO NEED to specify loss in model.compile
def pixel_weighted_cross_entropy(weights, targets, predictions)
loss_val = keras.losses.categorical_crossentropy(targets, predictions)
weighted_loss_val = weights * loss_val
return K.mean(weighted_loss_val)
If you don't refactor your code to the above approach, next section shows how to still run inference without issues
How to run your model in inference
Option 1 : Use another Model object for inference
You can create a Model used for training and another used for inference. Both are largely the same except that the inference Model does not take weights_ip, and gives an early output c10.
Here's an example code that adds an argument is_training=True to decide which Model to return :
def make_weighted_loss_unet(input_shape, n_classes, is_training=True):
ip = L.Input(shape=input_shape)
conv1 = L.Conv2D(64, 3, activation='relu', padding='same', kernel_initializer='he_normal')(ip)
# .... rest of your model definition code ...
c10 = L.Conv2D(n_classes, 1, activation='softmax', kernel_initializer='he_normal')(conv9)
if is_training:
# Mimic crossentropy loss
c11 = L.Lambda(lambda x: x / tf.reduce_sum(x, len(x.get_shape()) - 1, True))(c10)
c11 = L.Lambda(lambda x: tf.clip_by_value(x, _epsilon, 1. - _epsilon))(c11)
c11 = L.Lambda(lambda x: K.log(x))(c11)
weight_ip = L.Input(shape=input_shape[:2] + (n_classes,))
weighted_sm = L.multiply([c11, weight_ip])
return Model(inputs=[ip, weight_ip], outputs=[weighted_sm])
return Model(inputs=[ip], outputs=[c10])
return model
Option 2 : Use K.function
If you don't want to mess with your Model definition method (make_weighted_loss_unet) and want to achieve the same result outside, you can use a function that extracts the subgraph relevant for inference.
In your inference function:
from keras import backend as K
model = make_weighted_loss_unet(input_shape, n_classes)
inference_function = K.function([model.get_layer("input_layer").input],
predicted_heatmap = inference_function(new_image)
Note that you'll have to give name= to your ip layer and c10 layer to be able to retrieve them via model.get_layer(name) :
ip = L.Input(shape=input_shape, name="input_layer")
c10 = L.Conv2D(n_classes, 1, activation='softmax', kernel_initializer='he_normal', name="output_softmax_layer")(conv9)
I have a U-Net model trained on cloud segmentation images with the following structure
inputs = keras.Input(input_shape)
conv1 = layers.Conv2D(64, 3, activation='relu', padding='same',
kernel_initializer='he_normal', name='conv1')(inputs)
conv2 = layers.Conv2D(64, 3, activation='relu', padding='same',
kernel_initializer='he_normal', name='conv2')(conv1)
pool1 = layers.MaxPooling2D(pool_size=(2, 2))(conv2)
conv3 = layers.Conv2D(128, 3, activation='relu', padding='same',
kernel_initializer='he_normal', name='conv3')(pool1)
conv4 = layers.Conv2D(128, 3, activation='relu', padding='same',
kernel_initializer='he_normal', name='conv4')(conv3)
pool2 = layers.MaxPooling2D(pool_size=(2, 2))(conv4)
conv5 = layers.Conv2D(256, 3, activation='relu', padding='same',
kernel_initializer='he_normal', name='conv5')(pool2)
conv6 = layers.Conv2D(256, 3, activation='relu', padding='same',
kernel_initializer='he_normal', name='conv6')(conv5)
pool3 = layers.MaxPooling2D(pool_size=(2, 2))(conv6)
conv7 = layers.Conv2D(512, 3, activation='relu', padding='same',
kernel_initializer='he_normal', name='conv7')(pool3)
conv8 = layers.Conv2D(512, 3, activation='relu', padding='same',
kernel_initializer='he_normal', name='conv8')(conv7)
drop4 = layers.Dropout(0.5)(conv8)
pool4 = layers.MaxPooling2D(pool_size=(2, 2))(drop4)
conv9 = layers.Conv2D(1024, 3, activation='relu', padding='same',
kernel_initializer='he_normal', name='conv9')(pool4)
conv10 = layers.Conv2D(1024, 3, activation='relu', padding='same',
kernel_initializer='he_normal', name='conv19')(conv9)
drop5 = layers.Dropout(0.5)(conv10)
up6 = layers.Conv2D(512, 2, activation='relu', padding='same',
kernel_initializer='he_normal', name='up6')(
layers.UpSampling2D(size=(2, 2))(drop5))
merge6 = layers.concatenate([drop4, up6], axis=3)
conv11 = layers.Conv2D(512, 3, activation='relu', padding='same',
kernel_initializer='he_normal', name='conv11')(merge6)
conv12 = layers.Conv2D(512, 3, activation='relu', padding='same',
kernel_initializer='he_normal', name='conv12')(conv11)
up7 = layers.Conv2D(256, 2, activation='relu', padding='same',
kernel_initializer='he_normal', name='up7')(
layers.UpSampling2D(size=(2, 2))(conv12))
merge7 = layers.concatenate([conv6, up7], axis=3)
conv13 = layers.Conv2D(256, 3, activation='relu', padding='same',
kernel_initializer='he_normal', name='conv13')(merge7)
conv14 = layers.Conv2D(256, 3, activation='relu', padding='same',
kernel_initializer='he_normal', name='conv14')(conv13)
up8 = layers.Conv2D(128, 2, activation='relu', padding='same',
kernel_initializer='he_normal', name='up8')(
layers.UpSampling2D(size=(2, 2))(conv14))
merge8 = layers.concatenate([conv4, up8], axis=3)
conv15 = layers.Conv2D(128, 3, activation='relu', padding='same',
kernel_initializer='he_normal', name='conv15')(merge8)
conv16 = layers.Conv2D(128, 3, activation='relu', padding='same',
kernel_initializer='he_normal', name='conv16')(conv15)
up9 = layers.Conv2D(64, 2, activation='relu', padding='same',
kernel_initializer='he_normal', name='up9')(
layers.UpSampling2D(size=(2, 2))(conv16))
merge9 = layers.concatenate([conv2, up9], axis=3)
conv17 = layers.Conv2D(64, 3, activation='relu', padding='same',
kernel_initializer='he_normal', name='conv17')(merge9)
conv18 = layers.Conv2D(1, 1, activation='sigmoid', name='conv_out')(conv17)
model = keras.Model(inputs=inputs, outputs=conv18)
I want to use Keras functions to split the computation, to do so I have the following function
selected_layer_idx = 1
get_first_part = K.function([model.layers[0].input], [model.layers[selected_layer_idx].output])
output_first_part = get_first_part([np.expand_dims(img, 0)])[0]
get_second_part = K.function([model.layers[selected_layer_idx + 1].input], [model.layers[-1].output])
final_output = get_second_part(output_first_part)
If I select one of the first two convolutional layers everything goes fine but when selecting any other layer I get the following error.
ValueError: Found input tensor cannot be reached given provided output tensors. Please make sure the tensor KerasTensor(type_spec=TensorSpec(shape=(None, 384, 384, 4), dtype=tf.float32, name='input_1'), name='input_1', description="created by layer 'input_1'") is included in the model inputs when building functional model.
I am having trouble understanding what is the trouble here, I have already seen this question but the highlited problem should be different since here I don't have any shared name between different layers.
How could I fix this behavior?
I have found the error, this function
get_second_part = K.function([model.layers[selected_layer_idx + 1].input], [model.layers[-1].output])
can't be created since we need merge9 to calculate the final output. But in order to produce merge9 we need to have info about conv2 which is not known by the function if selected_layer_idx is greater than 2. To fix this behavior we need to create a third function that calculates conv2's output and pass it to a fourth function that merges the two values and produces the final output
I am currently training a large object detection model in Tensorflow 2 with a custom training loop using gradient tape. The problem is that the model is not improving the loss as the gradients are very low. I reproduced the problem on a simple classification task using cifar10 and discovered, that a small model is training fine with no problem while a larger model (VGG16) is not improving the loss at all. Below is some code for reproducing the problem.
VGG16 model:
import tensorflow as tf
from tensorflow.keras.layers import Dense, Flatten, Conv2D, Dropout, MaxPooling2D, BatchNormalization, Input, Concatenate
import os
def create_vgg16(number_classes, include_fully=True, input_shape=(300, 300, 3), input_tensor=None):
if input_tensor is None:
img_input = Input(shape=input_shape)
img_input = input_tensor
x = Conv2D(64, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', name='conv1_1')(img_input)
x = Conv2D(64, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', name='conv1_2')(x)
x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same', name='pool1')(x)
x = Conv2D(128, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', name='conv2_1')(x)
x = Conv2D(128, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', name='conv2_2')(x)
x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same', name='pool2')(x)
x = Conv2D(256, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', name='conv3_1')(x)
x = Conv2D(256, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', name='conv3_2')(x)
x = Conv2D(256, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', name='conv3_3')(x)
x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same', name='pool3')(x)
x = Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', name='conv4_1')(x)
x = Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', name='conv4_2')(x)
x = Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', name='conv4_3')(x)
x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same', name='pool4')(x)
x = Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', name='conv5_1')(x)
x = Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', name='conv5_2')(x)
x = Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', name='conv5_3')(x)
x = MaxPooling2D(pool_size=(3, 3), strides=(1, 1), padding='same', name='pool5')(x)
if include_fully:
x = Flatten(name='flatten')(x)
x = Dense(4096, activation='relu', name='fc1')(x)
x = Dense(4096, activation='relu', name='fc2')(x)
x = Dense(number_classes, activation='softmax', name='predictions')(x)
if input_tensor is not None:
inputs = tf.keras.utils.get_source_inputs(input_tensor)
inputs = img_input
model = tf.keras.models.Model(inputs, x, name='vgg16')
return model
Small CNN model:
def create_small_cnn(n_classes, input_shape=(32, 32, 3)):
img_input = tf.keras.Input(shape=input_shape)
x = tf.keras.layers.Conv2D(64, (3, 3), activation='relu', padding='same', name='conv1_1')(img_input)
x = tf.keras.layers.Conv2D(64, (3, 3), activation='relu', padding='same', name='conv1_2')(x)
x = tf.keras.layers.Flatten(name='flatten')(x)
x = tf.keras.layers.Dense(16, activation='relu', name='fc1')(x)
x = tf.keras.layers.Dense(n_classes, activation='softmax', name='softmax')(x)
model = tf.keras.Model(img_input, x, name='small_cnn')
return model
Training loop:
def main():
number_classes = 10
# Load and one hot encode data
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()
x_train, x_test = x_train, x_test
y_train = tf.reshape(y_train, [-1])
y_train = tf.one_hot(y_train, number_classes).numpy()
y_test = tf.reshape(y_test, [-1])
y_test = tf.one_hot(y_test, number_classes).numpy()
# Define model
model = create_vgg16(number_classes, input_shape=(32, 32, 3))
# model = create_small_cnn(number_classes, input_shape=(32, 32, 3))
# Instantiate an optimizer to train the model.
optimizer = tf.keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
# Instantiate a loss function.s
loss_fn = tf.keras.losses.CategoricalCrossentropy()
# Prepare the metrics.
train_acc_metric = tf.keras.metrics.CategoricalAccuracy()
val_acc_metric = tf.keras.metrics.CategoricalAccuracy()
# Prepare the training dataset.
batch_size = 64
train_dataset = tf.data.Dataset.from_tensor_slices(
(tf.cast(x_train/255, tf.float32),
train_dataset = train_dataset.shuffle(buffer_size=1024).batch(batch_size)
# Prepare the validation dataset.
val_dataset = tf.data.Dataset.from_tensor_slices(
(tf.cast(x_test/255, tf.float32),
val_dataset = val_dataset.shuffle(buffer_size=1024).batch(batch_size)
for epoch in range(100):
print('Start of epoch %d' % (epoch,))
for step, (x_batch_train, y_batch_train) in enumerate(train_dataset):
with tf.GradientTape() as tape:
logits = model(x_batch_train)
loss_value = loss_fn(y_batch_train, logits)
grads = tape.gradient(loss_value, model.trainable_variables)
optimizer.apply_gradients(zip(grads, model.trainable_variables))
train_acc_metric(y_batch_train[0], logits[0][:-1])
if step % 200 == 0:
print('Training loss (for one batch) at step %s: %s' % (step, float(loss_value)))
# Display metrics at the end of each epoch.
train_acc = train_acc_metric.result()
print('Training acc over epoch: %s' % (float(train_acc),))
# Reset training metrics at the end of each epoch
# Run a validation loop at the end of each epoch.
for x_batch_val, y_batch_val in val_dataset:
val_logits = model(x_batch_val)
val_acc_metric(y_batch_val[0], val_logits[0][:-1])
val_acc = val_acc_metric.result()
print('Validation acc: %s' % (float(val_acc),))
if __name__ == '__main__':
If you run the code shown you will see the network training fine while using the small CNN model. But on the other hand it does not work on the exact same dataset with the same preprocessing using a standard VGG16 model. To make matters more confusing, the VGG model will train perfectly fine when using model.fit instead of custom training loop with gradient tape.
Does anybody have an idea why this is the case and how to fix this problem?
I have been using U-nets for a while now, and notice that in most of my applications, it generates an over-estimation surrounding a specific class.
For example, here's a grayscale image:
And a manual segmentation of 3 classes (lesion [green], tissue [magenta], background [all else]):
The issue I notice on prediction (over-estimation at boundaries):
The typical architecture used looks something like this:
def get_unet(dim=128, dropout=0.5, n_classes=3):
inputs = Input((dim, dim, 1))
conv1 = Conv2D(64, 3, activation='relu', padding='same', kernel_initializer='he_normal')(inputs)
conv1 = Conv2D(64, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv1)
pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)
conv2 = Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(pool1)
conv2 = Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv2)
pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)
conv3 = Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(pool2)
conv3 = Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv3)
pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)
conv4 = Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(pool3)
conv4 = Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv4)
conv4 = Dropout(dropout)(conv4)
pool4 = MaxPooling2D(pool_size=(2, 2))(conv4)
conv5 = Conv2D(1024, 3, activation='relu', padding='same', kernel_initializer='he_normal')(pool4)
conv5 = Conv2D(1024, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv5)
conv5 = Dropout(dropout)(conv5)
up6 = concatenate([UpSampling2D(size=(2, 2))(conv5), conv4], axis=3)
conv6 = Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(up6)
conv6 = Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv6)
up7 = concatenate([UpSampling2D(size=(2, 2))(conv6), conv3], axis=3)
conv7 = Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(up7)
conv7 = Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv7)
up8 = concatenate([UpSampling2D(size=(2, 2))(conv7), conv2], axis=3)
conv8 = Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(up8)
conv8 = Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv8)
up9 = concatenate([UpSampling2D(size=(2, 2))(conv8), conv1], axis=3)
conv9 = Conv2D(64, 3, activation='relu', padding='same', kernel_initializer='he_normal')(up9)
conv9 = Conv2D(64, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv9)
conv10 = Conv2D(n_classes, (1, 1), activation='relu', padding='same', ker nel_initializer='he_normal')(conv9)
conv10 = Reshape((dim * dim, n_classes))(conv10)
output = Activation('softmax')(conv10)
model = Model(inputs=[inputs], outputs=[output])
return model
mgpu_model.compile(optimizer='adadelta', loss='categorical_crossentropy',
metrics=['accuracy'], sample_weight_mode='temporal')
open(p, 'w').write(json_string)
model_checkpoint = callbacks.ModelCheckpoint(f, save_best_only=True)
reduce_lr_cback = callbacks.ReduceLROnPlateau(
monitor='val_loss', factor=0.2,
patience=5, verbose=1,
min_lr=0.05 * 0.0001)
h = mgpu_model.fit(train_gray, train_masks,
batch_size=64, epochs=50,
verbose=1, shuffle=True, validation_split=0.2, sample_weight=sample_weights,
callbacks=[model_checkpoint, reduce_lr_cback])
My Question:
Do you have any insight or suggestion on how to change either the architecture or hyperparameters to mitigate the over-estimation? This could include even using a different architecture that may be better at more precise segmentation. (Please note I already do class balancing/weighting to compensate for imbalances in class frequency)
You can experiment with various loss functions instead of cross entropy. For multi-class segmentation, you can try:
generalized dice loss
dice loss (summed across all classes)
categorical focal loss
boundary loss
The winner of brats 2018 used autoencoder regularization (https://github.com/IAmSuyogJadhav/3d-mri-brain-tumor-segmentation-using-autoencoder-regularization). You could try this as well. The idea in that paper is that the model is also learning how to better encode the features in the latent space, and that helps the model with segmentation somehow.
I am trying to detect the single pixel location of a single object in an image. I have a keras CNN regression network with my image tensor as the input, and a 3 item vector as the output.
First item: Is a 1 (if an object was found) or 0 (no object was found)
Second item: Is a number between 0 and 1 which indicates how far along the x axis is the object
Third item: Is a number between 0 and 1 which indicates how far along the y axis is the object
I have trained the network on 2000 test images and 500 validation images, and the val_loss is far less than 1, and the val_acc is best at around 0.94. Excellent.
But then when I predict the output, I find the values for all three output items are not between 0 and 1, they are actually between -2 and 3 approximately. All three items should be between 0 and 1.
I have not used any non-linear activation functions on the output layer, and have used relus for all non-output layers. Should I be using a softmax, even though it is non-linear? The second and third items are predicting the x and y axis of the image, which appear to me as linear quantities.
Here is my keras network:
inputs = Input((256, 256, 1))
base_kernels = 64
# 256
conv1 = Conv2D(base_kernels, 3, activation='relu', padding='same', kernel_initializer='he_normal')(inputs)
conv1 = BatchNormalization()(conv1)
conv1 = Conv2D(base_kernels, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv1)
conv1 = BatchNormalization()(conv1)
conv1 = Dropout(0.2)(conv1)
pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)
# 128
conv2 = Conv2D(base_kernels * 2, 3, activation='relu', padding='same', kernel_initializer='he_normal')(pool1)
conv2 = BatchNormalization()(conv2)
conv2 = Conv2D(base_kernels * 2, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv2)
conv2 = BatchNormalization()(conv2)
conv2 = Dropout(0.2)(conv2)
pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)
# 64
conv3 = Conv2D(base_kernels * 4, 3, activation='relu', padding='same', kernel_initializer='he_normal')(pool2)
conv3 = BatchNormalization()(conv3)
conv3 = Conv2D(base_kernels * 4, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv3)
conv3 = BatchNormalization()(conv3)
conv3 = Dropout(0.2)(conv3)
pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)
flat = Flatten()(pool3)
dense = Dense(256, activation='relu')(flat)
output = Dense(3)(dense)
model = Model(inputs=[inputs], outputs=[output])
optimizer = Adam(lr=1e-4)
model.compile(optimizer=optimizer, loss='mean_absolute_error', metrics=['accuracy'])
Can anyone please help? Thanks! :)
The sigmoid activation produces outputs between zero and one, so if you use it as activation of your last layer(the output), the network's output will be between zero and one.
output = Dense(3, activation="sigmoid")(dense)
Here is my discriminator architecture:
def build_discriminator(img_shape,embedding_shape):
model1 = Sequential()
model1.add(Conv2D(32, kernel_size=5, strides=2, input_shape=img_shape, padding="same"))
model1.add(Conv2D(48, kernel_size=5, strides=2, padding="same"))
model1.add(Conv2D(64, kernel_size=5, strides=2, padding="same"))
model1.add(Conv2D(128, kernel_size=5, strides=2, padding="same"))
model1.add(Conv2D(256, kernel_size=5, strides=2, padding="same"))
model2.add(Dense(50, input_shape=embedding_shape))
merged_model = Sequential()
merged_model.add(Merge([model1, model2], mode='concat'))
merged_model.add(Dense(1, activation='sigmoid', name='output_layer'))
#merged_model.compile(loss='binary_crossentropy', optimizer='adam',
#model1.add(Dense(1, activation='sigmoid'))
img = Input(shape=img_shape)
emb = Input(shape=embedding_shape)
validity = merged_model([img,emb])
return Model([img,emb],validity)
and here is the generator architecture:
def build_generator(latent_dim=484):
model = Sequential()
model.add(Dense(624 * 2 * 2, activation="relu", input_dim=latent_dim))
model.add(Reshape((2, 2, 624)))
model.add(Conv2D(512, kernel_size=5, padding="same"))
model.add(Conv2D(256, kernel_size=5, padding="same"))
model.add(Conv2D(128, kernel_size=5, padding="same"))
model.add(Conv2D(64, kernel_size=5, padding="same"))
model.add(Conv2D(32, kernel_size=5, padding="same"))
model.add(Conv2D(3, kernel_size=5, padding="same"))
noise = Input(shape=(latent_dim,))
img = model(noise)
return Model(noise, img)
and here is how I am making the GAN network:
optimizer = Adam(0.0004, 0.5)
# Build the generator
generator = build_generator()
# The generator takes noise as input and generates imgs
z = Input(shape=(100+384,))
img = generator(z)
# For the combined model we will only train the generator
discriminator.trainable = False
# The discriminator takes generated images as input and determines validity
valid = discriminator([img,temp])
# The combined model (stacked generator and discriminator)
# Trains the generator to fool the discriminator
combined = Model(z, valid)
combined.compile(loss='binary_crossentropy', optimizer=optimizer)
The discriminator have 2 models, and will get as input an image of shape 128x128x3 and an embedding of shape 1x128x3 and both models are merged then. The generator model just gets noise and generates a 128x128x3 image. So at the line combined = Model(z, valid) I am getting the followiing error:
RuntimeError: Graph disconnected: cannot obtain value for tensor Tensor("input_5:0", shape=(?, 1, 128, 3), dtype=float32) at layer "input_5". The following previous layers were accessed without issue: ['input_4', 'model_2']
which I think is because of the fact that discriminator can't find embedding input but I am feeding it a tensor of shape (1,128,3), just like noise is being fed to the generator model. Can anyone please help me where I am doing wrong?
And after everything is set here is how I will generate images from noise and embedding vector merged together and discriminator will take image and vector to identify fakes:
#texts has embedding vectors
pics=np.array(pics) . #images
noise = np.random.normal(0, 1, (batch_size, 100))
for j in range(len(texts)): #appending embedding at the end of noise
gen_imgs = generator.predict(latent_code) #gen making fakes
for im in gen_imgs:
t=np.expand_dims(t, axis=0)
vects=np.array(vects) #vector of ?,1,128,3
#disc marking fakes and reals
d_loss_real = discriminator.train_on_batch([pics,vects], valid)
d_loss_fake = discriminator.train_on_batch([gen_pics,vects], fake)
d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)
g_loss = combined.train_on_batch(latent_code, valid)
You have forgotten to add the temp as one of the inputs of the GAN (that's why the error says it can't feed the corresponding tensor since it is essentially disconnected):
combined = Model([z, temp], valid)
As a side note, I highly recommend to use Keras Functional API for building complicated and multi branch models like your discriminator. It is much easier to use, being more flexible and less error-prone.
For example, this is the descriminator you have written but I have rewritten it using Functional API. I personally think it is much easier to follow:
def build_discriminator(img_shape,embedding_shape):
input_img = Input(shape=img_shape)
x = Conv2D(32, kernel_size=5, strides=2, padding="same")(input_img)
x = LeakyReLU(alpha=0.2)(x)
x = Dropout(0.25)(x)
x = Conv2D(48, kernel_size=5, strides=2, padding="same")(x)
x = BatchNormalization(momentum=0.8)(x)
x = LeakyReLU(alpha=0.2)(x)
x = Dropout(0.25)(x)
x = Conv2D(64, kernel_size=5, strides=2, padding="same")(x)
x = BatchNormalization(momentum=0.8)(x)
x = LeakyReLU(alpha=0.2)(x)
x = Dropout(0.25)(x)
x = Conv2D(128, kernel_size=5, strides=2, padding="same")(x)
x = BatchNormalization(momentum=0.8)(x)
x = LeakyReLU(alpha=0.2)(x)
x = Dropout(0.25)(x)
x = Conv2D(256, kernel_size=5, strides=2, padding="same")(x)
x = BatchNormalization(momentum=0.8)(x)
x = LeakyReLU(alpha=0.2)(x)
x = Dropout(0.25)(x)
x = Flatten()(x)
output_img = Dense(200)(x)
input_emb = Input(shape=embedding_shape)
y = Dense(50)(input_emb)
y = Dense(100)(y)
y = Dense(200)(y)
output_emb = Flatten()(y)
merged = concatenate([output_img, output_emb])
output_merge = Dense(1, activation='sigmoid', name='output_layer')(merged)
return Model([input_img, input_emb], output_merge)