I am currently training a large object detection model in Tensorflow 2 with a custom training loop using gradient tape. The problem is that the model is not improving the loss as the gradients are very low. I reproduced the problem on a simple classification task using cifar10 and discovered, that a small model is training fine with no problem while a larger model (VGG16) is not improving the loss at all. Below is some code for reproducing the problem.
VGG16 model:
import tensorflow as tf
from tensorflow.keras.layers import Dense, Flatten, Conv2D, Dropout, MaxPooling2D, BatchNormalization, Input, Concatenate
import os
def create_vgg16(number_classes, include_fully=True, input_shape=(300, 300, 3), input_tensor=None):
if input_tensor is None:
img_input = Input(shape=input_shape)
else:
img_input = input_tensor
x = Conv2D(64, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', name='conv1_1')(img_input)
x = Conv2D(64, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', name='conv1_2')(x)
x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same', name='pool1')(x)
x = Conv2D(128, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', name='conv2_1')(x)
x = Conv2D(128, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', name='conv2_2')(x)
x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same', name='pool2')(x)
x = Conv2D(256, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', name='conv3_1')(x)
x = Conv2D(256, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', name='conv3_2')(x)
x = Conv2D(256, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', name='conv3_3')(x)
x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same', name='pool3')(x)
x = Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', name='conv4_1')(x)
x = Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', name='conv4_2')(x)
x = Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', name='conv4_3')(x)
x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same', name='pool4')(x)
x = Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', name='conv5_1')(x)
x = Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', name='conv5_2')(x)
x = Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', name='conv5_3')(x)
x = MaxPooling2D(pool_size=(3, 3), strides=(1, 1), padding='same', name='pool5')(x)
if include_fully:
x = Flatten(name='flatten')(x)
x = Dense(4096, activation='relu', name='fc1')(x)
x = Dense(4096, activation='relu', name='fc2')(x)
x = Dense(number_classes, activation='softmax', name='predictions')(x)
if input_tensor is not None:
inputs = tf.keras.utils.get_source_inputs(input_tensor)
else:
inputs = img_input
model = tf.keras.models.Model(inputs, x, name='vgg16')
return model
Small CNN model:
def create_small_cnn(n_classes, input_shape=(32, 32, 3)):
img_input = tf.keras.Input(shape=input_shape)
x = tf.keras.layers.Conv2D(64, (3, 3), activation='relu', padding='same', name='conv1_1')(img_input)
x = tf.keras.layers.Conv2D(64, (3, 3), activation='relu', padding='same', name='conv1_2')(x)
x = tf.keras.layers.Flatten(name='flatten')(x)
x = tf.keras.layers.Dense(16, activation='relu', name='fc1')(x)
x = tf.keras.layers.Dense(n_classes, activation='softmax', name='softmax')(x)
model = tf.keras.Model(img_input, x, name='small_cnn')
return model
Training loop:
def main():
number_classes = 10
# Load and one hot encode data
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()
x_train, x_test = x_train, x_test
y_train = tf.reshape(y_train, [-1])
y_train = tf.one_hot(y_train, number_classes).numpy()
y_test = tf.reshape(y_test, [-1])
y_test = tf.one_hot(y_test, number_classes).numpy()
# Define model
model = create_vgg16(number_classes, input_shape=(32, 32, 3))
# model = create_small_cnn(number_classes, input_shape=(32, 32, 3))
# Instantiate an optimizer to train the model.
optimizer = tf.keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
# Instantiate a loss function.s
loss_fn = tf.keras.losses.CategoricalCrossentropy()
# Prepare the metrics.
train_acc_metric = tf.keras.metrics.CategoricalAccuracy()
val_acc_metric = tf.keras.metrics.CategoricalAccuracy()
# Prepare the training dataset.
batch_size = 64
train_dataset = tf.data.Dataset.from_tensor_slices(
(tf.cast(x_train/255, tf.float32),
tf.cast(y_train,tf.int64)))
train_dataset = train_dataset.shuffle(buffer_size=1024).batch(batch_size)
# Prepare the validation dataset.
val_dataset = tf.data.Dataset.from_tensor_slices(
(tf.cast(x_test/255, tf.float32),
tf.cast(y_test,tf.int64)))
val_dataset = val_dataset.shuffle(buffer_size=1024).batch(batch_size)
model.summary()
for epoch in range(100):
print('Start of epoch %d' % (epoch,))
for step, (x_batch_train, y_batch_train) in enumerate(train_dataset):
with tf.GradientTape() as tape:
logits = model(x_batch_train)
loss_value = loss_fn(y_batch_train, logits)
grads = tape.gradient(loss_value, model.trainable_variables)
optimizer.apply_gradients(zip(grads, model.trainable_variables))
train_acc_metric(y_batch_train[0], logits[0][:-1])
if step % 200 == 0:
print('Training loss (for one batch) at step %s: %s' % (step, float(loss_value)))
# Display metrics at the end of each epoch.
train_acc = train_acc_metric.result()
print('Training acc over epoch: %s' % (float(train_acc),))
# Reset training metrics at the end of each epoch
train_acc_metric.reset_states()
# Run a validation loop at the end of each epoch.
for x_batch_val, y_batch_val in val_dataset:
val_logits = model(x_batch_val)
val_acc_metric(y_batch_val[0], val_logits[0][:-1])
val_acc = val_acc_metric.result()
val_acc_metric.reset_states()
print('Validation acc: %s' % (float(val_acc),))
if __name__ == '__main__':
main()
If you run the code shown you will see the network training fine while using the small CNN model. But on the other hand it does not work on the exact same dataset with the same preprocessing using a standard VGG16 model. To make matters more confusing, the VGG model will train perfectly fine when using model.fit instead of custom training loop with gradient tape.
Does anybody have an idea why this is the case and how to fix this problem?
Related
I am working on a biomedical image segmentation project. I am using U-net model for the job. The problem is, when i train the model, the validation loss doesn't seem to be practical.
I used the dice_coef_loss as loss function, as well as, in the metric. The result of the training is the below graph. The graph clearly shows that validation loss is not following my loss function, cause the two graphs are distinguishable. Though, the train loss does follow the train dice_coef_loss values.
(The first image from the left is training and validation loss, third one is tarining and validation dice_coef_loss as metric)
The history graph of training
(Sorry i am not yet eligible to embed an image, please check the link)
Here is my model
def unet(input_size=(256,256,1)):
inputs = Input(input_size)
conv1 = Conv2D(32, (3, 3), activation='relu', padding='same')(inputs)
conv1 = Conv2D(32, (3, 3), activation='relu', padding='same')(conv1)
pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)
conv2 = Conv2D(64, (3, 3), activation='relu', padding='same')(pool1)
conv2 = Conv2D(64, (3, 3), activation='relu', padding='same')(conv2)
pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)
conv3 = Conv2D(128, (3, 3), activation='relu', padding='same')(pool2)
conv3 = Conv2D(128, (3, 3), activation='relu', padding='same')(conv3)
pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)
conv4 = Conv2D(256, (3, 3), activation='relu', padding='same')(pool3)
conv4 = Conv2D(256, (3, 3), activation='relu', padding='same')(conv4)
pool4 = MaxPooling2D(pool_size=(2, 2))(conv4)
conv5 = Conv2D(512, (3, 3), activation='relu', padding='same')(pool4)
conv5 = Conv2D(512, (3, 3), activation='relu', padding='same')(conv5)
up6 = concatenate([Conv2DTranspose(256, (2, 2), strides=(2, 2), padding='same')(conv5), conv4], axis=3)
conv6 = Conv2D(256, (3, 3), activation='relu', padding='same')(up6)
conv6 = Conv2D(256, (3, 3), activation='relu', padding='same')(conv6)
up7 = concatenate([Conv2DTranspose(128, (2, 2), strides=(2, 2), padding='same')(conv6), conv3], axis=3)
conv7 = Conv2D(128, (3, 3), activation='relu', padding='same')(up7)
conv7 = Conv2D(128, (3, 3), activation='relu', padding='same')(conv7)
up8 = concatenate([Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same')(conv7), conv2], axis=3)
conv8 = Conv2D(64, (3, 3), activation='relu', padding='same')(up8)
conv8 = Conv2D(64, (3, 3), activation='relu', padding='same')(conv8)
up9 = concatenate([Conv2DTranspose(32, (2, 2), strides=(2, 2), padding='same')(conv8), conv1], axis=3)
conv9 = Conv2D(32, (3, 3), activation='relu', padding='same')(up9)
conv9 = Conv2D(32, (3, 3), activation='relu', padding='same')(conv9)
conv10 = Conv2D(1, (1, 1), activation='sigmoid')(conv9)
return Model(inputs=[inputs], outputs=[conv10])
Here is the loss function
import keras.backend as K
smooth=100
def dice_coef(y_true, y_pred):
y_truef=K.flatten(y_true)
y_predf=K.flatten(y_pred)
And=K.sum(y_truef* y_predf)
return((2* And + smooth) / (K.sum(y_truef) + K.sum(y_predf) + smooth))
def dice_coef_loss(y_true, y_pred):
return -dice_coef(y_true, y_pred)
Compilation
model.compile(optimizer=Adam(lr=1e-5), loss=dice_coef_loss, metrics=["binary_accuracy", dice_coef_loss, iou])
Note
I have tried:
learning rate tuning
tuning batch size
data augmentation
changing the loss function
If any one want to look at the code here is the link to kaggle kernel
Addition Note
To make my self clear :
The 1st and second graph was generated from a same function using same dataset (either from train or validation time), so I want the train_loss curve from 1st plot equal to train_dice_coef_loss from 3rd plot. And val_loss curve from 1st plot equal to val_dice_coef_loss from 3rd plot.
But unfortunately val_loss curve doesn't match val_dice_coef_loss curve.
P.S. I am new here. Any suggestion to improve my question is appreciated. Thank you.
As, ZabirAlNazi suggested it was a problem of the used library. Changing the imports from keras to tensorflow.keras solved that issue.
from tensorflow.keras.models import Model, load_model, save_model
from tensorflow.keras.layers import Input, Activation, BatchNormalization, Dropout, Lambda, Dense, Conv2D
I am trying to train a convolutional neural net. Therefore I am using a datset of 646 images/license plates which contains 8 characters (0-9, A-Z; without letter 'O' and blank spaces, in total 36 possible characters). These are my training data X_train. Their shape is (646, 40, 200, 3) with color code 3. I resized them to the same shape.
I also have a dataset which contains the labels of this images, which I one-hot-encoded to a numpy array of shape (646, 8, 36). This data is my y_train data.
Now, I am trying to apply a Neural Network which looks like this:
The architecture is taken from this paper: https://ieeexplore.ieee.org/abstract/document/8078501
I excluded the batch normalization part, because this part is not the most interesting one for me. But I am very unsure regarding the top of the layer. That means the part after the last pooling layer beginning with model.add(Flatten())...
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3), input_shape = (40, 200, 3), activation = "relu"))
model.add(Conv2D(32, kernel_size=(3, 3), activation = "relu"))
model.add(Conv2D(32, kernel_size=(3, 3), activation = "relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, kernel_size=(3, 3), activation = "relu"))
model.add(Conv2D(64, kernel_size=(3, 3), activation = "relu"))
model.add(Conv2D(64, kernel_size=(3, 3), activation = "relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(128, kernel_size=(3, 3), activation = "relu"))
model.add(Conv2D(128, kernel_size=(3, 3), activation = "relu"))
model.add(Conv2D(128, kernel_size=(3, 3), activation = "relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(16000, activation = "relu"))
model.add(Dense(128, activation = "relu"))
model.add(Dense(36, activation = "relu"))
model.add(Dense(8*36, activation="Softmax"))
model.add(keras.layers.Reshape((8, 36)))
Thank you very much in advance!
Assuming the image below matches your model architecture, the code can be used to create the model. Ensure you have some padding for the input images.
import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Conv2D, Flatten, MaxPooling2D, Dense, Input, Reshape, Concatenate
def create_model(input_shape = (40, 200, 3)):
input_img = Input(shape=input_shape)
model = Conv2D(32, kernel_size=(3, 3), input_shape = (40, 200, 3), activation = "relu")(input_img)
model = Conv2D(32, kernel_size=(3, 3), padding="same", activation = "relu")(model)
model = Conv2D(32, kernel_size=(3, 3), padding="same", activation = "relu")(model)
model = MaxPooling2D(pool_size=(2, 2))(model)
model = Conv2D(64, kernel_size=(3, 3), padding="same", activation = "relu")(model)
model = Conv2D(64, kernel_size=(3, 3), padding="same", activation = "relu")(model)
model = Conv2D(64, kernel_size=(3, 3), padding="same", activation = "relu")(model)
model = MaxPooling2D(pool_size=(2, 2))(model)
model = Conv2D(128, kernel_size=(3, 3), padding="same", activation = "relu")(model)
model = Conv2D(128, kernel_size=(3, 3), padding="same", activation = "relu")(model)
model = Conv2D(128, kernel_size=(3, 3), padding="same", activation = "relu")(model)
model = MaxPooling2D(pool_size=(2, 2))(model)
backbone = Flatten()(model)
branches = []
for i in range(8):
branches.append(backbone)
branches[i] = Dense(16000, activation = "relu", name="branch_"+str(i)+"_Dense_16000")(branches[i])
branches[i] = Dense(128, activation = "relu", name="branch_"+str(i)+"_Dense_128")(branches[i])
branches[i] = Dense(36, activation = "softmax", name="branch_"+str(i)+"_output")(branches[i])
output = Concatenate(axis=1)(branches)
output = Reshape((8, 36))(output)
model = Model(input_img, output)
return model
I'm trying to separate close objects as was shown in the U-Net paper (here). For this, one generates weight maps which can be used for pixel-wise losses. The following code describes the network I use from this blog post.
x_train_val = # list of images (imgs, 256, 256, 3)
y_train_val = # list of masks (imgs, 256, 256, 1)
y_weights = # list of weight maps (imgs, 256, 256, 1) according to the blog post
# visual inspection confirms the correct calculation of these maps
# Blog posts' loss function
def my_loss(target, output):
return - tf.reduce_sum(target * output,
len(output.get_shape()) - 1)
# Standard Unet model from blog post
_epsilon = tf.convert_to_tensor(K.epsilon(), np.float32)
def make_weighted_loss_unet(input_shape, n_classes):
ip = L.Input(shape=input_shape)
weight_ip = L.Input(shape=input_shape[:2] + (n_classes,))
conv1 = L.Conv2D(64, 3, activation='relu', padding='same', kernel_initializer='he_normal')(ip)
conv1 = L.Conv2D(64, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv1)
conv1 = L.Dropout(0.1)(conv1)
mpool1 = L.MaxPool2D()(conv1)
conv2 = L.Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(mpool1)
conv2 = L.Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv2)
conv2 = L.Dropout(0.2)(conv2)
mpool2 = L.MaxPool2D()(conv2)
conv3 = L.Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(mpool2)
conv3 = L.Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv3)
conv3 = L.Dropout(0.3)(conv3)
mpool3 = L.MaxPool2D()(conv3)
conv4 = L.Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(mpool3)
conv4 = L.Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv4)
conv4 = L.Dropout(0.4)(conv4)
mpool4 = L.MaxPool2D()(conv4)
conv5 = L.Conv2D(1024, 3, activation='relu', padding='same', kernel_initializer='he_normal')(mpool4)
conv5 = L.Conv2D(1024, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv5)
conv5 = L.Dropout(0.5)(conv5)
up6 = L.Conv2DTranspose(512, 2, strides=2, kernel_initializer='he_normal', padding='same')(conv5)
conv6 = L.Concatenate()([up6, conv4])
conv6 = L.Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv6)
conv6 = L.Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv6)
conv6 = L.Dropout(0.4)(conv6)
up7 = L.Conv2DTranspose(256, 2, strides=2, kernel_initializer='he_normal', padding='same')(conv6)
conv7 = L.Concatenate()([up7, conv3])
conv7 = L.Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv7)
conv7 = L.Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv7)
conv7 = L.Dropout(0.3)(conv7)
up8 = L.Conv2DTranspose(128, 2, strides=2, kernel_initializer='he_normal', padding='same')(conv7)
conv8 = L.Concatenate()([up8, conv2])
conv8 = L.Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv8)
conv8 = L.Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv8)
conv8 = L.Dropout(0.2)(conv8)
up9 = L.Conv2DTranspose(64, 2, strides=2, kernel_initializer='he_normal', padding='same')(conv8)
conv9 = L.Concatenate()([up9, conv1])
conv9 = L.Conv2D(64, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv9)
conv9 = L.Conv2D(64, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv9)
conv9 = L.Dropout(0.1)(conv9)
c10 = L.Conv2D(n_classes, 1, activation='softmax', kernel_initializer='he_normal')(conv9)
# Mimic crossentropy loss
c11 = L.Lambda(lambda x: x / tf.reduce_sum(x, len(x.get_shape()) - 1, True))(c10)
c11 = L.Lambda(lambda x: tf.clip_by_value(x, _epsilon, 1. - _epsilon))(c11)
c11 = L.Lambda(lambda x: K.log(x))(c11)
weighted_sm = L.multiply([c11, weight_ip])
model = Model(inputs=[ip, weight_ip], outputs=[weighted_sm])
return model
I then compile and fit the model as is shown below:
model = make_weighted_loss_unet((256, 256, 3), 1) # shape of input, number of classes
model.compile(optimizer='adam', loss=my_loss, metrics=['acc'])
model.fit([x_train_val, y_weights], y_train_val, validation_split=0.1, epochs=1)
The model can then train as usual. However, the loss doesn't seem to improve much. Furthermore, when I try to predict on new images, I obviously don't have the weight maps (because they are calculated on the labeled masks). I tried to use empty / zero arrays shaped like the weight map but that only yields in blank / zero predictions. I also tried different metrics and more standards losses without any success.
Did anyone face the same issue or have an alternative in implementing this weighted loss? Thanks in advance. BBQuercus
A simpler way to write custom loss with pixel weights
In your code, the loss is scattered around, between my_loss and make_weighted_loss_unet functions. You can add targets as an input and use model.add_loss to structure the code better :
def make_weighted_loss_unet(input_shape, n_classes):
ip = L.Input(shape=input_shape)
weight_ip = L.Input(shape=input_shape[:2] + (n_classes,))
targets = L.input(shape=input_shape[:2] + (n_classes,))
# .... rest of your model definition code ...
c10 = L.Conv2D(n_classes, 1, activation='softmax', kernel_initializer='he_normal')(conv9)
model.add_loss(pixel_weighted_cross_entropy(weights_ip, targets, c10))
# .... return Model .... NO NEED to specify loss in model.compile
def pixel_weighted_cross_entropy(weights, targets, predictions)
loss_val = keras.losses.categorical_crossentropy(targets, predictions)
weighted_loss_val = weights * loss_val
return K.mean(weighted_loss_val)
If you don't refactor your code to the above approach, next section shows how to still run inference without issues
How to run your model in inference
Option 1 : Use another Model object for inference
You can create a Model used for training and another used for inference. Both are largely the same except that the inference Model does not take weights_ip, and gives an early output c10.
Here's an example code that adds an argument is_training=True to decide which Model to return :
def make_weighted_loss_unet(input_shape, n_classes, is_training=True):
ip = L.Input(shape=input_shape)
conv1 = L.Conv2D(64, 3, activation='relu', padding='same', kernel_initializer='he_normal')(ip)
# .... rest of your model definition code ...
c10 = L.Conv2D(n_classes, 1, activation='softmax', kernel_initializer='he_normal')(conv9)
if is_training:
# Mimic crossentropy loss
c11 = L.Lambda(lambda x: x / tf.reduce_sum(x, len(x.get_shape()) - 1, True))(c10)
c11 = L.Lambda(lambda x: tf.clip_by_value(x, _epsilon, 1. - _epsilon))(c11)
c11 = L.Lambda(lambda x: K.log(x))(c11)
weight_ip = L.Input(shape=input_shape[:2] + (n_classes,))
weighted_sm = L.multiply([c11, weight_ip])
return Model(inputs=[ip, weight_ip], outputs=[weighted_sm])
else:
return Model(inputs=[ip], outputs=[c10])
return model
Option 2 : Use K.function
If you don't want to mess with your Model definition method (make_weighted_loss_unet) and want to achieve the same result outside, you can use a function that extracts the subgraph relevant for inference.
In your inference function:
from keras import backend as K
model = make_weighted_loss_unet(input_shape, n_classes)
inference_function = K.function([model.get_layer("input_layer").input],
[model.get_layer("output_softmax_layer").output])
predicted_heatmap = inference_function(new_image)
Note that you'll have to give name= to your ip layer and c10 layer to be able to retrieve them via model.get_layer(name) :
ip = L.Input(shape=input_shape, name="input_layer")
and
c10 = L.Conv2D(n_classes, 1, activation='softmax', kernel_initializer='he_normal', name="output_softmax_layer")(conv9)
I'm trying to train a cnn with one output with two datasets. I want to be able to in each batch to feed some data from the first dataset and some data from the second dataset and each dataset has a different corresponding loss function. I've read articles where if the neural network has two heads this is possible, but I'm not sure how to do two datasets with two losses on the same neural network with one output. I would also be open to using pytorch if there are some existing examples.
Two datasets with two losses on the same neural network with one
output.
You can use multiple loss functions if you have multiple outputs. You can use multiple loss function in below scenarios
Single Input Multi Output
Multi Input Multi Output
Please refer Single Input Multi Output Network:
# Convolutional Encoder
input_img = Input(shape=(img_rows, img_cols, 1))
conv_1 = Conv2D(16, (3, 3), activation='relu', padding='same')(input_img)
pool_1 = MaxPooling2D((2, 2), padding='same')(conv_1)
conv_2 = Conv2D(8, (3, 3), activation='relu', padding='same')(pool_1)
pool_2 = MaxPooling2D((2, 2), padding='same')(conv_2)
conv_3 = Conv2D(8, (3, 3), activation='relu', padding='same')(pool_2)
encoded= MaxPooling2D((2, 2), padding='same')(conv_3)
# Classification
flatten = Flatten()(encoded)
fc = Dense(128, activation='relu')(flatten)
Output1 = Dense(num_classes, activation='softmax', name='classification')(fc)
# Decoder
conv_4 = Conv2D(8, (3, 3), activation='relu', padding='same')(encoded)
up_1 = UpSampling2D((2, 2))(conv_4)
conv_5 = Conv2D(8, (3, 3), activation='relu', padding='same')(up_1)
up_2 = UpSampling2D((2, 2))(conv_5)
conv_6 = Conv2D(16, (3, 3), activation='relu')(up_2)
up_3 = UpSampling2D((2, 2))(conv_6)
Output2 = Conv2D(1, (3, 3), activation='sigmoid', padding='same', name='autoencoder')(up_3)
model = Model(inputs=input_img, outputs=[Output1 , Output2])
model.compile(loss={'classification': 'categorical_crossentropy',
'autoencoder': 'binary_crossentropy'},
loss_weights={'classification': 1.0,
'autoencoder': 0.5},
optimizer='adam',
metrics={'classification': 'accuracy', 'autoencoder': ['mse']})
model.fit(x_train,
{'classification': y_train, 'autoencoder': y_train},
batch_size=batch_size,
epochs=epochs,
validation_data= (x_test, {'classification': y_test, 'autoencoder': y_test}),
verbose=1)
Please refer Multi Input Multi Output Network:
main_input = Input(shape=(100,), dtype='int32', name='main_input')
x = Embedding(output_dim=512, input_dim=10000, input_length=100)(main_input)
lstm_out = LSTM(32)(x)
auxiliary_output = Dense(1, activation='sigmoid', name='aux_output')(lstm_out)
auxiliary_input = Input(shape=(5,), name='aux_input')
x = keras.layers.concatenate([lstm_out, auxiliary_input])
x = Dense(64, activation='relu')(x)
x = Dense(64, activation='relu')(x)
x = Dense(64, activation='relu')(x)
main_output = Dense(1, activation='sigmoid', name='main_output')(x)
model = Model(inputs=[main_input, auxiliary_input], outputs=[main_output, auxiliary_output])
model.compile(optimizer='rmsprop',
loss={'main_output': 'binary_crossentropy', 'aux_output': 'binary_crossentropy'},
loss_weights={'main_output': 1., 'aux_output': 0.2})
model.fit({'main_input': headline_data, 'aux_input': additional_data},
{'main_output': headline_labels, 'aux_output': additional_labels},
epochs=50, batch_size=32)
For more information please refer this link
I have images with shape (3600, 3600, 3). I'd like to use an autoencoder on them. My code is:
from keras.layers import Input, Dense, Conv2D, MaxPooling2D, UpSampling2D
from keras.models import Model
from keras import backend as K
from keras.preprocessing.image import ImageDataGenerator
input_img = Input(shape=(3600, 3600, 3))
x = Conv2D(16, (3, 3), activation='relu', padding='same')(input_img)
x = MaxPooling2D((2, 2), padding='same')(x)
x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
x = MaxPooling2D((2, 2), padding='same')(x)
x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
encoded = MaxPooling2D((2, 2), padding='same')(x)
x = Conv2D(8, (3, 3), activation='relu', padding='same')(encoded)
x = UpSampling2D((2, 2))(x)
x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
x = UpSampling2D((2, 2))(x)
x = Conv2D(16, (3, 3), activation='relu')(x)
x = UpSampling2D((2, 2))(x)
decoded = Conv2D(1, (3, 3), activation='sigmoid', padding='same')(x)
autoencoder = Model(input_img, decoded)
autoencoder.compile(optimizer='adadelta', loss='binary_crossentropy')
batch_size=2
datagen = ImageDataGenerator(rescale=1. / 255)
# dimensions of our images.
img_width, img_height = 3600, 3600
train_data_dir = 'train'
validation_data_dir = validation
generator_train = datagen.flow_from_directory(
train_data_dir,
target_size=(img_width, img_height),
)
generator_valid = datagen.flow_from_directory(
validation_data_dir,
target_size=(img_width, img_height),
batch_size=batch_size,
class_mode=None,
shuffle=False)
autoencoder.fit_generator(generator=generator_train,
validation_data = generator_valid,
)
When I run the code I get this error message:
ValueError: Error when checking target: expected conv2d_21 to have 4 dimensions, but got array with shape (26, 1)
I know the problem is somewhere in the shape of the layers, but I couldn't find it. Can someone please help me and explain the solution?
There are the following issues in your code:
Pass class_mode='input' to flow_from_directory method to give input images as the labels as well (since you are creating an autoencoder).
Pass padding='same' to the third Conv2D layer in the decoder:
x = Conv2D(16, (3, 3), activation='relu', padding='same')(x)
Use three filers in the last layer since your images are RGB:
decoded = Conv2D(3, (3, 3), activation='sigmoid', padding='same')(x)