VAE in Keras: how to define the end-to-end model? - python

I am learning the tutorial here. My Model part is:
input_img = keras.Input(shape=img_shape)
x = layers.Conv2D(32, (3, 3),
padding='same', activation='relu')(input_img)
...
x = layers.Conv2D(64, (3, 3),
padding='same', activation='relu')(x)
shape_before_flattening = K.int_shape(x)
x = layers.Flatten()(x)
x = layers.Dense(32, activation='relu')(x)
z_mean = layers.Dense(latent_dim)(x)
z_log_var = layers.Dense(latent_dim)(x)
def sampling(args):
...
z = layers.Lambda(sampling)([z_mean, z_log_var])
decoder_input = layers.Input(K.int_shape(z)[1:])
x = layers.Dense(np.prod(shape_before_flattening[1:]),
activation='relu')(decoder_input)
x = layers.Reshape(shape_before_flattening[1:])(x)
x = layers.Conv2DTranspose(32, 3,
padding='same', activation='relu',
strides=(2, 2))(x)
x = layers.Conv2D(1, 3,
padding='same', activation='sigmoid')(x)
# This is our decoder model from letent space to reconstructed images
decoder = Model(decoder_input, x)
# We then apply it to `z` to recover the decoded `z`.
z_decoded = decoder(z)
def vae_loss(self, x, z_decoded):
...
# Fit the end-to-end model
vae = Model(input_img, z_decoded) # vae = Model(input_img, x)
vae.compile(optimizer='rmsprop', loss=vae_loss)
vae.summary()
My question is: the end-to-end is vae = Model(input_img, z_decoded) or vae = Model(input_img, x). Should we compute loss on input_img and z_decoded OR between input_img and x? Thanks

x is changing throughout the model, where x = layers.Conv2D(1, 3,padding='same', activation='sigmoid')(x) you set x to be the last layer of your decoder model.
When doing z_decoded = decoder(z) you chain your decoder straight after the encoder, z_decoded is actually the output layer of your decoder, thus, the same x as earlier. Also, you create the link between the actual input and the output.
Computing the loss would yield the same results on both (as they both represent the same layer).
In short - Both vae = Model(input_img, z_decoded) and vae = Model(input_img, x) are the end to end model, i would suggest using the z_decoded version, for readability.

Related

How to correctly save and load a model with custom CTC layer (Keras example)

I am following this tutorial on Keras , but I don't know how to correctly save this model with custom layer after the training and load it.
This problem has been mentioned in here and here but apparently non of those solutions work for this Keras example. Can anyone point me in the right direction?
P.S: here is the main part of the code:
class CTCLayer(layers.Layer):
def __init__(self, name=None):
super().__init__(name=name)
self.loss_fn = keras.backend.ctc_batch_cost
def call(self, y_true, y_pred):
# Compute the training-time loss value and add it
# to the layer using `self.add_loss()`.
batch_len = tf.cast(tf.shape(y_true)[0], dtype="int64")
input_length = tf.cast(tf.shape(y_pred)[1], dtype="int64")
label_length = tf.cast(tf.shape(y_true)[1], dtype="int64")
input_length = input_length * tf.ones(shape=(batch_len, 1), dtype="int64")
label_length = label_length * tf.ones(shape=(batch_len, 1), dtype="int64")
loss = self.loss_fn(y_true, y_pred, input_length, label_length)
self.add_loss(loss)
# At test time, just return the computed predictions
return y_pred
def build_model():
# Inputs to the model
input_img = layers.Input(
shape=(img_width, img_height, 1), name="image", dtype="float32"
)
labels = layers.Input(name="label", shape=(None,), dtype="float32")
# First conv block
x = layers.Conv2D(
32,
(3, 3),
activation="relu",
kernel_initializer="he_normal",
padding="same",
name="Conv1",
)(input_img)
x = layers.MaxPooling2D((2, 2), name="pool1")(x)
# Second conv block
x = layers.Conv2D(
64,
(3, 3),
activation="relu",
kernel_initializer="he_normal",
padding="same",
name="Conv2",
)(x)
x = layers.MaxPooling2D((2, 2), name="pool2")(x)
# We have used two max pool with pool size and strides 2.
# Hence, downsampled feature maps are 4x smaller. The number of
# filters in the last layer is 64. Reshape accordingly before
# passing the output to the RNN part of the model
new_shape = ((img_width // 4), (img_height // 4) * 64)
x = layers.Reshape(target_shape=new_shape, name="reshape")(x)
x = layers.Dense(64, activation="relu", name="dense1")(x)
x = layers.Dropout(0.2)(x)
# RNNs
x = layers.Bidirectional(layers.LSTM(128, return_sequences=True, dropout=0.25))(x)
x = layers.Bidirectional(layers.LSTM(64, return_sequences=True, dropout=0.25))(x)
# Output layer
x = layers.Dense(len(characters) + 1, activation="softmax", name="dense2")(x)
# Add CTC layer for calculating CTC loss at each step
output = CTCLayer(name="ctc_loss")(labels, x)
# Define the model
model = keras.models.Model(
inputs=[input_img, labels], outputs=output, name="ocr_model_v1"
)
# Optimizer
opt = keras.optimizers.Adam()
# Compile the model and return
model.compile(optimizer=opt)
return model
# Get the model
model = build_model()
model.summary()class CTCLayer(layers.Layer):
def __init__(self, name=None):
super().__init__(name=name)
self.loss_fn = keras.backend.ctc_batch_cost
def call(self, y_true, y_pred):
# Compute the training-time loss value and add it
# to the layer using `self.add_loss()`.
batch_len = tf.cast(tf.shape(y_true)[0], dtype="int64")
input_length = tf.cast(tf.shape(y_pred)[1], dtype="int64")
label_length = tf.cast(tf.shape(y_true)[1], dtype="int64")
input_length = input_length * tf.ones(shape=(batch_len, 1), dtype="int64")
label_length = label_length * tf.ones(shape=(batch_len, 1), dtype="int64")
loss = self.loss_fn(y_true, y_pred, input_length, label_length)
self.add_loss(loss)
# At test time, just return the computed predictions
return y_pred
def build_model():
# Inputs to the model
input_img = layers.Input(
shape=(img_width, img_height, 1), name="image", dtype="float32"
)
labels = layers.Input(name="label", shape=(None,), dtype="float32")
# First conv block
x = layers.Conv2D(
32,
(3, 3),
activation="relu",
kernel_initializer="he_normal",
padding="same",
name="Conv1",
)(input_img)
x = layers.MaxPooling2D((2, 2), name="pool1")(x)
# Second conv block
x = layers.Conv2D(
64,
(3, 3),
activation="relu",
kernel_initializer="he_normal",
padding="same",
name="Conv2",
)(x)
x = layers.MaxPooling2D((2, 2), name="pool2")(x)
# We have used two max pool with pool size and strides 2.
# Hence, downsampled feature maps are 4x smaller. The number of
# filters in the last layer is 64. Reshape accordingly before
# passing the output to the RNN part of the model
new_shape = ((img_width // 4), (img_height // 4) * 64)
x = layers.Reshape(target_shape=new_shape, name="reshape")(x)
x = layers.Dense(64, activation="relu", name="dense1")(x)
x = layers.Dropout(0.2)(x)
# RNNs
x = layers.Bidirectional(layers.LSTM(128, return_sequences=True, dropout=0.25))(x)
x = layers.Bidirectional(layers.LSTM(64, return_sequences=True, dropout=0.25))(x)
# Output layer
x = layers.Dense(len(characters) + 1, activation="softmax", name="dense2")(x)
# Add CTC layer for calculating CTC loss at each step
output = CTCLayer(name="ctc_loss")(labels, x)
# Define the model
model = keras.models.Model(
inputs=[input_img, labels], outputs=output, name="ocr_model_v1"
)
# Optimizer
opt = keras.optimizers.Adam()
# Compile the model and return
model.compile(optimizer=opt)
return model
# Get the model
model = build_model()
model.summary()
epochs = 100
early_stopping_patience = 10
# Add early stopping
early_stopping = keras.callbacks.EarlyStopping(
monitor="val_loss", patience=early_stopping_patience, restore_best_weights=True
)
# Train the model
history = model.fit(
train_dataset,
validation_data=validation_dataset,
epochs=epochs,
callbacks=[early_stopping],
)
# Get the prediction model by extracting layers till the output layer
prediction_model = keras.models.Model(
model.get_layer(name="image").input, model.get_layer(name="dense2").output
)
prediction_model.summary()
The problem is not actually with Keras's saving methods. The characters set is inconsistent and does not keep ordering. Add the below code after creating the characters set to solve the issue:
characters = sorted(list(characters))
#Amirhosein, check out this function in the Horovod repository:
Serialize:
https://github.com/horovod/horovod/blob/6f0bb9fae826167559501701d4a5a0380284b5f0/horovod/spark/keras/util.py#L115
Deserialize:
https://github.com/horovod/horovod/blob/6f0bb9fae826167559501701d4a5a0380284b5f0/horovod/spark/keras/remote.py#L267
Example of use for deserialization:
https://github.com/horovod/horovod/blob/6f0bb9fae826167559501701d4a5a0380284b5f0/horovod/spark/keras/remote.py#L118
If you are using custom objects like custom metrics or custom Loss function, you will need to use custom_object_scope as in the example.
It used a package called cloudpickle (https://pypi.org/project/cloudpickle/) under the hood to convert the KerasModel to a string and vice versa.

multiple loss functions , None for gradient

I am using the custom loss function in addition to the mean squared error loss function in my Keras model. Code for the custom loss function is given below:
def grad1(matrix):
dx = 1.0
u_x = np.gradient(matrix,dx,axis=0)
u_xx = np.gradient(u_x,dx,axis=0)
return u_xx
def artificial_diffusion(y_true, y_pred):
u_xxt = tf.py_func(grad1,[y_true],tf.float32)
u_xxp = tf.py_func(grad1,[y_pred],tf.float32)
lap_mse = tf.losses.mean_squared_error(u_xxt,u_xxp) + K.epsilon()
I have the 1D CNN model.
input_img = Input(shape=(n_states,n_features))
x = Conv1D(32, kernel_size=5, activation='relu', padding='same')(input_img)
x = Conv1D(32, kernel_size=5, activation='relu', padding='same')(x)
x = Conv1D(32, kernel_size=5, activation='relu', padding='same')(x)
decoded1 = Conv1D(n_outputs, kernel_size=3, activation='linear', padding='same',
name='regression')(x)
decoded2 = Conv1D(n_outputs, kernel_size=3, activation='linear', padding='same',
name='diffusion')(x)
model = Model(inputs=input_img, outputs=[decoded1,decoded2])
model.compile(loss=['mse',artificial_diffusion],
loss_weights=[1, 1],
optimizer='adam',metrics=[coeff_determination])
When I compile and run the model, I get an error An operation has `None` for gradient. Please make sure that all of your ops have a gradient defined (i.e. are differentiable). Common ops without gradient: K.argmax, K.round, K.eval.. If I create the model as model = Model(inputs=input_img, outputs=[decoded1,decoded1]), then there is no error. But, then I can't monitor two losses separately. Am I doing any mistake while constructing the model?

SVM on top of an autoencoder for image classification: 100% accuracy on train and test data

I have a small image data set with 4 classes. So I've used an auto-encoder to extract features automatically.
These features are fed to an Support Vector Machine classifier in order to do the classification task.
After training the auto encoder for 10 epochs and training the SVM model on the extracted features I've got these confusion matrices:
My concern is about if the model isn't as general as possible in order to be applicable on a new data.
My question is: Is there any method to know if the model is over fitting except testing it on some other samples?
EDIT:
This is the code for the auto encoder:
encoder_input = Input(shape=(IMG_HEIGHT, IMG_WIDTH, 3), name='original_img')
x = Conv2D(filters=32, kernel_size=3, padding="same", input_shape=(IMG_HEIGHT, IMG_WIDTH, 3))(encoder_input)
x = MaxPooling2D((2, 2))(x)
x = Conv2D(filters=64, kernel_size=3, padding="same", activation='relu')(x)
encoder_output = MaxPooling2D((2, 2))(x)
encoder = Model(encoder_input, encoder_output, name='encoder')
encoder.summary()
decoder_input = Input(shape=(32, 35, 64,), name='encoded_img')
x = Conv2D(filters=64, kernel_size=3, padding="same", activation='relu') (decoder_input)
x = UpSampling2D((2, 2))(x)
x = Conv2D(filters=32, kernel_size=3, padding="same", activation='relu')(x)
x = UpSampling2D((2, 2))(x)
decoder_output = Conv2D(filters=3, kernel_size=3, padding='same', activation='relu')(x)
decoder = Model(decoder_input, decoder_output, name='decoder')
decoder.summary()
autoencoder_input = Input(shape=(IMG_HEIGHT, IMG_WIDTH, 3), name='img')
encoded_img = encoder(autoencoder_input)
decoded_img = decoder(encoded_img)
Autoencoder = Model(autoencoder_input, decoded_img, name='autoencoder')
Autoencoder.summary()
total_train = 80
total_val = 40
Autoencoder.compile(optimizer='adam',
loss=tf.keras.losses.MeanSquaredError())
history = Autoencoder.fit_generator(
train_data_gen,
steps_per_epoch=total_train // batch_size,
epochs=epochs,
validation_data=val_data_gen,
validation_steps=total_val // batch_size
)
The code for the SVM and confusion matrices:
samples, y = next(train_data_gen_svm)
samples_val, y_val = next(val_data_gen_svm)
#Get features
X = encoder.predict(samples)
X_val = encoder.predict(samples_val)
# Reshape for the SVM input
X_reshape = X.reshape(80,32*35*64)
X_val_reshape = X_val.reshape(40,32*35*64)
clf = SVC(kernel='poly', degree=3)
print('X shape:', X_reshape.shape)
print('y shape: ', X_val_reshape.shape)
clf.fit(X_reshape, y)
y_pred_val = clf.predict(X_val_reshape)
y_pred_train = clf.predict(X_reshape)
cnf_matrix_val = confusion_matrix(y_val, y_pred_val,normalize='true')
cnf_matrix_train = confusion_matrix(y,y_pred_train,normalize='true')

How can I concatenate dynamic values with the flatten vector in CNN

I'm trying to increase the accuracy of CNN by computing some dynamic values such as Hu moments of the images during the training phase and then feed them to the fully connected layer with the flatten vector as shown in the image of my model:
I want to compute Hu moments for each image in the dataset then after the flatten operation, I want to concatenate the values of the Hu moments with the fatten vector and feed it to the fully connected layer.
This is the model I'm using (Tensorflow Keras):
layer1 = Conv2D(16, (3, 3),padding="same", activation='relu')(inpx)
layer2 = Conv2D(32, kernel_size=(3, 3),padding="same", activation='relu')(layer1)
layer3 = MaxPooling2D(pool_size=(2, 2))(layer2)
layer4 = Conv2D(64, kernel_size=(5, 5),padding="same", activation='relu')(layer3)
layer5 = Conv2D(128, kernel_size=(5, 5),padding="same", activation='relu')(layer4)
layer6 = MaxPooling2D(pool_size=(2, 2))(layer5)
layer7 = Dropout(0.5)(layer6)
layer8 = Flatten()(layer7)
layer9 = Dense(250, activation='sigmoid')(layer8)
layer10 = Dense(10, activation='softmax')(layer9)
model = Model([inpx], layer10)
model.compile(optimizer=keras.optimizers.Adadelta(),
loss=keras.losses.categorical_crossentropy,
metrics=['accuracy'])
model.fit(x_train, y_train, epochs=10, batch_size=500)
score = model.evaluate(x_test, y_test, verbose=0)
The dataset I'm using is MNIST handwritten digits.
Hmm I don't know what the Hu moments and the Extend and Soidty are, but I'm assuming they're 1dimensional:
# image = tf.Tensor
#tf.function
def calc_hu(image):
""" calculate hu """
hu = ...
return hu
class HuLayer(tf.keras.layers.Layer):
def call(self, inputs):
return calc_hu(inputs)
#tf.function
def calc_extend(image):
""" calculate extend """
extend = ...
return extend
class ExtendLayer(tf.keras.layers.Layer):
def call(self, inputs):
return calc_extend(inputs)
layer1 = Conv2D(16, (3, 3),padding="same", activation='relu')(inpx)
layer2 = Conv2D(32, kernel_size=(3, 3),padding="same", activation='relu')(layer1)
layer3 = MaxPooling2D(pool_size=(2, 2))(layer2)
layer4 = Conv2D(64, kernel_size=(5, 5),padding="same", activation='relu')(layer3)
layer5 = Conv2D(128, kernel_size=(5, 5),padding="same", activation='relu')(layer4)
layer6 = MaxPooling2D(pool_size=(2, 2))(layer5)
layer7 = Dropout(0.5)(layer6)
layer8 = Flatten()(layer7)
layer8_ = tf.layers.keras.concatenate([layer_8, HuLayer()(tf.keras.layers.Input(input_shape)(inpx)), ExtendLayer()(tf.keras.layers.Input(input_shape)(inpx))])
layer9 = Dense(250, activation='sigmoid')(layer8_)
layer10 = Dense(10, activation='softmax')(layer9)
I didn't test this code but it should set you on your way. Hope it helps you enough to get going!

How to remove the FC layer off of a fine turned model keras

So I have finetuned a Resnet50 model with the following architecture:
model = models.Sequential()
model.add(resnet)
model.add(Conv2D(512, (3, 3), activation='relu'))
model.add(Conv2D(512, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2), strides=(2, 2)))
model.add(Flatten())
model.add(layers.Dense(2048, activation='relu'))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(4096, activation='relu'))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(736, activation='softmax')) # Output layer
So now I have a saved model (.h5) which I want to use as input into another model. But I don't want the last layer. I would normally do it like this with a base resnet50 model:
def base_model():
resnet = resnet50.ResNet50(weights="imagenet", include_top=False)
x = resnet.output
x = GlobalAveragePooling2D()(x)
x = Dense(4096, activation='relu')(x)
x = Dropout(0.6)(x)
x = Dense(4096, activation='relu')(x)
x = Dropout(0.6)(x)
x = Lambda(lambda x_: K.l2_normalize(x,axis=1))(x)
return Model(inputs=resnet.input, outputs=x)
but that does not work for the model as it gives me an error. I am trying it like this right now but still, it does not work.
def base_model():
resnet = load_model("../Models/fine_tuned_model/fine_tuned_resnet50.h5")
x = resnet.layers.pop()
#resnet = resnet50.ResNet50(weights="imagenet", include_top=False)
#x = resnet.output
#x = GlobalAveragePooling2D()(x)
x = Dense(4096, activation='relu')(x)
x = Dropout(0.6)(x)
x = Dense(4096, activation='relu')(x)
x = Dropout(0.6)(x)
x = Lambda(lambda x_: K.l2_normalize(x,axis=1))(x)
return Model(inputs=resnet.input, outputs=x)
enhanced_resent = base_model()
This is the error that it gives me.
Layer dense_3 was called with an input that isn't a symbolic tensor. Received type: <class 'keras.layers.core.Dense'>. Full input: [<keras.layers.core.Dense object at 0x000001C61E68E2E8>]. All inputs to the layer should be tensors.
I don't know if I can do this or not.
I have finally figured it out after quitting for an hour. So this is how you will do it.
def base_model():
resnet = load_model("../Models/fine_tuned_model/42-0.85.h5")
x = resnet.layers[-2].output
x = Dense(4096, activation='relu', name="FC1")(x)
x = Dropout(0.6, name="FCDrop1")(x)
x = Dense(4096, activation='relu', name="FC2")(x)
x = Dropout(0.6, name="FCDrop2")(x)
x = Lambda(lambda x_: K.l2_normalize(x,axis=1))(x)
return Model(inputs=resnet.input, outputs=x)
enhanced_resent = base_model()
And this works perfectly. I hope this helps out someone else as I have never seen this done in any tutorial before.
x = resnet.layers[-2].output
This will get the layer you want, but you need to know which index the layer you want is at. -2 is the 2nd to last FC layer that I wanted as I wanted the feature extractions, not the final classification. This can be found doing a
model.summary()

Categories