I am doing am image similarity problem and want to save the image embeddings the model creates during training. Is there a way I can capture the embeddings before they are passed into the loss function?
Here is my model
model = tf.keras.Sequential([
tf.keras.layers.Conv2D(filters=128, kernel_size=2, padding='same', activation='relu', input_shape=(32,32,3)),
tf.keras.layers.MaxPooling2D(pool_size=2),
tf.keras.layers.Dropout(0.3),
tf.keras.layers.Conv2D(filters=64, kernel_size=2, padding='same', activation='relu'),
tf.keras.layers.MaxPooling2D(pool_size=2),
tf.keras.layers.Dropout(0.3),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(32, activation=None), # No activation on final dense layer
tf.keras.layers.Lambda(lambda x: tf.math.l2_normalize(x, axis=1)) # L2 normalize embeddings
])
model.compile(
optimizer=tf.keras.optimizers.Adam(0.001),
loss=tfa.losses.TripletSemiHardLoss(),
metrics = ["accuracy"])
history = model.fit(train_dataset, epochs=3, validation_data=test_dataset)
To be clear I do not want the outputs just from the final layer shown here. I want to save the final resulting vector that is output from my model.
You can create a custom callback and at the end of training step of each batch, call your model with input batch and get the output and save it.
class SaveEmbeddingCallback(tf.keras.callbacks.Callback):
def on_train_batch_end(self, batch, logs=None):
embedding = self.model.predict(batch)
# IN THIS STAGE YOU HAVE THE OUTPUT OF THE MODEL
# YOU CAN SAVE IT OR WHATEVER YOU WANT
...
model.compile(
optimizer=tf.keras.optimizers.Adam(0.001),
loss=tfa.losses.TripletSemiHardLoss(),
metrics = ["accuracy"])
history = model.fit(train_dataset,
epochs=3,
validation_data=test_dataset,
callbacks=[SaveEmbeddingCallback()])
For more information on keras custom callbacks read this TensorFlow tutorial
Related
I build a model in Keras for CNN. I want to save this model and reuse this trained model for transfer learning as a pre-trained model. I do not understand what is the proper way to save the model for reusing it for transfer learning. Again how to load my pre-trained model in Keras so that I can add some layers after load the previous model.
This is my model that I build
model_cnn = Sequential() # initilaizing the Sequential nature for CNN model
# Adding the embedding layer which will take in maximum of 450 words as input and provide a 32 dimensional output of those words which belong in the top_words dictionary
model_cnn.add(Embedding(vocab_size, embedding_size, input_length=max_len))
model_cnn.add(Conv1D(32, 3, padding='same', activation='relu'))
model_cnn.add(Conv1D(64, 3, padding='same', activation='relu'))
model_cnn.add(MaxPooling1D())
model_cnn.add(Flatten())
model_cnn.add(Dense(250, activation='relu'))
model_cnn.add(Dense(2, activation='softmax'))
# optimizer = keras.optimizers.Adam(lr=0.001)
model_cnn.compile(
loss='categorical_crossentropy',
optimizer=sgd,
metrics=['acc',Precision(),Recall(),]
)
history_cnn = model_cnn.fit(
X_train, y_train,
validation_data=(X_val, y_val),
batch_size = 64,
epochs=epochs,
verbose=1
)
The recommended way to save model, is saving with SavedModel format:
dir = "target_directory"
model_cnn.save(dir) # it will save a .pb file with assets and variables folders
Then you can load it:
model_cnn = tf.keras.models.load_model(dir)
Now, you can add some layers and make another model. For example:
input = tf.keras.Input(shape=(128,128,3))
x = model_cnn(input)
x = tf.keras.layers.Dense(1, activation='sigmoid')(x)
new_model = tf.keras.models.Model(inputs=input, outputs=x)
I would like to integrate the weighted_cross_entropy_with_logits to deal with data imbalance. I am not sure how to do it. Class 0 has 10K images, while class 1 has 500 images. Here is my code.
model = tf.keras.models.Sequential([
tf.keras.layers.Conv2D(32, (3, 3), input_shape=(dim, dim, 3), activation='relu'),
....
tf.keras.layers.Dense(2, activation='softmax')
])
model.compile(optimizer="nadam",
loss=tf.keras.losses.CategoricalCrossentropy(),
metrics=['accuracy'])
class_weight = {0: 1.,
1: 20.}
model.fit(
train_ds,
val_ds,
epochs=epc,
verbose=1,
class_weight=class_weight)
You can simply wrap tf.nn.weighted_cross_entropy_with_logits inside a custom loss function.
Remember also that tf.nn.weighted_cross_entropy_with_logits expects logits so your network must produce it and not probabilities (remove softmax activation from the last layer)
Here a dummy example:
X = np.random.uniform(0,1, (10,32,32,3))
y = np.random.randint(0,2, (10,))
y = tf.keras.utils.to_categorical(y)
model = tf.keras.models.Sequential([
tf.keras.layers.Conv2D(32, (3, 3), input_shape=(32, 32, 3), activation='relu'),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(2) ### must be logits (remove softmax)
])
def my_loss(weight):
def weighted_cross_entropy_with_logits(labels, logits):
loss = tf.nn.weighted_cross_entropy_with_logits(
labels, logits, weight
)
return loss
return weighted_cross_entropy_with_logits
model.compile(optimizer="nadam",
loss=my_loss(weight=0.8),
metrics=['accuracy'])
model.fit(X,y, epochs=3)
At inference time you obtain the probabilities in this way:
tf.nn.softmax(model.predict(X))
I'm trying to add add conv layers to the transfer learning code mentioned below. But not sure how to proceed. I want to add
conv, max-pooling, 3x3 filter and stride 3 and activation mode ReLU or
conv, max-pooling, 3x3 filter and stride 3 and activation mode LReLU this layer in the below mentioned transfer learning code. Let me know if it's possible and if yes how?
CLASSES = 2
# setup model
base_model = MobileNet(weights='imagenet', include_top=False)
x = base_model.output
x = GlobalAveragePooling2D(name='avg_pool')(x)
x = Dropout(0.4)(x)
predictions = Dense(CLASSES, activation='softmax')(x)
model = Model(inputs=base_model.input, outputs=predictions)
# transfer learning
for layer in base_model.layers:
layer.trainable = False
model.compile(optimizer='rmsprop',
loss='categorical_crossentropy',
metrics=['accuracy'])
"""##Data augmentation"""
# data prep
"""
## Transfer learning
"""
from tensorflow.keras.callbacks import ModelCheckpoint
filepath="mobilenet/my_model.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')
callbacks_list = [checkpoint]
EPOCHS = 1
BATCH_SIZE = 32
STEPS_PER_EPOCH = 5
VALIDATION_STEPS = 32
MODEL_FILE = 'mobilenet/filename.model'
history = model.fit_generator(
train_generator,
epochs=EPOCHS,
steps_per_epoch=STEPS_PER_EPOCH,
validation_data=validation_generator,
validation_steps=VALIDATION_STEPS,
callbacks=callbacks_list)
model.save(MODEL_FILE)
backup_model = model
model.summary()
You can do it several ways, one of them is:
model = Sequential([
base_model,
GlobalAveragePooling2D(name='avg_pool'),
Dropout(0.4),
Conv(...), # the layers you would like to add for the base model
MaxPool(...),
...
])
model.compile(...)
I think this is what you are after
CLASSES=2
new_filters=256 # specify the number of filter you want in the added convolutional layer
img_shape=(224,224,3)
base_model=tf.keras.applications.mobilenet.MobileNet( include_top=False, input_shape=img_shape, weights='imagenet',dropout=.4)
x=base_model.output
x= Conv2D(new_filters, 3, padding='same', strides= (3,3), activation='relu', name='added')(x)
x= GlobalAveragePooling2D(name='avg_pool')(x)
x= Dropout(0.4)(x)
predictions= Dense(CLASSES, activation='softmax', name='output')(x)
model=Model(inputs=base_model.input, outputs=predictions)
model.summary()
print("Building model...")
ques1_enc = Sequential()
ques1_enc.add(Embedding(output_dim=64, input_dim=vocab_size, weights=[embedding_weights], mask_zero=True))
ques1_enc.add(LSTM(100, input_shape=(64, seq_maxlen), return_sequences=False))
ques1_enc.add(Dropout(0.3))
ques2_enc = Sequential()
ques2_enc.add(Embedding(output_dim=64, input_dim=vocab_size, weights=[embedding_weights], mask_zero=True))
ques2_enc.add(LSTM(100, input_shape=(64, seq_maxlen), return_sequences=False))
ques2_enc.add(Dropout(0.3))
model = Sequential()
model.add(Merge([ques1_enc, ques2_enc], mode="sum"))
model.add(Dense(2, activation="softmax"))
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
print("Building model costs:", time.time() - start)
print("Training...")
checkpoint = ModelCheckpoint(filepath=os.path.join("C:/Users/", "quora_dul_best_lstm.hdf5"), verbose=1, save_best_only=True)
model.fit([x_ques1train, x_ques2train], ytrain, batch_size=32, epochs=1, validation_split=0.1, verbose=2, callbacks=[checkpoint])
print("Training neural network costs:", time.time() - start)
I want to convert the above code into functional API in keras as in sequential API Merge() function is not supported. I have been trying it for long time but getting few errors. About the details of the attrributes:
ques_pairs contains the preprocessed data,
word2index contains the word count,
seq_maxlen contains the maximum length of question one or two.
iam trying to implement this model on Quora Question Pair Dataset https://www.kaggle.com/c/quora-question-pairs
I will give you a small example, that you can apply to your own model:
from keras.layers import Input, Dense, Add
input1 = Input(shape=(16,))
output1 = Dense(8, activation='relu')(input1)
output1 = Dense(4, activation='relu')(output1) # Add as many layers as you like like this
input2 = Input(shape=(16,))
output2 = Dense(8, activation='relu')(input2)
output2 = Dense(4, activation='relu')(output2) # Add as many layers as you like like this
output_full = Add()([output1, output2])
output_full = Dense(1, activation='sigmoid')(output_full) # Add as many layers as you like like this
model_full = Model(inputs=[input1, input2], outputs=output_full)
You need to define an Input for each of your model parts first, then add layers (as shown in the code) to both models. Then you can add them using the Add layer. Finally you call Model with a list of the input layers and the output layer.
model_full can then be compiled and trained like any other model.
Are you trying to achieve something like the following ?
from tensorflow.python import keras
from keras.layers import *
from keras.models import Sequential, Model
vocab_size = 1000
seq_maxlen = 32
embedding_weights = np.zeros((vocab_size, 64))
print("Building model...")
ques1_enc = Sequential()
ques1_enc.add(Embedding(output_dim=64, input_dim=vocab_size, weights=[embedding_weights], mask_zero=True))
ques1_enc.add(LSTM(100, input_shape=(64, seq_maxlen), return_sequences=False))
ques1_enc.add(Dropout(0.3))
ques2_enc = Sequential()
ques2_enc.add(Embedding(output_dim=64, input_dim=vocab_size, weights=[embedding_weights], mask_zero=True))
ques2_enc.add(LSTM(100, input_shape=(64, seq_maxlen), return_sequences=False))
ques2_enc.add(Dropout(0.3))
merge = Concatenate(axis=1)([ques1_enc.output, ques2_enc.output])
output = Dense(2, activation="softmax")(merge)
model = Model([ques1_enc.input, ques2_enc.input], output)
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
model.summary()
I am trying to learn how vgg16 works. Below is my code, using vgg16 for another classification.
# Generate a model with all layers (with top)
model_vgg16_conv = VGG16(weights='imagenet', include_top=False)
model_vgg16_conv.summary()
# create your own input format
input = Input(shape=(128,128,3),name = 'image_input')
# Use the generated model
output_vgg16_conv = model_vgg16_conv(input)
# Add the fully-connected layers
x = Flatten(name='flatten')(output_vgg16_conv)
x = Dense(4096, activation='relu', name='fc1')(x)
x = Dense(4096, activation='relu', name='fc2')(x)
x = Dense(5, activation='softmax', name='predictions')(x)
#Create your own model
model = Model(input=input, output=x)
#In the summary, weights and layers from VGG part will be hidden, but they will be fit during the training
model.summary()
# Specify an optimizer to use
adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
# Choose loss function, optimization method, and metrics (which results to display)
model.compile(
optimizer = adam,
loss='categorical_crossentropy',
metrics=['accuracy']
)
model.fit(X_train,y_train,epochs=10,batch_size=10,verbose=2)
# model.fit(X_train,y_train,epochs=30,batch_size=100,verbose=2)
result = model.predict(y_test) # same result
For some reason, using different epoch size and batch size generate exactly the same result. Am I doing something wrong?