how to load model with another pre-trained model as a parameter

how to load model with another pre-trained model as a parameter - python

I have a pre-trained model, and I created a model based on one layer of the pre-trained as below:
def create_ds_Pretrained(pretrained, maxlen, vocab_size, segment_size=2):
input_wtoken = layers.Input((maxlen,), dtype=tf.int64, name='input_ids')
input_segment = layers.Input((maxlen,), dtype=tf.int64, name='segment_ids')
input_seq = layers.Input((maxlen, 5), dtype=tf.float32, name='seq')
Pretrained_output = pretrained(inputs=[input_wtoken, input_segment]) #
combined_output = layers.Concatenate()([Pretrained_output, input_seq])
normalized_output = layers.LayerNormalization(epsilon=1e-6)(combined_output)
pooled_output = layers.GlobalMaxPooling1D()(normalized_output)
regularization_layer = layers.LeakyReLU(alpha=0.1)(pooled_output)
x1 = layers.Dense(1024, activation="relu")(regularization_layer)
x1 = layers.Dense(1024, activation="relu")(x1)
x1 = layers.Dense(512, activation="relu")(x1)
output_1 = layers.Dense(1, activation="linear", name='output_1')(x1)
model = keras.Model(
inputs=[input_wtoken, input_segment, input_seq],
outputs=[output_1])
return model
I trained the model on below code:
savepath_ds = 'downstream.h5'
savepath_ds_weights = savepath_ds.replace('.h5', '_weights.h5')
savepath_ds_checkpoint = savepath_ds_weights.replace('.h5', '_cp.h5')
checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(savepath_ds_checkpoint, monitor="val_loss", verbose=0, mode="min", save_best_only=True, save_weights_only=True, save_freq="epoch" )
early_stop = tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=config.patience, verbose=1, mode="min", restore_best_weights=True)
# Load pre-trained upstream model
try:
pretrained_mdl = pretrained_mdl
except:
savepath_pre = 'pretrained_mdl.h5'
pretrained_mdl = tf.keras.models.load_model(savepath_pre)
with strategy.scope():
pretrained_layer = tf.keras.Model(pretrained_mdl.input, pretrained_mdl.get_layer("encoder_"+str(config.NUM_LAYERS-1)+"/ffn_layernormalization").output)
pretrained_layer.trainable = True
ds_mdl = create_ds_Pretrained(pretrained_layer, config.maxlen)
ds_mdl.compile(
optimizer=keras.optimizers.Adam(learning_rate=0.00001, decay=1e-6),
loss={
"output_1": keras.losses.MeanSquaredError()
},
metrics={
"output_1": tf.keras.metrics.RootMeanSquaredError(),
}
)
ds_mdl.fit(
ds_train,
epochs=config.EPOCH,
validation_data=ds_valid,
callbacks=[checkpoint_cb, early_stop]
)
ds_mdl.save(savepath_ds)
ds_mdl.save_weights(savepath_ds_weights)
Now, I want to load the ds_mdl from the saved path..
loading_ds_mdl = tf.keras.models.load_model(savepath_ds)
I got error:
"unable to open object (component not found)"
I think I need to specify custom_objects, but I did not declare a function for the pretrained layer, how to load model in such situation?
Thank you!

Related

Getting Error "Can't pickle" while saving keras model

I build a model with BERT pre-trained model, then I tried to save it using pickle I got this error
Can't pickle <function LayerCall.call at 0x7f3f6a88a8c0>: it's not the same object as tensorflow.python.keras.saving.saved_model.save_impl.LayerCall.call
UPDATE:
I tried to save it with model.save() method but I got: IndexError: list index out of range
Here is my Code to build the model:
MAX_LENGTH = 35
INPUT_IDs = Input(shape=(MAX_LENGTH,), dtype=tf.int32, name="input_ids")
INPUT_MASK = Input(shape=(MAX_LENGTH,), dtype=tf.int32, name="attention_mask")
embeddings = bert_model([INPUT_IDs, INPUT_MASK])[1] # 0 is hidden state, 1 is pooler_output
out = tf.keras.layers.Dropout(0.1)(embeddings)
out = Dense(128, activation="relu")(out)
out = tf.keras.layers.Dropout(0.1)(out)
out = Dense(32, activation="relu")(out)
y = Dense(1, activation='sigmoid')(out)
model = tf.keras.Model(inputs=[INPUT_IDs, INPUT_MASK], outputs=y)
model.layers[2].trainable = True
optimizer = Adam(learning_rate=5e-06,
epsilon=1e-08,
decay=0.01,
clipnorm=1.0)
loss = BinaryCrossentropy(from_logits=True)
metric = BinaryAccuracy('accuracy')
model.compile(
optimizer=optimizer,
loss=loss,
metrics=metric)
train_history = model.fit(
x={'input_ids':X_train["input_ids"], 'attention_mask':X_train["attention_mask"]},
y=y_train,
validation_split=0.2,
epochs=10,
batch_size=24)

Validation accuracy initially high then low

I am training a model for text sentiment classification with CNN. In it, the validation accuracy is initially more than training accuracy and then it decreases. Is this behavior acceptable? If not then what can be the reason and how to solve it?
My model:
class hyper():
def __init__(self,embedding_dim,filter_sizes,num_filters,dropout_prob,hidden_dims,batch_size,num_epochs):
# Model Hyperparameters
self.embedding_dim = embedding_dim
self.filter_sizes = filter_sizes
self.num_filters = num_filters
self.dropout_prob = dropout_prob
self.hidden_dims = hidden_dims
# Training parameters
self.batch_size = batch_size
self.num_epochs = num_epochs
class prep_hyper():
def __init__(self,sequenceLength,max_words):
# Prepossessing parameters
self.sequenceLength = sequenceLength
self.max_words = max_words
m_hyper=hyper(embedding_dim=embed_dim,filter_sizes=(3,4,5,6,8),num_filters=80,dropout_prob=(0.2,0.5),
hidden_dims=50,batch_size=128,num_epochs= 30)
pr_hyper = prep_hyper(sequenceLength=sequence_length,max_words=vocab_size)
model architecture:
def build_model(pr_hyper,m_hyper):
# Convolutional block
model_input = Input(shape=(pr_hyper.sequenceLength))
# use a random embedding for the text
x = Embedding(pr_hyper.max_words, m_hyper.embedding_dim,weights=[emb],trainable=False)(model_input)
# x = SpatialDropout1D(m_hyper.dropout_prob[0])(x)
conv_kern_reg = regularizers.l2(0.0001)
conv_bias_reg = regularizers.l2(0.0001)
conv_blocks = []
for sz in m_hyper.filter_sizes:
conv = Convolution1D(filters=m_hyper.num_filters,
kernel_size=sz,
# padding="same",
activation="relu",
strides=1,
kernel_regularizer=conv_kern_reg,
bias_regularizer=conv_bias_reg
)(x)
conv = GlobalMaxPooling1D()(conv)
conv_blocks.append(conv)
# merge
x = Concatenate()(conv_blocks) if len(conv_blocks) > 1 else conv_blocks[0]
x = Dense(m_hyper.hidden_dims, activation="relu")(x)
x = Dropout(m_hyper.dropout_prob[1])(x)
x = Dense(100, activation="relu")(x)
x = Dropout(m_hyper.dropout_prob[1])(x)
model_output = Dense(3, activation="softmax")(x)
model = Model(model_input, model_output)
model.compile(loss="categorical_crossentropy", optimizer=keras.optimizers.Adam(learning_rate=0.00005), metrics=["accuracy"]) #categorical_crossentropy
print(model.summary())
tf.keras.utils.plot_model(model, show_shapes=True)#, to_file='multichannel.png')
return model
INITIAL EPOCHS:

There are several reasons that this happens, like, the dropout layers is disabled during validation. For more information I would suggest you to see this
that describes several possible reasons that this happens.

Tensorflow Prune Layer Not Supported

I am trying to prune a model in tensorflow but coming across an error I don't know how to tackle. The error is ValueError: Please initialize "Prune" with a supported layer. Layers should either be a "PrunableLayer" instance, or should be supported by the PruneRegistry. You passed: <class 'base_transformer_tf.TransformerEncoder'>
The model is created using following
def transformer_encoder(num_columns, num_labels, num_layers, d_model, num_heads, dff, window_size, dropout_rate, weight_decay, label_smoothing, learning_rate):
inp = tf.keras.layers.Input(shape = (window_size, num_columns))
x = tf.keras.layers.BatchNormalization()(inp)
x = tf.keras.layers.Dense(d_model)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation('swish')(x)
x = tf.keras.layers.SpatialDropout1D(dropout_rate)(x)
x = TransformerEncoder(num_layers, d_model, num_heads, dff, window_size, dropout_rate)(x)
out = tf.keras.layers.Dense(num_labels, activation = 'sigmoid', dtype=tf.float32)(x[:, -1, :])
model = tf.keras.models.Model(inputs = inp, outputs = out)
model.compile(optimizer = tfa.optimizers.AdamW(weight_decay = weight_decay, learning_rate = learning_rate),
loss = tf.keras.losses.BinaryCrossentropy(label_smoothing = label_smoothing),
metrics = tf.keras.metrics.AUC(name = 'AUC'),
)
return model
The pruning portion of code is following
pruning_params = {
'pruning_schedule': tfmot.sparsity.keras.PolynomialDecay(initial_sparsity=0.00,
final_sparsity=0.50,
begin_step=0,
end_step=end_step)
}
model_for_pruning = prune_low_magnitude(model, **pruning_params)
# `prune_low_magnitude` requires a recompile.
model_for_pruning.compile(optimizer='adam',
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['accuracy'])
logdir = tempfile.mkdtemp()
callbacks = [
tfmot.sparsity.keras.UpdatePruningStep(),
tfmot.sparsity.keras.PruningSummaries(log_dir=logdir),
]
model_for_pruning.fit(np.concatenate((X_tr2, X_val)), np.concatenate((y_tr2, y_val)),
batch_size=batch_size, epochs=epochs, validation_split=validation_split,
callbacks=callbacks)
Any help would be appreciated

Tensorflow does not know how to prune your custom TransformerEncoder Keras layer. You should specify which weights to sparsify, as in this example: Prune custom Keras layer or modify parts of layer to prune.
That would look like:
class TransformerEncoder(tf.keras.layers.Layer, tfmot.sparsity.keras.PrunableLayer):
def get_prunable_weights(self):
return [self.my_weight, ..]

I am getting 0% stack test accuracy for ensemble neural network where base learners are CNN and BiLSTM

I am performing binary classification for classifying the reviews according to polarity.
I have created an ensemble model in Keras using CNN and BiLSTM as a base model and a neural network as meta-learner. I am using Stacking. But I am getting Stack ensemble accuracy as 0%. What all changes should I make in my code to run smoothly? Where I am getting wrong?
text_input_layer = Input(shape=(length_trainshuffle,))
embedding_layer = Embedding(vocab_size, 100)(text_input_layer)
text_layer_cnn = Conv1D(128, 5, activation='relu')(embedding_layer)
text_layer_cnn = GlobalMaxPooling1D()(text_layer_cnn)
text_layer_cnn = Dropout(0.2)(text_layer_cnn)
text_layer_cnn = Dense(5,kernel_initializer='glorot_uniform', activation='tanh')(text_layer_cnn)
output_layer_cnn = Dense(1, kernel_initializer='glorot_uniform',activation='sigmoid')(text_layer_cnn)
model_cnn = Model(text_input_layer, output_layer_cnn)
optimizer = Adamax(lr=0.001,decay=0.0001)
model_cnn.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
filepath_cnn="cnnmodel.best.hdf5"
checkpoint_cnn = ModelCheckpoint(filepath_cnn, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
callbacks_list_cnn = [checkpoint_cnn]
# Fit the model
model_cnn.fit(trainX, array(trainlabelshuffle), epochs=10,batch_size=80, validation_data = (validateX, array(validatelabelshuffle)), callbacks=callbacks_list_cnn, verbose=1)
model_cnn.save(filepath_cnn)
print('>Saved %s' % filepath_cnn)
loss_cnn, acc_cnn = model_cnn.evaluate(testX,array(testlabelshuffle), verbose=0)
print('Test Accuracy CNN: %f' % (acc_cnn*100))
print('Loss CNN: %f' %(loss_cnn))
text_layer_bilstm = Bidirectional(CuDNNLSTM(256))(embedding_layer)
output_layer_bilstm = Dense(1, kernel_initializer='glorot_uniform',activation='sigmoid')(text_layer_bilstm)
model_bilstm = Model(text_input_layer, output_layer_bilstm)
optimizer_bilstm = Adamax(lr=0.001,decay=0.0001)
model_bilstm.compile(optimizer=optimizer_bilstm, loss='binary_crossentropy', metrics=['accuracy'])
filepath_bilstm="bilstm_model.best.hdf5"
checkpoint_bilstm = ModelCheckpoint(filepath_bilstm, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
callbacks_list_bilstm = [checkpoint_bilstm]
# Fit the model
model_bilstm.fit(trainX, array(trainlabelshuffle), epochs=10,batch_size=80, validation_data = (validateX, array(validatelabelshuffle)), callbacks=callbacks_list_bilstm, verbose=1)
model_bilstm.save(filepath_bilstm)
print('>Saved %s' % filepath_bilstm)
loss_bilstm, acc_bilstm = model_bilstm.evaluate(testX,array(testlabelshuffle), verbose=0)
print('Test Accuracy bilstm: %f' % (acc_bilstm*100))
print('Loss bilstm: %f' %(loss_bilstm))
all_models = list()
cnnmodel = load_model(filepath_cnn)
# add to list of members
all_models.append(cnnmodel)
print('>loaded %s' % filepath_cnn)
bilstmmodel = load_model(filepath_bilstm)
# add to list of members
all_models.append(bilstmmodel)
print('>loaded %s' % filepath_bilstm)
def define_stacked_model(all_models):
# update all layers in all models to not be trainable
for i in range(len(all_models)):
model = all_models[i]
for layer in model.layers:
# make not trainable
layer.trainable = False
# rename to avoid 'unique layer name' issue
layer.name = 'ensemble_' + str(i+1) + '_' + layer.name
# define multi-headed input
ensemble_visible = [model.input for model in all_models]
# concatenate merge output from each model
ensemble_outputs = [model.output for model in all_models]
merge = concatenate(ensemble_outputs)
hidden = Dense(10, activation='relu')(merge)
#hidden = Flatten()(hidden)
output = Dense(2, activation='softmax')(hidden)
model = Model(inputs=ensemble_visible, outputs=output)
# plot graph of ensemble
plot_model(model, show_shapes=True, to_file='model_graph.png')
# compile
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
return model
def fit_stacked_model(model, inputX, inputy):
# prepare input data
X = [inputX for _ in range(len(model.input))]
# encode output data
inputy_enc = to_categorical(inputy)
# fit model
model.fit(X, inputy_enc, epochs=10, verbose=1)
# make a prediction with a stacked model
def predict_stacked_model(model, inputX):
# prepare input data
X = [inputX for _ in range(len(model.input))]
# make prediction
return model.predict(X, verbose=0)
stacked_model = define_stacked_model(all_models)
stacked_model.summary()
# fit stacked model on test dataset
fit_stacked_model(stacked_model,validateX,array(validatelabelshuffle))
#stacked_model.fit(X=testX,y=array(testlabelshuffle),epochs=10, verbose=1)
# make predictions and evaluate
yhat = predict_stacked_model(stacked_model, testX)
yhat = argmax(yhat, axis=1)
acc = accuracy_score(array(testlabelshuffle), yhat)
print('Stacked Test Accuracy: %.3f' % acc)

How to inform class weights when using `tensorflow.python.keras.estimator.model_to_estimator` to convert Keras Models to Estimator API?

I'm having some trouble to convert a pure Keras model to TensorFlow Estimator API on an unbalanced dataset.
When using pure Keras API, the class_weight parameter is available at model.fit method, but when converting a Keras model to TensorFlow Estimator with tensorflow.python.keras.estimator.model_to_estimator there is no place to inform class_weights.
How can overcome this?
I'm using TF 1.12 on Ubuntu 18, Cuda 9, Cudnn 7
Pure Keras model:
def keras_model(n_classes=None, model_dir='./tmp-model/', config=None):
with tf.device('/gpu:0'):
# Inputs
inp_raw = Input(shape=(max_len,), name='word_raw')
# raw text LSTM network
word_raw_emb = Embedding(
input_dim=nunique_chars_raw,
output_dim=EMBED_SIZE,
input_length=MAX_WORD_LENGTH,
trainable=True,
name='word_raw_emb')(inp_raw)
word_raw_emb = Dropout(rate=dropout_rate)(word_raw_emb)
word_raw_emb_lstm = Bidirectional(
LSTM(48, return_sequences=True))(word_raw_emb)
word_raw_emb_gru = Bidirectional(
GRU(48, return_sequences=False))(word_raw_emb_lstm)
word_raw_net = Dense(16, activation='relu')(word_raw_emb_gru)
output_raw_net = Dense(n_classes, activation='softmax')(word_raw_net)
model = Model(inputs=inp_raw, outputs=output_raw_net)
optz = keras.optimizers.RMSprop(
lr=0.002, rho=0.9, epsilon=None, decay=0.0)
model.compile(loss='categorical_crossentropy',
optimizer=optz, metrics=['categorical_accuracy'])
return model
model = keras_model(5)
model.fit(train_X, train_Y_onehot,
batch_size=128,
epochs=10,
validation_data=(eval_X,eval_Y_onehot),
class_weight=class_weights,
verbose=1)
Keras model to TensorFlow Estimator:
def keras_estimator_model(n_classes=None, model_dir='./tmp-model/', config=None):
with tf.device('/gpu:0'):
# Inputs
inp_raw = Input(shape=(max_len,), name='word_raw')
# raw text LSTM network
word_raw_emb = Embedding(
input_dim=nunique_chars_raw,
output_dim=EMBED_SIZE,
input_length=MAX_WORD_LENGTH,
trainable=True,
name='word_raw_emb')(inp_raw)
word_raw_emb = Dropout(rate=dropout_rate)(word_raw_emb)
word_raw_emb_lstm = Bidirectional(
LSTM(48, return_sequences=True))(word_raw_emb)
word_raw_emb_gru = Bidirectional(
GRU(48, return_sequences=False))(word_raw_emb_lstm)
word_raw_net = Dense(16, activation='relu')(word_raw_emb_gru)
output_raw_net = Dense(n_classes, activation='softmax')(word_raw_net)
model = Model(inputs=inp_raw, outputs=output_raw_net)
optz = keras.optimizers.RMSprop(
lr=0.002, rho=0.9, epsilon=None, decay=0.0)
model.compile(loss='categorical_crossentropy',
optimizer=optz, metrics=['categorical_accuracy'])
model_estimator = model_to_estimator(keras_model=model, model_dir=model_dir, config=config)
return model_estimator
estimator_model = keras_estimator_model(5)
train_spec = tf.estimator.TrainSpec(input_fn=train_input_fn,max_steps=10)
eval_spec = tf.estimator.EvalSpec(
input_fn=eval_input_fn,
steps=None,
start_delay_secs=10,
throttle_secs=10)
tf.estimator.train_and_evaluate(estimator_model, train_spec, eval_spec)

I've codded a workaround and it seems to work. I
import tensorflow as tf
from tensorflow.python.keras import backend as K
def weighted_loss_fn(class_weights):
def _loss_fn(y_true, y_pred):
class_weights_tensor = K.variable(class_weights)
y_true_labels = K.argmax(y_true,axis=1)
weights = K.gather(class_weights_tensor,y_true_labels)
return tf.losses.softmax_cross_entropy(onehot_labels=y_true, logits=y_pred, weights=weights)
return _loss_fn
def keras_estimator_model(n_classes=None, model_dir='./tmp-model/', config=None, class_weights=None):
with tf.device('/gpu:0'):
# Inputs
inp_raw = Input(shape=(max_len,), name='word_raw')
# raw text LSTM network
word_raw_emb = Embedding(
input_dim=nunique_chars_raw,
output_dim=EMBED_SIZE,
input_length=MAX_WORD_LENGTH,
trainable=True,
name='word_raw_emb')(inp_raw)
word_raw_emb = Dropout(rate=dropout_rate)(word_raw_emb)
word_raw_emb_lstm = Bidirectional(
LSTM(48, return_sequences=True))(word_raw_emb)
word_raw_emb_gru = Bidirectional(
GRU(48, return_sequences=False))(word_raw_emb_lstm)
word_raw_net = Dense(16, activation='relu')(word_raw_emb_gru)
output_raw_net = Dense(n_classes, activation='softmax')(word_raw_net)
model = Model(inputs=inp_raw, outputs=output_raw_net)
optz = keras.optimizers.RMSprop(
lr=0.002, rho=0.9, epsilon=None, decay=0.0)
loss_fn = weighted_loss_fn(class_weights)
model.compile(loss=loss_fn,
optimizer=optz, metrics=['categorical_accuracy'])
model_estimator = model_to_estimator(keras_model=model, model_dir=model_dir, config=config)
return model_estimator
estimator_model = keras_estimator_model(5)
train_spec = tf.estimator.TrainSpec(input_fn=train_input_fn,max_steps=10)
eval_spec = tf.estimator.EvalSpec(
input_fn=eval_input_fn,
steps=None,
start_delay_secs=10,
throttle_secs=10)
tf.estimator.train_and_evaluate(estimator_model, train_spec, eval_spec)
In my case class_weights = [ 0.17041813 42.00318471 35.26470588 29.70495495 42.00318471 44.55743243]

The class_weight param is used for weighting the loss function (during training only). So, to get the same effect you have to alter your loss function. I did not find any existing TensorFlow's loss functions, only feature request for the cross entropy loss with class weights. There is a workaround mentioned:
Here
By the way, the issue was closed, because it is possible in Keras :)

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

how to load model with another pre-trained model as a parameter - python

Related

Getting Error "Can't pickle" while saving keras model

Validation accuracy initially high then low

Tensorflow Prune Layer Not Supported

I am getting 0% stack test accuracy for ensemble neural network where base learners are CNN and BiLSTM

How to inform class weights when using `tensorflow.python.keras.estimator.model_to_estimator` to convert Keras Models to Estimator API?

Categories

Resources