I build a model with BERT pre-trained model, then I tried to save it using pickle I got this error
Can't pickle <function LayerCall.call at 0x7f3f6a88a8c0>: it's not the same object as tensorflow.python.keras.saving.saved_model.save_impl.LayerCall.call
UPDATE:
I tried to save it with model.save() method but I got: IndexError: list index out of range
Here is my Code to build the model:
MAX_LENGTH = 35
INPUT_IDs = Input(shape=(MAX_LENGTH,), dtype=tf.int32, name="input_ids")
INPUT_MASK = Input(shape=(MAX_LENGTH,), dtype=tf.int32, name="attention_mask")
embeddings = bert_model([INPUT_IDs, INPUT_MASK])[1] # 0 is hidden state, 1 is pooler_output
out = tf.keras.layers.Dropout(0.1)(embeddings)
out = Dense(128, activation="relu")(out)
out = tf.keras.layers.Dropout(0.1)(out)
out = Dense(32, activation="relu")(out)
y = Dense(1, activation='sigmoid')(out)
model = tf.keras.Model(inputs=[INPUT_IDs, INPUT_MASK], outputs=y)
model.layers[2].trainable = True
optimizer = Adam(learning_rate=5e-06,
epsilon=1e-08,
decay=0.01,
clipnorm=1.0)
loss = BinaryCrossentropy(from_logits=True)
metric = BinaryAccuracy('accuracy')
model.compile(
optimizer=optimizer,
loss=loss,
metrics=metric)
train_history = model.fit(
x={'input_ids':X_train["input_ids"], 'attention_mask':X_train["attention_mask"]},
y=y_train,
validation_split=0.2,
epochs=10,
batch_size=24)
Related
I got the following error: RuntimeError: You must compile your model before training/testing. Use model.compile(optimizer, loss). However, i have already compiled my model so i don't understand what the problem is.
`vgg = VGG16(weights='imagenet',include_top=False,input_shape=(224,224,3))
for layer in vgg.layers:
layer.trainable = False #making all the layers non-trainable
x = Flatten()(vgg.output) #flattening out the last layer
predictions = Dense(2,activation='sigmoid')(x) #Dense layer to predict wether there is pneumonia or not
model = Model(inputs=vgg.input, outputs=predictions)
early_stopping_callbacks = tensorflow.keras.callbacks.EarlyStopping(patience = 15,
restore_best_weights = True,
verbose = 1)
base_model1 = VGG16(include_top = False, weights = "imagenet", input_shape = (224, 224, 3), pooling = "max",
classes = 2)
#base_model1.load_weights("../input/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5")
base_model1.summary()
model2 = Sequential()
model2.add(base_model1)
model2.add(Flatten())
model2.add(Dense(128, activation = "relu"))
model2.add(Dense(64, activation = "relu"))
model2.add(Dense(32, activation = "relu"))
model2.add(Dense(1, activation = "sigmoid"))
# freeze the layers
for layer in base_model1.layers:
layer.trainable = False
model2.compile(optimizer = "adam", loss = "binary_crossentropy", metrics = ["accuracy"])
history = model2.fit_generator(train_generator, epochs = EPOCH, validation_data = val_generator, steps_per_epoch = 10,
callbacks = [early_stopping_callbacks])
test_loss, test_accuracy = base_model1.evaluate(test_generator, steps = 50)
print("The testing accuracy is: ", test_accuracy * 100, "%")
print("The testing loss is: ", test_loss * 100, "%")`
base_model1 is my vgg16 model and test_generator is my test set
I have a pre-trained model, and I created a model based on one layer of the pre-trained as below:
def create_ds_Pretrained(pretrained, maxlen, vocab_size, segment_size=2):
input_wtoken = layers.Input((maxlen,), dtype=tf.int64, name='input_ids')
input_segment = layers.Input((maxlen,), dtype=tf.int64, name='segment_ids')
input_seq = layers.Input((maxlen, 5), dtype=tf.float32, name='seq')
Pretrained_output = pretrained(inputs=[input_wtoken, input_segment]) #
combined_output = layers.Concatenate()([Pretrained_output, input_seq])
normalized_output = layers.LayerNormalization(epsilon=1e-6)(combined_output)
pooled_output = layers.GlobalMaxPooling1D()(normalized_output)
regularization_layer = layers.LeakyReLU(alpha=0.1)(pooled_output)
x1 = layers.Dense(1024, activation="relu")(regularization_layer)
x1 = layers.Dense(1024, activation="relu")(x1)
x1 = layers.Dense(512, activation="relu")(x1)
output_1 = layers.Dense(1, activation="linear", name='output_1')(x1)
model = keras.Model(
inputs=[input_wtoken, input_segment, input_seq],
outputs=[output_1])
return model
I trained the model on below code:
savepath_ds = 'downstream.h5'
savepath_ds_weights = savepath_ds.replace('.h5', '_weights.h5')
savepath_ds_checkpoint = savepath_ds_weights.replace('.h5', '_cp.h5')
checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(savepath_ds_checkpoint, monitor="val_loss", verbose=0, mode="min", save_best_only=True, save_weights_only=True, save_freq="epoch" )
early_stop = tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=config.patience, verbose=1, mode="min", restore_best_weights=True)
# Load pre-trained upstream model
try:
pretrained_mdl = pretrained_mdl
except:
savepath_pre = 'pretrained_mdl.h5'
pretrained_mdl = tf.keras.models.load_model(savepath_pre)
with strategy.scope():
pretrained_layer = tf.keras.Model(pretrained_mdl.input, pretrained_mdl.get_layer("encoder_"+str(config.NUM_LAYERS-1)+"/ffn_layernormalization").output)
pretrained_layer.trainable = True
ds_mdl = create_ds_Pretrained(pretrained_layer, config.maxlen)
ds_mdl.compile(
optimizer=keras.optimizers.Adam(learning_rate=0.00001, decay=1e-6),
loss={
"output_1": keras.losses.MeanSquaredError()
},
metrics={
"output_1": tf.keras.metrics.RootMeanSquaredError(),
}
)
ds_mdl.fit(
ds_train,
epochs=config.EPOCH,
validation_data=ds_valid,
callbacks=[checkpoint_cb, early_stop]
)
ds_mdl.save(savepath_ds)
ds_mdl.save_weights(savepath_ds_weights)
Now, I want to load the ds_mdl from the saved path..
loading_ds_mdl = tf.keras.models.load_model(savepath_ds)
I got error:
"unable to open object (component not found)"
I think I need to specify custom_objects, but I did not declare a function for the pretrained layer, how to load model in such situation?
Thank you!
I'm training a Keras model for token classification with an ELMO layer. I will need to save the model for future use, I've tried with model.save_weights("model_weights.h5"),
but then if I load them into a new model that I build, and then I call model.predict(...), I get results as if the model has never been trained. It looks like the configurations are not saved properly.
I am new with keras and tensorflow 1, and I'm not sure if this is the way to do it. Any help is welcome! I'm obviously missing something here, but I couldn't find sufficient on saving models with an elmo layer.
I am defining the model like this :
def ElmoEmbedding(x):
return elmo_model(inputs={"tokens": tf.squeeze(tf.cast(x, tf.string)),
"sequence_len": tf.constant(batch_size*[max_len])},
signature="tokens",
as_dict=True)["elmo"]
def build_model(max_len, n_words, n_tags):
word_input_layer = Input(shape=(max_len, 40, ))
elmo_input_layer = Input(shape=(max_len,), dtype=tf.string)
word_output_layer = Dense(n_tags, activation = 'softmax')(word_input_layer)
elmo_output_layer = Lambda(ElmoEmbedding, output_shape=(1, 1024))(elmo_input_layer)
output_layer = Concatenate()([word_output_layer, elmo_output_layer])
output_layer = BatchNormalization()(output_layer)
output_layer = Bidirectional(LSTM(units=512, return_sequences=True, recurrent_dropout=0.2, dropout=0.2))(output_layer)
output_layer = TimeDistributed(Dense(n_tags, activation='softmax'))(output_layer)
model = Model([elmo_input_layer, word_input_layer], output_layer)
return model
And I then I run the training like:
tf.disable_eager_execution()
elmo_model = hub.Module("https://tfhub.dev/google/elmo/3", trainable=False)
sess = tf.Session()
K.set_session(sess)
sess.run([tf.global_variables_initializer(), tf.tables_initializer()])
model = build_model(max_len, n_words, n_tags)
model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])
history = model.fit([np.array(X1_train), np.array(X2_train).reshape((len(X2_train), max_len, 40))],
y_train,
validation_data=([np.array(X1_valid), np.array(X2_valid).reshape((len(X2_valid), max_len, 40))], y_valid),
batch_size=batch_size, epochs=5, verbose=1)
model.save_weights("model_weights.h5")
If I try to load the weights in another session like the following, I get zero accuracy:
tf.disable_eager_execution()
elmo_model = hub.Module("https://tfhub.dev/google/elmo/3", trainable=False)
sess = tf.Session()
K.set_session(sess)
sess.run([tf.global_variables_initializer(), tf.tables_initializer()])
model = build_model(max_len, n_words, n_tags)
model.load_weights("model_weights.h5")
y_pred = model.predict([X1_test, np.array(X2_test).reshape((len(X2_test), max_len, 40))])
I am performing binary classification for classifying the reviews according to polarity.
I have created an ensemble model in Keras using CNN and BiLSTM as a base model and a neural network as meta-learner. I am using Stacking. But I am getting Stack ensemble accuracy as 0%. What all changes should I make in my code to run smoothly? Where I am getting wrong?
text_input_layer = Input(shape=(length_trainshuffle,))
embedding_layer = Embedding(vocab_size, 100)(text_input_layer)
text_layer_cnn = Conv1D(128, 5, activation='relu')(embedding_layer)
text_layer_cnn = GlobalMaxPooling1D()(text_layer_cnn)
text_layer_cnn = Dropout(0.2)(text_layer_cnn)
text_layer_cnn = Dense(5,kernel_initializer='glorot_uniform', activation='tanh')(text_layer_cnn)
output_layer_cnn = Dense(1, kernel_initializer='glorot_uniform',activation='sigmoid')(text_layer_cnn)
model_cnn = Model(text_input_layer, output_layer_cnn)
optimizer = Adamax(lr=0.001,decay=0.0001)
model_cnn.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
filepath_cnn="cnnmodel.best.hdf5"
checkpoint_cnn = ModelCheckpoint(filepath_cnn, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
callbacks_list_cnn = [checkpoint_cnn]
# Fit the model
model_cnn.fit(trainX, array(trainlabelshuffle), epochs=10,batch_size=80, validation_data = (validateX, array(validatelabelshuffle)), callbacks=callbacks_list_cnn, verbose=1)
model_cnn.save(filepath_cnn)
print('>Saved %s' % filepath_cnn)
loss_cnn, acc_cnn = model_cnn.evaluate(testX,array(testlabelshuffle), verbose=0)
print('Test Accuracy CNN: %f' % (acc_cnn*100))
print('Loss CNN: %f' %(loss_cnn))
text_layer_bilstm = Bidirectional(CuDNNLSTM(256))(embedding_layer)
output_layer_bilstm = Dense(1, kernel_initializer='glorot_uniform',activation='sigmoid')(text_layer_bilstm)
model_bilstm = Model(text_input_layer, output_layer_bilstm)
optimizer_bilstm = Adamax(lr=0.001,decay=0.0001)
model_bilstm.compile(optimizer=optimizer_bilstm, loss='binary_crossentropy', metrics=['accuracy'])
filepath_bilstm="bilstm_model.best.hdf5"
checkpoint_bilstm = ModelCheckpoint(filepath_bilstm, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
callbacks_list_bilstm = [checkpoint_bilstm]
# Fit the model
model_bilstm.fit(trainX, array(trainlabelshuffle), epochs=10,batch_size=80, validation_data = (validateX, array(validatelabelshuffle)), callbacks=callbacks_list_bilstm, verbose=1)
model_bilstm.save(filepath_bilstm)
print('>Saved %s' % filepath_bilstm)
loss_bilstm, acc_bilstm = model_bilstm.evaluate(testX,array(testlabelshuffle), verbose=0)
print('Test Accuracy bilstm: %f' % (acc_bilstm*100))
print('Loss bilstm: %f' %(loss_bilstm))
all_models = list()
cnnmodel = load_model(filepath_cnn)
# add to list of members
all_models.append(cnnmodel)
print('>loaded %s' % filepath_cnn)
bilstmmodel = load_model(filepath_bilstm)
# add to list of members
all_models.append(bilstmmodel)
print('>loaded %s' % filepath_bilstm)
def define_stacked_model(all_models):
# update all layers in all models to not be trainable
for i in range(len(all_models)):
model = all_models[i]
for layer in model.layers:
# make not trainable
layer.trainable = False
# rename to avoid 'unique layer name' issue
layer.name = 'ensemble_' + str(i+1) + '_' + layer.name
# define multi-headed input
ensemble_visible = [model.input for model in all_models]
# concatenate merge output from each model
ensemble_outputs = [model.output for model in all_models]
merge = concatenate(ensemble_outputs)
hidden = Dense(10, activation='relu')(merge)
#hidden = Flatten()(hidden)
output = Dense(2, activation='softmax')(hidden)
model = Model(inputs=ensemble_visible, outputs=output)
# plot graph of ensemble
plot_model(model, show_shapes=True, to_file='model_graph.png')
# compile
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
return model
def fit_stacked_model(model, inputX, inputy):
# prepare input data
X = [inputX for _ in range(len(model.input))]
# encode output data
inputy_enc = to_categorical(inputy)
# fit model
model.fit(X, inputy_enc, epochs=10, verbose=1)
# make a prediction with a stacked model
def predict_stacked_model(model, inputX):
# prepare input data
X = [inputX for _ in range(len(model.input))]
# make prediction
return model.predict(X, verbose=0)
stacked_model = define_stacked_model(all_models)
stacked_model.summary()
# fit stacked model on test dataset
fit_stacked_model(stacked_model,validateX,array(validatelabelshuffle))
#stacked_model.fit(X=testX,y=array(testlabelshuffle),epochs=10, verbose=1)
# make predictions and evaluate
yhat = predict_stacked_model(stacked_model, testX)
yhat = argmax(yhat, axis=1)
acc = accuracy_score(array(testlabelshuffle), yhat)
print('Stacked Test Accuracy: %.3f' % acc)
I'm having some trouble to convert a pure Keras model to TensorFlow Estimator API on an unbalanced dataset.
When using pure Keras API, the class_weight parameter is available at model.fit method, but when converting a Keras model to TensorFlow Estimator with tensorflow.python.keras.estimator.model_to_estimator there is no place to inform class_weights.
How can overcome this?
I'm using TF 1.12 on Ubuntu 18, Cuda 9, Cudnn 7
Pure Keras model:
def keras_model(n_classes=None, model_dir='./tmp-model/', config=None):
with tf.device('/gpu:0'):
# Inputs
inp_raw = Input(shape=(max_len,), name='word_raw')
# raw text LSTM network
word_raw_emb = Embedding(
input_dim=nunique_chars_raw,
output_dim=EMBED_SIZE,
input_length=MAX_WORD_LENGTH,
trainable=True,
name='word_raw_emb')(inp_raw)
word_raw_emb = Dropout(rate=dropout_rate)(word_raw_emb)
word_raw_emb_lstm = Bidirectional(
LSTM(48, return_sequences=True))(word_raw_emb)
word_raw_emb_gru = Bidirectional(
GRU(48, return_sequences=False))(word_raw_emb_lstm)
word_raw_net = Dense(16, activation='relu')(word_raw_emb_gru)
output_raw_net = Dense(n_classes, activation='softmax')(word_raw_net)
model = Model(inputs=inp_raw, outputs=output_raw_net)
optz = keras.optimizers.RMSprop(
lr=0.002, rho=0.9, epsilon=None, decay=0.0)
model.compile(loss='categorical_crossentropy',
optimizer=optz, metrics=['categorical_accuracy'])
return model
model = keras_model(5)
model.fit(train_X, train_Y_onehot,
batch_size=128,
epochs=10,
validation_data=(eval_X,eval_Y_onehot),
class_weight=class_weights,
verbose=1)
Keras model to TensorFlow Estimator:
def keras_estimator_model(n_classes=None, model_dir='./tmp-model/', config=None):
with tf.device('/gpu:0'):
# Inputs
inp_raw = Input(shape=(max_len,), name='word_raw')
# raw text LSTM network
word_raw_emb = Embedding(
input_dim=nunique_chars_raw,
output_dim=EMBED_SIZE,
input_length=MAX_WORD_LENGTH,
trainable=True,
name='word_raw_emb')(inp_raw)
word_raw_emb = Dropout(rate=dropout_rate)(word_raw_emb)
word_raw_emb_lstm = Bidirectional(
LSTM(48, return_sequences=True))(word_raw_emb)
word_raw_emb_gru = Bidirectional(
GRU(48, return_sequences=False))(word_raw_emb_lstm)
word_raw_net = Dense(16, activation='relu')(word_raw_emb_gru)
output_raw_net = Dense(n_classes, activation='softmax')(word_raw_net)
model = Model(inputs=inp_raw, outputs=output_raw_net)
optz = keras.optimizers.RMSprop(
lr=0.002, rho=0.9, epsilon=None, decay=0.0)
model.compile(loss='categorical_crossentropy',
optimizer=optz, metrics=['categorical_accuracy'])
model_estimator = model_to_estimator(keras_model=model, model_dir=model_dir, config=config)
return model_estimator
estimator_model = keras_estimator_model(5)
train_spec = tf.estimator.TrainSpec(input_fn=train_input_fn,max_steps=10)
eval_spec = tf.estimator.EvalSpec(
input_fn=eval_input_fn,
steps=None,
start_delay_secs=10,
throttle_secs=10)
tf.estimator.train_and_evaluate(estimator_model, train_spec, eval_spec)
I've codded a workaround and it seems to work. I
import tensorflow as tf
from tensorflow.python.keras import backend as K
def weighted_loss_fn(class_weights):
def _loss_fn(y_true, y_pred):
class_weights_tensor = K.variable(class_weights)
y_true_labels = K.argmax(y_true,axis=1)
weights = K.gather(class_weights_tensor,y_true_labels)
return tf.losses.softmax_cross_entropy(onehot_labels=y_true, logits=y_pred, weights=weights)
return _loss_fn
def keras_estimator_model(n_classes=None, model_dir='./tmp-model/', config=None, class_weights=None):
with tf.device('/gpu:0'):
# Inputs
inp_raw = Input(shape=(max_len,), name='word_raw')
# raw text LSTM network
word_raw_emb = Embedding(
input_dim=nunique_chars_raw,
output_dim=EMBED_SIZE,
input_length=MAX_WORD_LENGTH,
trainable=True,
name='word_raw_emb')(inp_raw)
word_raw_emb = Dropout(rate=dropout_rate)(word_raw_emb)
word_raw_emb_lstm = Bidirectional(
LSTM(48, return_sequences=True))(word_raw_emb)
word_raw_emb_gru = Bidirectional(
GRU(48, return_sequences=False))(word_raw_emb_lstm)
word_raw_net = Dense(16, activation='relu')(word_raw_emb_gru)
output_raw_net = Dense(n_classes, activation='softmax')(word_raw_net)
model = Model(inputs=inp_raw, outputs=output_raw_net)
optz = keras.optimizers.RMSprop(
lr=0.002, rho=0.9, epsilon=None, decay=0.0)
loss_fn = weighted_loss_fn(class_weights)
model.compile(loss=loss_fn,
optimizer=optz, metrics=['categorical_accuracy'])
model_estimator = model_to_estimator(keras_model=model, model_dir=model_dir, config=config)
return model_estimator
estimator_model = keras_estimator_model(5)
train_spec = tf.estimator.TrainSpec(input_fn=train_input_fn,max_steps=10)
eval_spec = tf.estimator.EvalSpec(
input_fn=eval_input_fn,
steps=None,
start_delay_secs=10,
throttle_secs=10)
tf.estimator.train_and_evaluate(estimator_model, train_spec, eval_spec)
In my case class_weights = [ 0.17041813 42.00318471 35.26470588 29.70495495 42.00318471 44.55743243]
The class_weight param is used for weighting the loss function (during training only). So, to get the same effect you have to alter your loss function. I did not find any existing TensorFlow's loss functions, only feature request for the cross entropy loss with class weights. There is a workaround mentioned:
Here
By the way, the issue was closed, because it is possible in Keras :)