I'm training a Keras model for token classification with an ELMO layer. I will need to save the model for future use, I've tried with model.save_weights("model_weights.h5"),
but then if I load them into a new model that I build, and then I call model.predict(...), I get results as if the model has never been trained. It looks like the configurations are not saved properly.
I am new with keras and tensorflow 1, and I'm not sure if this is the way to do it. Any help is welcome! I'm obviously missing something here, but I couldn't find sufficient on saving models with an elmo layer.
I am defining the model like this :
def ElmoEmbedding(x):
return elmo_model(inputs={"tokens": tf.squeeze(tf.cast(x, tf.string)),
"sequence_len": tf.constant(batch_size*[max_len])},
signature="tokens",
as_dict=True)["elmo"]
def build_model(max_len, n_words, n_tags):
word_input_layer = Input(shape=(max_len, 40, ))
elmo_input_layer = Input(shape=(max_len,), dtype=tf.string)
word_output_layer = Dense(n_tags, activation = 'softmax')(word_input_layer)
elmo_output_layer = Lambda(ElmoEmbedding, output_shape=(1, 1024))(elmo_input_layer)
output_layer = Concatenate()([word_output_layer, elmo_output_layer])
output_layer = BatchNormalization()(output_layer)
output_layer = Bidirectional(LSTM(units=512, return_sequences=True, recurrent_dropout=0.2, dropout=0.2))(output_layer)
output_layer = TimeDistributed(Dense(n_tags, activation='softmax'))(output_layer)
model = Model([elmo_input_layer, word_input_layer], output_layer)
return model
And I then I run the training like:
tf.disable_eager_execution()
elmo_model = hub.Module("https://tfhub.dev/google/elmo/3", trainable=False)
sess = tf.Session()
K.set_session(sess)
sess.run([tf.global_variables_initializer(), tf.tables_initializer()])
model = build_model(max_len, n_words, n_tags)
model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])
history = model.fit([np.array(X1_train), np.array(X2_train).reshape((len(X2_train), max_len, 40))],
y_train,
validation_data=([np.array(X1_valid), np.array(X2_valid).reshape((len(X2_valid), max_len, 40))], y_valid),
batch_size=batch_size, epochs=5, verbose=1)
model.save_weights("model_weights.h5")
If I try to load the weights in another session like the following, I get zero accuracy:
tf.disable_eager_execution()
elmo_model = hub.Module("https://tfhub.dev/google/elmo/3", trainable=False)
sess = tf.Session()
K.set_session(sess)
sess.run([tf.global_variables_initializer(), tf.tables_initializer()])
model = build_model(max_len, n_words, n_tags)
model.load_weights("model_weights.h5")
y_pred = model.predict([X1_test, np.array(X2_test).reshape((len(X2_test), max_len, 40))])
Related
I'm trying to implement a bespoke loss calculation model but keep getting hit with recognition failures. eg: "ValueError: Unknown loss function: root_mean_squared_error_fraction. Please ensure this object is passed to the custom_objects argument."
The function "root_mean_squared_fraction" exists and is functioning, I know this because I call it elsewhere outside of the Keras model and it functions as expected. I'm clearly not understanding something about injecting this into the model definition and would appreciate any advice? Thanks.
from keras.models import load_model
from keras import backend as K
def root_mean_squared_error_fraction(y_true, y_pred):
return K.sqrt(K.mean(K.square((y_pred - y_true)/y_true)))
This is my model routine which does work when the =rmsef in the model.compile is replaced with ='mse':
def ResModel(Svect, Xvect, Yvect, dtrain):
model = Sequential()
model.add(LSTM(64, activation='relu',\
input_shape=(Xvect.shape[1], Xvect.shape[2]),\
return_sequences=True))
model.add(LSTM(32, activation='relu', return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(Yvect.shape[1]))
rmsef = root_mean_squared_error_fraction
model.compile(optimizer = "adam", loss = rmsef )
bmfile = 'bestmodel.h5'
earlystop = EarlyStopping(monitor='val_loss', mode='auto',\
verbose=0, patience=mpat, min_delta=0.005)
chkpoint = ModelCheckpoint(bmfile, monitor = 'val_loss', mode = 'auto',\
save_best_only = True )
history = model.fit(Xvect, Yvect, epochs=mcycl, batch_size=32,\
validation_split=dsplit, verbose=0,\
callbacks = [earlystop, chkpoint] )
saved = load_model('bestmodel.h5')
score = saved.evaluate(Xvect, Yvect, verbose=0)
print("Epoch: %04d yeilded best fit with overall loss of: %0.4f "\
% ((earlystop.stopped_epoch + 1), score ) )
Yvect = descalevector(Svect, saved.predict(Xvect),dtrain )
return Yvect, score
I build a model with BERT pre-trained model, then I tried to save it using pickle I got this error
Can't pickle <function LayerCall.call at 0x7f3f6a88a8c0>: it's not the same object as tensorflow.python.keras.saving.saved_model.save_impl.LayerCall.call
UPDATE:
I tried to save it with model.save() method but I got: IndexError: list index out of range
Here is my Code to build the model:
MAX_LENGTH = 35
INPUT_IDs = Input(shape=(MAX_LENGTH,), dtype=tf.int32, name="input_ids")
INPUT_MASK = Input(shape=(MAX_LENGTH,), dtype=tf.int32, name="attention_mask")
embeddings = bert_model([INPUT_IDs, INPUT_MASK])[1] # 0 is hidden state, 1 is pooler_output
out = tf.keras.layers.Dropout(0.1)(embeddings)
out = Dense(128, activation="relu")(out)
out = tf.keras.layers.Dropout(0.1)(out)
out = Dense(32, activation="relu")(out)
y = Dense(1, activation='sigmoid')(out)
model = tf.keras.Model(inputs=[INPUT_IDs, INPUT_MASK], outputs=y)
model.layers[2].trainable = True
optimizer = Adam(learning_rate=5e-06,
epsilon=1e-08,
decay=0.01,
clipnorm=1.0)
loss = BinaryCrossentropy(from_logits=True)
metric = BinaryAccuracy('accuracy')
model.compile(
optimizer=optimizer,
loss=loss,
metrics=metric)
train_history = model.fit(
x={'input_ids':X_train["input_ids"], 'attention_mask':X_train["attention_mask"]},
y=y_train,
validation_split=0.2,
epochs=10,
batch_size=24)
I have some problems with keras tuner and tpu. When I run the code below, everything works well and network training is fast.
vocab_size = 5000
embedding_dim = 64
max_length = 2000
def create_model():
model = tf.keras.Sequential([
tf.keras.layers.Embedding(vocab_size, embedding_dim),
tf.keras.layers.LSTM(100, dropout=0.5, recurrent_dropout=0.5),
tf.keras.layers.Dense(embedding_dim, activation='relu'),
tf.keras.layers.Dense(4, activation='softmax')
])
return model
resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='grpc://' + os.environ['COLAB_TPU_ADDR'])
tf.config.experimental_connect_to_cluster(resolver)
tf.tpu.experimental.initialize_tpu_system(resolver)
strategy = tf.distribute.experimental.TPUStrategy(resolver)
with strategy.scope():
model = create_model()
model.compile(optimizer='adam',
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['sparse_categorical_accuracy'])
model.fit(train_padded, y_train,
epochs=10,
validation_split=0.15,
verbose=1, batch_size=128)
When I use a keras tuner, the neural network learns slowly. I believe that TPU is not used.
vocab_size = 5000
max_length = 2000
resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='grpc://' + os.environ['COLAB_TPU_ADDR'])
tf.config.experimental_connect_to_cluster(resolver)
tf.tpu.experimental.initialize_tpu_system(resolver)
strategy = tf.distribute.experimental.TPUStrategy(resolver)
def build_model(hp):
model = tf.keras.Sequential()
activation_choice = hp.Choice('activation', values=['relu', 'sigmoid', 'tanh', 'elu', 'selu'])
embedding_dim = hp.Int('units_hidden', min_value=128, max_value=24, step=8)
model.add(tf.keras.layers.Embedding(vocab_size, embedding_dim))
model.add(tf.keras.layers.LSTM(hp.Int('LSTM_Units', min_value=50, max_value=500, step=10),
dropout=hp.Float('dropout', 0, 0.5, step=0.1, default=0),
recurrent_dropout=hp.Float('recurrent_dropout', 0, 0.5, step=0.1, default=0)))
model.add(tf.keras.layers.Dense(embedding_dim, activation=activation_choice))
model.add(tf.keras.layers.Dense(4, activation='softmax'))
model.compile(
optimizer=hp.Choice('optimizer', values=['adam', 'rmsprop', 'SGD']),
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['sparse_categorical_accuracy'])
return model
with strategy.scope():
tuner = Hyperband(
build_model,
objective='val_accuracy',
max_epochs=10,
hyperband_iterations=2)
tuner.search(train_padded, y_train,
batch_size=128,
epochs=10,
callbacks=[EarlyStopping(patience=1)],
validation_split=0.15,
verbose=1)
best_models = tuner.get_best_models(1)
best_model.save('/content/drive/My Drive/best_model.h5')
Notebook link
How to make a keras tuner work with TPU?
You need to pass it to the tuner:
tuner = Hyperband(
build_model,
objective='val_accuracy',
max_epochs=10,
hyperband_iterations=2,
distribution_strategy=strategy,)
(and remove the strategy.scope() part)
To add ...
I don't use Google Colab, but Kaggle. Using TPU, I get that same error "File system scheme '[local]' not implemented", when the tuner tries to write the checkpoints on Kaggle's working directory.
Since I don't have a gs://location, I just "modified" the function called by Keras Tuner to save checkpoints, to allow writing to local dir, which is the Kaggle working directory. I used patch() to mock the function.
First important thing is that Keras Tuner must be version 1.1.2 and above.
Example:
from mock import patch
<your code>
# now the new function to "replace" the existing one (keras_tuner.engine.tuner_utils.SaveBestEpoch.on_epoch_end)
def new_on_epoch_end(self, epoch, logs=None):
if not self.objective.has_value(logs):
# Save on every epoch if metric value is not in the logs. Either no
# objective is specified, or objective is computed and returned
# after `fit()`.
#***** the following are the lines I added ******************************************
# Save model in Tensorflow's "SavedModel" format
save_locally = tf.saved_model.SaveOptions(experimental_io_device = '/job:localhost')
# I then added ', options = save_locally' to the line below.
#************************************************************************************
self.model.save_weights(self.filepath, options = save_locally)
return
current_value = self.objective.get_value(logs)
if self.objective.better_than(current_value, self.best_value):
self.best_value = current_value
#***** the following are the lines I added ******************************************
# Save model in Tensorflow's "SavedModel" format
save_locally = tf.saved_model.SaveOptions(experimental_io_device = '/job:localhost')
# I then added ', options = save_locally' to the line below.
#************************************************************************************
self.model.save_weights(self.filepath, options = save_locally)
with patch('keras_tuner.engine.tuner_utils.SaveBestEpoch.on_epoch_end', new_on_epoch_end):
# Perform hypertuning. The parameters are exactly like those in the fit() method.
tuner.search(
X_train,
y_train,
epochs=num_of_epochs,
validation_data = (X_valid, y_valid),
callbacks=[early_stopping]
)
<more of your code>
Since I used 'with patch', after all is done, it reverts back to the original code automatically.
I hope this will be useful for those using Kaggle, or those who want to write to a local dir.
Environment:
TF2.0
Python 3.5
ubuntu 16.04
Problem:
I try to use the pre-trained mobilenet_V2 but accuracy doesn't increase:
base_model = tf.keras.applications.MobileNetV2(input_shape=IMG_SHAPE,
include_top=False,
weights='imagenet')
The script is copied from the tutorial of the tensorflow 2.0(https://www.tensorflow.org/tutorials/images/transfer_learning?hl=zh-cn)
The only change I made is the dataset which feed into the network. The original code makes binary classification between dogs and cats, and everything works. However, the accuracy never increases while using multi-classes datasets like: "mnist", "tf_flowers". Please note that, I used the correct loss function and metrics.
Naive model and results:
Keras.mobilenetv2:
Here is the code:
from __future__ import absolute_import, division, print_function, unicode_literals
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Flatten, Conv2D, GlobalAveragePooling2D
from tensorflow.keras import Model
keras = tf.keras
import tensorflow_datasets as tfds
# tfds.disable_progress_bar()
IMG_SIZE = 224
IMG_SHAPE = (IMG_SIZE, IMG_SIZE, 3)
def format_example(image, label):
if image.shape[-1] == 1:
image = tf.concat([image, image, image], 2)
image = tf.cast(image, tf.float32)
image = (image/127.5) - 1
image = tf.image.resize(image, (IMG_SIZE, IMG_SIZE))
return image, label
##----functional model----##
class TinyModel():
def __init__(self, num_classes, hiddens=32, input_shape=IMG_SHAPE):
import tensorflow as tf
self.num_classes = num_classes
self.input_shape = input_shape
self.hiddens = hiddens
def build(self):
inputs = Input(shape=self.input_shape)
x = Conv2D(16, 3, activation="relu", strides=2)(inputs)
x = Conv2D(32, 3, activation="relu", strides=2)(x)
x = Conv2D(32, 3, activation="relu", strides=2)(x)
x = Conv2D(16, 3, activation="relu")(x)
x = Flatten()(x)
x = Dense(self.hiddens, activation="relu")(x)
outputs = Dense(self.num_classes, activation="softmax")(x)
model = Model(inputs=inputs, outputs=outputs, name='my_model')
return model
def assemble_model(num_classes, model_name='MobileNetV2'):
import tensorflow as tf
base_model = tf.keras.applications.MobileNetV2(input_shape=IMG_SHAPE,
weights='imagenet',
include_top=False)
model = tf.keras.Sequential([
base_model,
GlobalAveragePooling2D(),
Dense(num_classes, activation='softmax')
])
model.trainable = True
return model
## ---- dataset preparation -----##
SPLIT_WEIGHTS = (8, 1, 1)
splits = tfds.Split.TRAIN.subsplit(weighted=SPLIT_WEIGHTS)
(raw_train, raw_validation, raw_test), metadata = tfds.load(
'tf_flowers', split=list(splits),
with_info=True, as_supervised=True)
get_label_name = metadata.features['label'].int2str
train = raw_train.map(format_example)
validation = raw_validation.map(format_example)
test = raw_test.map(format_example)
BATCH_SIZE = 32
SHUFFLE_BUFFER_SIZE = 1000
train_ds = train.shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE)
validation_ds = validation.batch(BATCH_SIZE)
test_ds = test.batch(BATCH_SIZE)
IMG_SHAPE = (IMG_SIZE, IMG_SIZE, 3)
## ----- model config ---- ##
# Create an instance of the model
model = TinyModel(num_classes=5).build() # model 1
# model = assemble_model(num_classes=5) # model 2
model.summary()
## ----- training config -----##
loss_object = tf.keras.losses.SparseCategoricalCrossentropy()
optimizer = tf.keras.optimizers.Adam()
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
test_loss = tf.keras.metrics.Mean(name='test_loss')
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')
## ----- training loop -----##
#tf.function
def train_step(images, labels):
with tf.GradientTape() as tape:
predictions = model(images)
loss = loss_object(labels, predictions)
gradients = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(gradients, model.trainable_variables))
train_loss(loss)
train_accuracy(labels, predictions)
#tf.function
def test_step(images, labels):
predictions = model(images)
t_loss = loss_object(labels, predictions)
test_loss(t_loss)
test_accuracy(labels, predictions)
EPOCHS = 5
for epoch in range(EPOCHS):
# Reset the metrics at the start of the next epoch
train_loss.reset_states()
train_accuracy.reset_states()
test_loss.reset_states()
test_accuracy.reset_states()
for images, labels in train_ds:
train_step(images, labels)
for test_images, test_labels in test_ds:
test_step(test_images, test_labels)
template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
print(template.format(epoch+1,
train_loss.result(),
train_accuracy.result()*100,
test_loss.result(),
test_accuracy.result()*100))
----------------------SOLVED-----------------------
Solution:add the argument "training=True" when training the keras.application.. For example
model = tf.keras.applications.MobileNetV2(input_shape=IMG_SHAPE,weights="imagenet",include_top=False)
pred = model(inputs, training=True)
The reason might be caused by "batchnorm" layer. Those model which has BN layers works well in keras training loop, "model.fit()", and nothing to takecare. However, they cannot learn anything by costume training loop if you forget to set training=True in model()
The problem is that you set all your parameters to be non-trainable, check this on hte summary of the model, you will see something like this
Change this line, (or just delete it)
base_model.trainable = False
To
base_model.trainable = True
And everything will work fine
I'm having some trouble to convert a pure Keras model to TensorFlow Estimator API on an unbalanced dataset.
When using pure Keras API, the class_weight parameter is available at model.fit method, but when converting a Keras model to TensorFlow Estimator with tensorflow.python.keras.estimator.model_to_estimator there is no place to inform class_weights.
How can overcome this?
I'm using TF 1.12 on Ubuntu 18, Cuda 9, Cudnn 7
Pure Keras model:
def keras_model(n_classes=None, model_dir='./tmp-model/', config=None):
with tf.device('/gpu:0'):
# Inputs
inp_raw = Input(shape=(max_len,), name='word_raw')
# raw text LSTM network
word_raw_emb = Embedding(
input_dim=nunique_chars_raw,
output_dim=EMBED_SIZE,
input_length=MAX_WORD_LENGTH,
trainable=True,
name='word_raw_emb')(inp_raw)
word_raw_emb = Dropout(rate=dropout_rate)(word_raw_emb)
word_raw_emb_lstm = Bidirectional(
LSTM(48, return_sequences=True))(word_raw_emb)
word_raw_emb_gru = Bidirectional(
GRU(48, return_sequences=False))(word_raw_emb_lstm)
word_raw_net = Dense(16, activation='relu')(word_raw_emb_gru)
output_raw_net = Dense(n_classes, activation='softmax')(word_raw_net)
model = Model(inputs=inp_raw, outputs=output_raw_net)
optz = keras.optimizers.RMSprop(
lr=0.002, rho=0.9, epsilon=None, decay=0.0)
model.compile(loss='categorical_crossentropy',
optimizer=optz, metrics=['categorical_accuracy'])
return model
model = keras_model(5)
model.fit(train_X, train_Y_onehot,
batch_size=128,
epochs=10,
validation_data=(eval_X,eval_Y_onehot),
class_weight=class_weights,
verbose=1)
Keras model to TensorFlow Estimator:
def keras_estimator_model(n_classes=None, model_dir='./tmp-model/', config=None):
with tf.device('/gpu:0'):
# Inputs
inp_raw = Input(shape=(max_len,), name='word_raw')
# raw text LSTM network
word_raw_emb = Embedding(
input_dim=nunique_chars_raw,
output_dim=EMBED_SIZE,
input_length=MAX_WORD_LENGTH,
trainable=True,
name='word_raw_emb')(inp_raw)
word_raw_emb = Dropout(rate=dropout_rate)(word_raw_emb)
word_raw_emb_lstm = Bidirectional(
LSTM(48, return_sequences=True))(word_raw_emb)
word_raw_emb_gru = Bidirectional(
GRU(48, return_sequences=False))(word_raw_emb_lstm)
word_raw_net = Dense(16, activation='relu')(word_raw_emb_gru)
output_raw_net = Dense(n_classes, activation='softmax')(word_raw_net)
model = Model(inputs=inp_raw, outputs=output_raw_net)
optz = keras.optimizers.RMSprop(
lr=0.002, rho=0.9, epsilon=None, decay=0.0)
model.compile(loss='categorical_crossentropy',
optimizer=optz, metrics=['categorical_accuracy'])
model_estimator = model_to_estimator(keras_model=model, model_dir=model_dir, config=config)
return model_estimator
estimator_model = keras_estimator_model(5)
train_spec = tf.estimator.TrainSpec(input_fn=train_input_fn,max_steps=10)
eval_spec = tf.estimator.EvalSpec(
input_fn=eval_input_fn,
steps=None,
start_delay_secs=10,
throttle_secs=10)
tf.estimator.train_and_evaluate(estimator_model, train_spec, eval_spec)
I've codded a workaround and it seems to work. I
import tensorflow as tf
from tensorflow.python.keras import backend as K
def weighted_loss_fn(class_weights):
def _loss_fn(y_true, y_pred):
class_weights_tensor = K.variable(class_weights)
y_true_labels = K.argmax(y_true,axis=1)
weights = K.gather(class_weights_tensor,y_true_labels)
return tf.losses.softmax_cross_entropy(onehot_labels=y_true, logits=y_pred, weights=weights)
return _loss_fn
def keras_estimator_model(n_classes=None, model_dir='./tmp-model/', config=None, class_weights=None):
with tf.device('/gpu:0'):
# Inputs
inp_raw = Input(shape=(max_len,), name='word_raw')
# raw text LSTM network
word_raw_emb = Embedding(
input_dim=nunique_chars_raw,
output_dim=EMBED_SIZE,
input_length=MAX_WORD_LENGTH,
trainable=True,
name='word_raw_emb')(inp_raw)
word_raw_emb = Dropout(rate=dropout_rate)(word_raw_emb)
word_raw_emb_lstm = Bidirectional(
LSTM(48, return_sequences=True))(word_raw_emb)
word_raw_emb_gru = Bidirectional(
GRU(48, return_sequences=False))(word_raw_emb_lstm)
word_raw_net = Dense(16, activation='relu')(word_raw_emb_gru)
output_raw_net = Dense(n_classes, activation='softmax')(word_raw_net)
model = Model(inputs=inp_raw, outputs=output_raw_net)
optz = keras.optimizers.RMSprop(
lr=0.002, rho=0.9, epsilon=None, decay=0.0)
loss_fn = weighted_loss_fn(class_weights)
model.compile(loss=loss_fn,
optimizer=optz, metrics=['categorical_accuracy'])
model_estimator = model_to_estimator(keras_model=model, model_dir=model_dir, config=config)
return model_estimator
estimator_model = keras_estimator_model(5)
train_spec = tf.estimator.TrainSpec(input_fn=train_input_fn,max_steps=10)
eval_spec = tf.estimator.EvalSpec(
input_fn=eval_input_fn,
steps=None,
start_delay_secs=10,
throttle_secs=10)
tf.estimator.train_and_evaluate(estimator_model, train_spec, eval_spec)
In my case class_weights = [ 0.17041813 42.00318471 35.26470588 29.70495495 42.00318471 44.55743243]
The class_weight param is used for weighting the loss function (during training only). So, to get the same effect you have to alter your loss function. I did not find any existing TensorFlow's loss functions, only feature request for the cross entropy loss with class weights. There is a workaround mentioned:
Here
By the way, the issue was closed, because it is possible in Keras :)