Train model after load in MLFlow - python

My goal is to store an empty model into MLFlow Registry and then load it for training.
I have a register_model.py which looks like this:
if __name__ == "__main__":
remote_server_uri = "http://127.0.0.1:5000"
mlflow.set_tracking_uri(remote_server_uri)
# Load and compile Keras model
model = tf.keras.applications.MobileNetV2((32, 32, 3), classes=10, weights=None)
model.compile("adam", "sparse_categorical_crossentropy", metrics=["accuracy"])
# Load CIFAR-10 dataset
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()
epochs = 1
batch_size = 32
mlflow.tensorflow.autolog()
model.fit(x_train, y_train, epochs=epochs, batch_size=batch_size)
tf.keras.models.save_model(model, "models/tensorflow")
mlflow.tensorflow.log_model(
tf_saved_model_dir='models/tensorflow',
tf_meta_graph_tags=None,
tf_signature_def_key='serving_default',
artifact_path="saved/models/tensorflow",
registered_model_name="tensorflow-MobileNetV2-32inputs"
)
Then I'm trying to load it using:
if __name__ == "__main__":
remote_server_uri = "http://127.0.0.1:5000"
mlflow.set_tracking_uri(remote_server_uri)
model_name = "tensorflow-MobileNetV2-32inputs"
model_version = 1
model = mlflow.tensorflow.load_model(
model_uri=f"models:/{model_name}/{model_version}"
)
I would expect my model to be a able to do things like 'model.fit()' and 'model.predict()' but Im always getting:
AttributeError: '_WrapperFunction' object has no attribute 'fit'
So my question is: it is possible to save a tensorflow/keras model and to load the architecture to be trained/retrained and even modified through mlflow? Load the model, add a new layer, store the model either as a new version or as a new model itself, for example.
Thanks in advance!

Related

Tensorflow ValueError: Unexpected result of `predict_function` (Empty batch_outputs). Please use `Model.compile(..., run_eagerly=True)`

I'm trying to train U-NET using Brats dataset on colab, but it showing the following error :
ValueError: Unexpected result of train_function (Empty logs). Please use Model.compile(..., run_eagerly=True), or tf.config.run_functions_eagerly(True) for more information of where went wrong, or file a issue/bug to tf.keras
the code for training:
import os
import tables
import numpy as np
from config import cfg
from model import unet_model
from data_generator import CustomDataGenerator
from tensorflow.keras.callbacks import CSVLogger, ModelCheckpoint, TensorBoard
def train_model(hdf5_dir, brains_idx_dir, view, modified_unet=True, batch_size=16, val_batch_size=32,
lr=0.01, epochs=100, hor_flip=False, ver_flip=False, zoom_range=0.0, save_dir='./save/',
start_chs=64, levels=3, multiprocessing=False, load_model_dir=None):
"""
The function that builds/loads UNet model, initializes the data generators for training and validation, and finally
trains the model.
"""
# preparing generators
hdf5_file = tables.open_file(hdf5_dir, mode='r+')
brain_idx = np.load(brains_idx_dir)
datagen_train = CustomDataGenerator(hdf5_file, brain_idx, batch_size, view, 'train',
hor_flip, ver_flip, zoom_range, shuffle=True)
datagen_val = CustomDataGenerator(hdf5_file, brain_idx, val_batch_size, view, 'validation', shuffle=False)
# add callbacks
save_dir = os.path.join(save_dir, '{}_{}'.format(view, os.path.basename(brains_idx_dir)[:5]))
if not os.path.isdir(save_dir):
os.mkdir(save_dir)
logger = CSVLogger(os.path.join(save_dir, 'log.txt'))
checkpointer = ModelCheckpoint(filepath = os.path.join(save_dir, 'model.hdf5'), verbose=1, save_best_only=True)
tensorboard = TensorBoard(os.path.join(save_dir, 'tensorboard'))
callbacks = [logger, checkpointer, tensorboard]
# building the model
model_input_shape = datagen_train.data_shape[1:]
model = unet_model(model_input_shape, modified_unet, lr, start_chs, levels)
# training the model
model.fit_generator(datagen_train, epochs=epochs, use_multiprocessing=multiprocessing,
callbacks=callbacks, validation_data = datagen_val)
if __name__ == '__main__':
train_model(cfg['hdf5_dir'], cfg['brains_idx_dir'], cfg['view'], cfg['modified_unet'], cfg['batch_size'],
cfg['val_batch_size'], cfg['lr'], cfg['epochs'], cfg['hor_flip'], cfg['ver_flip'], cfg['zoom_range'],
cfg['save_dir'], cfg['start_chs'], cfg['levels'], cfg['multiprocessing'],
cfg['load_model_dir'])
and the error msg is :
Why am I getting the above error and how can I fix it?

TypeError: ('Keyword argument not understood:', 'vocabulary_size')

Having an issue when loading my LSTM model using Keras when running my Streamlit application. The error says:
TypeError: ('Keyword argument not understood:', 'vocabulary_size')
I think that this issue is due to the TextVectorizer layer in my tf.keras.Sequential(). The layer is called encoder in my code.
Training Code:
import numpy as np
import tensorflow_datasets as tfds
import tensorflow as tf
tfds.disable_progress_bar()
import matplotlib.pyplot as plt
def plot_graphs(history, metric):
plt.plot(history.history[metric])
plt.plot(history.history['val_'+metric], '')
plt.xlabel("Epochs")
plt.ylabel(metric)
plt.legend([metric, 'val_'+metric])
dataset, info = tfds.load('imdb_reviews', with_info=True, as_supervised=True)
train_dataset, test_dataset = dataset['train'], dataset['test']
BUFFER_SIZE = 1000
BATCH_SIZE = 64
train_dataset = train_dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
test_dataset = test_dataset.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
VOCAB_SIZE = 1000
encoder = tf.keras.layers.experimental.preprocessing.TextVectorization(max_tokens=VOCAB_SIZE)
encoder.adapt(train_dataset.map(lambda text, label: text))
model = tf.keras.Sequential([
encoder,
tf.keras.layers.Embedding(
input_dim=len(encoder.get_vocabulary()),
output_dim=64,
mask_zero=True
),
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)),
tf.keras.layers.Dense(64, activation='relu'),
tf.keras.layers.Dense(1)
])
model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
optimizer=tf.keras.optimizers.Adam(1e-4),
metrics=['accuracy'])
history = model.fit(train_dataset, epochs=10,
validation_data=test_dataset,
validation_steps=30)
## I tried two different ways to save the model but both resulted in the same error
## Method 1:
model.save('model/')
## Method 2:
model_json = model.to_json()
with open('model.json', 'w') as js:
js.write(model_json)
model.save_weights('model', save_format='tf')
Loading the model code:
## Loaded the model in two differnt ways based on the save methods
## Method 1:
lstm_model = tf.keras.models.load_model('model/')
## Method 2:
json_file = open('model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
lstm_model = model_from_json(loaded_model_json)
lstm_model.load_weights("model.h5")
[Additional Links]
My Github Repo contains the LSTM_model.ipynb notebook which I trained as well as the Streamlit App.py which loads in the Keras model.
Screenshot of issue

How I can save the model python3

**I am trying to save the model for use it in the web application but I get this error **
X = []
sentences = list(review_df['text'])
for sen in sentences:
X.append(clean_text(sen))
y = review_df['Label']
y = np.array(list(map(lambda x: 1 if x=="fake" else 0, y)))
#Text Classification with Recurrent Neural Network (LSTM)
from keras.layers.recurrent import LSTM
model = Sequential()
embedding_layer = Embedding(vocab_size, 100, weights=[embedding_matrix], input_length=maxlen ,
trainable=False)
model.add(embedding_layer)
model.add(LSTM(128))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])
print(model.summary())
#train the model
history = model.fit(X_train, y_train, batch_size=128, epochs=6, verbose=1, validation_split=0.2)
score = model.evaluate(X_test, y_test, verbose=1)
#print model results
print("Test Score:", score[0])
print("Test Accuracy:", score[1])
#Making Predictions on Single Instance
instance = X[57]
print(instance)
instance = tokenizer.texts_to_sequences(instance)
flat_list = []
for sublist in instance:
for item in sublist:
flat_list.append(item)
flat_list = [flat_list]
instance = pad_sequences(flat_list, padding='post', maxlen=maxlen)
model.predict(instance)
#save the model
import pickle
with open('model.pkl', 'wb') as f:
pickle.dump(model, f)
when I try to save the model I get this error : TypeError: can't pickle _thread.RLock objects
Is there any idea for solving this error
Try doing this:
model_json = model.to_json()
with open("my_model.json", "w") as json_file:
json_file.write(model_json)
You can save a keras model directly by:
model.save('path/xyz.h5')
It is Recommended to save model in H5 format and use model.save()
You can also load the format with model.load()

Keras tuner and TPU in Google Colab

I have some problems with keras tuner and tpu. When I run the code below, everything works well and network training is fast.
vocab_size = 5000
embedding_dim = 64
max_length = 2000
def create_model():
model = tf.keras.Sequential([
tf.keras.layers.Embedding(vocab_size, embedding_dim),
tf.keras.layers.LSTM(100, dropout=0.5, recurrent_dropout=0.5),
tf.keras.layers.Dense(embedding_dim, activation='relu'),
tf.keras.layers.Dense(4, activation='softmax')
])
return model
resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='grpc://' + os.environ['COLAB_TPU_ADDR'])
tf.config.experimental_connect_to_cluster(resolver)
tf.tpu.experimental.initialize_tpu_system(resolver)
strategy = tf.distribute.experimental.TPUStrategy(resolver)
with strategy.scope():
model = create_model()
model.compile(optimizer='adam',
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['sparse_categorical_accuracy'])
model.fit(train_padded, y_train,
epochs=10,
validation_split=0.15,
verbose=1, batch_size=128)
When I use a keras tuner, the neural network learns slowly. I believe that TPU is not used.
vocab_size = 5000
max_length = 2000
resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='grpc://' + os.environ['COLAB_TPU_ADDR'])
tf.config.experimental_connect_to_cluster(resolver)
tf.tpu.experimental.initialize_tpu_system(resolver)
strategy = tf.distribute.experimental.TPUStrategy(resolver)
def build_model(hp):
model = tf.keras.Sequential()
activation_choice = hp.Choice('activation', values=['relu', 'sigmoid', 'tanh', 'elu', 'selu'])
embedding_dim = hp.Int('units_hidden', min_value=128, max_value=24, step=8)
model.add(tf.keras.layers.Embedding(vocab_size, embedding_dim))
model.add(tf.keras.layers.LSTM(hp.Int('LSTM_Units', min_value=50, max_value=500, step=10),
dropout=hp.Float('dropout', 0, 0.5, step=0.1, default=0),
recurrent_dropout=hp.Float('recurrent_dropout', 0, 0.5, step=0.1, default=0)))
model.add(tf.keras.layers.Dense(embedding_dim, activation=activation_choice))
model.add(tf.keras.layers.Dense(4, activation='softmax'))
model.compile(
optimizer=hp.Choice('optimizer', values=['adam', 'rmsprop', 'SGD']),
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['sparse_categorical_accuracy'])
return model
with strategy.scope():
tuner = Hyperband(
build_model,
objective='val_accuracy',
max_epochs=10,
hyperband_iterations=2)
tuner.search(train_padded, y_train,
batch_size=128,
epochs=10,
callbacks=[EarlyStopping(patience=1)],
validation_split=0.15,
verbose=1)
best_models = tuner.get_best_models(1)
best_model.save('/content/drive/My Drive/best_model.h5')
Notebook link
How to make a keras tuner work with TPU?
You need to pass it to the tuner:
tuner = Hyperband(
build_model,
objective='val_accuracy',
max_epochs=10,
hyperband_iterations=2,
distribution_strategy=strategy,)
(and remove the strategy.scope() part)
To add ...
I don't use Google Colab, but Kaggle. Using TPU, I get that same error "File system scheme '[local]' not implemented", when the tuner tries to write the checkpoints on Kaggle's working directory.
Since I don't have a gs://location, I just "modified" the function called by Keras Tuner to save checkpoints, to allow writing to local dir, which is the Kaggle working directory. I used patch() to mock the function.
First important thing is that Keras Tuner must be version 1.1.2 and above.
Example:
from mock import patch
<your code>
# now the new function to "replace" the existing one (keras_tuner.engine.tuner_utils.SaveBestEpoch.on_epoch_end)
def new_on_epoch_end(self, epoch, logs=None):
if not self.objective.has_value(logs):
# Save on every epoch if metric value is not in the logs. Either no
# objective is specified, or objective is computed and returned
# after `fit()`.
#***** the following are the lines I added ******************************************
# Save model in Tensorflow's "SavedModel" format
save_locally = tf.saved_model.SaveOptions(experimental_io_device = '/job:localhost')
# I then added ', options = save_locally' to the line below.
#************************************************************************************
self.model.save_weights(self.filepath, options = save_locally)
return
current_value = self.objective.get_value(logs)
if self.objective.better_than(current_value, self.best_value):
self.best_value = current_value
#***** the following are the lines I added ******************************************
# Save model in Tensorflow's "SavedModel" format
save_locally = tf.saved_model.SaveOptions(experimental_io_device = '/job:localhost')
# I then added ', options = save_locally' to the line below.
#************************************************************************************
self.model.save_weights(self.filepath, options = save_locally)
with patch('keras_tuner.engine.tuner_utils.SaveBestEpoch.on_epoch_end', new_on_epoch_end):
# Perform hypertuning. The parameters are exactly like those in the fit() method.
tuner.search(
X_train,
y_train,
epochs=num_of_epochs,
validation_data = (X_valid, y_valid),
callbacks=[early_stopping]
)
<more of your code>
Since I used 'with patch', after all is done, it reverts back to the original code automatically.
I hope this will be useful for those using Kaggle, or those who want to write to a local dir.

Keras loading model issue

So I'm having an issue and I'm stuck on it for a while now
I'm trying to understand and repeat a jupyter tutorial about adversarial example.
However, when I'm loading a model, depending on how is it done I have big differences between the two models accuracy:
- when it's loading inside a class, there is no issues
- when it's loading on jupyter, the accuracy decrease
The class use the following lines:
from keras.models import Sequential, load_model
class Lenet:
def __init__(self, epochs=200, batch_size=128, load_weights=True):
self.name = 'lenet'
self.model_filename = 'networks/models/lenet.h5'
self.num_classes = 10
self.input_shape = 32, 32, 3
self.batch_size = batch_size
self.epochs = epochs
self.iterations = 391
self.weight_decay = 0.0001
self.log_filepath = r'networks/models/lenet/'
if load_weights:
try:
self._model = load_model(self.model_filename)
print('Successfully loaded', self.name)
except (ImportError, ValueError, OSError):
print('Failed to load', self.name)
what I am doing is:
modelPath = "networks/models/lenet.h5"
lenet2 = keras.models.load_model(modelPath)
modelPath2 = "networks/models/resnet.h5"
resnet2 = keras.models.load_model(modelPath2)
However, when I'm testing the accuracy of the two models:
lenet = LeNet()
resnet = ResNet() #the class is nearly the same as LeNet
models = [lenet, lenet2, resnet, resnet2]
network_stats, correct_imgs = helper.evaluate_models(models, x_test, y_test)
network_stats = pd.DataFrame(network_stats, columns=['name', 'accuracy', 'param_count'])
I'm getting this following result: (network_stats)
name accuracy param_count
0 lenet 0.7488 62006
1 sequential_1 0.4800 62006
2 resnet 0.9231 470218
3 model_1 0.1092 470218
link to the picture
to explain the picture: on the left, this is the class lenet, that give good results. On the right, my failed try to load the same model
Do you have any idea about why is that happening?
--edit--
can't post images yet on stackoverflow, so I explained a little bit more the issue
--edit2--
can reproduce with another network (resnet)
Can you provide the link to the tutorial you are following?
Are you training the model or just testing the accuracy?
My guess... the class has a color preprocessing function that is transforming the input in the "accuracy" method. If the non-class model is not using that same function then that's about the outcome discrepancy I would expect to see.
def accuracy(self):
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
y_train = keras.utils.to_categorical(y_train, self.num_classes)
y_test = keras.utils.to_categorical(y_test, self.num_classes)
# color preprocessing
x_train, x_test = self.color_preprocessing(x_train, x_test) # <--- here
return self._model.evaluate(x_test, y_test, verbose=0)[1]

Categories