Passing a trained model to another function in airflow - python

So I want to use airflow to display my model training. I created a model in a python function and now I want to pass it to another function which will train it. I am currently trying to pass it via xcom, but then I get the error:
INFO - Done. Returned value was: <keras.wrappers.scikit_learn.KerasClassifier object at 0x7fce52713940>
[2022-04-27, 10:18:37 CEST] {xcom.py:447}
ERROR - Could not serialize the XCom value into JSON.
How do I pass the model to the other function?
This is a part of my code:
def create_model():
# create model
model = Sequential()
model.add(Dense(12, input_dim=8, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
# Compile model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model = KerasClassifier(build_fn=create_model, verbose=0)
return model
def trainModel(ti):
model=ti.xcom_pull(task_ids='create_model')
X,Y= ti.xcom_pull(task_ids='loadData')
batch_size = 10
epochs = 10
model.fit(X,Y,
steps_per_epoch=batch_size,
epochs=epochs,
)
#model.save(model_file_name)
return model
create_model_task=PythonOperator(
task_id='create_model',
python_callable=create_model,
provide_context= True,
dag=MODEL_DAG
)
trainModel_Task=PythonOperator(
task_id='trainModel',
python_callable=trainModel,
provide_context= True,
dag=MODEL_DAG
)

Related

Bespoke loss routine for Keras Model

I'm trying to implement a bespoke loss calculation model but keep getting hit with recognition failures. eg: "ValueError: Unknown loss function: root_mean_squared_error_fraction. Please ensure this object is passed to the custom_objects argument."
The function "root_mean_squared_fraction" exists and is functioning, I know this because I call it elsewhere outside of the Keras model and it functions as expected. I'm clearly not understanding something about injecting this into the model definition and would appreciate any advice? Thanks.
from keras.models import load_model
from keras import backend as K
def root_mean_squared_error_fraction(y_true, y_pred):
return K.sqrt(K.mean(K.square((y_pred - y_true)/y_true)))
This is my model routine which does work when the =rmsef in the model.compile is replaced with ='mse':
def ResModel(Svect, Xvect, Yvect, dtrain):
model = Sequential()
model.add(LSTM(64, activation='relu',\
input_shape=(Xvect.shape[1], Xvect.shape[2]),\
return_sequences=True))
model.add(LSTM(32, activation='relu', return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(Yvect.shape[1]))
rmsef = root_mean_squared_error_fraction
model.compile(optimizer = "adam", loss = rmsef )
bmfile = 'bestmodel.h5'
earlystop = EarlyStopping(monitor='val_loss', mode='auto',\
verbose=0, patience=mpat, min_delta=0.005)
chkpoint = ModelCheckpoint(bmfile, monitor = 'val_loss', mode = 'auto',\
save_best_only = True )
history = model.fit(Xvect, Yvect, epochs=mcycl, batch_size=32,\
validation_split=dsplit, verbose=0,\
callbacks = [earlystop, chkpoint] )
saved = load_model('bestmodel.h5')
score = saved.evaluate(Xvect, Yvect, verbose=0)
print("Epoch: %04d yeilded best fit with overall loss of: %0.4f "\
% ((earlystop.stopped_epoch + 1), score ) )
Yvect = descalevector(Svect, saved.predict(Xvect),dtrain )
return Yvect, score

Getting Error "Can't pickle" while saving keras model

I build a model with BERT pre-trained model, then I tried to save it using pickle I got this error
Can't pickle <function LayerCall.call at 0x7f3f6a88a8c0>: it's not the same object as tensorflow.python.keras.saving.saved_model.save_impl.LayerCall.call
UPDATE:
I tried to save it with model.save() method but I got: IndexError: list index out of range
Here is my Code to build the model:
MAX_LENGTH = 35
INPUT_IDs = Input(shape=(MAX_LENGTH,), dtype=tf.int32, name="input_ids")
INPUT_MASK = Input(shape=(MAX_LENGTH,), dtype=tf.int32, name="attention_mask")
embeddings = bert_model([INPUT_IDs, INPUT_MASK])[1] # 0 is hidden state, 1 is pooler_output
out = tf.keras.layers.Dropout(0.1)(embeddings)
out = Dense(128, activation="relu")(out)
out = tf.keras.layers.Dropout(0.1)(out)
out = Dense(32, activation="relu")(out)
y = Dense(1, activation='sigmoid')(out)
model = tf.keras.Model(inputs=[INPUT_IDs, INPUT_MASK], outputs=y)
model.layers[2].trainable = True
optimizer = Adam(learning_rate=5e-06,
epsilon=1e-08,
decay=0.01,
clipnorm=1.0)
loss = BinaryCrossentropy(from_logits=True)
metric = BinaryAccuracy('accuracy')
model.compile(
optimizer=optimizer,
loss=loss,
metrics=metric)
train_history = model.fit(
x={'input_ids':X_train["input_ids"], 'attention_mask':X_train["attention_mask"]},
y=y_train,
validation_split=0.2,
epochs=10,
batch_size=24)

How I can save the model python3

**I am trying to save the model for use it in the web application but I get this error **
X = []
sentences = list(review_df['text'])
for sen in sentences:
X.append(clean_text(sen))
y = review_df['Label']
y = np.array(list(map(lambda x: 1 if x=="fake" else 0, y)))
#Text Classification with Recurrent Neural Network (LSTM)
from keras.layers.recurrent import LSTM
model = Sequential()
embedding_layer = Embedding(vocab_size, 100, weights=[embedding_matrix], input_length=maxlen ,
trainable=False)
model.add(embedding_layer)
model.add(LSTM(128))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])
print(model.summary())
#train the model
history = model.fit(X_train, y_train, batch_size=128, epochs=6, verbose=1, validation_split=0.2)
score = model.evaluate(X_test, y_test, verbose=1)
#print model results
print("Test Score:", score[0])
print("Test Accuracy:", score[1])
#Making Predictions on Single Instance
instance = X[57]
print(instance)
instance = tokenizer.texts_to_sequences(instance)
flat_list = []
for sublist in instance:
for item in sublist:
flat_list.append(item)
flat_list = [flat_list]
instance = pad_sequences(flat_list, padding='post', maxlen=maxlen)
model.predict(instance)
#save the model
import pickle
with open('model.pkl', 'wb') as f:
pickle.dump(model, f)
when I try to save the model I get this error : TypeError: can't pickle _thread.RLock objects
Is there any idea for solving this error
Try doing this:
model_json = model.to_json()
with open("my_model.json", "w") as json_file:
json_file.write(model_json)
You can save a keras model directly by:
model.save('path/xyz.h5')
It is Recommended to save model in H5 format and use model.save()
You can also load the format with model.load()

Keras tuner and TPU in Google Colab

I have some problems with keras tuner and tpu. When I run the code below, everything works well and network training is fast.
vocab_size = 5000
embedding_dim = 64
max_length = 2000
def create_model():
model = tf.keras.Sequential([
tf.keras.layers.Embedding(vocab_size, embedding_dim),
tf.keras.layers.LSTM(100, dropout=0.5, recurrent_dropout=0.5),
tf.keras.layers.Dense(embedding_dim, activation='relu'),
tf.keras.layers.Dense(4, activation='softmax')
])
return model
resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='grpc://' + os.environ['COLAB_TPU_ADDR'])
tf.config.experimental_connect_to_cluster(resolver)
tf.tpu.experimental.initialize_tpu_system(resolver)
strategy = tf.distribute.experimental.TPUStrategy(resolver)
with strategy.scope():
model = create_model()
model.compile(optimizer='adam',
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['sparse_categorical_accuracy'])
model.fit(train_padded, y_train,
epochs=10,
validation_split=0.15,
verbose=1, batch_size=128)
When I use a keras tuner, the neural network learns slowly. I believe that TPU is not used.
vocab_size = 5000
max_length = 2000
resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='grpc://' + os.environ['COLAB_TPU_ADDR'])
tf.config.experimental_connect_to_cluster(resolver)
tf.tpu.experimental.initialize_tpu_system(resolver)
strategy = tf.distribute.experimental.TPUStrategy(resolver)
def build_model(hp):
model = tf.keras.Sequential()
activation_choice = hp.Choice('activation', values=['relu', 'sigmoid', 'tanh', 'elu', 'selu'])
embedding_dim = hp.Int('units_hidden', min_value=128, max_value=24, step=8)
model.add(tf.keras.layers.Embedding(vocab_size, embedding_dim))
model.add(tf.keras.layers.LSTM(hp.Int('LSTM_Units', min_value=50, max_value=500, step=10),
dropout=hp.Float('dropout', 0, 0.5, step=0.1, default=0),
recurrent_dropout=hp.Float('recurrent_dropout', 0, 0.5, step=0.1, default=0)))
model.add(tf.keras.layers.Dense(embedding_dim, activation=activation_choice))
model.add(tf.keras.layers.Dense(4, activation='softmax'))
model.compile(
optimizer=hp.Choice('optimizer', values=['adam', 'rmsprop', 'SGD']),
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['sparse_categorical_accuracy'])
return model
with strategy.scope():
tuner = Hyperband(
build_model,
objective='val_accuracy',
max_epochs=10,
hyperband_iterations=2)
tuner.search(train_padded, y_train,
batch_size=128,
epochs=10,
callbacks=[EarlyStopping(patience=1)],
validation_split=0.15,
verbose=1)
best_models = tuner.get_best_models(1)
best_model.save('/content/drive/My Drive/best_model.h5')
Notebook link
How to make a keras tuner work with TPU?
You need to pass it to the tuner:
tuner = Hyperband(
build_model,
objective='val_accuracy',
max_epochs=10,
hyperband_iterations=2,
distribution_strategy=strategy,)
(and remove the strategy.scope() part)
To add ...
I don't use Google Colab, but Kaggle. Using TPU, I get that same error "File system scheme '[local]' not implemented", when the tuner tries to write the checkpoints on Kaggle's working directory.
Since I don't have a gs://location, I just "modified" the function called by Keras Tuner to save checkpoints, to allow writing to local dir, which is the Kaggle working directory. I used patch() to mock the function.
First important thing is that Keras Tuner must be version 1.1.2 and above.
Example:
from mock import patch
<your code>
# now the new function to "replace" the existing one (keras_tuner.engine.tuner_utils.SaveBestEpoch.on_epoch_end)
def new_on_epoch_end(self, epoch, logs=None):
if not self.objective.has_value(logs):
# Save on every epoch if metric value is not in the logs. Either no
# objective is specified, or objective is computed and returned
# after `fit()`.
#***** the following are the lines I added ******************************************
# Save model in Tensorflow's "SavedModel" format
save_locally = tf.saved_model.SaveOptions(experimental_io_device = '/job:localhost')
# I then added ', options = save_locally' to the line below.
#************************************************************************************
self.model.save_weights(self.filepath, options = save_locally)
return
current_value = self.objective.get_value(logs)
if self.objective.better_than(current_value, self.best_value):
self.best_value = current_value
#***** the following are the lines I added ******************************************
# Save model in Tensorflow's "SavedModel" format
save_locally = tf.saved_model.SaveOptions(experimental_io_device = '/job:localhost')
# I then added ', options = save_locally' to the line below.
#************************************************************************************
self.model.save_weights(self.filepath, options = save_locally)
with patch('keras_tuner.engine.tuner_utils.SaveBestEpoch.on_epoch_end', new_on_epoch_end):
# Perform hypertuning. The parameters are exactly like those in the fit() method.
tuner.search(
X_train,
y_train,
epochs=num_of_epochs,
validation_data = (X_valid, y_valid),
callbacks=[early_stopping]
)
<more of your code>
Since I used 'with patch', after all is done, it reverts back to the original code automatically.
I hope this will be useful for those using Kaggle, or those who want to write to a local dir.

how to switch to another optimizer in tensorflow?

I would like to change optimizer in the middle of training.
I use the code from this post:
Changing optimizer in keras during training
as follows:
model = Model(inputs=inputs, outputs=conv12)
def rmse(y_true, y_pred):
return backend.sqrt(backend.mean(backend.square(y_pred - y_true)))#, axis=-1))
class OptimizerChanger(EarlyStopping):
def __init__(self, on_train_end, **kwargs):
self.do_on_train_end = on_train_end
super(OptimizerChanger, self).__init__(**kwargs)
def on_train_end(self, logs=None):
super(OptimizerChanger, self).on_train_end(self, logs)
self.do_on_train_end()
def do_after_training():
model.compile(optimizer='sgd', loss='mean_squared_error', metrics=['mae', rmse])
model.fit(x_train_n, y_train_n, batch_size=10, epochs=200, validation_split=0.05, shuffle=True)
changer = OptimizerChanger(on_train_end= do_after_training,
monitor='val_rmse',
min_delta=5,
patience=10)
model.compile(loss='mean_squared_error',
optimizer='adam',
metrics=['mae', rmse])
history = model.fit(x_train_n, y_train_n, batch_size=10, epochs=200, validation_split=0.05, shuffle=True, callbacks=[changer])
I get the following error:
super(OptimizerChanger, self).on_train_end(self, logs)
TypeError: on_train_end() takes from 1 to 2 positional arguments but 3 were given
what is the third parameter that is being passed? is it implicit? how do I call it?
You're calling the method from an instance and you are passing self to it. So it's redundant.
super(OptimizerChanger, self).on_train_end(self, logs)
This line should be
super(OptimizerChanger, self).on_train_end(logs)

Categories