**I am trying to save the model for use it in the web application but I get this error **
X = []
sentences = list(review_df['text'])
for sen in sentences:
X.append(clean_text(sen))
y = review_df['Label']
y = np.array(list(map(lambda x: 1 if x=="fake" else 0, y)))
#Text Classification with Recurrent Neural Network (LSTM)
from keras.layers.recurrent import LSTM
model = Sequential()
embedding_layer = Embedding(vocab_size, 100, weights=[embedding_matrix], input_length=maxlen ,
trainable=False)
model.add(embedding_layer)
model.add(LSTM(128))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])
print(model.summary())
#train the model
history = model.fit(X_train, y_train, batch_size=128, epochs=6, verbose=1, validation_split=0.2)
score = model.evaluate(X_test, y_test, verbose=1)
#print model results
print("Test Score:", score[0])
print("Test Accuracy:", score[1])
#Making Predictions on Single Instance
instance = X[57]
print(instance)
instance = tokenizer.texts_to_sequences(instance)
flat_list = []
for sublist in instance:
for item in sublist:
flat_list.append(item)
flat_list = [flat_list]
instance = pad_sequences(flat_list, padding='post', maxlen=maxlen)
model.predict(instance)
#save the model
import pickle
with open('model.pkl', 'wb') as f:
pickle.dump(model, f)
when I try to save the model I get this error : TypeError: can't pickle _thread.RLock objects
Is there any idea for solving this error
Try doing this:
model_json = model.to_json()
with open("my_model.json", "w") as json_file:
json_file.write(model_json)
You can save a keras model directly by:
model.save('path/xyz.h5')
It is Recommended to save model in H5 format and use model.save()
You can also load the format with model.load()
Related
I build a model with BERT pre-trained model, then I tried to save it using pickle I got this error
Can't pickle <function LayerCall.call at 0x7f3f6a88a8c0>: it's not the same object as tensorflow.python.keras.saving.saved_model.save_impl.LayerCall.call
UPDATE:
I tried to save it with model.save() method but I got: IndexError: list index out of range
Here is my Code to build the model:
MAX_LENGTH = 35
INPUT_IDs = Input(shape=(MAX_LENGTH,), dtype=tf.int32, name="input_ids")
INPUT_MASK = Input(shape=(MAX_LENGTH,), dtype=tf.int32, name="attention_mask")
embeddings = bert_model([INPUT_IDs, INPUT_MASK])[1] # 0 is hidden state, 1 is pooler_output
out = tf.keras.layers.Dropout(0.1)(embeddings)
out = Dense(128, activation="relu")(out)
out = tf.keras.layers.Dropout(0.1)(out)
out = Dense(32, activation="relu")(out)
y = Dense(1, activation='sigmoid')(out)
model = tf.keras.Model(inputs=[INPUT_IDs, INPUT_MASK], outputs=y)
model.layers[2].trainable = True
optimizer = Adam(learning_rate=5e-06,
epsilon=1e-08,
decay=0.01,
clipnorm=1.0)
loss = BinaryCrossentropy(from_logits=True)
metric = BinaryAccuracy('accuracy')
model.compile(
optimizer=optimizer,
loss=loss,
metrics=metric)
train_history = model.fit(
x={'input_ids':X_train["input_ids"], 'attention_mask':X_train["attention_mask"]},
y=y_train,
validation_split=0.2,
epochs=10,
batch_size=24)
I am trying to load a model with the following ways but nothing seems to work.
Json:
model_json = model.to_json()
with open('C:/Users/path/model.json', "w") as json_file:
json_file.write(model_json)
# serialize weights to HDF5
model.save_weights("C:/Users/path/ΔΙΠΛΩΜΑΤΙΚΗ/model_json_1.h5")
print("Saved model to disk")
Load from Json file:
json_file = open('C:/Users/path/model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json, {"tf":tf})
# load weights into new model
loaded_model.load_weights("C:/Users/path/model_json_1.h5")
print("Loaded model from disk")
Yaml:
model_yaml = model.to_yaml()
with open("C:/Users/path/model.yaml", "w") as yaml_file:
yaml_file.write(model_yaml)
# serialize weights to HDF5
model.save_weights("C:/Users/path/model_yam.h5")
print("Saved model to disk")
Load model from Yaml file:
yaml_file = open('C:/Users/path/model.yaml', 'r')
loaded_model_yaml = yaml_file.read()
yaml_file.close()
loaded_model = model_from_yaml(loaded_model_yaml)
# load weights into new model
loaded_model.load_weights("C:/Users/path/model_yam.h5")
print("Loaded model from disk")
I also trying the following:
model.save("C:/Users/user/Desktop/ΔΙΠΛΩΜΑΤΙΚΗ/model_save.h5")
Load model:
model = load_model('C:/Users/path/model_save.h5', compile = False)
I get the same error from all the above ways.
ERROR:
AttributeError: 'str' object has no attribute 'decode'
I build my model with the following way.
MODEL:
print('Build model...')
main_input = Input(shape=(maxlen, num_features), name='main_input')
l1 = LSTM(100, implementation=2, kernel_initializer='glorot_uniform', return_sequences=True, dropout=0.2)(main_input) # the shared layer
b1 = BatchNormalization()(l1)
l2_1 = LSTM(100, implementation=2, kernel_initializer='glorot_uniform', return_sequences=False, dropout=0.2)(b1) # the layer specialized in activity prediction
b2_1 = BatchNormalization()(l2_1)
l2_2 = LSTM(100, implementation=2, kernel_initializer='glorot_uniform', return_sequences=False, dropout=0.2)(b1) # the layer specialized in time prediction
b2_2 = BatchNormalization()(l2_2)
act_output = Dense(len(target_chars), activation='softmax', kernel_initializer='glorot_uniform', name='act_output')(b2_1)
time_output = Dense(1, kernel_initializer='glorot_uniform', name='time_output')(b2_2)
model = Model(inputs=[main_input], outputs=[act_output, time_output])
opt = Nadam(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=1e-08, schedule_decay=0.004, clipvalue=3)
lr_reducer = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, verbose=0, mode='auto', min_delta=0.0001, cooldown=0, min_lr=0)
model.fit(X, {'act_output':y_a, 'time_output':y_t}, validation_split=0.2, verbose=2, callbacks=[early_stopping, model_checkpoint, lr_reducer], batch_size=maxlen, epochs=500)
Does that error maybe means that I am building my model wrong.?
What can I do to fix that error.?
Thank you.
I have some problems with keras tuner and tpu. When I run the code below, everything works well and network training is fast.
vocab_size = 5000
embedding_dim = 64
max_length = 2000
def create_model():
model = tf.keras.Sequential([
tf.keras.layers.Embedding(vocab_size, embedding_dim),
tf.keras.layers.LSTM(100, dropout=0.5, recurrent_dropout=0.5),
tf.keras.layers.Dense(embedding_dim, activation='relu'),
tf.keras.layers.Dense(4, activation='softmax')
])
return model
resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='grpc://' + os.environ['COLAB_TPU_ADDR'])
tf.config.experimental_connect_to_cluster(resolver)
tf.tpu.experimental.initialize_tpu_system(resolver)
strategy = tf.distribute.experimental.TPUStrategy(resolver)
with strategy.scope():
model = create_model()
model.compile(optimizer='adam',
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['sparse_categorical_accuracy'])
model.fit(train_padded, y_train,
epochs=10,
validation_split=0.15,
verbose=1, batch_size=128)
When I use a keras tuner, the neural network learns slowly. I believe that TPU is not used.
vocab_size = 5000
max_length = 2000
resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='grpc://' + os.environ['COLAB_TPU_ADDR'])
tf.config.experimental_connect_to_cluster(resolver)
tf.tpu.experimental.initialize_tpu_system(resolver)
strategy = tf.distribute.experimental.TPUStrategy(resolver)
def build_model(hp):
model = tf.keras.Sequential()
activation_choice = hp.Choice('activation', values=['relu', 'sigmoid', 'tanh', 'elu', 'selu'])
embedding_dim = hp.Int('units_hidden', min_value=128, max_value=24, step=8)
model.add(tf.keras.layers.Embedding(vocab_size, embedding_dim))
model.add(tf.keras.layers.LSTM(hp.Int('LSTM_Units', min_value=50, max_value=500, step=10),
dropout=hp.Float('dropout', 0, 0.5, step=0.1, default=0),
recurrent_dropout=hp.Float('recurrent_dropout', 0, 0.5, step=0.1, default=0)))
model.add(tf.keras.layers.Dense(embedding_dim, activation=activation_choice))
model.add(tf.keras.layers.Dense(4, activation='softmax'))
model.compile(
optimizer=hp.Choice('optimizer', values=['adam', 'rmsprop', 'SGD']),
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['sparse_categorical_accuracy'])
return model
with strategy.scope():
tuner = Hyperband(
build_model,
objective='val_accuracy',
max_epochs=10,
hyperband_iterations=2)
tuner.search(train_padded, y_train,
batch_size=128,
epochs=10,
callbacks=[EarlyStopping(patience=1)],
validation_split=0.15,
verbose=1)
best_models = tuner.get_best_models(1)
best_model.save('/content/drive/My Drive/best_model.h5')
Notebook link
How to make a keras tuner work with TPU?
You need to pass it to the tuner:
tuner = Hyperband(
build_model,
objective='val_accuracy',
max_epochs=10,
hyperband_iterations=2,
distribution_strategy=strategy,)
(and remove the strategy.scope() part)
To add ...
I don't use Google Colab, but Kaggle. Using TPU, I get that same error "File system scheme '[local]' not implemented", when the tuner tries to write the checkpoints on Kaggle's working directory.
Since I don't have a gs://location, I just "modified" the function called by Keras Tuner to save checkpoints, to allow writing to local dir, which is the Kaggle working directory. I used patch() to mock the function.
First important thing is that Keras Tuner must be version 1.1.2 and above.
Example:
from mock import patch
<your code>
# now the new function to "replace" the existing one (keras_tuner.engine.tuner_utils.SaveBestEpoch.on_epoch_end)
def new_on_epoch_end(self, epoch, logs=None):
if not self.objective.has_value(logs):
# Save on every epoch if metric value is not in the logs. Either no
# objective is specified, or objective is computed and returned
# after `fit()`.
#***** the following are the lines I added ******************************************
# Save model in Tensorflow's "SavedModel" format
save_locally = tf.saved_model.SaveOptions(experimental_io_device = '/job:localhost')
# I then added ', options = save_locally' to the line below.
#************************************************************************************
self.model.save_weights(self.filepath, options = save_locally)
return
current_value = self.objective.get_value(logs)
if self.objective.better_than(current_value, self.best_value):
self.best_value = current_value
#***** the following are the lines I added ******************************************
# Save model in Tensorflow's "SavedModel" format
save_locally = tf.saved_model.SaveOptions(experimental_io_device = '/job:localhost')
# I then added ', options = save_locally' to the line below.
#************************************************************************************
self.model.save_weights(self.filepath, options = save_locally)
with patch('keras_tuner.engine.tuner_utils.SaveBestEpoch.on_epoch_end', new_on_epoch_end):
# Perform hypertuning. The parameters are exactly like those in the fit() method.
tuner.search(
X_train,
y_train,
epochs=num_of_epochs,
validation_data = (X_valid, y_valid),
callbacks=[early_stopping]
)
<more of your code>
Since I used 'with patch', after all is done, it reverts back to the original code automatically.
I hope this will be useful for those using Kaggle, or those who want to write to a local dir.
I'm starting with Keras creating a model to classify text labels by inputting a couple of text features with a single output. I've a specific function to create the model and another one to test the model using a different dataset.
I'm still trying to fine tune the model predictions but i'd like to try understand why my test function is getting different results every time the model is recreated. Is that usual ? Also, i'd appreciate any tip to improve the model accuracy.
def create_model(model_name,data,test_data):
# lets take 80% data as training and remaining 20% for test.
train_size = int(len(data) * .9)
test_size = int(len(data) * .4)
train_headlines = data['Subject']
train_category = data['Category']
train_activities = data['Activity']
test_headlines = data['Subject'][:test_size]
test_category = data['Category'][:test_size]
test_activities = data['Activity'][:test_size]
# define Tokenizer with Vocab Sizes
vocab_size1 = 10000
vocab_size2 = 5000
batch_size = 100
tokenizer = Tokenizer(num_words=vocab_size1)
tokenizer2 = Tokenizer(num_words=vocab_size2)
test_headlines=test_headlines.astype(str)
train_headlines=train_headlines.astype(str)
test_category=test_category.astype(str)
train_category=train_category.astype(str)
tokenizer.fit_on_texts(test_headlines)
tokenizer2.fit_on_texts(test_category)
x_train = tokenizer.texts_to_matrix(train_headlines, mode='tfidf')
x_test = tokenizer.texts_to_matrix(test_headlines, mode='tfidf')
y_train = tokenizer2.texts_to_matrix(train_category, mode='tfidf')
y_test = tokenizer2.texts_to_matrix(test_category, mode='tfidf')
# load classes
labels = []
encoder = LabelBinarizer()
encoder.fit(train_activities)
text_labels = encoder.classes_
with open('outputs/classes.txt', 'w') as f:
for item in text_labels:
f.write("%s\n" % item)
z_train = encoder.transform(train_activities)
z_test = encoder.transform(test_activities)
num_classes = len(text_labels)
print ("num_classes: "+str(num_classes))
input1 = Input(shape=(vocab_size1,), name='main_input')
x1 = Dense(512, activation='relu')(input1)
x1 = Dense(64, activation='relu')(x1)
x1 = Dense(64, activation='relu')(x1)
input2 = Input(shape=(vocab_size2,), name='cat_input')
main_output = Dense(num_classes, activation='softmax', name='main_output')(x1)
model = Model(inputs=[input1, input2], outputs=[main_output])
model.compile(loss='categorical_crossentropy',
optimizer='adam',
metrics=['accuracy'])
model.summary()
history = model.fit([x_train,y_train], z_train,
batch_size=batch_size,
epochs=30,
verbose=1,
validation_split=0.1)
score = model.evaluate([x_test,y_test], z_test,
batch_size=batch_size, verbose=1)
print('Test accuracy:', score[1])
# serialize model to JSON
model_json = model.to_json()
with open("./outputs/my_model_"+model_name+".json", "w") as json_file:
json_file.write(model_json)
# creates a HDF5 file 'my_model.h5'
model.save('./outputs/my_model_'+model_name+'.h5')
# Save Tokenizer i.e. Vocabulary
with open('./outputs/tokenizer'+model_name+'.pickle', 'wb') as handle:
pickle.dump(tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL)
def validate_model (model_name,test_data,labels):
from keras.models import model_from_json
test_data['Subject'] = test_data['Subject'] + " " + test_data['Description']
headlines = test_data['Subject'].astype(str)
categories = test_data['Category'].astype(str)
# load json and create model
json_file = open("./outputs/my_model_"+model_name+".json", 'r')
loaded_model_json = json_file.read()
json_file.close()
model = model_from_json(loaded_model_json)
# load weights into new model
model.load_weights('./outputs/my_model_'+model_name+'.h5')
print("Loaded model from disk")
# loading
import pickle
with open('./outputs/tokenizer'+model_name+'.pickle', 'rb') as handle:
tokenizer = pickle.load(handle)
# Subjects
x_pred = tokenizer.texts_to_matrix(headlines, mode='tfidf')
# Categorias
y_pred = tokenizer.texts_to_matrix(categories, mode='tfidf')
predictions = []
scores = []
predictions_vetor = model.predict({'main_input': x_pred, 'cat_input': y_pred})
I read your training code following.
model.fit([x_train,y_train], z_train,
batch_size=batch_size,
epochs=30,
verbose=1,
validation_split=0.1)
You are using [x_train, y_train] as features and z_train as labels for your model. y_train is the raw form of label and z_train is the encoded form of label.
This way you are leaking information to the training set, hence resulting in an over-fitting situation. You model is not generalised at all, and therefore predicting irrelevant results.
I am performing binary classification for classifying the reviews according to polarity.
I have created an ensemble model in Keras using CNN and BiLSTM as a base model and a neural network as meta-learner. I am using Stacking. But I am getting Stack ensemble accuracy as 0%. What all changes should I make in my code to run smoothly? Where I am getting wrong?
text_input_layer = Input(shape=(length_trainshuffle,))
embedding_layer = Embedding(vocab_size, 100)(text_input_layer)
text_layer_cnn = Conv1D(128, 5, activation='relu')(embedding_layer)
text_layer_cnn = GlobalMaxPooling1D()(text_layer_cnn)
text_layer_cnn = Dropout(0.2)(text_layer_cnn)
text_layer_cnn = Dense(5,kernel_initializer='glorot_uniform', activation='tanh')(text_layer_cnn)
output_layer_cnn = Dense(1, kernel_initializer='glorot_uniform',activation='sigmoid')(text_layer_cnn)
model_cnn = Model(text_input_layer, output_layer_cnn)
optimizer = Adamax(lr=0.001,decay=0.0001)
model_cnn.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
filepath_cnn="cnnmodel.best.hdf5"
checkpoint_cnn = ModelCheckpoint(filepath_cnn, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
callbacks_list_cnn = [checkpoint_cnn]
# Fit the model
model_cnn.fit(trainX, array(trainlabelshuffle), epochs=10,batch_size=80, validation_data = (validateX, array(validatelabelshuffle)), callbacks=callbacks_list_cnn, verbose=1)
model_cnn.save(filepath_cnn)
print('>Saved %s' % filepath_cnn)
loss_cnn, acc_cnn = model_cnn.evaluate(testX,array(testlabelshuffle), verbose=0)
print('Test Accuracy CNN: %f' % (acc_cnn*100))
print('Loss CNN: %f' %(loss_cnn))
text_layer_bilstm = Bidirectional(CuDNNLSTM(256))(embedding_layer)
output_layer_bilstm = Dense(1, kernel_initializer='glorot_uniform',activation='sigmoid')(text_layer_bilstm)
model_bilstm = Model(text_input_layer, output_layer_bilstm)
optimizer_bilstm = Adamax(lr=0.001,decay=0.0001)
model_bilstm.compile(optimizer=optimizer_bilstm, loss='binary_crossentropy', metrics=['accuracy'])
filepath_bilstm="bilstm_model.best.hdf5"
checkpoint_bilstm = ModelCheckpoint(filepath_bilstm, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
callbacks_list_bilstm = [checkpoint_bilstm]
# Fit the model
model_bilstm.fit(trainX, array(trainlabelshuffle), epochs=10,batch_size=80, validation_data = (validateX, array(validatelabelshuffle)), callbacks=callbacks_list_bilstm, verbose=1)
model_bilstm.save(filepath_bilstm)
print('>Saved %s' % filepath_bilstm)
loss_bilstm, acc_bilstm = model_bilstm.evaluate(testX,array(testlabelshuffle), verbose=0)
print('Test Accuracy bilstm: %f' % (acc_bilstm*100))
print('Loss bilstm: %f' %(loss_bilstm))
all_models = list()
cnnmodel = load_model(filepath_cnn)
# add to list of members
all_models.append(cnnmodel)
print('>loaded %s' % filepath_cnn)
bilstmmodel = load_model(filepath_bilstm)
# add to list of members
all_models.append(bilstmmodel)
print('>loaded %s' % filepath_bilstm)
def define_stacked_model(all_models):
# update all layers in all models to not be trainable
for i in range(len(all_models)):
model = all_models[i]
for layer in model.layers:
# make not trainable
layer.trainable = False
# rename to avoid 'unique layer name' issue
layer.name = 'ensemble_' + str(i+1) + '_' + layer.name
# define multi-headed input
ensemble_visible = [model.input for model in all_models]
# concatenate merge output from each model
ensemble_outputs = [model.output for model in all_models]
merge = concatenate(ensemble_outputs)
hidden = Dense(10, activation='relu')(merge)
#hidden = Flatten()(hidden)
output = Dense(2, activation='softmax')(hidden)
model = Model(inputs=ensemble_visible, outputs=output)
# plot graph of ensemble
plot_model(model, show_shapes=True, to_file='model_graph.png')
# compile
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
return model
def fit_stacked_model(model, inputX, inputy):
# prepare input data
X = [inputX for _ in range(len(model.input))]
# encode output data
inputy_enc = to_categorical(inputy)
# fit model
model.fit(X, inputy_enc, epochs=10, verbose=1)
# make a prediction with a stacked model
def predict_stacked_model(model, inputX):
# prepare input data
X = [inputX for _ in range(len(model.input))]
# make prediction
return model.predict(X, verbose=0)
stacked_model = define_stacked_model(all_models)
stacked_model.summary()
# fit stacked model on test dataset
fit_stacked_model(stacked_model,validateX,array(validatelabelshuffle))
#stacked_model.fit(X=testX,y=array(testlabelshuffle),epochs=10, verbose=1)
# make predictions and evaluate
yhat = predict_stacked_model(stacked_model, testX)
yhat = argmax(yhat, axis=1)
acc = accuracy_score(array(testlabelshuffle), yhat)
print('Stacked Test Accuracy: %.3f' % acc)