Related
I am Beginner in Python
I using Keras and Tensorflow to Create ML Model . When I have 4 classes it work well but add more Classes it got this error. Add have added 10 classes.
I use python 3.7 , Keras 2.3.1, Tensorflow 1.14.
I'm following tutorial in this link
here.
I have lost 3 days to solve this problem but I could not
Thanks
Classification.py File
import os as os
from keras.models import load_model
from keras.utils import np_utils
import model as md
import preparation as prep
import visualization as vis
from keras.utils import to_categorical
from keras import metrics
# ----------- dataset settings -----------
# number of instances per class used for train and test in total:
# should be smaller or equal than generated subset
INSTANCES_PER_CLASS = 5000
NUM_CLASS_LIMIT = 345 # limit of classes
# path of the dataset seperated in train and test
DATA_PATH = os.path.join(os.getcwd(), "../Draw/dataset/train_test_20k/")
# path for all created files
MODEL_PATH = os.path.join(os.getcwd(), "models/" + str(NUM_CLASS_LIMIT) + "/" + str(INSTANCES_PER_CLASS) + "/")
# ----------- model settings -----------
MODEL_NAME = 'model.h5' # name for the freezed model
# input size
IMG_WIDTH = 28
IMG_HEIGHT = 28
IMG_SIZE = IMG_WIDTH * IMG_HEIGHT
IMG_DIM = 1
# training settings
EPOCHS = 10
BATCH_SIZE = 256
if __name__ == "__main__":
# create new directories if required
if not os.path.isdir(MODEL_PATH):
os.makedirs(MODEL_PATH)
# get the dataset
num_classes, x_train, x_test, y_train, y_test, classes_dict = prep.collect_data(NUM_CLASS_LIMIT)
print("trainingsset instances {}".format(x_train.shape))
print("trainingsset labels {}".format(y_train.shape))
# plot first test images
#vis.plot_first_n_images(x_test, y_test, classes_dict, 100)
# class representation as "binary" vector
y_train = np_utils.to_categorical(y_train, num_classes=num_classes)
y_test = np_utils.to_categorical(y_test, num_classes=num_classes)
# create or load keras model
if not os.path.isfile(MODEL_PATH + MODEL_NAME):
print("create model...")
model = md.build_model(input_shape=x_train.shape[1:], num_classes=num_classes)
else:
print("load existing model...")
model = load_model(MODEL_PATH + MODEL_NAME)
# score trained model using validation set
scores = model.evaluate(x_test, y_test, verbose=1)
print('test loss:', scores[0])
print('test accuracy:', scores[1])
model.compile(loss='categorical_crossentropy',
optimizer='adam',
metrics=['acc'])
# print model information if desired
print(model.summary())
# model training from scratch or retrain by existing model
hist = model.fit(x_train, y_train, batch_size=BATCH_SIZE,
epochs=EPOCHS,
validation_data=[x_test, y_test],
shuffle=True)
#from keras.utils import plot_model
#plot_model(model, to_file=MODEL_PATH + 'model.png')
# evaluation process
print("evaluate model...")
# summarize history during training phase
# plot training and validation set accuracy
vis.plot_training_history_accuracy(hist)
# test set evaluation
scores = model.evaluate(x_test, y_test, verbose=1)
print(scores)
print('test loss:', scores[0])
print('test accuracy:', scores[1])
# create and plot confusion matrix
#y_pred = model.predict(x_test)
#vis.plot_confusion_matrix(y_pred, y_test, classes=list(classes_dict.values()))
# freeze the model (architecture and weights)
model.save(os.path.join(MODEL_PATH, MODEL_NAME))
print('saved trained model at {}'.format(os.path.join(MODEL_PATH, MODEL_NAME)))
The Model Code model.py
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from keras.models import Model
from keras.layers import Input, Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
def build_model(input_shape, num_classes):
"""
Builds the model architecture based on MNIST CNN
https://www.tensorflow.org/tutorials/estimators/cnn
Args:
input_spape: Input shape of the model
num_classes: Number of classes
Returns:
keras.models.Model: The created model
"""
inputs = Input(shape=input_shape)
x = Conv2D(32, (5,5), activation='relu')(inputs)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Conv2D(128, (3, 3), activation='relu')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Dropout(0.2)(x)
x = Flatten()(x)
x = Dense(512, activation='relu')(x)
x = Dense(256, activation='relu')(x)
predictions = Dense(num_classes, activation='softmax')(x)
print("SHOW DESEN", num_classes)
return Model(inputs=inputs, outputs=predictions)
I believe the error is coming from this step:
hist = model.fit(x_train, y_train, batch_size=BATCH_SIZE,
epochs=EPOCHS,
validation_data=[x_test, y_test],
shuffle=True)
The reference to "target" means the "y_train" array. You might want to confirm that the y_train that you are trying to fit matches the actual model.summary(), by checking the y_train.shape just before the fit. It's possibly that the old model that was designed for 4 classes is being used, even though the new 10 class y_train is being passed to the model.fit().
Diagnostic code example (model.fit is commented out):
# This outputs the model design. The final layer should be (None,num_classes)
print(model.summary())
# print shape of y_train/target data. The second dimension should match the number of classes from above (?, num_classes)
print(y_train.shape)
#model.fit( x_train, y_train, epochs=1)
I hope this helps.
This is the error message I got
Traceback (most recent call last):
File "/home/xxx/Documents/program/test.py", line 27, in <module>
model.load_weights('models/model.h5')
File "/home/xxx/Documents/program/venv/lib/python3.6/site-packages/tensorflow/python/keras/engine/network.py", line 1391, in load_weights
saving.load_weights_from_hdf5_group(f, self.layers)
File "/home/xxx/Documents/program/venv/lib/python3.6/site-packages/tensorflow/python/keras/engine/saving.py", line 732, in load_weights_from_hdf5_group
' layers.')
ValueError: You are trying to load a weight file containing 2 layers into a model with 0 layers.
From this minimal example that produces the error
from tensorflow import keras
from data import get_data
X_train, y_train, X_val, y_val = get_data() # get some train and val data
model = keras.Sequential()
model.add(keras.layers.Dense(64, activation='relu'))
model.add(keras.layers.Dense(7, activation='softmax'))
model.compile(
optimizer=keras.optimizers.Adam(1e-4),
loss='categorical_crossentropy',
metrics=['accuracy']
)
model.fit(
x=X_train,
y=y_train,
batch_size=500,
epochs=200,
verbose=2,
validation_data=(X_val, y_val)
)
model.save_weights('models/model.h5')
model.load_weights('models/model.h5')
Directly running this does not produce the error. However, when I run the program for a second time commenting out the training part (from line 10 to 25) trying to load the weights, it gives me this error.
I am using Tensorflow 1.9.0 and the built-in Keras.
As mentioned above, there seems to be a bug in keras sequential mode: https://github.com/keras-team/keras/issues/10417.
However, you can get around this by using the Keras Functional API (you'll also find the Functional API much more useful when you're building trickier RNNs models with complicated I/O and tensor concatenations).
The disadvantage of using model.save_weights() method to save your neural network is that you have to invoke the model architecture before you load .h5 weights into the NN. If you instead save the whole model (both parameters AND architecture), you'll find that it's much easier to load trained model into a Python object. You can achieve this by using model.save() method.
### TRAINING CODE
import tensorflow as tf
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
# some data
iris = load_iris()
X, y = iris.data, iris.target
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2)
y_train_oh = tf.keras.utils.to_categorical(y_train)
y_val_oh = tf.keras.utils.to_categorical(y_val)
# Keras Functional API
x = tf.keras.Input(shape=(4,))
dense = tf.keras.layers.Dense(64, activation='relu')(x)
dense = tf.keras.layers.Dense(3, activation='softmax')(dense)
model = tf.keras.Model(inputs=x, outputs=dense)
model.compile(optimizer=tf.keras.optimizers.Adam(1e-4),
loss='categorical_crossentropy',
metrics=['accuracy'])
# training
model.fit(X_train, y_train_oh, 16, epochs=20, validation_data=(X_val, y_val_oh))
# save weights
model.save_weights('models/model_weights.h5')
# save weights AND architecture
model.save('models/model.h5')
### TESTING CODE
# Model loading using .h5 weights file
import tensorflow as tf
x = tf.keras.Input(shape=(4,))
dense = tf.keras.layers.Dense(64, activation='relu')(x)
dense = tf.keras.layers.Dense(3, activation='softmax')(dense)
model2 = tf.keras.Model(inputs=x, outputs=dense)
model2.load_weights('models/model_weights.h5')
# Model loading using .h5 model file
import tensorflow as tf
model3 = tf.keras.models.load_model('models/model.h5') # simpler API, but bigger filesize
I've written the following simple MLP network for the MNIST db.
from __future__ import print_function
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras import callbacks
batch_size = 100
num_classes = 10
epochs = 20
tb = callbacks.TensorBoard(log_dir='/Users/shlomi.shwartz/tensorflow/notebooks/logs/minist', histogram_freq=10, batch_size=32,
write_graph=True, write_grads=True, write_images=True,
embeddings_freq=10, embeddings_layer_names=None,
embeddings_metadata=None)
early_stop = callbacks.EarlyStopping(monitor='val_loss', min_delta=0,
patience=3, verbose=1, mode='auto')
# the data, shuffled and split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.reshape(60000, 784)
x_test = x_test.reshape(10000, 784)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
model = Sequential()
model.add(Dense(200, activation='relu', input_shape=(784,)))
model.add(Dropout(0.2))
model.add(Dense(100, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(60, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(30, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(10, activation='softmax'))
model.summary()
model.compile(loss='categorical_crossentropy',
optimizer='adam',
metrics=['accuracy'])
history = model.fit(x_train, y_train,
callbacks=[tb,early_stop],
batch_size=batch_size,
epochs=epochs,
verbose=1,
validation_data=(x_test, y_test))
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
The model ran fine, and I could see the scalars info on TensorBoard. However when I've changed embeddings_freq=10 to try and visualize the images (Like seen here) I got the following error:
Traceback (most recent call last):
File "/Users/shlomi.shwartz/IdeaProjects/TF/src/minist.py", line 65, in <module>
validation_data=(x_test, y_test))
File "/Users/shlomi.shwartz/tensorflow/lib/python3.6/site-packages/keras/models.py", line 870, in fit
initial_epoch=initial_epoch)
File "/Users/shlomi.shwartz/tensorflow/lib/python3.6/site-packages/keras/engine/training.py", line 1507, in fit
initial_epoch=initial_epoch)
File "/Users/shlomi.shwartz/tensorflow/lib/python3.6/site-packages/keras/engine/training.py", line 1117, in _fit_loop
callbacks.set_model(callback_model)
File "/Users/shlomi.shwartz/tensorflow/lib/python3.6/site-packages/keras/callbacks.py", line 52, in set_model
callback.set_model(model)
File "/Users/shlomi.shwartz/tensorflow/lib/python3.6/site-packages/keras/callbacks.py", line 719, in set_model
self.saver = tf.train.Saver(list(embeddings.values()))
File "/usr/local/Cellar/python3/3.6.1/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 1139, in __init__
self.build()
File "/usr/local/Cellar/python3/3.6.1/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 1161, in build
raise ValueError("No variables to save")
ValueError: No variables to save
Q: What am I missing? is that the right way of doing it in Keras?
Update: I understand there is some prerequisite in order to use embedding projection, however I haven't found a good tutorial for doing so in Keras, any help would be appreciated.
What is called "embedding" here in callbacks.TensorBoard is, in a broad sense, any layer weight. According to Keras documentation:
embeddings_layer_names: a list of names of layers to keep eye on. If None or empty list all the embedding layer will be watched.
So by default, it's going to monitor the Embedding layers, but you don't really need a Embedding layer to use this visualization tool.
In your provided MLP example, what's missing is the embeddings_layer_names argument. You have to figure out which layers you're going to visualize. Suppose you want to visualize the weights (or, kernel in Keras) of all Dense layers, you can specify embeddings_layer_names like this:
model = Sequential()
model.add(Dense(200, activation='relu', input_shape=(784,)))
model.add(Dropout(0.2))
model.add(Dense(100, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(60, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(30, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(10, activation='softmax'))
embedding_layer_names = set(layer.name
for layer in model.layers
if layer.name.startswith('dense_'))
tb = callbacks.TensorBoard(log_dir='temp', histogram_freq=10, batch_size=32,
write_graph=True, write_grads=True, write_images=True,
embeddings_freq=10, embeddings_metadata=None,
embeddings_layer_names=embedding_layer_names)
model.compile(...)
model.fit(...)
Then, you can see something like this in TensorBoard:
You can see the relevant lines in Keras source if you want to figure out what's happening regarding embeddings_layer_names.
Edit:
So here's a dirty solution for visualizing layer outputs. Since the original TensorBoard callback does not support this, implementing a new callback seems inevitable.
Since it will take up a lot of page space to re-write the entire TensorBoard callback here, I'll just extend the original TensorBoard, and write out the parts that are different (which is already quite lengthy). But to avoid duplicated computations and model saving, re-writing the TensorBoard callback will be a better and cleaner way.
import tensorflow as tf
from tensorflow.contrib.tensorboard.plugins import projector
from keras import backend as K
from keras.models import Model
from keras.callbacks import TensorBoard
class TensorResponseBoard(TensorBoard):
def __init__(self, val_size, img_path, img_size, **kwargs):
super(TensorResponseBoard, self).__init__(**kwargs)
self.val_size = val_size
self.img_path = img_path
self.img_size = img_size
def set_model(self, model):
super(TensorResponseBoard, self).set_model(model)
if self.embeddings_freq and self.embeddings_layer_names:
embeddings = {}
for layer_name in self.embeddings_layer_names:
# initialize tensors which will later be used in `on_epoch_end()` to
# store the response values by feeding the val data through the model
layer = self.model.get_layer(layer_name)
output_dim = layer.output.shape[-1]
response_tensor = tf.Variable(tf.zeros([self.val_size, output_dim]),
name=layer_name + '_response')
embeddings[layer_name] = response_tensor
self.embeddings = embeddings
self.saver = tf.train.Saver(list(self.embeddings.values()))
response_outputs = [self.model.get_layer(layer_name).output
for layer_name in self.embeddings_layer_names]
self.response_model = Model(self.model.inputs, response_outputs)
config = projector.ProjectorConfig()
embeddings_metadata = {layer_name: self.embeddings_metadata
for layer_name in embeddings.keys()}
for layer_name, response_tensor in self.embeddings.items():
embedding = config.embeddings.add()
embedding.tensor_name = response_tensor.name
# for coloring points by labels
embedding.metadata_path = embeddings_metadata[layer_name]
# for attaching images to the points
embedding.sprite.image_path = self.img_path
embedding.sprite.single_image_dim.extend(self.img_size)
projector.visualize_embeddings(self.writer, config)
def on_epoch_end(self, epoch, logs=None):
super(TensorResponseBoard, self).on_epoch_end(epoch, logs)
if self.embeddings_freq and self.embeddings_ckpt_path:
if epoch % self.embeddings_freq == 0:
# feeding the validation data through the model
val_data = self.validation_data[0]
response_values = self.response_model.predict(val_data)
if len(self.embeddings_layer_names) == 1:
response_values = [response_values]
# record the response at each layers we're monitoring
response_tensors = []
for layer_name in self.embeddings_layer_names:
response_tensors.append(self.embeddings[layer_name])
K.batch_set_value(list(zip(response_tensors, response_values)))
# finally, save all tensors holding the layer responses
self.saver.save(self.sess, self.embeddings_ckpt_path, epoch)
To use it:
tb = TensorResponseBoard(log_dir=log_dir, histogram_freq=10, batch_size=10,
write_graph=True, write_grads=True, write_images=True,
embeddings_freq=10,
embeddings_layer_names=['dense_1'],
embeddings_metadata='metadata.tsv',
val_size=len(x_test), img_path='images.jpg', img_size=[28, 28])
Before launching TensorBoard, you'll need to save the labels and images to log_dir for visualization:
from PIL import Image
img_array = x_test.reshape(100, 100, 28, 28)
img_array_flat = np.concatenate([np.concatenate([x for x in row], axis=1) for row in img_array])
img = Image.fromarray(np.uint8(255 * (1. - img_array_flat)))
img.save(os.path.join(log_dir, 'images.jpg'))
np.savetxt(os.path.join(log_dir, 'metadata.tsv'), np.where(y_test)[1], fmt='%d')
Here's the result:
You need at least one Embedding Layer in Keras. On stats was a good explanation about them. It is not directly for Keras, but the concepts are roughly the same. What is an embedding layer in a neural network
So, I conclude that what you actually want (it's not completely clear from your post) is to visualize the predictions of your model, in a manner similar to this Tensorboard demo.
To start with, reproducing this stuff is non-trivial even in Tensorflow, let alone Keras. The said demo makes very brief and passing references to things like metadata & sprite images that are necessary in order to obtain such visualizations.
Bottom line: although non-trivial, it is indeed possible to do it with Keras. You don't need the Keras callbacks; all you need is your model predictions, the necessary metadata & sprite image, and some pure TensorFlow code. So,
Step 1 - get your model predictions for the test set:
emb = model.predict(x_test) # 'emb' for embedding
Step 2a - build a metadata file with the real labels of the test set:
import numpy as np
LOG_DIR = '/home/herc/SO/tmp' # FULL PATH HERE!!!
metadata_file = os.path.join(LOG_DIR, 'metadata.tsv')
with open(metadata_file, 'w') as f:
for i in range(len(y_test)):
c = np.nonzero(y_test[i])[0][0]
f.write('{}\n'.format(c))
Step 2b - get the sprite image mnist_10k_sprite.png as provided by the TensorFlow guys here, and place it in your LOG_DIR
Step 3 - write some Tensorflow code:
import tensorflow as tf
from tensorflow.contrib.tensorboard.plugins import projector
embedding_var = tf.Variable(emb, name='final_layer_embedding')
sess = tf.Session()
sess.run(embedding_var.initializer)
summary_writer = tf.summary.FileWriter(LOG_DIR)
config = projector.ProjectorConfig()
embedding = config.embeddings.add()
embedding.tensor_name = embedding_var.name
# Specify the metadata file:
embedding.metadata_path = os.path.join(LOG_DIR, 'metadata.tsv')
# Specify the sprite image:
embedding.sprite.image_path = os.path.join(LOG_DIR, 'mnist_10k_sprite.png')
embedding.sprite.single_image_dim.extend([28, 28]) # image size = 28x28
projector.visualize_embeddings(summary_writer, config)
saver = tf.train.Saver([embedding_var])
saver.save(sess, os.path.join(LOG_DIR, 'model2.ckpt'), 1)
Then, running Tensorboard in your LOG_DIR, and selecting color by label, here is what you get:
Modifying this in order to get predictions for other layers is straightforward, although in this case the Keras Functional API may be a better choice.
I was wondering if it was possible to save a partly trained Keras model and continue the training after loading the model again.
The reason for this is that I will have more training data in the future and I do not want to retrain the whole model again.
The functions which I am using are:
#Partly train model
model.fit(first_training, first_classes, batch_size=32, nb_epoch=20)
#Save partly trained model
model.save('partly_trained.h5')
#Load partly trained model
from keras.models import load_model
model = load_model('partly_trained.h5')
#Continue training
model.fit(second_training, second_classes, batch_size=32, nb_epoch=20)
Edit 1: added fully working example
With the first dataset after 10 epochs the loss of the last epoch will be 0.0748 and the accuracy 0.9863.
After saving, deleting and reloading the model the loss and accuracy of the model trained on the second dataset will be 0.1711 and 0.9504 respectively.
Is this caused by the new training data or by a completely re-trained model?
"""
Model by: http://machinelearningmastery.com/
"""
# load (downloaded if needed) the MNIST dataset
import numpy
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense
from keras.utils import np_utils
from keras.models import load_model
numpy.random.seed(7)
def baseline_model():
model = Sequential()
model.add(Dense(num_pixels, input_dim=num_pixels, init='normal', activation='relu'))
model.add(Dense(num_classes, init='normal', activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
return model
if __name__ == '__main__':
# load data
(X_train, y_train), (X_test, y_test) = mnist.load_data()
# flatten 28*28 images to a 784 vector for each image
num_pixels = X_train.shape[1] * X_train.shape[2]
X_train = X_train.reshape(X_train.shape[0], num_pixels).astype('float32')
X_test = X_test.reshape(X_test.shape[0], num_pixels).astype('float32')
# normalize inputs from 0-255 to 0-1
X_train = X_train / 255
X_test = X_test / 255
# one hot encode outputs
y_train = np_utils.to_categorical(y_train)
y_test = np_utils.to_categorical(y_test)
num_classes = y_test.shape[1]
# build the model
model = baseline_model()
#Partly train model
dataset1_x = X_train[:3000]
dataset1_y = y_train[:3000]
model.fit(dataset1_x, dataset1_y, nb_epoch=10, batch_size=200, verbose=2)
# Final evaluation of the model
scores = model.evaluate(X_test, y_test, verbose=0)
print("Baseline Error: %.2f%%" % (100-scores[1]*100))
#Save partly trained model
model.save('partly_trained.h5')
del model
#Reload model
model = load_model('partly_trained.h5')
#Continue training
dataset2_x = X_train[3000:]
dataset2_y = y_train[3000:]
model.fit(dataset2_x, dataset2_y, nb_epoch=10, batch_size=200, verbose=2)
scores = model.evaluate(X_test, y_test, verbose=0)
print("Baseline Error: %.2f%%" % (100-scores[1]*100))
Edit 2: tensorflow.keras remarks
For tensorflow.keras change the parameter nb_epochs to epochs in the model fit. The imports and basemodel function are:
import numpy
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import load_model
numpy.random.seed(7)
def baseline_model():
model = Sequential()
model.add(Dense(num_pixels, input_dim=num_pixels, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
return model
Actually - model.save saves all information need for restarting training in your case. The only thing which could be spoiled by reloading model is your optimizer state. To check that - try to save and reload model and train it on training data.
Most of the above answers covered important points. If you are using recent Tensorflow (TF2.1 or above), Then the following example will help you. The model part of the code is from Tensorflow website.
import tensorflow as tf
from tensorflow import keras
mnist = tf.keras.datasets.mnist
(x_train, y_train),(x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
def create_model():
model = tf.keras.models.Sequential([
tf.keras.layers.Flatten(input_shape=(28, 28)),
tf.keras.layers.Dense(512, activation=tf.nn.relu),
tf.keras.layers.Dropout(0.2),
tf.keras.layers.Dense(10, activation=tf.nn.softmax)
])
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy',metrics=['accuracy'])
return model
# Create a basic model instance
model=create_model()
model.fit(x_train, y_train, epochs = 10, validation_data = (x_test,y_test),verbose=1)
Please save the model in *.tf format. From my experience, if you have any custom_loss defined, *.h5 format will not save optimizer status and hence will not serve your purpose if you want to retrain the model from where we left.
# saving the model in tensorflow format
model.save('./MyModel_tf',save_format='tf')
# loading the saved model
loaded_model = tf.keras.models.load_model('./MyModel_tf')
# retraining the model
loaded_model.fit(x_train, y_train, epochs = 10, validation_data = (x_test,y_test),verbose=1)
This approach will restart the training where we left before saving the model. As mentioned by others, if you want to save weights of best model or you want to save weights of model every epoch you need to use keras callbacks function (ModelCheckpoint) with options such as save_weights_only=True, save_freq='epoch', and save_best_only.
For more details, please check here and another example here.
The problem might be that you use a different optimizer - or different arguments to your optimizer. I just had the same issue with a custom pretrained model, using
reduce_lr = ReduceLROnPlateau(monitor='loss', factor=lr_reduction_factor,
patience=patience, min_lr=min_lr, verbose=1)
for the pretrained model, whereby the original learning rate starts at 0.0003 and during pre-training it is reduced to the min_learning rate, which is 0.000003
I just copied that line over to the script which uses the pre-trained model and got really bad accuracies. Until I noticed that the last learning rate of the pretrained model was the min learning rate, i.e. 0.000003. And if I start with that learning rate, I get exactly the same accuracies to start with as the output of the pretrained model - which makes sense, as starting with a learning rate that is 100 times bigger than the last learning rate used in the pretrained model will result in a huge overshoot of GD and hence in heavily decreased accuracies.
Notice that Keras sometimes has issues with loaded models, as in here.
This might explain cases in which you don't start from the same trained accuracy.
You might also be hitting Concept Drift, see Should you retrain a model when new observations are available. There's also the concept of catastrophic forgetting which a bunch of academic papers discuss. Here's one with MNIST Empirical investigation of catastrophic forgetting
All above helps, you must resume from same learning rate() as the LR when the model and weights were saved. Set it directly on the optimizer.
Note that improvement from there is not guaranteed, because the model may have reached the local minimum, which may be global. There is no point to resume a model in order to search for another local minimum, unless you intent to increase the learning rate in a controlled fashion and nudge the model into a possibly better minimum not far away.
If you are using TF2, use the new saved_model method(format pb). More information available here and here.
model.fit(x=X_train, y=y_train, epochs=10,callbacks=[model_callback])#your first training
tf.saved_model.save(model, save_to_dir_path) #save the model
del model #to delete the model
model = tf.keras.models.load_model(save_to_dir_path)
model.fit(x=X_train, y=y_train, epochs=10,callbacks=[model_callback])#your second training
It is completely okay to train a model with a saved model. I trained the saved model with the same data and found it was giving good accuracy. Moreover, the time taken was quite less in each epoch.
Here is the code have a look:
from keras.models import load_model
model = load_model('/content/drive/MyDrive/CustomResNet/saved_models/model_1.h5')
history=model.fit(train_gen,validation_data=valid_gen,epochs=5)
I want to build and train a neural network using the keras framework. I configured keras that it will use Tensorflow as a backend. After I trained the model with keras I tried to use Tensorflow only. I can access the session and get the tensorflow graph. But I do not know how to use the tensorflow graph for example to make a prediction.
I build a network with the following tutorial
http://machinelearningmastery.com/tutorial-first-neural-network-python-keras/
in the train() method i build and train a model using keras only and save the keras and tensorflow model
in the eval() method
Here is my Code:
from keras.models import Sequential
from keras.layers import Dense
from keras.models import model_from_json
import keras.backend.tensorflow_backend as K
import tensorflow as tf
import numpy
sess = tf.Session()
K.set_session(sess)
# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)
# load pima indians dataset
dataset = numpy.loadtxt("pima-indians-diabetes.csv", delimiter=",")
# split into input (X) and output (Y) variables
X = dataset[:, 0:8]
Y = dataset[:, 8]
def train():
# create model
model = Sequential()
model.add(Dense(12, input_dim=8, init='uniform', activation='relu'))
model.add(Dense(8, init='uniform', activation='relu'))
model.add(Dense(1, init='uniform', activation='sigmoid'))
# Compile model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics['accuracy'])
# Fit the model
model.fit(X, Y, nb_epoch=10, batch_size=10)
# evaluate the model
scores = model.evaluate(X, Y)
print("%s: %.2f%%" % (model.metrics_names[1], scores[1] * 100))
# serialize model to JSON
model_json = model.to_json()
with open("model.json", "w") as json_file:
json_file.write(model_json)
# serialize weights to HDF5
model.save_weights("model.h5")
# save tensorflow modell
saver = tf.train.Saver()
save_path = saver.save(sess, "model")
def eval():
# load json and create model
json_file = open('model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)
# load weights into new model
loaded_model.load_weights("model.h5")
# evaluate loaded model on test data
loaded_model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
score = loaded_model.evaluate(X, Y, verbose=0)
loaded_model.predict(X)
print ("%s: %.2f%%" % (loaded_model.metrics_names[1], score[1]*100))
# load tensorflow model
sess = tf.Session()
saver = tf.train.import_meta_graph('model.meta')
saver.restore(sess, tf.train.latest_checkpoint('./'))
# TODO try to predict with the tensorflow model only
# without using keras functions
I can access the tensorflow graph (sess.graph) which the keras framework built for me but I do not know how I can predict with the tensorflow graph. I know how I can build a tensorflow graph and predict with it in generell but not with the model keras build for me.
You need to get the input and output tensors from the Keras model definition and then the current TensorFlow session. Then you can evaluate it using TensorFlow only. Assuming model is your loaded_model and x is your training data.
sess = K.get_session()
input_tensor = model.input
output_tensor = model.output
output_tensor.eval(feed_dict={input_tensor: x}, session=sess)