I optimized my keras model using hyperopt. Now how do we save the best optimized keras model and its weights to disk.
My code:
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from sklearn.metrics import roc_auc_score
import sys
X = []
y = []
X_val = []
y_val = []
space = {'choice': hp.choice('num_layers',
[ {'layers':'two', },
{'layers':'three',
'units3': hp.uniform('units3', 64,1024),
'dropout3': hp.uniform('dropout3', .25,.75)}
]),
'units1': hp.choice('units1', [64,1024]),
'units2': hp.choice('units2', [64,1024]),
'dropout1': hp.uniform('dropout1', .25,.75),
'dropout2': hp.uniform('dropout2', .25,.75),
'batch_size' : hp.uniform('batch_size', 20,100),
'nb_epochs' : 100,
'optimizer': hp.choice('optimizer',['adadelta','adam','rmsprop']),
'activation': 'relu'
}
def f_nn(params):
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.optimizers import Adadelta, Adam, rmsprop
print ('Params testing: ', params)
model = Sequential()
model.add(Dense(output_dim=params['units1'], input_dim = X.shape[1]))
model.add(Activation(params['activation']))
model.add(Dropout(params['dropout1']))
model.add(Dense(output_dim=params['units2'], init = "glorot_uniform"))
model.add(Activation(params['activation']))
model.add(Dropout(params['dropout2']))
if params['choice']['layers']== 'three':
model.add(Dense(output_dim=params['choice']['units3'], init = "glorot_uniform"))
model.add(Activation(params['activation']))
model.add(Dropout(params['choice']['dropout3']))
model.add(Dense(1))
model.add(Activation('sigmoid'))
model.compile(loss='binary_crossentropy', optimizer=params['optimizer'])
model.fit(X, y, nb_epoch=params['nb_epochs'], batch_size=params['batch_size'], verbose = 0)
pred_auc =model.predict_proba(X_val, batch_size = 128, verbose = 0)
acc = roc_auc_score(y_val, pred_auc)
print('AUC:', acc)
sys.stdout.flush()
return {'loss': -acc, 'status': STATUS_OK}
trials = Trials()
best = fmin(f_nn, space, algo=tpe.suggest, max_evals=100, trials=trials)
print 'best: '
print best
Trials class object stores many relevant information related with each iteration of hyperopt. We can also ask this object to save trained model.
You have to make few small changes in your code base to achieve this.
-- return {'loss': -acc, 'status': STATUS_OK}
++ return {'loss':loss, 'status': STATUS_OK, 'Trained_Model': model}
Note:'Trained_Model' just a key and you can use any other string.
best = fmin(f_nn, space, algo=tpe.suggest, max_evals=100, trials=trials)
model = getBestModelfromTrials(trials)
Retrieve the trained model from the trials object:
import numpy as np
from hyperopt import STATUS_OK
def getBestModelfromTrials(trials):
valid_trial_list = [trial for trial in trials
if STATUS_OK == trial['result']['status']]
losses = [ float(trial['result']['loss']) for trial in valid_trial_list]
index_having_minumum_loss = np.argmin(losses)
best_trial_obj = valid_trial_list[index_having_minumum_loss]
return best_trial_obj['result']['Trained_Model']
Note: I have used this approach in Scikit-Learn classes.
Make f_nn return the model.
def f_nn(params):
# ...
return {'loss': -acc, 'status': STATUS_OK, 'model': model}
The models will be available on trials object under results. I put in some sample data and got print(trials.results) to spit out
[{'loss': 2.8245880603790283, 'status': 'ok', 'model': <keras.engine.training.Model object at 0x000001D725F62B38>}, {'loss': 2.4592788219451904, 'status': 'ok', 'model': <keras.engine.training.Model object at 0x000001D70BC3ABA8>}]
Use np.argmin to find the smallest loss, then save using model.save
trials.results[np.argmin([r['loss'] for r in trials.results])]['model']
(Side note, in C# this would be trials.results.min(r => r.loss).model... if there's a better way to do this in Python please let me know!)
You may wish to use attachments on the trial object if you're using MongoDB, as the model may be very large:
attachments - a dictionary of key-value pairs whose keys are short strings (like filenames) and whose values are potentially long strings (like file contents) that should not be loaded from a database every time we access the record. (Also, MongoDB limits the length of normal key-value pairs so once your value is in the megabytes, you may have to make it an attachment.) Source.
I don't know how to send some variable to f_nn or another hyperopt target explicilty. But I've use two approaches to do the same task.
First approach is some global variable (don't like it, because it's non-clear) and the second is to save the metric value to the file, then read and compare with a current metric. The last approach seems to me better.
def f_nn(params):
...
# I omit a part of the code
pred_auc =model.predict_proba(X_val, batch_size = 128, verbose = 0)
acc = roc_auc_score(y_val, pred_auc)
try:
with open("metric.txt") as f:
min_acc = float(f.read().strip()) # read best metric,
except FileNotFoundError:
min_acc = acc # else just use current value as the best
if acc < min_acc:
model.save("model.hd5") # save best to disc and overwrite metric
with open("metric.txt", "w") as f:
f.write(str(acc))
print('AUC:', acc)
sys.stdout.flush()
return {'loss': -acc, 'status': STATUS_OK}
trials = Trials()
best = fmin(f_nn, space, algo=tpe.suggest, max_evals=100, trials=trials)
print 'best: '
print best
from keras.models import load_model
best_model = load_model("model.hd5")
This approach has several advantages: you can keep metric and model together, and even apply to it some version or data version control system - so you can restore results of an experiment in the future.
Edit
It can cause an unexpected behaviour, if there's some metric from a previous run, but you don't delete it. So you can adopt the code - remove the metric after the optimization or use timestamp etc. to distinguish your experimets' data.
It is easy to implement a global variable to save the model. I would recommend saving it as an attribute under the trials object for clarity. In my experience in using hyperopt, unless you wrap ALL the remaining parameters (that are not tuned) into a dict to feed into the objective function (e.g. objective_fn = partial(objective_fn_withParams, otherParams=otherParams), it is very difficult to avoid global vars.
Example provided below:
trials = Trials()
trials.mybest = None # initialize an attribute for saving model later
best = fmin(f_nn, space, algo=tpe.suggest, max_evals=100, trials=trials)
trials.mybest['model'].save("model.hd5")
## In your optimization objective function
def f_nn(params):
global trials
model = trainMyKerasModelWithParams(..., params)
...
pred_auc =model.predict_proba(X_val, batch_size = 128, verbose = 0)
acc = roc_auc_score(y_val, pred_auc)
loss = -acc
## Track only best model (for saving later)
if ((trials.mybest is None)
or (loss < trials.mybest['loss'])):
trials.mybest = {'loss': loss,'model': model}
...
##
Related
I have a function that will load a pre-trained model from huggingface and fine-tune it for sentiment analysis then calculates the F1 score and returns the result.
The problem is when I call this function multiple times with the exact same arguments, it will give the exact same metric score which is expected, except for the first time which is different, how is that possible?
This is my function which is written based on this tutorial in huggingface:
import uuid
import numpy as np
from datasets import (
load_dataset,
load_metric,
DatasetDict,
concatenate_datasets
)
from transformers import (
AutoTokenizer,
AutoModelForSequenceClassification,
DataCollatorWithPadding,
TrainingArguments,
Trainer,
)
CHECKPOINT = "distilbert-base-uncased"
SAVING_FOLDER = "sst2"
def custom_train(datasets, checkpoint=CHECKPOINT, saving_folder=SAVING_FOLDER):
model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
def tokenize_function(example):
return tokenizer(example["sentence"], truncation=True)
tokenized_datasets = datasets.map(tokenize_function, batched=True)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
saving_folder = f"{SAVING_FOLDER}_{str(uuid.uuid1())}"
training_args = TrainingArguments(saving_folder)
trainer = Trainer(
model,
training_args,
train_dataset=tokenized_datasets["train"],
eval_dataset=tokenized_datasets["validation"],
data_collator=data_collator,
tokenizer=tokenizer,
)
trainer.train()
predictions = trainer.predict(tokenized_datasets["test"])
print(predictions.predictions.shape, predictions.label_ids.shape)
preds = np.argmax(predictions.predictions, axis=-1)
metric_fun = load_metric("f1")
metric_result = metric_fun.compute(predictions=preds, references=predictions.label_ids)
return metric_result
And then I will run this function several times with the same datasets, and append the result of the returned F1 score each time:
raw_datasets = load_dataset("glue", "sst2")
small_datasets = DatasetDict({
"train": raw_datasets["train"].select(range(100)).flatten_indices(),
"validation": raw_datasets["validation"].select(range(100)).flatten_indices(),
"test": raw_datasets["validation"].select(range(100, 200)).flatten_indices(),
})
results = []
for i in range(4):
result = custom_train(small_datasets)
results.append(result)
And then when I check the results list:
[{'f1': 0.7755102040816325}, {'f1': 0.5797101449275361}, {'f1': 0.5797101449275361}, {'f1': 0.5797101449275361}]
Something that may come to mind is that when I load a pre-trained model, the head will be initialized with random weights and that is why the results are different, if that is the case, why only the first one is different and the others are exactly the same?
Sylvain Gugger answered this question here: https://discuss.huggingface.co/t/multiple-training-will-give-exactly-the-same-result-except-for-the-first-time/8493
You need to set the seed before instantiating your model, otherwise the random head is not initialized the same way, that’s why the first run will always be different.
The subsequent runs are all the same because the seed has been set by the Trainer in the train method.
To set the seed:
from transformers import set_seed
set_seed(42)
I want to implement concordance_index_censored on my test set, using the model I trained.
I don't understand which should be my input into the estimate parameter in concordance_index_censored().
Is it somewhere in coxnet_pred? If not, where should I get it from?
I tried coxnet_pred['array'] and that doesn't work because it contains step functions.
Code as follows
from sksurv.linear_model import CoxnetSurvivalAnalysis
from sksurv.metrics import concordance_index_censored
from sksurv.util import Surv
y=Surv.from_arrays(np.array(survival_status_training), np.array(survival_time_training), name_event="event",name_time ="time")
cox_lasso_model = CoxnetSurvivalAnalysis(l1_ratio=1.0, fit_baseline_model=True)
cox_lasso_trained = cox_lasso_model.fit(training_data, y)
coxnet_pred=cox_lasso_trained.predict_survival_function(np.array(test_data))
training_cindex = concordance_index_censored(event_indicator=np.array(survival_status_training),event_time=np.array(survival_time_training), estimate=coxnet_pred['array'])
The estimate parameter for concordance_index_censored should be an array with a single risk score per instance in your test data:
from sksurv.linear_model import CoxnetSurvivalAnalysis
from sksurv.metrics import concordance_index_censored
from sksurv.util import Surv
train_y = Surv.from_arrays(
survival_status_training,
survival_time_training
)
test_y = Surv.from_arrays(
survival_status_test,
survival_time_test
)
model = CoxnetSurvivalAnalysis()
model.fit(train_X, train_y)
test_risk_scores = model.predict(test_X)
cindex = concordance_index_censored(
event_indicator=test_y["event"],
event_time=test_y["time"],
estimate=test_risk_scores)
Alternatively, you can use model.score(test_X, test_y) as explained in the
user guide.
I would like to get the best model to use later in the notebook to predict using a different test batch.
reproducible example (taken from Optuna Github) :
import lightgbm as lgb
import numpy as np
import sklearn.datasets
import sklearn.metrics
from sklearn.model_selection import train_test_split
import optuna
# FYI: Objective functions can take additional arguments
# (https://optuna.readthedocs.io/en/stable/faq.html#objective-func-additional-args).
def objective(trial):
data, target = sklearn.datasets.load_breast_cancer(return_X_y=True)
train_x, valid_x, train_y, valid_y = train_test_split(data, target, test_size=0.25)
dtrain = lgb.Dataset(train_x, label=train_y)
dvalid = lgb.Dataset(valid_x, label=valid_y)
param = {
"objective": "binary",
"metric": "auc",
"verbosity": -1,
"boosting_type": "gbdt",
"lambda_l1": trial.suggest_loguniform("lambda_l1", 1e-8, 10.0),
"lambda_l2": trial.suggest_loguniform("lambda_l2", 1e-8, 10.0),
"num_leaves": trial.suggest_int("num_leaves", 2, 256),
"feature_fraction": trial.suggest_uniform("feature_fraction", 0.4, 1.0),
"bagging_fraction": trial.suggest_uniform("bagging_fraction", 0.4, 1.0),
"bagging_freq": trial.suggest_int("bagging_freq", 1, 7),
"min_child_samples": trial.suggest_int("min_child_samples", 5, 100),
}
# Add a callback for pruning.
pruning_callback = optuna.integration.LightGBMPruningCallback(trial, "auc")
gbm = lgb.train(
param, dtrain, valid_sets=[dvalid], verbose_eval=False, callbacks=[pruning_callback]
)
preds = gbm.predict(valid_x)
pred_labels = np.rint(preds)
accuracy = sklearn.metrics.accuracy_score(valid_y, pred_labels)
return accuracy
my understanding is that the study below will tune for accuracy. I would like to somehow retrieve the best model from the study (not just the parameters) without saving it as a pickle, I just want to use the model somewhere else in my notebook.
if __name__ == "__main__":
study = optuna.create_study(
pruner=optuna.pruners.MedianPruner(n_warmup_steps=10), direction="maximize"
)
study.optimize(objective, n_trials=100)
print("Best trial:")
trial = study.best_trial
print(" Params: ")
for key, value in trial.params.items():
print(" {}: {}".format(key, value))
desired output would be
best_model = ~model from above~
new_target_pred = best_model.predict(new_data_test)
metrics.accuracy_score(new_target_test, new__target_pred)
Short addition to #Toshihiko Yanase's answer, because the condition study.best_trial==trial was never True for me. This was even the case when both (Frozen)Trial objects had the same content, so it is likely a bug in Optuna. Changing the condition to study.best_trial.number==trial.number solves the problem for me.
Also if you prefer to not use globals in Python, you can use the study and trial user attributes
def objective(trial):
gmb = ...
trial.set_user_attr(key="best_booster", value=gbm)
def callback(study, trial):
if study.best_trial.number == trial.number:
study.set_user_attr(key="best_booster", value=trial.user_attrs["best_booster"])
if __name__ == "__main__":
study = optuna.create_study(
pruner=optuna.pruners.MedianPruner(n_warmup_steps=10), direction="maximize"
)
study.optimize(objective, n_trials=100, callbacks=[callback])
best_model=study.user_attrs["best_booster"]
I think you can use the callback argument of Study.optimize to save the best model. In the following code example, the callback checks if a given trial is corresponding to the best trial and saves the model as a global variable best_booster.
best_booster = None
gbm = None
def objective(trial):
global gbm
# ...
def callback(study, trial):
global best_booster
if study.best_trial == trial:
best_booster = gbm
if __name__ == "__main__":
study = optuna.create_study(
pruner=optuna.pruners.MedianPruner(n_warmup_steps=10), direction="maximize"
)
study.optimize(objective, n_trials=100, callbacks=[callback])
If you define your objective function as a class, you can remove the global variables. I created a notebook as a code example. Please take a look at it:
https://colab.research.google.com/drive/1ssjXp74bJ8bCAbvXFOC4EIycBto_ONp_?usp=sharing
I would like to somehow retrieve the best model from the study (not just the parameters) without saving it as a pickle
FYI, if you can pickle the boosters, I think you can make the code simple by following this FAQ.
I know this is has been already answered, there is a straightforward way of doing this with the optuna-lightgbm integration lightgbmtuner released in late 2020.
In short you could perform what you want to do, i.e. save the best booster as follows
import optuna.integration.lightgbm as lgb
dtrain = lgb.Dataset(X,Y,categorical_feature = 'auto')
params = {
"objective": "binary",
"metric": "auc",
"verbosity": -1,
"boosting_type": "gbdt",
}
tuner = lgb.LightGBMTuner(
params, dtrain, verbose_eval=100, early_stopping_rounds=1000,
model_dir= 'directory_to_save_boosters'
)
tuner.run()
Please note that the main thing here is to specify a model_dir directory to save the models in each of the iterations.
There is usually no need for a pruning callback as the optimization is done using a combination of Bayesian methods and expert heuristics and the search is usually over in around 60-64 iterations.
Then you can get the best model from the model directory you specified above using the single line
tuner.get_best_booster()
I have some fundamental questions about the algorithms I picked in my Tensorflow project. I fed in around 1 million sets of training data and still couldn't get the accurate enough prediction results.
The code I am using is based on an old Tensorflow example (https://github.com/tensorflow/tensorflow/blob/r1.3/tensorflow/examples/tutorials/estimators/abalone.py). The goal of this example is to predict the age of an abalone based on the training features provided.
My purpose is very similar. The only difference is that I have more labels(6) compared to my features(4). Since the predictions after training are way off, the concern of the feasibility of this project is starting to raise some concerns.
I am pretty new to Machine Learning and Tensorflow so I am not very sure if I have picked the proper methods for this project. I'd like to know if there are some ways to improve my current code to possibly improve the accuracy of the predictions, like more layers, different optimization methods, etc.
Here is the code:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import argparse
import sys
import numpy as np
import pandas as pd
import tensorflow as tf
LEARNING_RATE = 0.001
def model_fn(features, labels, mode, params):
"""Model function for Estimator."""
first_hidden_layer = tf.layers.dense(features["x"], 10, activation=tf.nn.relu)
# Connect the second hidden layer to first hidden layer with relu
second_hidden_layer = tf.layers.dense(
first_hidden_layer, 10, activation=tf.nn.relu)
# Connect the output layer to second hidden layer (no activation fn)
output_layer = tf.layers.dense(second_hidden_layer, 6)
# Reshape output layer to 1-dim Tensor to return predictions
predictions = tf.reshape(output_layer, [-1,6])
# Provide an estimator spec for `ModeKeys.PREDICT`.
if mode == tf.estimator.ModeKeys.PREDICT:
return tf.estimator.EstimatorSpec(
mode=mode,
predictions={"ages": predictions})
# Calculate loss using mean squared error
loss = tf.losses.mean_squared_error(labels, predictions)
optimizer = tf.train.GradientDescentOptimizer(
learning_rate=params["learning_rate"])
train_op = optimizer.minimize(
loss=loss, global_step=tf.train.get_global_step())
# Calculate root mean squared error as additional eval metric
eval_metric_ops = {
"rmse": tf.metrics.root_mean_squared_error(
tf.cast(labels, tf.float64), predictions)
}
# Provide an estimator spec for `ModeKeys.EVAL` and `ModeKeys.TRAIN` modes.
return tf.estimator.EstimatorSpec(
mode=mode,
loss=loss,
train_op=train_op,
eval_metric_ops=eval_metric_ops)
def main(unused_argv):
train_file = "training_data_mc1000.csv"
test_file = "test_data_mc1000.csv"
train_features_interim = pd.read_csv(train_file, usecols=['vgs', 'vbs', 'vds', 'current'])
train_features_numpy = np.asarray(train_features_interim, dtype=np.float64)
train_labels_interim = pd.read_csv(train_file, usecols=['plo_tox', 'plo_dxl', 'plo_dxw', 'parl1', 'parl2', 'random_fn'])
train_labels_numpy = np.asarray(train_labels_interim, dtype=np.float64)
# Set model params
model_params = {"learning_rate": LEARNING_RATE}
# Instantiate Estimator
nn = tf.estimator.Estimator(model_fn=model_fn, params=model_params)
train_input_fn = tf.estimator.inputs.numpy_input_fn(
x={"x": train_features_numpy},
y=train_labels_numpy,
num_epochs=None,
shuffle=True)
# Train
nn.train(input_fn=train_input_fn, max_steps=1048576)
test_features_interim = pd.read_csv(test_file, usecols=['vgs', 'vbs', 'vds', 'current'])
test_features_numpy = np.asarray(test_features_interim, dtype=np.float64)
test_labels_interim = pd.read_csv(test_file, usecols=['plo_tox', 'plo_dxl', 'plo_dxw', 'parl1', 'parl2', 'random_fn'])
test_labels_numpy = np.asarray(test_labels_interim, dtype=np.float64)
# Score accuracy
test_input_fn = tf.estimator.inputs.numpy_input_fn(
x={"x": test_features_numpy},
y=test_labels_numpy,
num_epochs=1,
shuffle=False)
ev = nn.evaluate(input_fn=test_input_fn)
print("Loss: %s" % ev["loss"])
print("Root Mean Squared Error: %s" % ev["rmse"])
prediction_file = "Tensorflow_prediction_data.csv"
predict_features_interim = pd.read_csv(prediction_file, usecols=['vgs', 'vbs', 'vds', 'current'])
predict_features_numpy = np.asarray(predict_features_interim, dtype=np.float64)
# Print out predictions
predict_input_fn = tf.estimator.inputs.numpy_input_fn(
x= {"x": predict_features_numpy},
num_epochs=1,
shuffle=False)
predictions = nn.predict(input_fn=predict_input_fn)
for i, p in enumerate(predictions):
print("Prediction %s: %s" % (i + 1, p["ages"]))
if __name__ == '__main__':
tf.logging.set_verbosity(tf.logging.INFO)
parser = argparse.ArgumentParser()
parser.register("type", "bool", lambda v: v.lower() == "true")
parser.add_argument(
"--train_data", type=str, default="", help="Path to the training data.")
parser.add_argument(
"--test_data", type=str, default="", help="Path to the test data.")
parser.add_argument(
"--predict_data",
type=str,
default="",
help="Path to the prediction data.")
FLAGS, unparsed = parser.parse_known_args()
tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
And portion of the training and testing data looks like
The last four columns are the features and the first six columns are the labels. Again, you can see that I am having more labels than features. My goal is to train a model that when I feed in new sets of features, it can predict accurate enough labels.
The following part is added for clarification of my data sets. Thanks for the first ones that commented my question that reminds me to put this up as well.
The relation between my features and labels are : every 30(vgs)X10(vbs)X10(vds) corresponds to 1 set of labels. Basically it is like a 3-D array with the first three features acting like coordinates and the last feature(current) as the value that is stored within each cells. That's why the labels from the portions I showed are all the same.
Another question now is that I am expecting the loss should be getting smaller and smaller as the training progresses, but it is not. I think this should be another reason why the output is not accurate cuz the minimizing loss part isn't working. I don't really know why, though.
Thanks for taking time looking at this and I'd love to have a discussion down below.
From what I can see in your code, you are not normalizing your features. Try normalizing them for example to have mean zero and std=1. Since your features are in a completely different range, this normalization might help.
It would also be helpful to see other labels. The ones in the provided picture are all the same.
I have trouble with recording 'val_loss' and 'val_acc' in Keras. 'loss' and 'acc' are easy because they always recorded in history of model.fit.
'val_loss' is recorded if validation is enabled in fit, and val_acc is recorded if validation and accuracy monitoring are enabled. But what does this mean?
My node is model.fit(train_data, train_labels,epochs = 64,batch_size = 10,shuffle = True,validation_split = 0.2, callbacks=[history]).
As you see, I use 5-fold cross-validation and shuffle the data. In this case, how can I enable validation in fit to record 'val_loss' and 'val_acc'?
Thanks
From Keras documentation, we have for models.fit method:
fit(x=None, y=None,
batch_size=None,
epochs=1,
verbose=1,
callbacks=None,
validation_split=0.0, validation_data=None,
shuffle=True,
class_weight=None,
sample_weight=None,
initial_epoch=0,
steps_per_epoch=None,
validation_steps=None
)
'val_loss' is recorded if validation is enabled in fit, and val_accis recorded if validation and accuracy monitoring are enabled. - This is from the keras.callbacks.Callback() object, if used for callbacks parameter in the above fit method.
Instead of using the history callback, which you've used, it can be used as follows:
from keras.callbacks import Callback
logs = Callback()
model.fit(train_data,
train_labels,
epochs = 64,
batch_size = 10,
shuffle = True,
validation_split = 0.2,
callbacks=[logs]
)
'val_loss' is recorded if validation is enabled in fit means: when using the model.fit method you are using either the validatoin_split parameter or you use validation_data parameter to specify the tuple (x_val, y_val) or tuple (x_val, y_val, val_sample_weights) on which to evaluate the loss and any model metrics at the end of each epoch. .
A History object. Its History.history attribute is a record of
training loss values and metrics values at successive epochs, as well
as validation loss values and validation metrics values (if
applicable). - Keras Documentation ( Return value for model.fit method)
You are using the History callback, in your model as follows:
model.fit(train_data,
train_labels,
epochs = 64,
batch_size = 10,
shuffle = True,
validation_split = 0.2,
callbacks=[history]
)
history.history will output a dictionary for you with the : loss, acc, val_loss and val_acc, if you use a variable for saving model.fit like below:
history = model.fit(
train_data,
train_labels,
epochs = 64,
batch_size = 10,
shuffle = True,
validation_split = 0.2,
callbacks=[history]
)
history.history
The output will be like the following:
{'val_loss': [14.431451635814849,
14.431451635814849,
14.431451635814849,
14.431451635814849,
14.431451635814849,
14.431451635814849,
14.431451635814849,
14.431451635814849,
14.431451635814849,
14.431451635814849],
'val_acc': [0.1046428571712403,
0.1046428571712403,
0.1046428571712403,
0.1046428571712403,
0.1046428571712403,
0.1046428571712403,
0.1046428571712403,
0.1046428571712403,
0.1046428571712403,
0.1046428571712403],
'loss': [14.555215610322499,
14.555215534028553,
14.555215548560733,
14.555215588524229,
14.555215592157273,
14.555215581258137,
14.555215575808571,
14.55521561940511,
14.555215563092913,
14.555215624854679],
'acc': [0.09696428571428571,
0.09696428571428571,
0.09696428571428571,
0.09696428571428571,
0.09696428571428571,
0.09696428571428571,
0.09696428571428571,
0.09696428571428571,
0.09696428571428571,
0.09696428571428571]}
You can save the data both by using csvlogger like below as given in the comments or by using the longer method of writing a dictionary to a csv file as given here writing a dictionary to a csv
csv_logger = CSVLogger('training.log')
model.fit(X_train, Y_train, callbacks=[csv_logger])
UPDATE: The val_accuracy dictionary key seems to no longer work today. No idea why, but I removed that code from here despite the OP asking how to log it (also, loss is what actually matters for comparison of cross-validation results).
Using Python 3.7 and Tensorflow 2.0, the following worked for me, after much searching, guessing, and failing repeatedly. I started with someone else's script to get what I needed written to a .json file; it produces one such .json file per training run showing the validation loss per epoch so you can see how the model converged (or did not); accuracy is logged but not as a performance metric.
NOTE: You need to fill in yourTrainDir, yourTrainingData, yourValidationData, yourOptimizer, yourLossFunctionFromKerasOrElsewhere, yourNumberOfEpochs, etc. to enable this code to run:
import numpy as np
import os
import tensorflow as tf
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, LambdaCallback
import json
model.compile(
optimizer=yourOptimizer,
loss=yourLossFunctionFromKerasOrElsewhere()
)
# create a custom callback to enable future cross-validation efforts
yourTrainDir = os.getcwd() + '/yourOutputFolderName/'
uniqueID = np.random.randint(999999) # To distinguish validation runs by saved JSON name
epochValidationLog = open(
yourTrainDir +
'val_log_per_epoch_' +
'{}_'.format(uniqueID) +
'.json',
mode='wt',
buffering=1
)
ValidationLogsCallback = LambdaCallback(
on_epoch_end = lambda epoch,
logs: epochValidationLog.write(
json.dumps(
{
'oneIndexedEpoch': epoch + 1,
'Validationloss': logs['val_loss']
}
) + '\n'
),
on_train_end = lambda logs: epochValidationLog.close()
)
# set up the list of callbacks
callbacksList = [
ValidationLogsCallback,
EarlyStopping(patience=40, verbose=1),
]
results = model.fit(
x=yourTrainingData,
steps_per_epoch=len(yourTrainingData),
validation_data=yourValidationData,
validation_steps=len(yourValidationData),
epochs=yourNumberOfEpochs,
verbose=1,
callbacks=callbacksList
)
This produces a JSON file in TrainDir folder recording validation loss and accuracy for each training epoch as its own dictionary-like item. Note that the epoch number is indexed to start at 1 so it matches the output of tensorflow, not the actual index in Python.
I am outputting to .JSON file but it could be anything. Here is my code for analyzing the JSON files produced; I could have put it all in one script but did not.
import os
from pathlib import Path
import json
currentDirectory = os.getcwd()
outFileName = 'CVResults.json'
outFile = open(outFileName, mode='wt')
validationLogPaths = Path().glob('val_log_per_epoch_*.json')
# Necessary list to detect short unique IDs for each training session
stringDecimalDigits = [
'1',
'2',
'3',
'4',
'5',
'6',
'7',
'8',
'9',
'0'
]
setStringDecimalDigits = set(stringDecimalDigits)
trainingSessionsList = []
# Load the JSON files into memory to allow reading.
for validationLogFile in validationLogPaths:
trainingUniqueIDCandidate = str(validationLogFile)[18:21]
# Pad unique IDs with fewer than three digits with zeros at front
thirdPotentialDigitOfUniqueID = trainingUniqueIDCandidate[2]
if setStringDecimalDigits.isdisjoint(thirdPotentialDigitOfUniqueID):
secondPotentialDigitOfUniqueID = trainingUniqueIDCandidate[1]
if setStringDecimalDigits.isdisjoint(secondPotentialDigitOfUniqueID):
trainingUniqueID = '00' + trainingUniqueIDCandidate[:1]
else:
trainingUniqueID = '0' + trainingUniqueIDCandidate[:2]
else:
trainingUniqueID = trainingUniqueIDCandidate
trainingSessionsList.append((trainingUniqueID, validationLogFile))
trainingSessionsList.sort(key=lambda x: x[0])
# Analyze and export cross-validation results
for replicate in range(len(dict(trainingSessionsList).keys())):
validationLogFile = trainingSessionsList[replicate][1]
fileOpenForReading = open(
validationLogFile, mode='r', buffering=1
)
with fileOpenForReading as openedFile:
jsonValidationData = [json.loads(line) for line in openedFile]
bestEpochResultsDict = {}
oneIndexedEpochsList = []
validationLossesList = []
for line in range(len(jsonValidationData)):
tempDict = jsonValidationData[line]
oneIndexedEpochsList.append(tempDict['oneIndexedEpoch'])
validationLossesList.append(tempDict['Validationloss'])
trainingStopIndex = min(
range(len(validationLossesList)),
key=validationLossesList.__getitem__
)
bestEpochResultsDict['Integer_unique_ID'] = trainingSessionsList[replicate][0]
bestEpochResultsDict['Min_val_loss'] = validationLossesList[trainingStopIndex]
bestEpochResultsDict['Last_train_epoch'] = oneIndexedEpochsList[trainingStopIndex]
outFile.write(json.dumps(bestEpochResultsDict, sort_keys=True) + '\n')
outFile.close()
This last block of code creates a JSON summarizing what is in CVResults.json produced above:
from pathlib import Path
import json
import os
import statistics
outFile = open("CVAnalysis.json", mode='wt')
CVResultsPath = sorted(Path().glob('*CVResults.json'))
if len(CVResultsPath) > 1:
print('\nPlease analyze only one CVResults.json file at at time.')
userAnswer = input('\nI understand only one will be analyzed: y or n')
if (userAnswer == 'y') or (userAnswer == 'Y'):
print('\nAnalyzing results in file {}:'.format(str(CVResultsPath[0])))
# Load the first CVResults.json file into memory to allow reading.
CVResultsFile = CVResultsPath[0]
fileOpenForReading = open(
CVResultsFile, mode='r', buffering=1
)
outFile.write(
'Analysis of cross-validation results tabulated in file {}'.format(
os.getcwd()
) +
str(CVResultsFile) +
':\n\n'
)
with fileOpenForReading as openedFile:
jsonCVResultsData = [json.loads(line) for line in openedFile]
minimumValidationLossesList = []
trainedOneIndexedEpochsList = []
for line in range(len(jsonCVResultsData)):
tempDict = jsonCVResultsData[line]
minimumValidationLossesList.append(tempDict['Min_val_loss'])
trainedOneIndexedEpochsList.append(tempDict['Last_train_epoch'])
outFile.write(
'\nTrained validation losses: ' +
json.dumps(minimumValidationLossesList) +
'\n'
)
outFile.write(
'\nTraining epochs required: ' +
json.dumps(trainedOneIndexedEpochsList) +
'\n'
)
outFile.write(
'\n\nMean trained validation loss: ' +
str(round(statistics.mean(minimumValidationLossesList), 4)) +
'\n'
)
outFile.write(
'Median of mean trained validation losses per session: ' +
str(round(statistics.median(minimumValidationLossesList), 4)) +
'\n'
)
outFile.write(
'\n\nMean training epochs required: ' +
str(round(statistics.mean(trainedOneIndexedEpochsList), 1)) +
'\n'
)
outFile.write(
'Median of mean training epochs required per session: ' +
str(round(statistics.median(trainedOneIndexedEpochsList), 1)) +
'\n'
)
outFile.close()
It is possible to save the data of val_loss and val_acc using the ModelCheckpoint class of Keras.
from keras.callbacks import ModelCheckpoint
checkpointer = ModelCheckpoint(filepath='yourmodelname.hdf5',
monitor='val_loss',
verbose=1,
save_best_only=False)
history = model.fit(X_train, y_train, epochs=100, validation_split=0.02, callbacks=[checkpointer])
history.history.keys()
# output
# dict_keys(['val_loss', 'val_mae', 'val_acc', 'loss', 'mae', 'acc'])
An important point, if you omit the validation_split property, you will only get the values of loss, mae and acc.
Hope this helps!