LSTM val_loss did not improve from inf - python

I'm trying to teach a model to predict stock price, my dataframe has a lot of column because of one hot encoding, this is my code for the model.
seq_len = 128
opt = keras.optimizers.Adam(learning_rate=0.001)
def create_model():
in_seq = Input(shape = (seq_len, 143))
x = Bidirectional(LSTM(128, return_sequences=True))(in_seq)
x = Bidirectional(LSTM(128, return_sequences=True))(x)
x = Bidirectional(LSTM(64, return_sequences=True))(x)
avg_pool = GlobalAveragePooling1D()(x)
max_pool = GlobalMaxPooling1D()(x)
conc = concatenate([avg_pool, max_pool])
conc = Dense(64, activation="relu")(conc)
out = Dense(1, activation="linear")(conc)
model = Model(inputs=in_seq, outputs=out)
model.compile(loss="mse", optimizer= opt, metrics=['mae', 'mape'])
return model
model = create_model()
#model.summary()
#callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=2)
callback = tf.keras.callbacks.ModelCheckpoint('Bi-LSTM.hdf5', monitor='val_loss', save_best_only=True, verbose=1)
model.fit(X_train, y_train,
batch_size=2048,
verbose=2,
callbacks=[callback],
epochs=200,
#shuffle=True,
validation_data=(X_val, y_val),)
model = tf.keras.models.load_model('/content/Bi-LSTM.hdf5')
###############################################################################
'''Calculate predictions and metrics'''
#Calculate predication for training, validation and test data
train_pred = model.predict(X_train)
val_pred = model.predict(X_val)
test_pred = model.predict(X_test)
#Print evaluation metrics for all datasets
train_eval = model.evaluate(X_train, y_train, verbose=0)
val_eval = model.evaluate(X_val, y_val, verbose=0)
test_eval = model.evaluate(X_test, y_test, verbose=0)
print(' ')
print('Evaluation metrics')
print('Training Data - Loss: {:.4f}, MAE: {:.4f}, MAPE: {:.4f}'.format(train_eval[0], train_eval[1], train_eval[2]))
print('Validation Data - Loss: {:.4f}, MAE: {:.4f}, MAPE: {:.4f}'.format(val_eval[0], val_eval[1], val_eval[2]))
print('Test Data - Loss: {:.4f}, MAE: {:.4f}, MAPE: {:.4f}'.format(test_eval[0], test_eval[1], test_eval[2]))
but during training, the results are
Epoch 000xx: val_loss did not improve from inf
Epoch x/xxx
x/x - 19s - loss: nan - mae: nan - mape: nan - val_loss: nan - val_mae: nan - val_mape: nan
So my questions are,
Will letting the model train longer improve the result ?
Can I fix this model or do I have to change into another model ?
Please kindly let me know where I am wrong.

Related

train and validation accuracy -- straight horizontal lines

After training the below model and plotting the train and validation accuracy I'm getting two straight horizontal lines (picture attached).
These are the parameters
Params:
mid_units: 256.0
activation: relu
dropout: 0.34943936277356535
optimizer: adam
batch_size: 64.0
for cls in os.listdir(path):
for sound in tqdm(os.listdir(os.path.join(path, cls))):
wav = librosa.load(os.path.join(os.path.join(path, cls, sound)), sr=16000)[0].astype(np.float32)
tmp_samples.append(wav)
tmp_labels.append(cls)
X_train, X_test, y_train , y_test = train_test_split( tmp_samples, tmp_labels , test_size=0.60,shuffle=True)
X_test,X_valid, y_test , y_valid = train_test_split( X_test, y_test , test_size=0.50,shuffle=True)
for x,y in zip(X_train,y_train):
extract_features_with_aug(x, y, model, samples , labels )
for x,y in zip(X_test,y_test):
extract_features(x, y, model, plain_samples , plain_labels )
for x,y in zip(X_valid,y_valid):
extract_features(x, y, model, valid_sample,valid_label)
X_train = np.asarray(samples)
y_train = np.asarray(labels)
X_test = np.asarray(plain_samples)
y_test=np.asarray(plain_labels)
X_valid = np.asarray(valid_sample)
y_valid=np.asarray(valid_label)
X_train = shuffle(samples)
y_train = shuffle(labels)
X_test = shuffle(plain_samples)
y_test=shuffle(plain_labels)
X_valid = shuffle(valid_sample)
y_valid=shuffle(valid_label)
return X_train, y_train , X_test , y_test ,X_valid,y_valid
Model:
input = layers.Input( batch_shape=(None,1024,1),dtype=tf.float32,name='audio')
drop=layers.Dropout( dropout_rate ) (input)
fl= layers.Flatten() (drop)
l= layers.Dense( mid_units , activation= activation )(fl)
ba=layers.BatchNormalization() (l)
drop2=layers.Dropout( dropout_rate ) (ba)
net=layers.Dense( 5, activation= activation )(drop2)
model = Model(inputs=input, outputs=net)
model.summary()
return model
def train_model(
X_train, y_train , X_test , y_test , X_valid,y_valid,
fname, # Path where to save the model
mid_units,
activation ,
dropout ,
batch_size ,
optimizer
):
# Generate the model
general_model = create_model( mid_units, activation , dropout )
general_model.compile(optimizer= optimizer , loss='categorical_crossentropy',
metrics=['accuracy'])
# Create some callbacks
callbacks = [tf.keras.callbacks.ModelCheckpoint(filepath=fname, monitor='val_loss', save_best_only=True),
tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.95, patience=5, verbose=1,
min_lr=0.000001)]
################
history = general_model.fit(X_train, y_train, epochs=EPOCHS, validation_data = ( X_valid,y_valid ), batch_size= batch_size ,
callbacks=callbacks, verbose=1)
For the training history I'm getting fixed values
3027/3027 [==============================] - 29s 9ms/step - loss: nan - accuracy: 0.2150 - val_loss: nan - val_accuracy: 0.2266
Epoch 97/100
3027/3027 [==============================] - 31s 10ms/step - loss: nan - accuracy: 0.2150 - val_loss: nan - val_accuracy: 0.2266
Epoch 98/100
3027/3027 [==============================] - 41s 14ms/step - loss: nan - accuracy: 0.2150 - val_loss: nan - val_accuracy: 0.2266
Epoch 99/100
3027/3027 [==============================] - 32s 11ms/step - loss: nan - accuracy: 0.2150 - val_loss: nan - val_accuracy: 0.2266
Epoch 100/100

0 accuracy with LSTM

I trained LSTM classification model, but got weird results (0 accuracy). Here is my dataset with preprocessing steps:
import pandas as pd
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow import keras
import numpy as np
url = 'https://raw.githubusercontent.com/MislavSag/trademl/master/trademl/modeling/random_forest/X_TEST.csv'
X_TEST = pd.read_csv(url, sep=',')
url = 'https://raw.githubusercontent.com/MislavSag/trademl/master/trademl/modeling/random_forest/labeling_info_TEST.csv'
labeling_info_TEST = pd.read_csv(url, sep=',')
# TRAIN TEST SPLIT
X_train, X_test, y_train, y_test = train_test_split(
X_TEST.drop(columns=['close_orig']), labeling_info_TEST['bin'],
test_size=0.10, shuffle=False, stratify=None)
### PREPARE LSTM
x = X_train['close'].values.reshape(-1, 1)
y = y_train.values.reshape(-1, 1)
x_test = X_test['close'].values.reshape(-1, 1)
y_test = y_test.values.reshape(-1, 1)
train_val_index_split = 0.75
train_generator = keras.preprocessing.sequence.TimeseriesGenerator(
data=x,
targets=y,
length=30,
sampling_rate=1,
stride=1,
start_index=0,
end_index=int(train_val_index_split*X_TEST.shape[0]),
shuffle=False,
reverse=False,
batch_size=128
)
validation_generator = keras.preprocessing.sequence.TimeseriesGenerator(
data=x,
targets=y,
length=30,
sampling_rate=1,
stride=1,
start_index=int((train_val_index_split*X_TEST.shape[0] + 1)),
end_index=None, #int(train_test_index_split*X.shape[0])
shuffle=False,
reverse=False,
batch_size=128
)
test_generator = keras.preprocessing.sequence.TimeseriesGenerator(
data=x_test,
targets=y_test,
length=30,
sampling_rate=1,
stride=1,
start_index=0,
end_index=None,
shuffle=False,
reverse=False,
batch_size=128
)
# convert generator to inmemory 3D series (if enough RAM)
def generator_to_obj(generator):
xlist = []
ylist = []
for i in range(len(generator)):
x, y = train_generator[i]
xlist.append(x)
ylist.append(y)
X_train = np.concatenate(xlist, axis=0)
y_train = np.concatenate(ylist, axis=0)
return X_train, y_train
X_train_lstm, y_train_lstm = generator_to_obj(train_generator)
X_val_lstm, y_val_lstm = generator_to_obj(validation_generator)
X_test_lstm, y_test_lstm = generator_to_obj(test_generator)
# test for shapes
print('X and y shape train: ', X_train_lstm.shape, y_train_lstm.shape)
print('X and y shape validate: ', X_val_lstm.shape, y_val_lstm.shape)
print('X and y shape test: ', X_test_lstm.shape, y_test_lstm.shape)
and here is my model with resuslts:
### MODEL
model = keras.models.Sequential([
keras.layers.LSTM(124, return_sequences=True, input_shape=[None, 1]),
keras.layers.LSTM(258),
keras.layers.Dense(1, activation='sigmoid')
])
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
history = model.fit(X_train_lstm, y_train_lstm, epochs=10, batch_size=128,
validation_data=[X_val_lstm, y_val_lstm])
# history = model.fit_generator(train_generator, epochs=40, validation_data=validation_generator, verbose=1)
score, acc = model.evaluate(X_val_lstm, y_val_lstm,
batch_size=128)
historydf = pd.DataFrame(history.history)
historydf.head(10)
Why do I get 0 accuracy?
You're using sigmoid activation, which means your labels must be in range 0 and 1. But in your case, the labels are 1. and -1.
Just replace -1 with 0.
for i, y in enumerate(y_train_lstm):
if y == -1.:
y_train_lstm[i,:] = 0.
for i, y in enumerate(y_val_lstm):
if y == -1.:
y_val_lstm[i,:] = 0.
for i, y in enumerate(y_test_lstm):
if y == -1.:
y_test_lstm[i,:] = 0.
Sidenote:
The signals are very close, it would be hard to distinguish them. So, probably accuracy won't be high with simple models.
After training with 0. and 1. labels,
model = keras.models.Sequential([
keras.layers.LSTM(124, return_sequences=True, input_shape=(30, 1)),
keras.layers.LSTM(258),
keras.layers.Dense(1, activation='sigmoid')
])
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
history = model.fit(X_train_lstm, y_train_lstm, epochs=5, batch_size=128,
validation_data=(X_val_lstm, y_val_lstm))
# history = model.fit_generator(train_generator, epochs=40, validation_data=validation_generator, verbose=1)
score, acc = model.evaluate(X_val_lstm, y_val_lstm,
batch_size=128)
historydf = pd.DataFrame(history.history)
historydf.head(10)
Epoch 1/5
12/12 [==============================] - 5s 378ms/step - loss: 0.7386 - accuracy: 0.4990 - val_loss: 0.6959 - val_accuracy: 0.4896
Epoch 2/5
12/12 [==============================] - 4s 318ms/step - loss: 0.6947 - accuracy: 0.5133 - val_loss: 0.6959 - val_accuracy: 0.5104
Epoch 3/5
12/12 [==============================] - 4s 318ms/step - loss: 0.6941 - accuracy: 0.4895 - val_loss: 0.6930 - val_accuracy: 0.5104
Epoch 4/5
12/12 [==============================] - 4s 332ms/step - loss: 0.6946 - accuracy: 0.5269 - val_loss: 0.6946 - val_accuracy: 0.5104
Epoch 5/5
12/12 [==============================] - 4s 334ms/step - loss: 0.6931 - accuracy: 0.4901 - val_loss: 0.6929 - val_accuracy: 0.5104
3/3 [==============================] - 0s 73ms/step - loss: 0.6929 - accuracy: 0.5104
loss accuracy val_loss val_accuracy
0 0.738649 0.498980 0.695888 0.489583
1 0.694708 0.513256 0.695942 0.510417
2 0.694117 0.489463 0.692987 0.510417
3 0.694554 0.526852 0.694613 0.510417
4 0.693118 0.490143 0.692936 0.510417
Source code in colab: https://colab.research.google.com/drive/10yRf4TfGDnp_4F2HYoxPyTlF18no-8Dr?usp=sharing

Two different styles of Tensorflow implementation for the same network architecture lead to two different results and behaviors?

OS Platform: Linux Centos 7.6
Distribution: Intel Xeon Gold 6152 (22x3.70 GHz);
GPU Model: NVIDIA Tesla V100 32 GB;
Number of nodes/CPU/Cores/GPU: 26/52/1144/104;
TensorFlow installed from (source or binary): official webpage
TensorFlow version (use command below): 2.1.0
Python version: 3.6.8
Description of issue:
While I was implementing my proposed method, using the second style of implementation (see below), I realized that the performance of the algorithm is indeed strange. To be more precise, the accuracy decreases and loss value increases while the number of epochs increases.
So I narrow down the problem and finally, I decided to modify some codes from TensorFlow official page to check what is happening. As it is explained in TF v2 official webpage there are two styles of implementation which I have adopted as follows.
I have modified the code provided in "getting started of TF v2" the link below:
TensorFlow 2 quickstart for beginners
as follows:
import tensorflow as tf
from sklearn.preprocessing import OneHotEncoder
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
learning_rate = 1e-4
batch_size = 100
n_classes = 2
n_units = 80
# Generate synthetic data / load data sets
x_in, y_in = make_classification(n_samples=1000, n_features=10, n_informative=4, n_redundant=2, n_repeated=2, n_classes=2, n_clusters_per_class=2, weights=[0.5, 0.5],
flip_y=0.01, class_sep=1.0, hypercube=True, shift=0.0, scale=1.0, shuffle=True, random_state=42)
x_in = x_in.astype('float32')
y_in = y_in.astype('float32').reshape(-1, 1)
one_hot_encoder = OneHotEncoder(sparse=False)
y_in = one_hot_encoder.fit_transform(y_in)
y_in = y_in.astype('float32')
x_train, x_test, y_train, y_test = train_test_split(x_in, y_in, test_size=0.4, random_state=42, shuffle=True)
x_test, x_val, y_test, y_val = train_test_split(x_test, y_test, test_size=0.5, random_state=42, shuffle=True)
print("shapes:", x_train.shape, y_train.shape, x_test.shape, y_test.shape, x_val.shape, y_val.shape)
V = x_train.shape[1]
model = tf.keras.models.Sequential([
tf.keras.layers.Dense(n_units, activation='relu', input_shape=(V,)),
tf.keras.layers.Dropout(0.2),
tf.keras.layers.Dense(n_classes)
])
loss_fn = tf.keras.losses.BinaryCrossentropy(from_logits=True)
model.compile(optimizer='adam', loss=loss_fn, metrics=['accuracy'])
model.fit(x_train, y_train, epochs=5)
model.evaluate(x_test, y_test, verbose=2)
the output is as it is expected, as one can see below:
600/600 [==============================] - 0s 419us/sample - loss: 0.7114 - accuracy: 0.5350
Epoch 2/5
600/600 [==============================] - 0s 42us/sample - loss: 0.6149 - accuracy: 0.6050
Epoch 3/5
600/600 [==============================] - 0s 39us/sample - loss: 0.5450 - accuracy: 0.6925
Epoch 4/5
600/600 [==============================] - 0s 46us/sample - loss: 0.4895 - accuracy: 0.7425
Epoch 5/5
600/600 [==============================] - 0s 40us/sample - loss: 0.4579 - accuracy: 0.7825
test: 200/200 - 0s - loss: 0.4110 - accuracy: 0.8350
To be more precise, the training accuracy increases and the loss value decrease as the number epochs increases (which is expected and it is normal).
HOWEVER, the following chunk of code which is adapted from the link below:
TensorFlow 2 quickstart for experts
as follows:
import tensorflow as tf
from sklearn.preprocessing import OneHotEncoder
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
learning_rate = 1e-4
batch_size = 100
n_classes = 2
n_units = 80
# Generate synthetic data / load data sets
x_in, y_in = make_classification(n_samples=1000, n_features=10, n_informative=4, n_redundant=2, n_repeated=2, n_classes=2, n_clusters_per_class=2, weights=[0.5, 0.5],flip_y=0.01, class_sep=1.0, hypercube=True, shift=0.0, scale=1.0, shuffle=True, random_state=42)
x_in = x_in.astype('float32')
y_in = y_in.astype('float32').reshape(-1, 1)
one_hot_encoder = OneHotEncoder(sparse=False)
y_in = one_hot_encoder.fit_transform(y_in)
y_in = y_in.astype('float32')
x_train, x_test, y_train, y_test = train_test_split(x_in, y_in, test_size=0.4, random_state=42, shuffle=True)
x_test, x_val, y_test, y_val = train_test_split(x_test, y_test, test_size=0.5, random_state=42, shuffle=True)
print("shapes:", x_train.shape, y_train.shape, x_test.shape, y_test.shape, x_val.shape, y_val.shape)
training_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(batch_size)
valid_dataset = tf.data.Dataset.from_tensor_slices((x_val, y_val)).batch(batch_size)
testing_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(batch_size)
V = x_train.shape[1]
class MyModel(tf.keras.models.Model):
def __init__(self):
super(MyModel, self).__init__()
self.d1 = tf.keras.layers.Dense(n_units, activation='relu', input_shape=(V,))
self.d2 = tf.keras.layers.Dropout(0.2)
self.d3 = tf.keras.layers.Dense(n_classes,)
def call(self, x):
x = self.d1(x)
x = self.d2(x)
return self.d3(x)
# Create an instance of the model
model = MyModel()
loss_object = tf.keras.losses.BinaryCrossentropy(from_logits=True)
optimizer = tf.keras.optimizers.Adam()
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.BinaryCrossentropy(name='train_accuracy')
test_loss = tf.keras.metrics.Mean(name='test_loss')
test_accuracy = tf.keras.metrics.BinaryCrossentropy(name='test_accuracy')
#tf.function
def train_step(images, labels):
with tf.GradientTape() as tape:
# training=True is only needed if there are layers with different
# behavior during training versus inference (e.g. Dropout).
predictions = model(images,) # training=True
loss = loss_object(labels, predictions)
gradients = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(gradients, model.trainable_variables))
train_loss(loss)
train_accuracy(labels, predictions)
#tf.function
def test_step(images, labels):
# training=False is only needed if there are layers with different
# behavior during training versus inference (e.g. Dropout).
predictions = model(images,) # training=False
t_loss = loss_object(labels, predictions)
test_loss(t_loss)
test_accuracy(labels, predictions)
EPOCHS = 5
for epoch in range(EPOCHS):
# Reset the metrics at the start of the next epoch
train_loss.reset_states()
train_accuracy.reset_states()
test_loss.reset_states()
test_accuracy.reset_states()
for images, labels in training_dataset:
train_step(images, labels)
for test_images, test_labels in testing_dataset:
test_step(test_images, test_labels)
template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
print(template.format(epoch + 1,train_loss.result(), train_accuracy.result(), test_loss.result(), test_accuracy.result()))
Behaves indeed strange. Here is the output of this piece of code:
Epoch 1, Loss: 0.7299721837043762, Accuracy: 3.8341376781463623, Test Loss: 0.7290592193603516, Test Accuracy: 3.6925911903381348
Epoch 2, Loss: 0.6725851893424988, Accuracy: 3.1141700744628906, Test Loss: 0.6695905923843384, Test Accuracy: 3.2315549850463867
Epoch 3, Loss: 0.6256862878799438, Accuracy: 2.75959849357605, Test Loss: 0.6216427087783813, Test Accuracy: 2.920461416244507
Epoch 4, Loss: 0.5873140096664429, Accuracy: 2.4249706268310547, Test Loss: 0.5828182101249695, Test Accuracy: 2.575272560119629
Epoch 5, Loss: 0.555053174495697, Accuracy: 2.2128372192382812, Test Loss: 0.5501811504364014, Test Accuracy: 2.264410972595215
As one can see, not only the values of accuracy are strange but also instead of increasing, once the number of epochs increase, they decrease?
May you please explain what is happening here?
As it is pointed in the comment I made mistake in using the evaluation metrics. I should have used BinaryAccuracy.
Moreover, it is better to edit the call in the advance version as follows:
def call(self, x, training=False):
x = self.d1(x)
if training:
x = self.d2(x, training=training)
return self.d3(x)

Why does Keras gives me different results between model.evaluate, model.predicts & model.fit?

I'm working on a project with a resnet50 based dual output model. One output is for the regression task and the second output is for a classification task.
My main question is about the model evaluation. During the training, I achieve pretty good results on both ouputs on the validation set:
- Combined loss = 0.32507268732786176
- Val accuracy = 0.97375
- Val MSE: 4.1454763
The model.evaluate gives me the following results on the same set:
- Combined loss = 0.33064378452301024
- Val accuracy = 0.976
- Val MSE = 1.2375486
The model.predict gives me totally differents result (I use scikit-learn to compute the metrics):
- Val accuracy = 0.45875
- Val MSE: 43.555958365743805
These last values changes at each predict execution.
I work on TF2.0.
Here is my code:
valid_generator=datagen.flow_from_dataframe(dataframe=df,
directory=PATH,
x_col="X",
y_col=["yReg","yCls"],
class_mode="multi_output",
target_size=(IMG_SIZE, IMG_SIZE),
batch_size=batch_size,
subset="validation",
shuffle=False,
workers = 0)
def generate_data_generator(generator, train=True):
while True:
Xi, yi = train_generator.next()
y2 = []
for e in yi[1]:
y2.append(to_categorical(e, 7))
y2 = np.array(y2)
if train: # Augmentation for training only
Xi = Xi.astype('uint8')
Xi_aug = seq(images=Xi) # imgaug lib needs uint8
Xi_aug = Xi_aug.astype('float32')
Xi_aug = preprocess_input(Xi_aug) # resnet50 preprocessing
yield Xi_aug, [yi[0], y2]
else: # Validation
yield preprocess_input(Xi), [yi[0], y2]
model.fit_generator(generator=generate_data_generator(train_generator, True),
steps_per_epoch=STEP_SIZE_TRAIN,
validation_data=generate_data_generator(valid_generator, False),
validation_steps=STEP_SIZE_VALID,
verbose=1,
epochs=50,
callbacks=[checkpoint, tfBoard],
)
evalu = model.evaluate_generator(generate_data_generator(valid_generator, False), steps=STEP_SIZE_VALID)
print(model.metrics_names)
print(evalu)
preds = model.predict_generator(generate_data_generator(valid_generator, False), steps=STEP_SIZE_VALID, workers = 0)
labels = valid_generator.labels
print("MSE error:", me.mean_squared_error(labels[0], preds[0]))
print("Accuracy:", me.accuracy_score(labels[1], preds[1].argmax(axis=1)))
What am I doing wrong ?
Thanks for the help !
You are calculating accuracy using just one data point labels[1], preds[1] instead of all data points. You need to compute accuracy considering all data points to compare the result with model.evaluate_generator. Also you have computed MSE on labels[0], preds[0] data points but accuracy on labels[1], preds[1] data points. Consider all the data points in both the cases.
Below is an example of binary classification where I am not doing any data augmentation for validation data. You can build the validation generator without Augmentation and set shuffle=False to generate same batch of data every time, thus you will get same result for model.evaluate_generatorand model.predict_generator.
Validation Generator -
validation_image_generator = ImageDataGenerator(rescale=1./255) # Generator for our validation data
val_data_gen = validation_image_generator.flow_from_directory(batch_size=batch_size,
directory=validation_dir,
shuffle=False,
seed=10,
target_size=(IMG_HEIGHT, IMG_WIDTH),
class_mode='binary')
Below are the results for accuracy which all match -
model.fit_generator
history = model.fit_generator(
train_data_gen,
steps_per_epoch=total_train // batch_size,
epochs=5,
validation_data=val_data_gen,
validation_steps=total_val // batch_size)
Output -
Found 2000 images belonging to 2 classes.
Found 1000 images belonging to 2 classes.
Epoch 1/5
20/20 [==============================] - 27s 1s/step - loss: 0.8691 - accuracy: 0.4995 - val_loss: 0.6850 - val_accuracy: 0.5000
Epoch 2/5
20/20 [==============================] - 26s 1s/step - loss: 0.6909 - accuracy: 0.5145 - val_loss: 0.6880 - val_accuracy: 0.5000
Epoch 3/5
20/20 [==============================] - 26s 1s/step - loss: 0.6682 - accuracy: 0.5345 - val_loss: 0.6446 - val_accuracy: 0.6320
Epoch 4/5
20/20 [==============================] - 26s 1s/step - loss: 0.6245 - accuracy: 0.6180 - val_loss: 0.6214 - val_accuracy: 0.5920
Epoch 5/5
20/20 [==============================] - 27s 1s/step - loss: 0.5696 - accuracy: 0.6795 - val_loss: 0.6468 - val_accuracy: 0.6270
model.evaluate_generator
evalu = model.evaluate_generator(val_data_gen)
print(model.metrics_names)
print(evalu)
Output -
['loss', 'accuracy']
[0.646793782711029, 0.6269999742507935]
model.predict_generator
from sklearn.metrics import mean_squared_error, accuracy_score
preds = model.predict_generator(val_data_gen)
y_pred = tf.where(preds<=0.5,0,1)
labels = val_data_gen.labels
y_true = labels
# confusion_matrix(y_true, y_pred)
print("Accuracy:", accuracy_score(y_true, y_pred))
Output -
Accuracy: 0.627
Complete Code for your reference -
%tensorflow_version 2.x
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, Flatten, Dropout, MaxPooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam
import os
import numpy as np
import matplotlib.pyplot as plt
_URL = 'https://storage.googleapis.com/mledu-datasets/cats_and_dogs_filtered.zip'
path_to_zip = tf.keras.utils.get_file('cats_and_dogs.zip', origin=_URL, extract=True)
PATH = os.path.join(os.path.dirname(path_to_zip), 'cats_and_dogs_filtered')
train_dir = os.path.join(PATH, 'train')
validation_dir = os.path.join(PATH, 'validation')
train_cats_dir = os.path.join(train_dir, 'cats') # directory with our training cat pictures
train_dogs_dir = os.path.join(train_dir, 'dogs') # directory with our training dog pictures
validation_cats_dir = os.path.join(validation_dir, 'cats') # directory with our validation cat pictures
validation_dogs_dir = os.path.join(validation_dir, 'dogs') # directory with our validation dog pictures
num_cats_tr = len(os.listdir(train_cats_dir))
num_dogs_tr = len(os.listdir(train_dogs_dir))
num_cats_val = len(os.listdir(validation_cats_dir))
num_dogs_val = len(os.listdir(validation_dogs_dir))
total_train = num_cats_tr + num_dogs_tr
total_val = num_cats_val + num_dogs_val
batch_size = 100
epochs = 5
IMG_HEIGHT = 150
IMG_WIDTH = 150
train_image_generator = ImageDataGenerator(rescale=1./255,brightness_range=[0.5,1.5]) # Generator for our training data
validation_image_generator = ImageDataGenerator(rescale=1./255) # Generator for our validation data
train_data_gen = train_image_generator.flow_from_directory(batch_size=batch_size,
directory=train_dir,
shuffle=True,
target_size=(IMG_HEIGHT, IMG_WIDTH),
class_mode='binary')
val_data_gen = validation_image_generator.flow_from_directory(batch_size=batch_size,
directory=validation_dir,
shuffle=False,
seed=10,
target_size=(IMG_HEIGHT, IMG_WIDTH),
class_mode='binary')
model = Sequential([
Conv2D(16, 3, padding='same', activation='relu', input_shape=(IMG_HEIGHT, IMG_WIDTH ,3)),
MaxPooling2D(),
Conv2D(32, 3, padding='same', activation='relu'),
MaxPooling2D(),
Conv2D(64, 3, padding='same', activation='relu'),
MaxPooling2D(),
Flatten(),
Dense(512, activation='relu'),
Dense(1)
])
model.compile(optimizer="adam",
loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
metrics=['accuracy'])
history = model.fit_generator(
train_data_gen,
steps_per_epoch=total_train // batch_size,
epochs=epochs,
validation_data=val_data_gen,
validation_steps=total_val // batch_size)
evalu = model.evaluate_generator(val_data_gen, steps=total_val // batch_size)
print(model.metrics_names)
print(evalu)
from sklearn.metrics import mean_squared_error, accuracy_score
#val_data_gen.reset()
preds = model.predict_generator(val_data_gen, steps=total_val // batch_size)
y_pred = tf.where(preds<=0.5,0,1)
labels = val_data_gen.labels
y_true = labels
test_labels = []
for i in range(0,10):
test_labels.extend(np.array(val_data_gen[i][1]))
# confusion_matrix(y_true, y_pred)
print("Accuracy:", accuracy_score(test_labels, y_pred))
Also keep in mind, fit_generator, evaluate_generator and predict_generator FUNCTION IS DEPRECATED. It will be removed in a future version. Instructions for updating: Please use Model.fit, Model.evaluate and Model.predict respectively, which supports generators.
Hope this answers your question. Happy Learning.

Printing out the validation accuracy to the console for every batch or epoch (Keras)

I'm using ImageDataGenerator and flow_from_directory to generate my data, and
using model.fit_generator to fit the data.
This defaults to outputting the accuracy for training data set only.
There doesn't seem to be an option to output validation accuracy to the terminal.
Here is the relevant portion of my code:
#train data generator
print('Starting Preprocessing')
train_datagen = ImageDataGenerator(preprocessing_function = preprocess)
train_generator = train_datagen.flow_from_directory(
train_data_dir,
target_size = (img_height, img_width),
batch_size = batch_size,
class_mode = 'categorical') #class_mode = 'categorical'
#same for validation
val_datagen = ImageDataGenerator(preprocessing_function = preprocess)
validation_generator = val_datagen.flow_from_directory(
validation_data_dir,
target_size = (img_height, img_width),
batch_size=batch_size,
class_mode='categorical')
########################Model Creation###################################
#create the base pre-trained model
print('Finished Preprocessing, starting model creating \n')
base_model = InceptionV3(weights='imagenet', include_top=False)
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
predictions = Dense(12, activation='softmax')(x)
model = Model(input=base_model.input, output=predictions)
for layer in model.layers[:-34]:
layer.trainable = False
for layer in model.layers[-34:]:
layer.trainable = True
from keras.optimizers import SGD
model.compile(optimizer=SGD(lr=0.001, momentum=0.92),
loss='categorical_crossentropy',
metrics = ['accuracy'])
#############SAVE Model #######################################
file_name = str(datetime.datetime.now()).split(' ')[0] + '_{epoch:02d}.hdf5'
filepath = os.path.join(save_dir, file_name)
checkpoints =ModelCheckpoint(filepath, monitor='val_acc', verbose=1,
save_best_only=False, save_weights_only=False,
mode='auto', period=2)
###############Fit Model #############################
model.fit_generator(
train_generator,
steps_per_epoch =total_samples//batch_size,
epochs = epochs,
validation_data=validation_generator,
validation_steps=total_validation//batch_size,
callbacks = [checkpoints],
shuffle= True)
UPDATE OUTPUT:
Throughout training, I'm only getting the output of training accuracy,
but at the end of training, I"m getting both training, validation accuracy.
Epoch 1/10
1/363 [..............................] - ETA: 1:05:58 - loss: 2.4976 - acc: 0.0640
2/363 [..............................] - ETA: 51:33 - loss: 2.4927 - acc: 0.0760
3/363 [..............................] - ETA: 48:55 - loss: 2.5067 - acc: 0.0787
4/363 [..............................] - ETA: 47:26 - loss: 2.5110 - acc: 0.0770
5/363 [..............................] - ETA: 46:30 - loss: 2.5021 - acc: 0.0824
6/363 [..............................] - ETA: 45:56 - loss: 2.5063 - acc: 0.0820
The idea is that you go through you validation set after each epoch, not after each batch.
If after every batch, you had to evaluate the performances of the model on the whole validation set, you would loose a lot of time.
After each epoch, you will have the corresponding losses and accuracies both for training and validation. But during one epoch, you will only have access to the training loss and accuracy.
Validation loss and validation accuracy gets printed for every epoch once you specify the validation_split.
model.fit(X, Y, epochs=1000, batch_size=10, validation_split=0.2)
I have used the above in my code, and val_loss and val_acc are getting printed for every epoch, but not after every batch.
Hope that answers your question.
Epoch 1/500
1267/1267 [==============================] - 0s 376us/step - loss: 0.6428 - acc: 0.6409 - val_loss: 0.5963 - val_acc: 0.6656
In fit_generator,
fit_generator(generator, steps_per_epoch=None, epochs=1, verbose=1, callbacks=None, **validation_data=None, validation_steps=None**, validation_freq=1, class_weight=None, max_queue_size=10, workers=1, use_multiprocessing=False, shuffle=True, initial_epoch=0)
since there is no validation_split parameter, you can create two different ImageDataGenerator flow, one for training and one for validating and then place that 'validation_generator' in validation_data. Then it will print the validation loss and accuracy.

Categories