What can I do to solve over-fitting problem in following code?

What can I do to solve over-fitting problem in following code? - python

I'm trying to build a Handwritten word recognition using IAM Dataset
and while training I'm facing over fitting problem. Would you please
help me figure out what mistake I have made in code below.
I have tried all the solution that I can find to resolve the problem but still the same overfitting problem persists.
import os
import fnmatch
import cv2
import numpy as np
import string
import time
import random
from keras import regularizers, optimizers
from keras.regularizers import l2
from keras.preprocessing.sequence import pad_sequences
from keras.layers import Dense, LSTM, Reshape, BatchNormalization, Input, Conv2D, MaxPool2D, Lambda, Bidirectional, Dropout
from keras.models import Model
from keras.activations import relu, sigmoid, softmax
import keras.backend as K
from keras.utils import to_categorical
from keras.callbacks import ModelCheckpoint,ReduceLROnPlateau
import matplotlib.pyplot as plt
imgSize = (128,32)
def preprocess(img, imgSize, dataAugmentation=False):
"put img into target img of size imgSize, transpose for TF and normalize gray-values"
# there are damaged files in IAM dataset - just use black image instead
if img is None:
img = np.zeros([imgSize[1], imgSize[0]])
# increase dataset size by applying random stretches to the images
if dataAugmentation:
stretch = (random.random() - 0.5) # -0.5 .. +0.5
wStretched = max(int(img.shape[1] * (1 + stretch)), 1) # random width, but at least 1
img = cv2.resize(img, (wStretched, img.shape[0])) # stretch horizontally by factor 0.5 .. 1.5
img = cv2.adaptiveThreshold(img,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY,11,2)
# print('Data Augmented')
# create target image and copy sample image into it
(wt, ht) = imgSize
(h, w) = img.shape
fx = w / wt
fy = h / ht
f = max(fx, fy)
newSize = (max(min(wt, int(w / f)), 1), max(min(ht, int(h / f)), 1)) # scale according to f (result at least 1 and at most wt or ht)
img = cv2.resize(img, newSize)
target = np.ones([ht, wt]) * 255
target[0:newSize[1], 0:newSize[0]] = img
# transpose for TF
img = cv2.transpose(target)
# normalize
(m, s) = cv2.meanStdDev(img)
m = m[0][0]
s = s[0][0]
img = img - m
img = img / s if s>0 else img
img = np.expand_dims(img , axis = 2)
return img
def truncateLabel(text, maxTextLen): # A,32
cost = 0
for i in range(len(text)):
if i != 0 and text[i] == text[i-1]:
cost += 2
else:
cost += 1
if cost > maxTextLen:
return text[:i] # returns words with repeated chars
return text
path = 'iam_dataset_words/'
maxTextLen = 32
samples = []
bad_samples = []
fileName = ''
dataAugmentation = False
chars = set()
f=open(path+ 'words.txt', "r")
cou = 0
bad_samples = []
bad_samples_reference = ['a01-117-05-02.png',
'r06-022-03-05.png']
for line in f:
cou+=1
# ignore comment line
if not line or line[0]=='#':
continue
lineSplit = line.strip().split(' ')
assert len(lineSplit) >= 9
fileNameSplit = lineSplit[0].split('-') #a01-000u-00-00 splits
#../data/words/a01/a01-000u/a01-000u-00-00.png
fileName = path + 'words/' \
+ fileNameSplit[0] + '/' \
+ fileNameSplit[0] + '-' \
+ fileNameSplit[1] \
+ '/' + lineSplit[0] + '.png'
# GT text are columns starting at 9
gtText = truncateLabel(' '.join(lineSplit[8:]), maxTextLen) #A,32
#chars = chars.union(gtText) #unique chars only
chars = chars.union(set(list(gtText)))
# check if image is not empty
if not os.path.getsize(fileName):
bad_samples.append(lineSplit[0] + '.png')
continue
# put sample into list
#'A','../data/words/a01/a01-000u/a01-000u-00-00.png'
samples.append([gtText, fileName])
print(cou)
print(len(samples))
print(samples[:2])
if set(bad_samples) != set(bad_samples_reference):
print("Warning, damaged images found:", bad_samples)
print("Damaged images expected:", bad_samples_reference)
trainSamples = []
validationSamples = []
testSamples = []
valid_testSamples = []
# split into training and validation set: 90% - 10%
# dataAugmentation = True
random.shuffle(samples)
splitIdx = int(0.75 * len(samples))
train_samples = samples[:splitIdx]
valid_testSamples = samples[splitIdx:]
print('vv:', len(valid_testSamples))
validationSamples = valid_testSamples[:15000]
testSamples = valid_testSamples[15000:]
print('valid: ',len(validationSamples))
print('test: ',len(testSamples))
print('train_before: ',len(train_samples))
# # start with train set
trainSamples = train_samples[:25000] #tran data 25000
print('train_ after: ',len(trainSamples))
# # list of all unique chars in dataset
charList = sorted(list(chars))
char_list = str().join(charList)
# print('test samples: ',testSamples)
print('char list : ',char_list)
# # save characters of model for inference mode
# open(FilePaths.fnCharList, 'w').write(str().join(charList))
# # save words contained in dataset into file
# open(FilePaths.fnCorpus, 'w').write(str(' ').join(loader.trainWords + validationWords))
def encode_to_labels(txt):
# encoding each output word into digits
chars = []
for index, char in enumerate(txt):
try:
chars.append(char_list.index(char))
except:
print(char)
return chars
print(trainSamples[:2])
# lists for training dataset
train_img = []
train_txt = []
train_input_length = []
train_label_length = []
train_orig_txt = []
max_label_len = 0
b = 0
for words, imgPath in trainSamples:
img = preprocess(cv2.imread(imgPath, cv2.IMREAD_GRAYSCALE), imgSize, dataAugmentation = True)
# compute maximum length of the text
if len(words) > max_label_len:
max_label_len = len(words)
train_orig_txt.append(words)
train_label_length.append(len(words))
train_input_length.append(31)
train_img.append(img)
train_txt.append(encode_to_labels(words))
b+=1
# print(train_img[1])
print(len(train_txt))
train_txt[:5]
a = 0
#lists for validation dataset
valid_img = []
valid_txt = []
valid_input_length = []
valid_label_length = []
valid_orig_txt = []
for words, imgPath in validationSamples:
img = preprocess(cv2.imread(imgPath, cv2.IMREAD_GRAYSCALE), imgSize, dataAugmentation = False)
valid_orig_txt.append(words)
valid_label_length.append(len(words))
valid_input_length.append(31)
valid_img.append(img)
valid_txt.append(encode_to_labels(words))
a+=1
print(len(valid_txt))
valid_txt[:5]
# lists for training dataset
test_img = []
test_txt = []
test_input_length = []
test_label_length = []
test_orig_txt = []
c = 0
for words, imgPath in testSamples:
img = preprocess(cv2.imread(imgPath, cv2.IMREAD_GRAYSCALE), imgSize, dataAugmentation = False)
test_orig_txt.append(words)
test_label_length.append(len(words))
test_input_length.append(31)
test_img.append(img)
test_txt.append(encode_to_labels(words))
c+=1
# print(c)
print(test_img[0].shape)
print('Train: {}\nValid: {}\nTest: {}'.format(b,a,c))
print(max_label_len)
# pad each output label to maximum text length
train_padded_txt = pad_sequences(train_txt, maxlen=max_label_len, padding='post', value = len(char_list))
valid_padded_txt = pad_sequences(valid_txt, maxlen=max_label_len, padding='post', value = len(char_list))
test_padded_txt = pad_sequences(test_txt, maxlen=max_label_len, padding='post', value = len(char_list))
print(len(train_padded_txt))
print(len(test_padded_txt))
print(valid_padded_txt[1])
# input with shape of height=32 and width=128
inputs = Input(shape=(128,32,1))
print(inputs.shape)
# convolution layer with kernel size (3,3)
conv_1 = Conv2D(32, (3,3), activation = 'relu', padding='same')(inputs)
batch_norm_1 = BatchNormalization()(conv_1)
# poolig layer with kernel size (2,2)
pool_1 = Conv2D(32, kernel_size=(1, 1), strides=2, padding='valid')(batch_norm_1)
conv_2 = Conv2D(64, (3,3), activation = 'relu', padding='same')(pool_1)
batch_norm_2 = BatchNormalization()(conv_2)
pool_2 = Conv2D(64, kernel_size=(1, 1), strides=2, padding='valid')(batch_norm_2)
conv_3 = Conv2D(128, (3,3), activation = 'relu', padding='same')(pool_2)
batch_norm_3 = BatchNormalization()(conv_3)
conv_4 = Conv2D(128, (3,3), activation = 'relu', padding='same')(batch_norm_3)
batch_norm_4 = BatchNormalization()(conv_4)
# poolig layer with kernel size (1,2)
pool_4 = MaxPool2D(pool_size=(1,2))(batch_norm_4)
conv_5 = Conv2D(256, (3,3), activation = 'relu', padding='same')(pool_4)
# Batch normalization layer
batch_norm_5 = BatchNormalization()(conv_5)
conv_6 = Conv2D(256, (3,3), activation = 'relu', padding='same')(batch_norm_5)
batch_norm_6 = BatchNormalization()(conv_6)
pool_6 = MaxPool2D(pool_size=(1,2))(batch_norm_6)
conv_7 = Conv2D(256, (2,2), activation = 'relu')(pool_6)
batch_norm_7 = BatchNormalization()(conv_7)
# print(conv_7.shape)
# map-to-sequence-- dropping 1 dimension
squeezed = Lambda(lambda x: K.squeeze(x, 2))(batch_norm_7)
# print('squeezed',squeezed.shape)
# bidirectional LSTM layers with units=128
blstm_1 = Bidirectional(LSTM(128, return_sequences=True, dropout = 0.3))(squeezed)
blstm_2 = Bidirectional(LSTM(128, return_sequences=True, dropout = 0.3))(blstm_1)
outputs = Dense(len(char_list)+1, activation = 'softmax')(blstm_2)
# model to be used at test time
word_model = Model(inputs, outputs)
adam = optimizers.Adamax(lr=0.01, decay = 1e-5)
model.compile(loss= {'ctc': lambda y_true, y_pred: y_pred}, optimizer = adam, metrics = ['accuracy'])
filepath="best_model.hdf5"
checkpoint1 = ReduceLROnPlateau(monitor='val_loss', verbose=1,
mode='auto',factor=0.2,patience=4, min_lr=0.0001)
checkpoint2 = ModelCheckpoint(filepath=filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='auto')
callbacks_list = [checkpoint1, checkpoint2]
train_img = np.array(train_img)
train_input_length = np.array(train_input_length)
train_label_length = np.array(train_label_length)
valid_img = np.array(valid_img)
valid_input_length = np.array(valid_input_length)
valid_label_length = np.array(valid_label_length)
test_img = np.array(test_img)
test_input_length = np.array(test_input_length)
test_label_length = np.array(test_label_length)
test_img.shape
batch_size = 50
epochs = 30
train_history = model.fit(x=[train_img, train_padded_txt, train_input_length, train_label_length],
y=np.zeros(len(train_img)), batch_size=batch_size, epochs = epochs,
validation_data = ([valid_img, valid_padded_txt, valid_input_length,
valid_label_length], [np.zeros(len(valid_img))]),
verbose = 1, callbacks = callbacks_list)
Train on 25000 samples, validate on 15000 samples
Epoch 1/30
25000/25000 [==============================] - 159s 6ms/step - loss: 13.6510 - acc: 0.0199 - val_loss: 11.4910 - val_acc: 0.0651
Epoch 00001: val_loss improved from inf to 11.49100, saving model to best_model.hdf5
Epoch 2/30
25000/25000 [==============================] - 146s 6ms/step - loss: 10.9559 - acc: 0.0603 - val_loss: 9.7359 - val_acc: 0.0904
Epoch 00002: val_loss improved from 11.49100 to 9.73587, saving model to best_model.hdf5
Epoch 3/30
25000/25000 [==============================] - 146s 6ms/step - loss: 9.0720 - acc: 0.0943 - val_loss: 7.3571 - val_acc: 0.1565
Epoch 00003: val_loss improved from 9.73587 to 7.35715, saving model to best_model.hdf5
Epoch 4/30
25000/25000 [==============================] - 145s 6ms/step - loss: 6.9501 - acc: 0.1520 - val_loss: 5.5228 - val_acc: 0.2303
Epoch 00004: val_loss improved from 7.35715 to 5.52277, saving model to best_model.hdf5
Epoch 5/30
25000/25000 [==============================] - 144s 6ms/step - loss: 5.4893 - acc: 0.2129 - val_loss: 4.3179 - val_acc: 0.2895
Epoch 00005: val_loss improved from 5.52277 to 4.31793, saving model to best_model.hdf5
Epoch 6/30
25000/25000 [==============================] - 143s 6ms/step - loss: 4.7053 - acc: 0.2612 - val_loss: 3.7490 - val_acc: 0.3449
Epoch 00006: val_loss improved from 4.31793 to 3.74896, saving model to best_model.hdf5
Epoch 7/30
25000/25000 [==============================] - 143s 6ms/step - loss: 4.1183 - acc: 0.3096 - val_loss: 3.5902 - val_acc: 0.3805
Epoch 00007: val_loss improved from 3.74896 to 3.59015, saving model to best_model.hdf5
Epoch 8/30
25000/25000 [==============================] - 143s 6ms/step - loss: 3.6662 - acc: 0.3462 - val_loss: 3.7923 - val_acc: 0.3350
Epoch 00008: val_loss did not improve from 3.59015
Epoch 9/30
25000/25000 [==============================] - 143s 6ms/step - loss: 3.3398 - acc: 0.3809 - val_loss: 3.1352 - val_acc: 0.4344
Epoch 00009: val_loss improved from 3.59015 to 3.13516, saving model to best_model.hdf5
Epoch 10/30
25000/25000 [==============================] - 143s 6ms/step - loss: 3.0199 - acc: 0.4129 - val_loss: 2.9798 - val_acc: 0.4541
Epoch 00010: val_loss improved from 3.13516 to 2.97978, saving model to best_model.hdf5
Epoch 11/30
25000/25000 [==============================] - 143s 6ms/step - loss: 2.7361 - acc: 0.4447 - val_loss: 3.3836 - val_acc: 0.3780
Epoch 00011: val_loss did not improve from 2.97978
Epoch 12/30
25000/25000 [==============================] - 143s 6ms/step - loss: 2.5127 - acc: 0.4695 - val_loss: 2.9266 - val_acc: 0.5041
Epoch 00012: val_loss improved from 2.97978 to 2.92656, saving model to best_model.hdf5
Epoch 13/30
25000/25000 [==============================] - 142s 6ms/step - loss: 2.3045 - acc: 0.4974 - val_loss: 2.7329 - val_acc: 0.5174
Epoch 00013: val_loss improved from 2.92656 to 2.73294, saving model to best_model.hdf5
Epoch 14/30
25000/25000 [==============================] - 141s 6ms/step - loss: 2.1245 - acc: 0.5237 - val_loss: 2.8624 - val_acc: 0.5339
Epoch 00014: val_loss did not improve from 2.73294
Epoch 15/30
25000/25000 [==============================] - 142s 6ms/step - loss: 1.9091 - acc: 0.5524 - val_loss: 2.6933 - val_acc: 0.5506
Epoch 00015: val_loss improved from 2.73294 to 2.69333, saving model to best_model.hdf5
Epoch 16/30
25000/25000 [==============================] - 141s 6ms/step - loss: 1.7565 - acc: 0.5705 - val_loss: 2.7697 - val_acc: 0.5461
Epoch 00016: val_loss did not improve from 2.69333
Epoch 17/30
25000/25000 [==============================] - 145s 6ms/step - loss: 1.6273 - acc: 0.5892 - val_loss: 2.8992 - val_acc: 0.5361
Epoch 00017: val_loss did not improve from 2.69333
Epoch 18/30
25000/25000 [==============================] - 145s 6ms/step - loss: 1.5007 - acc: 0.6182 - val_loss: 2.9558 - val_acc: 0.5345
Epoch 00018: val_loss did not improve from 2.69333
Epoch 19/30
25000/25000 [==============================] - 143s 6ms/step - loss: 1.3775 - acc: 0.6311 - val_loss: 2.8437 - val_acc: 0.5744
Epoch 00019: ReduceLROnPlateau reducing learning rate to 0.0019999999552965165.
Epoch 00019: val_loss did not improve from 2.69333
Epoch 20/30
25000/25000 [==============================] - 144s 6ms/step - loss: 0.9636 - acc: 0.7115 - val_loss: 2.6072 - val_acc: 0.6083
Epoch 00020: val_loss improved from 2.69333 to 2.60724, saving model to best_model.hdf5
Epoch 21/30
25000/25000 [==============================] - 146s 6ms/step - loss: 0.7940 - acc: 0.7583 - val_loss: 2.6613 - val_acc: 0.6167
Epoch 00021: val_loss did not improve from 2.60724
Epoch 22/30
25000/25000 [==============================] - 146s 6ms/step - loss: 0.6995 - acc: 0.7797 - val_loss: 2.7180 - val_acc: 0.6220
Epoch 00022: val_loss did not improve from 2.60724
Epoch 23/30
25000/25000 [==============================] - 144s 6ms/step - loss: 0.6197 - acc: 0.8046 - val_loss: 2.7504 - val_acc: 0.6226
Epoch 00023: val_loss did not improve from 2.60724
Epoch 24/30
25000/25000 [==============================] - 143s 6ms/step - loss: 0.5668 - acc: 0.8167 - val_loss: 2.8238 - val_acc: 0.6255
Epoch 00024: ReduceLROnPlateau reducing learning rate to 0.0003999999724328518.
Epoch 00024: val_loss did not improve from 2.60724
Epoch 25/30
25000/25000 [==============================] - 144s 6ms/step - loss: 0.5136 - acc: 0.8316 - val_loss: 2.8167 - val_acc: 0.6283
Epoch 00025: val_loss did not improve from 2.60724
Epoch 26/30
25000/25000 [==============================] - 143s 6ms/step - loss: 0.5012 - acc: 0.8370 - val_loss: 2.8244 - val_acc: 0.6299
Epoch 00026: val_loss did not improve from 2.60724
Epoch 27/30
25000/25000 [==============================] - 143s 6ms/step - loss: 0.4886 - acc: 0.8425 - val_loss: 2.8366 - val_acc: 0.6282
Epoch 00027: val_loss did not improve from 2.60724
Epoch 28/30
25000/25000 [==============================] - 143s 6ms/step - loss: 0.4820 - acc: 0.8432 - val_loss: 2.8447 - val_acc: 0.6271
Epoch 00028: ReduceLROnPlateau reducing learning rate to 0.0001.
Epoch 00028: val_loss did not improve from 2.60724
Epoch 29/30
25000/25000 [==============================] - 141s 6ms/step - loss: 0.4643 - acc: 0.8452 - val_loss: 2.8538 - val_acc: 0.6278
Epoch 00029: val_loss did not improve from 2.60724
Epoch 30/30
25000/25000 [==============================] - 141s 6ms/step - loss: 0.4576 - acc: 0.8496 - val_loss: 2.8555 - val_acc: 0.6277
Epoch 00030: val_loss did not improve from 2.60724
Evaluation of the model
test_history = model.evaluate([test_img, test_padded_txt,
test_input_length, test_label_length],
y=np.zeros(len(test_img)), verbose = 1)
test_history
Output
13830/13830 [==============================] - 42s 3ms/step
[2.855567638786134, 0.6288503253882292]
Some Predicted Output:

Not sure what you have already tried, but did you check if your training and validation samples are balanced? That is, whether they have roughly the same percentages of examples in each category.
You could shuffle 'samples' using 'random.shuffle(samples)' before executing your following code:
splitIdx = int(0.75 * len(samples))
train_samples = samples[:splitIdx]
That way, you can be more certain that your training and validation sets are balanced.

There is a lot you can do.
Add batch normalization after every conv2d layer
Replace maxpooling with conv2d valid padding so it becomes a learnable layer
from: pool_1 = MaxPool2D(pool_size=(2, 2), strides=2)(conv_1)
to: pool_1 = Conv2D(filters, kernel_size=(1, 1), strides=2, padding='valid')(conv_1)
Add l2 regularization to your layers, look here for implementation
Try weight decay
Increase the dropout values you already have
Modify your learning rate, too small and it might fall into a local minimum
And here is a lot more, the only way to know is to try them out

Related

Keras Transformer - Test Loss Not Changing

I'm trying to create a small transformer model with Keras to model stock prices, based off of this tutorial from the Keras docs. The problem is, my test loss is massive and barely changes between epochs, unsurprisingly resulting in severe underfitting, with my outputs all the same arbitrary value.
My code is below:
def transformer_encoder_block(inputs, head_size, num_heads, filters, dropout=0):
# Normalization and Attention
x = layers.LayerNormalization(epsilon=1e-6)(inputs)
x = layers.MultiHeadAttention(
key_dim=head_size, num_heads=num_heads, dropout=dropout
)(x, x)
x = layers.Dropout(dropout)(x)
res = x + inputs
# Feed Forward Part
x = layers.LayerNormalization(epsilon=1e-6)(res)
x = layers.Conv1D(filters=filters, kernel_size=1, activation="relu")(x)
x = layers.Dropout(dropout)(x)
x = layers.Conv1D(filters=inputs.shape[-1], kernel_size=1)(x)
return x + res
data = ...
input = np.array(
keras.preprocessing.sequence.pad_sequences(data["input"], padding="pre", dtype="float32"))
output = np.array(
keras.preprocessing.sequence.pad_sequences(data["output"], padding="pre", dtype="float32"))
# Input shape: (723, 36, 22)
# Output shape: (723, 36, 1)
# Train data
train_features = input[100:]
train_labels = output[100:]
train_labels = tf.keras.utils.to_categorical(train_labels, num_classes=3)
# Test data
test_features = input[:100]
test_labels = output[:100]
test_labels = tf.keras.utils.to_categorical(test_labels, num_classes=3)
inputs = keras.Input(shape=(None,22), dtype="float32", name="inputs")
# Ignore padding in inputs
x = layers.Masking(mask_value=0)(inputs)
x = transformer_encoder_block(x, head_size=64, num_heads=16, filters=3, dropout=0.2)
# Multiclass = Softmax (decrease, no change, increase)
outputs = layers.TimeDistributed(layers.Dense(3, activation="softmax", name="outputs"))(x)
# Create model
model = keras.Model(inputs=inputs, outputs=outputs)
# Compile model
model.compile(loss="categorical_crossentropy", optimizer=(tf.keras.optimizers.Adam(learning_rate=0.005)), metrics=['accuracy'])
# Train model
history = model.fit(train_features, train_labels, epochs=10, batch_size=32)
# Evaluate on the test data
test_loss = model.evaluate(test_features, test_labels, verbose=0)
print("Test loss:", test_loss)
out = model.predict(test_features)
After padding, input is of shape (723, 36, 22), and output is of shape (723, 36, 1) (before converting output to one hop, after which there are 3 output classes).
Here's an example output for ten epochs (trust me, more than ten doesn't make it better):
Epoch 1/10
20/20 [==============================] - 2s 62ms/step - loss: 10.7436 - accuracy: 0.3335
Epoch 2/10
20/20 [==============================] - 1s 62ms/step - loss: 10.7083 - accuracy: 0.3354
Epoch 3/10
20/20 [==============================] - 1s 60ms/step - loss: 10.6555 - accuracy: 0.3392
Epoch 4/10
20/20 [==============================] - 1s 62ms/step - loss: 10.7846 - accuracy: 0.3306
Epoch 5/10
20/20 [==============================] - 1s 60ms/step - loss: 10.7600 - accuracy: 0.3322
Epoch 6/10
20/20 [==============================] - 1s 59ms/step - loss: 10.7074 - accuracy: 0.3358
Epoch 7/10
20/20 [==============================] - 1s 59ms/step - loss: 10.6569 - accuracy: 0.3385
Epoch 8/10
20/20 [==============================] - 1s 60ms/step - loss: 10.7767 - accuracy: 0.3314
Epoch 9/10
20/20 [==============================] - 1s 61ms/step - loss: 10.7346 - accuracy: 0.3341
Epoch 10/10
20/20 [==============================] - 1s 62ms/step - loss: 10.7093 - accuracy: 0.3354
Test loss: [10.073813438415527, 0.375]
4/4 [==============================] - 0s 22ms/step
Using the same data on a simple LSTM model with the same shape yielded a desirable prediction with a constantly decreasing loss.
Tweaking the learning rate appears to have no effect, nor does stacking more transformer_encoder_block()s.
If anyone has any suggestions for how I can solve this, please let me know.

My multilabel classification RNN is not learning

I have made a model that tries to predict the chances of every piano key playing in a time step given all time steps before it. I tried making a GRU network with 88 outputs(one for every piano key)
input shape = (600,88,)
desired output/ label shape = (88, )
import numpy as np
import midi_processer
from keras import models
from keras import layers
x_train, x_test = np.load("samples.npy", mmap_mode='r'), np.load("test_samples.npy", mmap_mode='r')
y_train, y_test = np.load("labels.npy", mmap_mode='r'), np.load("test_labels.npy", mmap_mode='r')
def build_model():
model = models.Sequential()
model.add(layers.Input(shape=(600,88,)))
model.add(layers.GRU(512,activation='tanh',recurrent_activation='hard_sigmoid'))
model.add(layers.RepeatVector(600))
model.add(layers.GRU(512,activation='tanh', recurrent_activation='hard_sigmoid'))
model.add(layers.Dense(88, activation = 'sigmoid'))
return model
x_partial, x_val = x_train[:13000], x_train[13000:]
y_partial, y_val = y_train[:13000], y_train[13000:]
model = build_model()
model.compile(optimizer = 'adam',
loss = 'binary_crossentropy',
metrics = ['accuracy'])
history = model.fit(x_partial, y_partial, batch_size = 50, epochs = , validation_data= (x_val,y_val))
instead of learning normally my algorithm had stayed with constant accuracy throughout all of the epochs
Epoch 1/15
260/260 [==============================] - 998s 4s/step - loss: -0.1851 - accuracy: 0.0298 - val_loss: -8.8735 - val_accuracy: 0.0310
Epoch 2/15
260/260 [==============================] - 827s 3s/step - loss: -33.6520 - accuracy: 0.0382 - val_loss: -56.0122 - val_accuracy: 0.0310
Epoch 3/15
260/260 [==============================] - 844s 3s/step - loss: -78.6130 - accuracy: 0.0382 - val_loss: -98.2798 - val_accuracy: 0.0310
Epoch 4/15
260/260 [==============================] - 906s 3s/step - loss: -121.0963 - accuracy: 0.0382 - val_loss: -139.3440 - val_accuracy: 0.0310
Epoch 5/15

Keras image classification val_accuracy doesn't improve

I tried to basically copy this tutorial: https://keras.io/examples/vision/image_classification_from_scratch/
But I can't seem to improve on my val_accuracy score. I also have 2 kinds of images dogs (Hunde) and cats (Katzen) but only 95 samples each. I have an "upper" folder "Hunde und Katzen" where the folders of these samples are. I probably have to tune some parameters, because my sample size is so low but I already tried at some code parts.
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import os
num_skipped = 0
for folder_name in ("Hund", "Katze"):
folder_path = os.path.join("Hund und Katze", folder_name)
for fname in os.listdir(folder_path):
fpath = os.path.join(folder_path, fname)
try:
fobj = open(fpath, "rb")
is_jfif = tf.compat.as_bytes("JFIF") in fobj.peek(10)
finally:
fobj.close()
if not is_jfif:
num_skipped += 1
# Delete corrupted image
os.remove(fpath)
print("Deleted %d images" % num_skipped)
image_size = (180, 180)
batch_size = 16
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
"Hund und Katze",
validation_split=0.5,
subset="training",
seed=9,
image_size=image_size,
batch_size=batch_size,
)
val_ds = tf.keras.preprocessing.image_dataset_from_directory(
"Hund und Katze",
validation_split=0.5,
subset="validation",
seed=9,
image_size=image_size,
batch_size=batch_size,
)
#Found 190 files belonging to 2 classes.
#Using 95 files for training.
#Found 190 files belonging to 2 classes.
#Using 95 files for validation.
data_augmentation = keras.Sequential(
[
layers.RandomFlip("horizontal"),
layers.RandomRotation(0.1),
]
)
train_ds = train_ds.prefetch(buffer_size=8)
val_ds = val_ds.prefetch(buffer_size=8)
def make_model(input_shape, num_classes):
inputs = keras.Input(shape=input_shape)
# Image augmentation block
x = data_augmentation(inputs)
# Entry block
x = layers.Rescaling(1.0 / 255)(x)
x = layers.Conv2D(16, 3, strides=2, padding="same")(x)
x = layers.BatchNormalization()(x)
x = layers.Activation("relu")(x)
x = layers.Conv2D(32, 3, padding="same")(x)
x = layers.BatchNormalization()(x)
x = layers.Activation("relu")(x)
previous_block_activation = x # Set aside residual
for size in [128, 256, 512, 728]:
x = layers.Activation("relu")(x)
x = layers.SeparableConv2D(size, 3, padding="same")(x)
x = layers.BatchNormalization()(x)
x = layers.Activation("relu")(x)
x = layers.SeparableConv2D(size, 3, padding="same")(x)
x = layers.BatchNormalization()(x)
x = layers.MaxPooling2D(3, strides=2, padding="same")(x)
# Project residual
residual = layers.Conv2D(size, 1, strides=2, padding="same")(
previous_block_activation
)
x = layers.add([x, residual]) # Add back residual
previous_block_activation = x # Set aside next residual
x = layers.SeparableConv2D(1024, 3, padding="same")(x)
x = layers.BatchNormalization()(x)
x = layers.Activation("relu")(x)
x = layers.GlobalAveragePooling2D()(x)
if num_classes == 2:
activation = "sigmoid"
units = 1
else:
activation = "softmax"
units = num_classes
x = layers.Dropout(0.5)(x)
outputs = layers.Dense(units, activation=activation)(x)
return keras.Model(inputs, outputs)
model = make_model(input_shape=image_size + (3,), num_classes=2)
keras.utils.plot_model(model, show_shapes=True)
#('You must install pydot (`pip install pydot`) and install graphviz (see instructions at
#https://graphviz.gitlab.io/download/) ', 'for plot_model/model_to_dot to work.')
epochs = 10
callbacks = [
keras.callbacks.ModelCheckpoint("save_at_{epoch}.h5"),
]
model.compile(
optimizer=keras.optimizers.Adam(0.001),
loss="binary_crossentropy",
metrics=["accuracy"],
)
model.fit(
train_ds, epochs=epochs, callbacks=callbacks, validation_data=val_ds,
)
Output: Epoch 1/10
6/6 [==============================] - 8s 1s/step - loss: 0.7691 - accuracy: 0.6421 - val_loss: 0.6935 - val_accuracy: 0.4632
E:\anacondaBI\lib\site-packages\keras\engine\functional.py:1410: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.
layer_config = serialize_layer_fn(layer)
Epoch 2/10
6/6 [==============================] - 6s 995ms/step - loss: 0.7747 - accuracy: 0.6526 - val_loss: 0.6917 - val_accuracy: 0.5368
Epoch 3/10
6/6 [==============================] - 6s 1s/step - loss: 0.6991 - accuracy: 0.7053 - val_loss: 0.6905 - val_accuracy: 0.5368
Epoch 4/10
6/6 [==============================] - 6s 1s/step - loss: 0.5411 - accuracy: 0.7368 - val_loss: 0.6935 - val_accuracy: 0.5368
Epoch 5/10
6/6 [==============================] - 6s 1s/step - loss: 0.3949 - accuracy: 0.8316 - val_loss: 0.7023 - val_accuracy: 0.5368
Epoch 6/10
6/6 [==============================] - 6s 1s/step - loss: 0.4440 - accuracy: 0.8526 - val_loss: 0.7199 - val_accuracy: 0.5368
Epoch 7/10
6/6 [==============================] - 6s 1s/step - loss: 0.3515 - accuracy: 0.8842 - val_loss: 0.7470 - val_accuracy: 0.5368
Epoch 8/10
6/6 [==============================] - 6s 1s/step - loss: 0.3249 - accuracy: 0.8526 - val_loss: 0.7955 - val_accuracy: 0.5368
Epoch 9/10
6/6 [==============================] - 6s 994ms/step - loss: 0.3953 - accuracy: 0.8421 - val_loss: 0.8570 - val_accuracy: 0.5368
Epoch 10/10
6/6 [==============================] - 6s 989ms/step - loss: 0.4363 - accuracy: 0.7789 - val_loss: 0.9189 - val_accuracy: 0.5368
<keras.callbacks.History at 0x2176ec764c0>

95 samples for each class is less to achieve a decent accuracy
decrease your validation_split to 0.05 (5% for validation ), as you have very less number of data points
If the first step does not help you then you can use transfer learning i.e using architectures that have a good accuracy on imagenet for e.g: MobileNets, ResNets and efficientnets
If the above 2 steps are not giving you a good accuracy then try increasing your data size and tune your hyperparameters.

Human Gender Classification- Train and Val accuracy not moving

I am having 0.3 million image in my Train set - Male/Female and around ~50K image in the test set - Male/Female . I am using below to work , also tried to add few more layers and more units . Also, I am doing data augmentation and others provided from keras docs.
targetSize =64
classifier.add(Conv2D(filters = 32,kernel_size =(3,3),input_shape=(targetSize,targetSize,3),activation ='relu'))
classifier.add(MaxPooling2D(pool_size = (2,2)))
classifier.add(Conv2D(filters = 32,kernel_size =(3,3),activation ='relu'))
classifier.add(MaxPooling2D(pool_size = (2,2)))
classifier.add(Conv2D(filters = 32,kernel_size =(3,3),activation ='relu'))
classifier.add(MaxPooling2D(pool_size = (2,2)))
classifier.add(Conv2D(filters = 32,kernel_size =(3,3),activation ='relu'))
classifier.add(MaxPooling2D(pool_size = (2,2)))
classifier.add(Flatten())
classifier.add(Dropout(rate = 0.6))
classifier.add(Dense(units = 64, activation='relu'))
classifier.add(Dropout(rate = 0.5))
classifier.add(Dense(units = 64, activation='relu'))
classifier.add(Dropout(rate = 0.2))
classifier.add(Dense(units = 1,activation='sigmoid')
classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
# Part 2 - Fitting the CNN to the images
from keras.preprocessing.image import ImageDataGenerator
train_datagen = ImageDataGenerator(rescale = 1./255,
shear_range = 0.2,
zoom_range = 0.2,
height_shift_range = 0.2,
width_shift_range = 0.2,
horizontal_flip = True)
test_datagen = ImageDataGenerator(rescale = 1./255)
training_set = train_datagen.flow_from_directory('<train_folder_loc>',
target_size = (img_size, img_size),
batch_size = batch_size_train,
class_mode = 'binary')
test_set = test_datagen.flow_from_directory('<test_folder_loc>',
target_size = (img_size, img_size),
batch_size = batch_size_test,
class_mode = 'binary')
classifier.fit_generator(training_set,
steps_per_epoch = <train_image_count>/batch_size_train,
epochs = n_epoch,
validation_data = test_set,
validation_steps = <test_image_count>/batch_size_test,
use_multiprocessing = True,
workers=<mycpu>)
But with many combinations tried I am getting result like below , train acc and val acc is not moving ahead . I tried till 100 epoch and its almost like same.
11112/11111 [==============================] - 156s 14ms/step - loss: 0.5628 - acc: 0.7403 - val_loss: 0.6001 - val_acc: 0.6967
Epoch 2/25
11112/11111 [==============================] - 156s 14ms/step - loss: 0.5516 - acc: 0.7403 - val_loss: 0.6096 - val_acc: 0.6968
Epoch 3/25
11112/11111 [==============================] - 154s 14ms/step - loss: 0.5472 - acc: 0.7404 - val_loss: 0.5837 - val_acc: 0.6967
Epoch 4/25
11112/11111 [==============================] - 155s 14ms/step - loss: 0.5437 - acc: 0.7408 - val_loss: 0.5850 - val_acc: 0.6978
Epoch 5/25
11112/11111 [==============================] - 154s 14ms/step - loss: 0.5409 - acc: 0.7417 - val_loss: 0.5844 - val_acc: 0.6991
Epoch 6/25
11112/11111 [==============================] - 155s 14ms/step - loss: 0.5386 - acc: 0.7420 - val_loss: 0.5828 - val_acc: 0.7011
Epoch 7/25
11112/11111 [==============================] - 154s 14ms/step - loss: 0.5372 - acc: 0.7427 - val_loss: 0.5856 - val_acc: 0.6984
Epoch 8/25
11112/11111 [==============================] - 154s 14ms/step - loss: 0.5347 - acc: 0.7437 - val_loss: 0.5847 - val_acc: 0.7017
Epoch 9/25
11112/11111 [==============================] - 154s 14ms/step - loss: 0.5331 - acc: 0.7444 - val_loss: 0.5770 - val_acc: 0.7017
Epoch 10/25
11112/11111 [==============================] - 154s 14ms/step - loss: 0.5323 - acc: 0.7443 - val_loss: 0.5803 - val_acc: 0.7037
Epoch 11/25
11112/11111 [==============================] - 154s 14ms/step - loss: 0.5309 - acc: 0.7453 - val_loss: 0.5877 - val_acc: 0.7018
Epoch 12/25
11112/11111 [==============================] - 155s 14ms/step - loss: 0.5294 - acc: 0.7454 - val_loss: 0.5774 - val_acc: 0.7037
Epoch 13/25
11112/11111 [==============================] - 154s 14ms/step - loss: 0.5282 - acc: 0.7464 - val_loss: 0.5807 - val_acc: 0.7024
Epoch 14/25
11112/11111 [==============================] - 154s 14ms/step - loss: 0.5276 - acc: 0.7467 - val_loss: 0.5815 - val_acc: 0.7033
Epoch 15/25
11112/11111 [==============================] - 156s 14ms/step - loss: 0.5269 - acc: 0.7474 - val_loss: 0.5753 - val_acc: 0.7038
Epoch 16/25
11112/11111 [==============================] - 154s 14ms/step - loss: 0.5263 - acc: 0.7477 - val_loss: 0.5825 - val_acc: 0.7039
Epoch 17/25
11112/11111 [==============================] - 155s 14ms/step - loss: 0.5249 - acc: 0.7485 - val_loss: 0.5821 - val_acc: 0.7037
I need your suggestion on this or any snippet to try .

Make sure you are overfitting on a small sample before trying to extend the network.
I would remove some/all of the Dropout layers and see if it improves performance. I think 3 Dropout layers is quite high.
Try reducing the learning rate.

Try and understand some of the basic principles of CNNs and how they are constructed; implement a simple one which works before arbitrarily putting in your own parameters.
For example, typically the number of filters in successive convolutions increases in powers of two (e.g. 32, 64, 128 etc). Your use of dropout also is questionable, 0.6 is very high, not to mention stacking the three dropouts like you have doesn't make any sense.

Hmm if you look at it closely, its not that its not moviing. it is moving a bit. There are times when models only get better at a certain point no matter how long you train it, or even how much more layers you add. When that happens, it all boils down to the data. I think it would be best to determine what is hindering your model to improve. Also, my friend, training a good model doesn't happen overnight specially with real world data, much more with complex data such as images of humans.
I guess, if you are just following a tutorial which has achieved a better score than yours, you could check the version of packages their using, the data that you have, the steps they took and much more importantly the re run the model. There are instances where models could get different scores on different instances of training.
I suggest you should try playing with the layers more, or even use a different type of Neural Network. If not, you should try playing with your data more. 300k images are a lot but when it comes to image classification, it could be really hard.
Finally, I guess you could look into transfer learning by tensorflow. You can read about it there. It works by retraining pre-made image recognition models. Keras has a tutorial on Transfer learning too.

Keras: validation accuracy is not improving

I am trying to reproduce the face shape classification (https://github.com/adonistio/inception-face-shape-classifier/blob/master/Paper%20v3.pdf) in Keras using the same dataset. The dataset consists of 500 images of 5 classes (100 each): heart, oblong, oval, round, and square. The images comes in different sizes but the faces are aligned.
Sample image: heart_agreene_003.jpg. Class: heart shaped.
Unfortunately I am not able to train a good model, my accuracy is stuck at 20% (roc_auc_score = 0.5). It would be really nice with some feedback at this moment.
I have tried various CNN architectures, batch sizes, learning rates, optimizers, and data augmentations.
import os
import cv2
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold, train_test_split
import matplotlib.pyplot as plt
from sklearn import metrics
import keras
from keras.models import Model
from keras.optimizers import SGD
from keras.applications.inception_v3 import InceptionV3
from keras.layers import Dense, Input, Flatten, Dropout, GlobalAveragePooling2D
from keras.layers.normalization import BatchNormalization
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import np_utils
path = '...'
folders = next(os.walk(path))[1]
filepaths = []
labels = []
for i, cls in enumerate(folders):
files = next(os.walk(path + cls + '/'))[2]
for file in files:
filepaths.append(path + cls + '/' + file)
labels.append(i)
# convert to dummies
labels = np_utils.to_categorical(labels)
fps_train, fps_test, labels_train, labels_test = train_test_split(filepaths, labels, test_size=0.2, random_state=2018)
img_height = 299
img_width = 299
img_channels = 3
img_dim = (img_height, img_width, img_channels)
img_size = (img_height, img_width)
datagen = ImageDataGenerator(
rescale=1./255,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.15,
zoom_range=0.15,
horizontal_flip=True)
def arc(img_dim=img_dim):
base_model = InceptionV3(weights="imagenet", include_top=False, input_shape=img_dim)
for l in base_model.layers:
l.trainable = False
x = base_model.output
x = Flatten()(x)
x = Dense(2048, activation="relu")(x)
x = Dropout(0.5)(x)
predictions = Dense(len(folders), activation="softmax")(x)
# creating the final model
model = Model(inputs = base_model.input, outputs = predictions)
return model
model = arc()
batch_size = 16
epochs = 100
n_folds = 3
preds_test = 0
train_scores = []; valid_scores = []
kf = KFold(n_splits=n_folds, random_state=10, shuffle=True)
history = []
for i, (train_index, test_index) in enumerate(kf.split(fps_train)):
x_train = [fps_train[j] for j in train_index]; x_valid = [fps_train[j] for j in test_index]
y_train = labels_train[train_index]; y_valid = labels_train[test_index]
def train_generator():
while 1:
for start in range(0, len(x_train), batch_size):
x_batch = []
end = min(start + batch_size, len(x_train))
train_batch = x_train[start:end]
for filepath in train_batch:
img = cv2.imread(filepath)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = cv2.resize(img, img_size)
# data augmentation
img = np.expand_dims(img, axis=0)
img = next(datagen.flow(img, batch_size=1))[0]
x_batch.append(img)
y_batch = y_train[start:end]
x_batch = np.array(x_batch, np.float32)
y_batch = np.array(y_batch, np.uint8)
yield x_batch, y_batch
def valid_generator():
while 1:
for start in range(0, len(x_valid), batch_size):
x_batch = []
end = min(start + batch_size, len(x_valid))
valid_batch = x_valid[start:end]
for filepath in valid_batch:
img = cv2.imread(filepath)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = cv2.resize(img, img_size)
# data augmentation
img = np.expand_dims(img, axis=0)
img = next(datagen.flow(img, batch_size=1))[0]
x_batch.append(img)
y_batch = y_train[start:end]
x_batch = np.array(x_batch, np.float32)
y_batch = np.array(y_batch, np.uint8)
yield x_batch, y_batch
def test_generator():
while 1:
for start in range(0, len(labels_test), batch_size):
x_batch = []
end = min(start + batch_size, len(labels_test))
test_batch = fps_test[start:end]
for filepath in test_batch:
img = cv2.imread(filepath)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = cv2.resize(img, img_size)
x_batch.append(img)
x_batch = np.array(x_batch, np.float32)
yield x_batch
train_steps = len(x_train) / batch_size
valid_steps = len(x_valid) / batch_size
test_steps = len(labels_test) / batch_size
model = model
model.compile(optimizer=SGD(lr=0.00001, momentum=0.9), loss='categorical_crossentropy',
metrics = ['accuracy'])
# sparse_categorical_crossentropy
history.append(model.fit_generator(train_generator(),
train_steps,
epochs=epochs,
verbose=1,
validation_data=valid_generator(),
validation_steps=valid_steps,
shuffle=True))
preds_valid = model.predict_generator(generator=valid_generator(),
steps=valid_steps, verbose=1)
preds_train = model.predict_generator(generator=train_generator(),
steps=train_steps, verbose=1)
preds_test_fold = model.predict_generator(generator=test_generator(),
steps=test_steps, verbose=1)
valid_score = metrics.roc_auc_score(y_valid, preds_valid)
train_score = metrics.roc_auc_score(y_train, preds_train)
print('valid score:{} for fold {}'.format(valid_score, i))
print('train score: {} for fold {}'.format(train_score, i))
valid_scores.append(valid_score)
train_scores.append(train_score)
print('avg valid score:{0:0.5f} after {1:0.5f} folds'.format(np.mean(valid_scores), i))
print('avg train score:{0:0.5f} after {1:0.5f} folds'.format(np.mean(train_scores), i))
preds_test += preds_test_fold
preds_test /= n_folds
test_score = metrics.roc_auc_score(labels_test, preds_test)
print(test_score)
.
Epoch 1/100
13/12 [===============================] - 114s 9s/step - loss: 1.7377 - acc: 0.2696 - val_loss: 1.7597 - val_acc: 0.2300
Epoch 2/100
13/12 [===============================] - 110s 8s/step - loss: 1.6934 - acc: 0.3023 - val_loss: 1.8588 - val_acc: 0.1550
Epoch 3/100
13/12 [===============================] - 111s 9s/step - loss: 1.7105 - acc: 0.3643 - val_loss: 1.7486 - val_acc: 0.2500
Epoch 4/100
13/12 [===============================] - 111s 9s/step - loss: 1.6023 - acc: 0.3164 - val_loss: 1.8622 - val_acc: 0.1950
Epoch 5/100
13/12 [===============================] - 119s 9s/step - loss: 1.6091 - acc: 0.3316 - val_loss: 1.7544 - val_acc: 0.2100
Epoch 6/100
13/12 [===============================] - 119s 9s/step - loss: 1.6923 - acc: 0.2882 - val_loss: 1.7124 - val_acc: 0.2600
Epoch 7/100
13/12 [===============================] - 118s 9s/step - loss: 1.6071 - acc: 0.3602 - val_loss: 1.8326 - val_acc: 0.2100
Epoch 8/100
13/12 [===============================] - 110s 8s/step - loss: 1.6666 - acc: 0.2882 - val_loss: 1.7460 - val_acc: 0.2250
Epoch 9/100
13/12 [===============================] - 107s 8s/step - loss: 1.5762 - acc: 0.3684 - val_loss: 1.7904 - val_acc: 0.1850
Epoch 10/100
13/12 [===============================] - 106s 8s/step - loss: 1.6371 - acc: 0.3550 - val_loss: 1.8316 - val_acc: 0.2000
Epoch 11/100
13/12 [===============================] - 105s 8s/step - loss: 1.5918 - acc: 0.3602 - val_loss: 1.6973 - val_acc: 0.2650
Epoch 12/100
13/12 [===============================] - 106s 8s/step - loss: 1.5225 - acc: 0.3987 - val_loss: 1.7811 - val_acc: 0.2100
Epoch 13/100
13/12 [===============================] - 105s 8s/step - loss: 1.6055 - acc: 0.3561 - val_loss: 1.8557 - val_acc: 0.1450
Epoch 14/100
13/12 [===============================] - 108s 8s/step - loss: 1.5136 - acc: 0.3368 - val_loss: 1.8463 - val_acc: 0.1650
Epoch 15/100
13/12 [===============================] - 108s 8s/step - loss: 1.5672 - acc: 0.3654 - val_loss: 1.7392 - val_acc: 0.2050
Epoch 16/100
13/12 [===============================] - 109s 8s/step - loss: 1.5112 - acc: 0.3939 - val_loss: 1.7721 - val_acc: 0.2200
Epoch 17/100
13/12 [===============================] - 106s 8s/step - loss: 1.3944 - acc: 0.4614 - val_loss: 1.7274 - val_acc: 0.2200
Epoch 18/100
13/12 [===============================] - 106s 8s/step - loss: 1.3603 - acc: 0.4559 - val_loss: 1.7671 - val_acc: 0.2050
Epoch 19/100
13/12 [===============================] - 107s 8s/step - loss: 1.4985 - acc: 0.3843 - val_loss: 1.7367 - val_acc: 0.2150
Epoch 20/100
13/12 [===============================] - 107s 8s/step - loss: 1.4073 - acc: 0.4180 - val_loss: 1.8292 - val_acc: 0.1700
Epoch 21/100
13/12 [===============================] - 111s 9s/step - loss: 1.4495 - acc: 0.3743 - val_loss: 1.8178 - val_acc: 0.2050
Epoch 22/100
13/12 [===============================] - 108s 8s/step - loss: 1.3810 - acc: 0.4704 - val_loss: 1.8049 - val_acc: 0.2000
Epoch 23/100
13/12 [===============================] - 105s 8s/step - loss: 1.3556 - acc: 0.4366 - val_loss: 1.7813 - val_acc: 0.2050
Epoch 24/100
13/12 [===============================] - 111s 9s/step - loss: 1.3139 - acc: 0.4614 - val_loss: 1.8184 - val_acc: 0.2000
Epoch 25/100
13/12 [===============================] - 111s 9s/step - loss: 1.4152 - acc: 0.4421 - val_loss: 1.8859 - val_acc: 0.1300
Epoch 26/100
13/12 [===============================] - 110s 8s/step - loss: 1.3630 - acc: 0.4577 - val_loss: 1.7652 - val_acc: 0.2250
Epoch 27/100
13/12 [===============================] - 110s 8s/step - loss: 1.3347 - acc: 0.4521 - val_loss: 1.7304 - val_acc: 0.2200
Epoch 28/100
13/12 [===============================] - 107s 8s/step - loss: 1.2981 - acc: 0.5048 - val_loss: 1.8108 - val_acc: 0.2200
Epoch 29/100
2/12 [===>..........................] - ETA: 52s - loss: 1.5823 - acc: 0.3125

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

What can I do to solve over-fitting problem in following code? - python

Related

Keras Transformer - Test Loss Not Changing

My multilabel classification RNN is not learning

Keras image classification val_accuracy doesn't improve

Human Gender Classification- Train and Val accuracy not moving

Keras: validation accuracy is not improving

Categories

Resources