Keras: validation accuracy is not improving - python

I am trying to reproduce the face shape classification (https://github.com/adonistio/inception-face-shape-classifier/blob/master/Paper%20v3.pdf) in Keras using the same dataset. The dataset consists of 500 images of 5 classes (100 each): heart, oblong, oval, round, and square. The images comes in different sizes but the faces are aligned.
Sample image: heart_agreene_003.jpg. Class: heart shaped.
Unfortunately I am not able to train a good model, my accuracy is stuck at 20% (roc_auc_score = 0.5). It would be really nice with some feedback at this moment.
I have tried various CNN architectures, batch sizes, learning rates, optimizers, and data augmentations.
import os
import cv2
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold, train_test_split
import matplotlib.pyplot as plt
from sklearn import metrics
import keras
from keras.models import Model
from keras.optimizers import SGD
from keras.applications.inception_v3 import InceptionV3
from keras.layers import Dense, Input, Flatten, Dropout, GlobalAveragePooling2D
from keras.layers.normalization import BatchNormalization
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import np_utils
path = '...'
folders = next(os.walk(path))[1]
filepaths = []
labels = []
for i, cls in enumerate(folders):
files = next(os.walk(path + cls + '/'))[2]
for file in files:
filepaths.append(path + cls + '/' + file)
labels.append(i)
# convert to dummies
labels = np_utils.to_categorical(labels)
fps_train, fps_test, labels_train, labels_test = train_test_split(filepaths, labels, test_size=0.2, random_state=2018)
img_height = 299
img_width = 299
img_channels = 3
img_dim = (img_height, img_width, img_channels)
img_size = (img_height, img_width)
datagen = ImageDataGenerator(
rescale=1./255,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.15,
zoom_range=0.15,
horizontal_flip=True)
def arc(img_dim=img_dim):
base_model = InceptionV3(weights="imagenet", include_top=False, input_shape=img_dim)
for l in base_model.layers:
l.trainable = False
x = base_model.output
x = Flatten()(x)
x = Dense(2048, activation="relu")(x)
x = Dropout(0.5)(x)
predictions = Dense(len(folders), activation="softmax")(x)
# creating the final model
model = Model(inputs = base_model.input, outputs = predictions)
return model
model = arc()
batch_size = 16
epochs = 100
n_folds = 3
preds_test = 0
train_scores = []; valid_scores = []
kf = KFold(n_splits=n_folds, random_state=10, shuffle=True)
history = []
for i, (train_index, test_index) in enumerate(kf.split(fps_train)):
x_train = [fps_train[j] for j in train_index]; x_valid = [fps_train[j] for j in test_index]
y_train = labels_train[train_index]; y_valid = labels_train[test_index]
def train_generator():
while 1:
for start in range(0, len(x_train), batch_size):
x_batch = []
end = min(start + batch_size, len(x_train))
train_batch = x_train[start:end]
for filepath in train_batch:
img = cv2.imread(filepath)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = cv2.resize(img, img_size)
# data augmentation
img = np.expand_dims(img, axis=0)
img = next(datagen.flow(img, batch_size=1))[0]
x_batch.append(img)
y_batch = y_train[start:end]
x_batch = np.array(x_batch, np.float32)
y_batch = np.array(y_batch, np.uint8)
yield x_batch, y_batch
def valid_generator():
while 1:
for start in range(0, len(x_valid), batch_size):
x_batch = []
end = min(start + batch_size, len(x_valid))
valid_batch = x_valid[start:end]
for filepath in valid_batch:
img = cv2.imread(filepath)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = cv2.resize(img, img_size)
# data augmentation
img = np.expand_dims(img, axis=0)
img = next(datagen.flow(img, batch_size=1))[0]
x_batch.append(img)
y_batch = y_train[start:end]
x_batch = np.array(x_batch, np.float32)
y_batch = np.array(y_batch, np.uint8)
yield x_batch, y_batch
def test_generator():
while 1:
for start in range(0, len(labels_test), batch_size):
x_batch = []
end = min(start + batch_size, len(labels_test))
test_batch = fps_test[start:end]
for filepath in test_batch:
img = cv2.imread(filepath)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = cv2.resize(img, img_size)
x_batch.append(img)
x_batch = np.array(x_batch, np.float32)
yield x_batch
train_steps = len(x_train) / batch_size
valid_steps = len(x_valid) / batch_size
test_steps = len(labels_test) / batch_size
model = model
model.compile(optimizer=SGD(lr=0.00001, momentum=0.9), loss='categorical_crossentropy',
metrics = ['accuracy'])
# sparse_categorical_crossentropy
history.append(model.fit_generator(train_generator(),
train_steps,
epochs=epochs,
verbose=1,
validation_data=valid_generator(),
validation_steps=valid_steps,
shuffle=True))
preds_valid = model.predict_generator(generator=valid_generator(),
steps=valid_steps, verbose=1)
preds_train = model.predict_generator(generator=train_generator(),
steps=train_steps, verbose=1)
preds_test_fold = model.predict_generator(generator=test_generator(),
steps=test_steps, verbose=1)
valid_score = metrics.roc_auc_score(y_valid, preds_valid)
train_score = metrics.roc_auc_score(y_train, preds_train)
print('valid score:{} for fold {}'.format(valid_score, i))
print('train score: {} for fold {}'.format(train_score, i))
valid_scores.append(valid_score)
train_scores.append(train_score)
print('avg valid score:{0:0.5f} after {1:0.5f} folds'.format(np.mean(valid_scores), i))
print('avg train score:{0:0.5f} after {1:0.5f} folds'.format(np.mean(train_scores), i))
preds_test += preds_test_fold
preds_test /= n_folds
test_score = metrics.roc_auc_score(labels_test, preds_test)
print(test_score)
.
Epoch 1/100
13/12 [===============================] - 114s 9s/step - loss: 1.7377 - acc: 0.2696 - val_loss: 1.7597 - val_acc: 0.2300
Epoch 2/100
13/12 [===============================] - 110s 8s/step - loss: 1.6934 - acc: 0.3023 - val_loss: 1.8588 - val_acc: 0.1550
Epoch 3/100
13/12 [===============================] - 111s 9s/step - loss: 1.7105 - acc: 0.3643 - val_loss: 1.7486 - val_acc: 0.2500
Epoch 4/100
13/12 [===============================] - 111s 9s/step - loss: 1.6023 - acc: 0.3164 - val_loss: 1.8622 - val_acc: 0.1950
Epoch 5/100
13/12 [===============================] - 119s 9s/step - loss: 1.6091 - acc: 0.3316 - val_loss: 1.7544 - val_acc: 0.2100
Epoch 6/100
13/12 [===============================] - 119s 9s/step - loss: 1.6923 - acc: 0.2882 - val_loss: 1.7124 - val_acc: 0.2600
Epoch 7/100
13/12 [===============================] - 118s 9s/step - loss: 1.6071 - acc: 0.3602 - val_loss: 1.8326 - val_acc: 0.2100
Epoch 8/100
13/12 [===============================] - 110s 8s/step - loss: 1.6666 - acc: 0.2882 - val_loss: 1.7460 - val_acc: 0.2250
Epoch 9/100
13/12 [===============================] - 107s 8s/step - loss: 1.5762 - acc: 0.3684 - val_loss: 1.7904 - val_acc: 0.1850
Epoch 10/100
13/12 [===============================] - 106s 8s/step - loss: 1.6371 - acc: 0.3550 - val_loss: 1.8316 - val_acc: 0.2000
Epoch 11/100
13/12 [===============================] - 105s 8s/step - loss: 1.5918 - acc: 0.3602 - val_loss: 1.6973 - val_acc: 0.2650
Epoch 12/100
13/12 [===============================] - 106s 8s/step - loss: 1.5225 - acc: 0.3987 - val_loss: 1.7811 - val_acc: 0.2100
Epoch 13/100
13/12 [===============================] - 105s 8s/step - loss: 1.6055 - acc: 0.3561 - val_loss: 1.8557 - val_acc: 0.1450
Epoch 14/100
13/12 [===============================] - 108s 8s/step - loss: 1.5136 - acc: 0.3368 - val_loss: 1.8463 - val_acc: 0.1650
Epoch 15/100
13/12 [===============================] - 108s 8s/step - loss: 1.5672 - acc: 0.3654 - val_loss: 1.7392 - val_acc: 0.2050
Epoch 16/100
13/12 [===============================] - 109s 8s/step - loss: 1.5112 - acc: 0.3939 - val_loss: 1.7721 - val_acc: 0.2200
Epoch 17/100
13/12 [===============================] - 106s 8s/step - loss: 1.3944 - acc: 0.4614 - val_loss: 1.7274 - val_acc: 0.2200
Epoch 18/100
13/12 [===============================] - 106s 8s/step - loss: 1.3603 - acc: 0.4559 - val_loss: 1.7671 - val_acc: 0.2050
Epoch 19/100
13/12 [===============================] - 107s 8s/step - loss: 1.4985 - acc: 0.3843 - val_loss: 1.7367 - val_acc: 0.2150
Epoch 20/100
13/12 [===============================] - 107s 8s/step - loss: 1.4073 - acc: 0.4180 - val_loss: 1.8292 - val_acc: 0.1700
Epoch 21/100
13/12 [===============================] - 111s 9s/step - loss: 1.4495 - acc: 0.3743 - val_loss: 1.8178 - val_acc: 0.2050
Epoch 22/100
13/12 [===============================] - 108s 8s/step - loss: 1.3810 - acc: 0.4704 - val_loss: 1.8049 - val_acc: 0.2000
Epoch 23/100
13/12 [===============================] - 105s 8s/step - loss: 1.3556 - acc: 0.4366 - val_loss: 1.7813 - val_acc: 0.2050
Epoch 24/100
13/12 [===============================] - 111s 9s/step - loss: 1.3139 - acc: 0.4614 - val_loss: 1.8184 - val_acc: 0.2000
Epoch 25/100
13/12 [===============================] - 111s 9s/step - loss: 1.4152 - acc: 0.4421 - val_loss: 1.8859 - val_acc: 0.1300
Epoch 26/100
13/12 [===============================] - 110s 8s/step - loss: 1.3630 - acc: 0.4577 - val_loss: 1.7652 - val_acc: 0.2250
Epoch 27/100
13/12 [===============================] - 110s 8s/step - loss: 1.3347 - acc: 0.4521 - val_loss: 1.7304 - val_acc: 0.2200
Epoch 28/100
13/12 [===============================] - 107s 8s/step - loss: 1.2981 - acc: 0.5048 - val_loss: 1.8108 - val_acc: 0.2200
Epoch 29/100
2/12 [===>..........................] - ETA: 52s - loss: 1.5823 - acc: 0.3125

Related

Transfer learning model running on inference does not learn

I'm doing a species classification task from kaggle (https://www.kaggle.com/competitions/yum-or-yuck-butterfly-mimics-2022/overview). I decided to use transfer learning to tackle this problem since there aren't that many images. The model is as follows:
inputs = tf.keras.layers.Input(shape=(224, 224, 3))
base_model = tf.keras.applications.resnet50.ResNet50(
input_shape=(224,224,3),
include_top=False,
weights="imagenet")
for layer in base_model.layers:
layer.trainable = False
x = base_model(inputs, training=False)
x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = tf.keras.layers.Dropout(0.3)(x)
x = tf.keras.layers.Dense(1024, activation="relu")(x)
x = tf.keras.layers.Dropout(0.3)(x)
x = tf.keras.layers.Dense(512, activation="relu")(x)
x = tf.keras.layers.Dropout(0.3)(x)
x = tf.keras.layers.Dense(64, activation="relu")(x)
output = tf.keras.layers.Dense(6, activation="softmax")(x)
model = tf.keras.Model(inputs=inputs, outputs=output)
As per the guidelines when doing transfer learning:https://keras.io/guides/transfer_learning/, I'm freezing the resnet layers and training the model on inference only (training=False). However, the results show that the model is not learning properly. Convergence doesn't seem like it will be possible even after nearly 200 epochs:
model.compile(
optimizer=tf.keras.optimizers.Adam(),
loss="categorical_crossentropy",
metrics="accuracy",
)
stop_early = tf.keras.callbacks.EarlyStopping(
monitor='val_loss',
min_delta=0.0001,
patience=20,
restore_best_weights=True
)
history = model.fit(train_generator,
validation_data = val_generator,
epochs = 200,
callbacks=[stop_early])
22/22 [==============================] - 19s 442ms/step - loss: 1.9317 - accuracy: 0.1794 - val_loss: 1.8272 - val_accuracy: 0.1618
Epoch 2/200
22/22 [==============================] - 9s 398ms/step - loss: 1.8250 - accuracy: 0.1882 - val_loss: 1.7681 - val_accuracy: 0.2197
Epoch 3/200
22/22 [==============================] - 9s 402ms/step - loss: 1.7927 - accuracy: 0.2294 - val_loss: 1.7612 - val_accuracy: 0.2139
Epoch 4/200
22/22 [==============================] - 9s 424ms/step - loss: 1.7930 - accuracy: 0.2000 - val_loss: 1.7640 - val_accuracy: 0.2139
Epoch 5/200
22/22 [==============================] - 9s 391ms/step - loss: 1.7872 - accuracy: 0.2132 - val_loss: 1.7489 - val_accuracy: 0.3121
Epoch 6/200
22/22 [==============================] - 9s 389ms/step - loss: 1.7700 - accuracy: 0.2574 - val_loss: 1.7378 - val_accuracy: 0.2543
Epoch 7/200
22/22 [==============================] - 9s 396ms/step - loss: 1.7676 - accuracy: 0.2353 - val_loss: 1.7229 - val_accuracy: 0.3064
Epoch 8/200
22/22 [==============================] - 9s 427ms/step - loss: 1.7721 - accuracy: 0.2353 - val_loss: 1.7225 - val_accuracy: 0.2948
Epoch 9/200
22/22 [==============================] - 9s 399ms/step - loss: 1.7522 - accuracy: 0.2588 - val_loss: 1.7267 - val_accuracy: 0.2948
Epoch 10/200
22/22 [==============================] - 9s 395ms/step - loss: 1.7434 - accuracy: 0.2735 - val_loss: 1.7151 - val_accuracy: 0.2948
Epoch 11/200
22/22 [==============================] - 9s 391ms/step - loss: 1.7500 - accuracy: 0.2632 - val_loss: 1.7083 - val_accuracy: 0.3064
Epoch 12/200
22/22 [==============================] - 9s 425ms/step - loss: 1.7307 - accuracy: 0.2721 - val_loss: 1.6899 - val_accuracy: 0.3179
Epoch 13/200
22/22 [==============================] - 9s 407ms/step - loss: 1.7439 - accuracy: 0.2794 - val_loss: 1.7045 - val_accuracy: 0.2948
Epoch 14/200
22/22 [==============================] - 9s 404ms/step - loss: 1.7376 - accuracy: 0.2706 - val_loss: 1.7118 - val_accuracy: 0.2659
Epoch 15/200
22/22 [==============================] - 9s 419ms/step - loss: 1.7588 - accuracy: 0.2647 - val_loss: 1.6684 - val_accuracy: 0.3237
Epoch 16/200
22/22 [==============================] - 9s 394ms/step - loss: 1.7289 - accuracy: 0.2824 - val_loss: 1.6733 - val_accuracy: 0.3064
Epoch 17/200
22/22 [==============================] - 9s 387ms/step - loss: 1.7184 - accuracy: 0.2809 - val_loss: 1.7185 - val_accuracy: 0.2659
Epoch 18/200
22/22 [==============================] - 9s 408ms/step - loss: 1.7242 - accuracy: 0.2765 - val_loss: 1.6961 - val_accuracy: 0.2717
Epoch 19/200
22/22 [==============================] - 9s 424ms/step - loss: 1.7218 - accuracy: 0.2853 - val_loss: 1.6757 - val_accuracy: 0.3006
Epoch 20/200
22/22 [==============================] - 9s 396ms/step - loss: 1.7248 - accuracy: 0.2882 - val_loss: 1.6716 - val_accuracy: 0.3064
Epoch 21/200
22/22 [==============================] - 9s 401ms/step - loss: 1.7134 - accuracy: 0.2838 - val_loss: 1.6666 - val_accuracy: 0.2948
Epoch 22/200
22/22 [==============================] - 9s 393ms/step - loss: 1.7140 - accuracy: 0.2941 - val_loss: 1.6427 - val_accuracy: 0.3064
I need to unfreeze the layers and turn off inference in order for the model to learn. I tested the same scenario with EfficientNet and the same thing happened. Finally, I also used Xception, and freezing the layers and running with inference was fine. So it seems they behave differently, even though they all have batchnorm layers.
I'm not understanding what is going on here. Why would I need to turn inference off? Could anyone have a clue about this?
EDIT:
results from Resnet50:
results from Xception:

TensorFlow with same accuracy in Python

I was just following a TensorFlow example from the book Hands-On Machine Learning with Scikit-Learn and TensorFlow but got weird results.
The example:
import tensorflow as tf
from tensorflow import keras
tf.__version__
keras.__version__
fashion_mnist = keras.datasets.fashion_mnist
(X_train_full, y_train_full), (X_test, y_test) = fashion_mnist.load_data()
X_valid, X_train = X_train_full[:5000] / 255.0, X_train_full[5000:] / 255.0
y_valid, y_train = y_train_full[:5000] / 255.0, y_train_full[5000:] / 255.0
class_names = ["T-shirt/top", "Trouser", "Pullover", "Dress", "Coat",
"Sandal", "Shirt", "Sneaker", "Bag", "Ankle boot"]
model = keras.models.Sequential([
keras.layers.Flatten(input_shape=[28, 28]),
keras.layers.Dense(300, activation="relu"),
keras.layers.Dense(100, activation="relu"),
keras.layers.Dense(10, activation="softmax")
])
model.compile(loss="sparse_categorical_crossentropy",
optimizer='sgd',
metrics=['accuracy'])
history = model.fit(X_train, y_train, epochs=50, validation_data=(X_valid, y_valid))
As the epochs evolve we should se an improvement for accuracy as indicated in the book:
Train on 55000 samples, validate on 5000 samples
Epoch 1/30
55000/55000 [==========] - 3s 55us/sample - loss: 1.4948 - acc: 0.5757 - val_loss: 1.0042 - val_acc: 0.7166
Epoch 2/30
55000/55000 [==========] - 3s 55us/sample - loss: 0.8690 - acc: 0.7318 - val_loss: 0.7549 - val_acc: 0.7616
[...]
Epoch 50/50
55000/55000 [==========] - 4s 72us/sample - loss: 0.3607 - acc: 0.8752 - acc: 0.8752 -val_loss: 0.3706 - val_acc: 0.8728
But when I ran I got the following:
Epoch 1/30
1719/1719 [==============================] - 3s 2ms/step - loss: 0.0623 - accuracy: 0.1005 - val_loss: 0.0011 - val_accuracy: 0.0914
Epoch 2/30
1719/1719 [==============================] - 3s 2ms/step - loss: 8.7637e-04 - accuracy: 0.1011 - val_loss: 5.2079e-04 - val_accuracy: 0.0914
Epoch 3/30
1719/1719 [==============================] - 3s 2ms/step - loss: 4.9200e-04 - accuracy: 0.1019 - val_loss: 3.4211e-04 - val_accuracy: 0.0914
[...]
Epoch 49/50
1719/1719 [==============================] - 3s 2ms/step - loss: 3.1710e-05 - accuracy: 0.0992 - val_loss: 3.2966e-05 - val_accuracy: 0.0914
Epoch 50/50
1719/1719 [==============================] - 3s 2ms/step - loss: 2.7711e-05 - accuracy: 0.1022 - val_loss: 3.1833e-05 - val_accuracy: 0.0914
So, as you can see the reproduction got a strongly lower accuracy that has not improved: it stayed at 0.0914 instead of 0.8728.
Is there something wrong in my TensorFlow installation, setup or even in the code?
you can not divide y such as y_valid, y_train = y_train_full[:5000] / 255.0, y_train_full[5000:] / 255.0. The completed code is following :
import tensorflow as tf
from tensorflow import keras
tf.__version__
keras.__version__
fashion_mnist = keras.datasets.fashion_mnist
(X_train_full, y_train_full), (X_test, y_test) = fashion_mnist.load_data()
X_train_full = X_train_full / 255.0
X_test = X_test / 255.0
class_names = ["T-shirt/top", "Trouser", "Pullover", "Dress", "Coat",
"Sandal", "Shirt", "Sneaker", "Bag", "Ankle boot"]
model = keras.models.Sequential([
keras.layers.Flatten(input_shape=(28, 28)),
keras.layers.Dense(128, activation="relu"),
keras.layers.Dense(10, activation="softmax")
])
model.compile(loss="sparse_categorical_crossentropy",
optimizer='sgd',
metrics=['accuracy'])
history = model.fit(X_train_full, y_train_full, epochs=5, validation_data=(X_test, y_test))
It will give the acc like :
Epoch 1/5
1875/1875 [==============================] - 2s 1ms/step - loss: 0.9880 - accuracy: 0.6923 - val_loss: 0.5710 - val_accuracy: 0.8054
Epoch 2/5
1875/1875 [==============================] - 2s 944us/step - loss: 0.5281 - accuracy: 0.8227 - val_loss: 0.5112 - val_accuracy: 0.8228
Epoch 3/5
1875/1875 [==============================] - 2s 913us/step - loss: 0.4720 - accuracy: 0.8391 - val_loss: 0.4782 - val_accuracy: 0.8345
Epoch 4/5
1875/1875 [==============================] - 2s 915us/step - loss: 0.4492 - accuracy: 0.8462 - val_loss: 0.4568 - val_accuracy: 0.8410
Epoch 5/5
1875/1875 [==============================] - 2s 935us/step - loss: 0.4212 - accuracy: 0.8550 - val_loss: 0.4469 - val_accuracy: 0.8444
Also, optimizer adam may be give better result than sgd.

Inception Resnet v2 bad test accuracy with transfer learning

I want to binary classify breast cancer histopathological images from the BreakHis dataset (https://www.kaggle.com/ambarish/breakhis) using transfer learning and the Inception Resnet v2. The goal is to freeze all layers and train the fully connected layer by adding two neurons to the model. In particular, initially I want to consider the images related to the magnificant factor 40X (Benign: 625, Malignant: 1370). Here is a summary of what I do:
I read the images and resize them to 150x150
I partition the dataset into training, validation and test set
I load the pre-trained network Inception Resnet v2
I freeze all the layers I add the two neurons for binary
classification (1 = "benign", 0 = "malignant")
I compile the model using as activation function the Adam method
I carry out the training
I make the prediction
I calculate the accuracy
This is the code:
data = dataset[dataset["Magnificant"]=="40X"]
def preprocessing(dataset, img_size):
# images
X = []
# labels
y = []
i = 0
for image in list(dataset["Path"]):
# Ridimensiono e leggo le immagini
X.append(cv2.resize(cv2.imread(image, cv2.IMREAD_COLOR),
(img_size, img_size), interpolation=cv2.INTER_CUBIC))
basename = os.path.basename(image)
# Get labels
if dataset.loc[i][2] == "benign":
y.append(1)
else:
y.append(0)
i = i+1
return X, y
X, y = preprocessing(data, 150)
X = np.array(X)
y = np.array(y)
# Splitting
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify = y_40, shuffle=True, random_state=1)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=1)
conv_base = InceptionResNetV2(weights='imagenet', include_top=False, input_shape=[150, 150, 3])
# Freezing
for layer in conv_base.layers:
layer.trainable = False
model = models.Sequential()
model.add(conv_base)
model.add(layers.Flatten())
model.add(layers.Dense(1, activation='sigmoid'))
opt = tf.keras.optimizers.Adam(learning_rate=0.0002)
loss = tf.keras.losses.BinaryCrossentropy(from_logits=True)
model.compile(loss=loss, optimizer=opt, metrics = ["accuracy", tf.metrics.AUC()])
batch_size = 32
train_datagen = ImageDataGenerator(rescale=1./255)
val_datagen = ImageDataGenerator(rescale=1./255)
train_generator = train_datagen.flow(X_train, y_train, batch_size=batch_size)
val_generator = val_datagen.flow(X_val, y_val, batch_size=batch_size)
ntrain =len(X_train)
nval = len(X_val)
len(y_train)
epochs = 70
history = model.fit_generator(train_generator,
steps_per_epoch=ntrain // batch_size,
epochs=epochs,
validation_data=val_generator,
validation_steps=nval // batch_size)
This is the output of the training at the last epoch:
Epoch 70/70
32/32 [==============================] - 3s 84ms/step - loss: 0.0499 - accuracy: 0.9903 - auc_5: 0.9996 - val_loss: 0.5661 - val_accuracy: 0.8250 - val_auc_5: 0.8521
I make the prediction:
test_datagen = ImageDataGenerator(rescale=1./255)
x = X_test
y_pred = model.predict(test_datagen.flow(x))
y_p = []
for i in range(len(y_pred)):
if y_pred[i] > 0.5:
y_p.append(1)
else:
y_p.append(0)
I calculate the accuracy:
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y_test, y_p)
print(accuracy)
This is the accuracy value I get: 0.5459098497495827
Why do I get such low accuracy, I have done several tests but I always get similar results?
Update
I have made the following changes but I always get the same results (place only the modified parts of the code):
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify = y, shuffle=True, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, stratify = y_train, shuffle=True, random_state=1)
model = models.Sequential()
model.add(conv_base)
model.add(layers.Flatten())
model.add(layers.Dropout(0.5))
model.add(layers.Dense(1, activation='sigmoid'))
callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3)
ntrain =len(X_train)
nval = len(X_val)
len(y_train)
epochs = 70
history = model.fit_generator(train_generator,
steps_per_epoch=ntrain // batch_size,
epochs=epochs,
validation_data=val_generator,
validation_steps=nval // batch_size, callbacks=[callback])
Update 2
I also changed from_logits from True to False, but of course that's not the problem yet. I always get 57% accuracy.
This is the model.fit output over 30 epochs:
Epoch 1/30
32/32 [==============================] - 23s 202ms/step - loss: 0.7994 - accuracy: 0.6010 - auc: 0.5272 - val_loss: 0.5338 - val_accuracy: 0.7688 - val_auc: 0.7943
Epoch 2/30
32/32 [==============================] - 3s 87ms/step - loss: 0.5778 - accuracy: 0.7206 - auc: 0.7521 - val_loss: 0.4763 - val_accuracy: 0.7781 - val_auc: 0.8155
Epoch 3/30
32/32 [==============================] - 3s 85ms/step - loss: 0.5311 - accuracy: 0.7581 - auc: 0.7710 - val_loss: 0.4740 - val_accuracy: 0.7719 - val_auc: 0.8212
Epoch 4/30
32/32 [==============================] - 3s 85ms/step - loss: 0.4684 - accuracy: 0.7718 - auc: 0.8219 - val_loss: 0.4270 - val_accuracy: 0.8031 - val_auc: 0.8611
Epoch 5/30
32/32 [==============================] - 3s 83ms/step - loss: 0.4280 - accuracy: 0.7943 - auc: 0.8617 - val_loss: 0.4496 - val_accuracy: 0.7969 - val_auc: 0.8468
Epoch 6/30
32/32 [==============================] - 3s 88ms/step - loss: 0.4237 - accuracy: 0.8250 - auc: 0.8673 - val_loss: 0.3993 - val_accuracy: 0.7937 - val_auc: 0.8840
Epoch 7/30
32/32 [==============================] - 3s 85ms/step - loss: 0.4130 - accuracy: 0.8513 - auc: 0.8767 - val_loss: 0.4207 - val_accuracy: 0.7781 - val_auc: 0.8692
Epoch 8/30
32/32 [==============================] - 3s 85ms/step - loss: 0.3446 - accuracy: 0.8485 - auc: 0.9077 - val_loss: 0.4229 - val_accuracy: 0.7937 - val_auc: 0.8730
Epoch 9/30
32/32 [==============================] - 3s 85ms/step - loss: 0.3690 - accuracy: 0.8514 - auc: 0.9003 - val_loss: 0.4300 - val_accuracy: 0.8062 - val_auc: 0.8696
Epoch 10/30
32/32 [==============================] - 3s 100ms/step - loss: 0.3204 - accuracy: 0.8533 - auc: 0.9270 - val_loss: 0.4235 - val_accuracy: 0.7969 - val_auc: 0.8731
Epoch 11/30
32/32 [==============================] - 3s 86ms/step - loss: 0.3555 - accuracy: 0.8508 - auc: 0.9124 - val_loss: 0.4124 - val_accuracy: 0.8000 - val_auc: 0.8797
Epoch 12/30
32/32 [==============================] - 3s 85ms/step - loss: 0.3243 - accuracy: 0.8481 - auc: 0.9308 - val_loss: 0.3979 - val_accuracy: 0.7969 - val_auc: 0.8908
Epoch 13/30
32/32 [==============================] - 3s 85ms/step - loss: 0.3017 - accuracy: 0.8744 - auc: 0.9348 - val_loss: 0.4239 - val_accuracy: 0.8094 - val_auc: 0.8758
Epoch 14/30
32/32 [==============================] - 3s 89ms/step - loss: 0.3317 - accuracy: 0.8521 - auc: 0.9221 - val_loss: 0.4238 - val_accuracy: 0.8094 - val_auc: 0.8704
Epoch 15/30
32/32 [==============================] - 3s 85ms/step - loss: 0.2840 - accuracy: 0.8908 - auc: 0.9490 - val_loss: 0.4131 - val_accuracy: 0.8281 - val_auc: 0.8858
Epoch 16/30
32/32 [==============================] - 3s 85ms/step - loss: 0.2583 - accuracy: 0.8905 - auc: 0.9511 - val_loss: 0.3841 - val_accuracy: 0.8375 - val_auc: 0.9007
Epoch 17/30
32/32 [==============================] - 3s 87ms/step - loss: 0.2810 - accuracy: 0.8648 - auc: 0.9470 - val_loss: 0.3928 - val_accuracy: 0.8438 - val_auc: 0.8972
Epoch 18/30
32/32 [==============================] - 3s 89ms/step - loss: 0.2622 - accuracy: 0.8923 - auc: 0.9550 - val_loss: 0.3732 - val_accuracy: 0.8438 - val_auc: 0.9089
Epoch 19/30
32/32 [==============================] - 3s 84ms/step - loss: 0.2486 - accuracy: 0.8990 - auc: 0.9579 - val_loss: 0.4077 - val_accuracy: 0.8250 - val_auc: 0.8924
Epoch 20/30
32/32 [==============================] - 3s 85ms/step - loss: 0.2412 - accuracy: 0.9074 - auc: 0.9635 - val_loss: 0.4249 - val_accuracy: 0.8219 - val_auc: 0.8787
Epoch 21/30
32/32 [==============================] - 3s 84ms/step - loss: 0.2386 - accuracy: 0.9095 - auc: 0.9657 - val_loss: 0.4177 - val_accuracy: 0.8094 - val_auc: 0.8904
Epoch 22/30
32/32 [==============================] - 3s 99ms/step - loss: 0.2313 - accuracy: 0.8996 - auc: 0.9668 - val_loss: 0.4089 - val_accuracy: 0.8406 - val_auc: 0.8890
Epoch 23/30
32/32 [==============================] - 3s 86ms/step - loss: 0.2424 - accuracy: 0.9067 - auc: 0.9654 - val_loss: 0.4033 - val_accuracy: 0.8500 - val_auc: 0.8953
Epoch 24/30
32/32 [==============================] - 3s 85ms/step - loss: 0.2315 - accuracy: 0.9045 - auc: 0.9626 - val_loss: 0.3903 - val_accuracy: 0.8250 - val_auc: 0.9030
Epoch 25/30
32/32 [==============================] - 3s 85ms/step - loss: 0.2001 - accuracy: 0.9321 - auc: 0.9788 - val_loss: 0.4276 - val_accuracy: 0.8000 - val_auc: 0.8855
Epoch 26/30
32/32 [==============================] - 3s 87ms/step - loss: 0.2118 - accuracy: 0.9212 - auc: 0.9695 - val_loss: 0.4335 - val_accuracy: 0.8125 - val_auc: 0.8897
Epoch 27/30
32/32 [==============================] - 3s 85ms/step - loss: 0.2463 - accuracy: 0.8941 - auc: 0.9665 - val_loss: 0.4112 - val_accuracy: 0.8438 - val_auc: 0.8882
Epoch 28/30
32/32 [==============================] - 3s 85ms/step - loss: 0.2130 - accuracy: 0.9033 - auc: 0.9771 - val_loss: 0.3834 - val_accuracy: 0.8406 - val_auc: 0.9021
Epoch 29/30
32/32 [==============================] - 3s 86ms/step - loss: 0.2021 - accuracy: 0.9229 - auc: 0.9754 - val_loss: 0.3855 - val_accuracy: 0.8469 - val_auc: 0.9008
Epoch 30/30
32/32 [==============================] - 3s 88ms/step - loss: 0.1859 - accuracy: 0.9314 - auc: 0.9824 - val_loss: 0.4018 - val_accuracy: 0.8375 - val_auc: 0.8928
You have to changefrom_logits=True to from_logits=False in your loss function. Once again Credits - #Frightera.
It seems like your model is over-fitting somewhere. It would be best if you could check for that.
Do the K-Fold test for 10 folds. It would show the true results
In your metrics, do add the F1 score. The F1 value would give you a real look into the metrics of the TP in terms of both FP and FN
Add some augmentations (apart from the rescaling one) to make the model robust to changes in the dataset.
Tweak the training parameters (if you feel).
If these changes fail, then there might be a possibility that the model fails to learn the artifacts of the image. You should go ahead with a different model!

What can I do to solve over-fitting problem in following code?

I'm trying to build a Handwritten word recognition using IAM Dataset
and while training I'm facing over fitting problem. Would you please
help me figure out what mistake I have made in code below.
I have tried all the solution that I can find to resolve the problem but still the same overfitting problem persists.
import os
import fnmatch
import cv2
import numpy as np
import string
import time
import random
from keras import regularizers, optimizers
from keras.regularizers import l2
from keras.preprocessing.sequence import pad_sequences
from keras.layers import Dense, LSTM, Reshape, BatchNormalization, Input, Conv2D, MaxPool2D, Lambda, Bidirectional, Dropout
from keras.models import Model
from keras.activations import relu, sigmoid, softmax
import keras.backend as K
from keras.utils import to_categorical
from keras.callbacks import ModelCheckpoint,ReduceLROnPlateau
import matplotlib.pyplot as plt
imgSize = (128,32)
def preprocess(img, imgSize, dataAugmentation=False):
"put img into target img of size imgSize, transpose for TF and normalize gray-values"
# there are damaged files in IAM dataset - just use black image instead
if img is None:
img = np.zeros([imgSize[1], imgSize[0]])
# increase dataset size by applying random stretches to the images
if dataAugmentation:
stretch = (random.random() - 0.5) # -0.5 .. +0.5
wStretched = max(int(img.shape[1] * (1 + stretch)), 1) # random width, but at least 1
img = cv2.resize(img, (wStretched, img.shape[0])) # stretch horizontally by factor 0.5 .. 1.5
img = cv2.adaptiveThreshold(img,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY,11,2)
# print('Data Augmented')
# create target image and copy sample image into it
(wt, ht) = imgSize
(h, w) = img.shape
fx = w / wt
fy = h / ht
f = max(fx, fy)
newSize = (max(min(wt, int(w / f)), 1), max(min(ht, int(h / f)), 1)) # scale according to f (result at least 1 and at most wt or ht)
img = cv2.resize(img, newSize)
target = np.ones([ht, wt]) * 255
target[0:newSize[1], 0:newSize[0]] = img
# transpose for TF
img = cv2.transpose(target)
# normalize
(m, s) = cv2.meanStdDev(img)
m = m[0][0]
s = s[0][0]
img = img - m
img = img / s if s>0 else img
img = np.expand_dims(img , axis = 2)
return img
def truncateLabel(text, maxTextLen): # A,32
cost = 0
for i in range(len(text)):
if i != 0 and text[i] == text[i-1]:
cost += 2
else:
cost += 1
if cost > maxTextLen:
return text[:i] # returns words with repeated chars
return text
path = 'iam_dataset_words/'
maxTextLen = 32
samples = []
bad_samples = []
fileName = ''
dataAugmentation = False
chars = set()
f=open(path+ 'words.txt', "r")
cou = 0
bad_samples = []
bad_samples_reference = ['a01-117-05-02.png',
'r06-022-03-05.png']
for line in f:
cou+=1
# ignore comment line
if not line or line[0]=='#':
continue
lineSplit = line.strip().split(' ')
assert len(lineSplit) >= 9
fileNameSplit = lineSplit[0].split('-') #a01-000u-00-00 splits
#../data/words/a01/a01-000u/a01-000u-00-00.png
fileName = path + 'words/' \
+ fileNameSplit[0] + '/' \
+ fileNameSplit[0] + '-' \
+ fileNameSplit[1] \
+ '/' + lineSplit[0] + '.png'
# GT text are columns starting at 9
gtText = truncateLabel(' '.join(lineSplit[8:]), maxTextLen) #A,32
#chars = chars.union(gtText) #unique chars only
chars = chars.union(set(list(gtText)))
# check if image is not empty
if not os.path.getsize(fileName):
bad_samples.append(lineSplit[0] + '.png')
continue
# put sample into list
#'A','../data/words/a01/a01-000u/a01-000u-00-00.png'
samples.append([gtText, fileName])
print(cou)
print(len(samples))
print(samples[:2])
if set(bad_samples) != set(bad_samples_reference):
print("Warning, damaged images found:", bad_samples)
print("Damaged images expected:", bad_samples_reference)
trainSamples = []
validationSamples = []
testSamples = []
valid_testSamples = []
# split into training and validation set: 90% - 10%
# dataAugmentation = True
random.shuffle(samples)
splitIdx = int(0.75 * len(samples))
train_samples = samples[:splitIdx]
valid_testSamples = samples[splitIdx:]
print('vv:', len(valid_testSamples))
validationSamples = valid_testSamples[:15000]
testSamples = valid_testSamples[15000:]
print('valid: ',len(validationSamples))
print('test: ',len(testSamples))
print('train_before: ',len(train_samples))
# # start with train set
trainSamples = train_samples[:25000] #tran data 25000
print('train_ after: ',len(trainSamples))
# # list of all unique chars in dataset
charList = sorted(list(chars))
char_list = str().join(charList)
# print('test samples: ',testSamples)
print('char list : ',char_list)
# # save characters of model for inference mode
# open(FilePaths.fnCharList, 'w').write(str().join(charList))
# # save words contained in dataset into file
# open(FilePaths.fnCorpus, 'w').write(str(' ').join(loader.trainWords + validationWords))
def encode_to_labels(txt):
# encoding each output word into digits
chars = []
for index, char in enumerate(txt):
try:
chars.append(char_list.index(char))
except:
print(char)
return chars
print(trainSamples[:2])
# lists for training dataset
train_img = []
train_txt = []
train_input_length = []
train_label_length = []
train_orig_txt = []
max_label_len = 0
b = 0
for words, imgPath in trainSamples:
img = preprocess(cv2.imread(imgPath, cv2.IMREAD_GRAYSCALE), imgSize, dataAugmentation = True)
# compute maximum length of the text
if len(words) > max_label_len:
max_label_len = len(words)
train_orig_txt.append(words)
train_label_length.append(len(words))
train_input_length.append(31)
train_img.append(img)
train_txt.append(encode_to_labels(words))
b+=1
# print(train_img[1])
print(len(train_txt))
train_txt[:5]
a = 0
#lists for validation dataset
valid_img = []
valid_txt = []
valid_input_length = []
valid_label_length = []
valid_orig_txt = []
for words, imgPath in validationSamples:
img = preprocess(cv2.imread(imgPath, cv2.IMREAD_GRAYSCALE), imgSize, dataAugmentation = False)
valid_orig_txt.append(words)
valid_label_length.append(len(words))
valid_input_length.append(31)
valid_img.append(img)
valid_txt.append(encode_to_labels(words))
a+=1
print(len(valid_txt))
valid_txt[:5]
# lists for training dataset
test_img = []
test_txt = []
test_input_length = []
test_label_length = []
test_orig_txt = []
c = 0
for words, imgPath in testSamples:
img = preprocess(cv2.imread(imgPath, cv2.IMREAD_GRAYSCALE), imgSize, dataAugmentation = False)
test_orig_txt.append(words)
test_label_length.append(len(words))
test_input_length.append(31)
test_img.append(img)
test_txt.append(encode_to_labels(words))
c+=1
# print(c)
print(test_img[0].shape)
print('Train: {}\nValid: {}\nTest: {}'.format(b,a,c))
print(max_label_len)
# pad each output label to maximum text length
train_padded_txt = pad_sequences(train_txt, maxlen=max_label_len, padding='post', value = len(char_list))
valid_padded_txt = pad_sequences(valid_txt, maxlen=max_label_len, padding='post', value = len(char_list))
test_padded_txt = pad_sequences(test_txt, maxlen=max_label_len, padding='post', value = len(char_list))
print(len(train_padded_txt))
print(len(test_padded_txt))
print(valid_padded_txt[1])
# input with shape of height=32 and width=128
inputs = Input(shape=(128,32,1))
print(inputs.shape)
# convolution layer with kernel size (3,3)
conv_1 = Conv2D(32, (3,3), activation = 'relu', padding='same')(inputs)
batch_norm_1 = BatchNormalization()(conv_1)
# poolig layer with kernel size (2,2)
pool_1 = Conv2D(32, kernel_size=(1, 1), strides=2, padding='valid')(batch_norm_1)
conv_2 = Conv2D(64, (3,3), activation = 'relu', padding='same')(pool_1)
batch_norm_2 = BatchNormalization()(conv_2)
pool_2 = Conv2D(64, kernel_size=(1, 1), strides=2, padding='valid')(batch_norm_2)
conv_3 = Conv2D(128, (3,3), activation = 'relu', padding='same')(pool_2)
batch_norm_3 = BatchNormalization()(conv_3)
conv_4 = Conv2D(128, (3,3), activation = 'relu', padding='same')(batch_norm_3)
batch_norm_4 = BatchNormalization()(conv_4)
# poolig layer with kernel size (1,2)
pool_4 = MaxPool2D(pool_size=(1,2))(batch_norm_4)
conv_5 = Conv2D(256, (3,3), activation = 'relu', padding='same')(pool_4)
# Batch normalization layer
batch_norm_5 = BatchNormalization()(conv_5)
conv_6 = Conv2D(256, (3,3), activation = 'relu', padding='same')(batch_norm_5)
batch_norm_6 = BatchNormalization()(conv_6)
pool_6 = MaxPool2D(pool_size=(1,2))(batch_norm_6)
conv_7 = Conv2D(256, (2,2), activation = 'relu')(pool_6)
batch_norm_7 = BatchNormalization()(conv_7)
# print(conv_7.shape)
# map-to-sequence-- dropping 1 dimension
squeezed = Lambda(lambda x: K.squeeze(x, 2))(batch_norm_7)
# print('squeezed',squeezed.shape)
# bidirectional LSTM layers with units=128
blstm_1 = Bidirectional(LSTM(128, return_sequences=True, dropout = 0.3))(squeezed)
blstm_2 = Bidirectional(LSTM(128, return_sequences=True, dropout = 0.3))(blstm_1)
outputs = Dense(len(char_list)+1, activation = 'softmax')(blstm_2)
# model to be used at test time
word_model = Model(inputs, outputs)
adam = optimizers.Adamax(lr=0.01, decay = 1e-5)
model.compile(loss= {'ctc': lambda y_true, y_pred: y_pred}, optimizer = adam, metrics = ['accuracy'])
filepath="best_model.hdf5"
checkpoint1 = ReduceLROnPlateau(monitor='val_loss', verbose=1,
mode='auto',factor=0.2,patience=4, min_lr=0.0001)
checkpoint2 = ModelCheckpoint(filepath=filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='auto')
callbacks_list = [checkpoint1, checkpoint2]
train_img = np.array(train_img)
train_input_length = np.array(train_input_length)
train_label_length = np.array(train_label_length)
valid_img = np.array(valid_img)
valid_input_length = np.array(valid_input_length)
valid_label_length = np.array(valid_label_length)
test_img = np.array(test_img)
test_input_length = np.array(test_input_length)
test_label_length = np.array(test_label_length)
test_img.shape
batch_size = 50
epochs = 30
train_history = model.fit(x=[train_img, train_padded_txt, train_input_length, train_label_length],
y=np.zeros(len(train_img)), batch_size=batch_size, epochs = epochs,
validation_data = ([valid_img, valid_padded_txt, valid_input_length,
valid_label_length], [np.zeros(len(valid_img))]),
verbose = 1, callbacks = callbacks_list)
Train on 25000 samples, validate on 15000 samples
Epoch 1/30
25000/25000 [==============================] - 159s 6ms/step - loss: 13.6510 - acc: 0.0199 - val_loss: 11.4910 - val_acc: 0.0651
Epoch 00001: val_loss improved from inf to 11.49100, saving model to best_model.hdf5
Epoch 2/30
25000/25000 [==============================] - 146s 6ms/step - loss: 10.9559 - acc: 0.0603 - val_loss: 9.7359 - val_acc: 0.0904
Epoch 00002: val_loss improved from 11.49100 to 9.73587, saving model to best_model.hdf5
Epoch 3/30
25000/25000 [==============================] - 146s 6ms/step - loss: 9.0720 - acc: 0.0943 - val_loss: 7.3571 - val_acc: 0.1565
Epoch 00003: val_loss improved from 9.73587 to 7.35715, saving model to best_model.hdf5
Epoch 4/30
25000/25000 [==============================] - 145s 6ms/step - loss: 6.9501 - acc: 0.1520 - val_loss: 5.5228 - val_acc: 0.2303
Epoch 00004: val_loss improved from 7.35715 to 5.52277, saving model to best_model.hdf5
Epoch 5/30
25000/25000 [==============================] - 144s 6ms/step - loss: 5.4893 - acc: 0.2129 - val_loss: 4.3179 - val_acc: 0.2895
Epoch 00005: val_loss improved from 5.52277 to 4.31793, saving model to best_model.hdf5
Epoch 6/30
25000/25000 [==============================] - 143s 6ms/step - loss: 4.7053 - acc: 0.2612 - val_loss: 3.7490 - val_acc: 0.3449
Epoch 00006: val_loss improved from 4.31793 to 3.74896, saving model to best_model.hdf5
Epoch 7/30
25000/25000 [==============================] - 143s 6ms/step - loss: 4.1183 - acc: 0.3096 - val_loss: 3.5902 - val_acc: 0.3805
Epoch 00007: val_loss improved from 3.74896 to 3.59015, saving model to best_model.hdf5
Epoch 8/30
25000/25000 [==============================] - 143s 6ms/step - loss: 3.6662 - acc: 0.3462 - val_loss: 3.7923 - val_acc: 0.3350
Epoch 00008: val_loss did not improve from 3.59015
Epoch 9/30
25000/25000 [==============================] - 143s 6ms/step - loss: 3.3398 - acc: 0.3809 - val_loss: 3.1352 - val_acc: 0.4344
Epoch 00009: val_loss improved from 3.59015 to 3.13516, saving model to best_model.hdf5
Epoch 10/30
25000/25000 [==============================] - 143s 6ms/step - loss: 3.0199 - acc: 0.4129 - val_loss: 2.9798 - val_acc: 0.4541
Epoch 00010: val_loss improved from 3.13516 to 2.97978, saving model to best_model.hdf5
Epoch 11/30
25000/25000 [==============================] - 143s 6ms/step - loss: 2.7361 - acc: 0.4447 - val_loss: 3.3836 - val_acc: 0.3780
Epoch 00011: val_loss did not improve from 2.97978
Epoch 12/30
25000/25000 [==============================] - 143s 6ms/step - loss: 2.5127 - acc: 0.4695 - val_loss: 2.9266 - val_acc: 0.5041
Epoch 00012: val_loss improved from 2.97978 to 2.92656, saving model to best_model.hdf5
Epoch 13/30
25000/25000 [==============================] - 142s 6ms/step - loss: 2.3045 - acc: 0.4974 - val_loss: 2.7329 - val_acc: 0.5174
Epoch 00013: val_loss improved from 2.92656 to 2.73294, saving model to best_model.hdf5
Epoch 14/30
25000/25000 [==============================] - 141s 6ms/step - loss: 2.1245 - acc: 0.5237 - val_loss: 2.8624 - val_acc: 0.5339
Epoch 00014: val_loss did not improve from 2.73294
Epoch 15/30
25000/25000 [==============================] - 142s 6ms/step - loss: 1.9091 - acc: 0.5524 - val_loss: 2.6933 - val_acc: 0.5506
Epoch 00015: val_loss improved from 2.73294 to 2.69333, saving model to best_model.hdf5
Epoch 16/30
25000/25000 [==============================] - 141s 6ms/step - loss: 1.7565 - acc: 0.5705 - val_loss: 2.7697 - val_acc: 0.5461
Epoch 00016: val_loss did not improve from 2.69333
Epoch 17/30
25000/25000 [==============================] - 145s 6ms/step - loss: 1.6273 - acc: 0.5892 - val_loss: 2.8992 - val_acc: 0.5361
Epoch 00017: val_loss did not improve from 2.69333
Epoch 18/30
25000/25000 [==============================] - 145s 6ms/step - loss: 1.5007 - acc: 0.6182 - val_loss: 2.9558 - val_acc: 0.5345
Epoch 00018: val_loss did not improve from 2.69333
Epoch 19/30
25000/25000 [==============================] - 143s 6ms/step - loss: 1.3775 - acc: 0.6311 - val_loss: 2.8437 - val_acc: 0.5744
Epoch 00019: ReduceLROnPlateau reducing learning rate to 0.0019999999552965165.
Epoch 00019: val_loss did not improve from 2.69333
Epoch 20/30
25000/25000 [==============================] - 144s 6ms/step - loss: 0.9636 - acc: 0.7115 - val_loss: 2.6072 - val_acc: 0.6083
Epoch 00020: val_loss improved from 2.69333 to 2.60724, saving model to best_model.hdf5
Epoch 21/30
25000/25000 [==============================] - 146s 6ms/step - loss: 0.7940 - acc: 0.7583 - val_loss: 2.6613 - val_acc: 0.6167
Epoch 00021: val_loss did not improve from 2.60724
Epoch 22/30
25000/25000 [==============================] - 146s 6ms/step - loss: 0.6995 - acc: 0.7797 - val_loss: 2.7180 - val_acc: 0.6220
Epoch 00022: val_loss did not improve from 2.60724
Epoch 23/30
25000/25000 [==============================] - 144s 6ms/step - loss: 0.6197 - acc: 0.8046 - val_loss: 2.7504 - val_acc: 0.6226
Epoch 00023: val_loss did not improve from 2.60724
Epoch 24/30
25000/25000 [==============================] - 143s 6ms/step - loss: 0.5668 - acc: 0.8167 - val_loss: 2.8238 - val_acc: 0.6255
Epoch 00024: ReduceLROnPlateau reducing learning rate to 0.0003999999724328518.
Epoch 00024: val_loss did not improve from 2.60724
Epoch 25/30
25000/25000 [==============================] - 144s 6ms/step - loss: 0.5136 - acc: 0.8316 - val_loss: 2.8167 - val_acc: 0.6283
Epoch 00025: val_loss did not improve from 2.60724
Epoch 26/30
25000/25000 [==============================] - 143s 6ms/step - loss: 0.5012 - acc: 0.8370 - val_loss: 2.8244 - val_acc: 0.6299
Epoch 00026: val_loss did not improve from 2.60724
Epoch 27/30
25000/25000 [==============================] - 143s 6ms/step - loss: 0.4886 - acc: 0.8425 - val_loss: 2.8366 - val_acc: 0.6282
Epoch 00027: val_loss did not improve from 2.60724
Epoch 28/30
25000/25000 [==============================] - 143s 6ms/step - loss: 0.4820 - acc: 0.8432 - val_loss: 2.8447 - val_acc: 0.6271
Epoch 00028: ReduceLROnPlateau reducing learning rate to 0.0001.
Epoch 00028: val_loss did not improve from 2.60724
Epoch 29/30
25000/25000 [==============================] - 141s 6ms/step - loss: 0.4643 - acc: 0.8452 - val_loss: 2.8538 - val_acc: 0.6278
Epoch 00029: val_loss did not improve from 2.60724
Epoch 30/30
25000/25000 [==============================] - 141s 6ms/step - loss: 0.4576 - acc: 0.8496 - val_loss: 2.8555 - val_acc: 0.6277
Epoch 00030: val_loss did not improve from 2.60724
Evaluation of the model
test_history = model.evaluate([test_img, test_padded_txt,
test_input_length, test_label_length],
y=np.zeros(len(test_img)), verbose = 1)
test_history
Output
13830/13830 [==============================] - 42s 3ms/step
[2.855567638786134, 0.6288503253882292]
Some Predicted Output:
Not sure what you have already tried, but did you check if your training and validation samples are balanced? That is, whether they have roughly the same percentages of examples in each category.
You could shuffle 'samples' using 'random.shuffle(samples)' before executing your following code:
splitIdx = int(0.75 * len(samples))
train_samples = samples[:splitIdx]
That way, you can be more certain that your training and validation sets are balanced.
There is a lot you can do.
Add batch normalization after every conv2d layer
Replace maxpooling with conv2d valid padding so it becomes a learnable layer
from: pool_1 = MaxPool2D(pool_size=(2, 2), strides=2)(conv_1)
to: pool_1 = Conv2D(filters, kernel_size=(1, 1), strides=2, padding='valid')(conv_1)
Add l2 regularization to your layers, look here for implementation
Try weight decay
Increase the dropout values you already have
Modify your learning rate, too small and it might fall into a local minimum
And here is a lot more, the only way to know is to try them out

My CNN Keras doesn't predict properly and I don't know what to do

I have to do a CNN to diagnosis Diabetic retinopathy in 4th stage (binary classification - or 0 (non4thStage - nonPdr), or 1 (4thStage - pdr)) I'm using vgg16 and a gaussianBlur to better classification. I have 1400 test images (700 each class) and this is my train.py:
#import tensorflow as tf
import cv2
import os
import numpy as np
from keras.layers.core import Flatten, Dense, Dropout, Reshape
from keras.models import Model
from keras.layers import Input, ZeroPadding2D, Dropout
from keras import optimizers
from keras.optimizers import SGD
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import EarlyStopping
from keras.applications.vgg16 import VGG16
TRAIN_DIR = 'train/'
TEST_DIR = 'test/'
v = 'v/'
BATCH_SIZE = 32
NUM_EPOCHS = 5
def ReadImages(Path):
LabelList = list()
ImageCV = list()
classes = ["nonPdr", "pdr"]
# Get all subdirectories
FolderList = [f for f in os.listdir(Path) if not f.startswith('.')]
# Loop over each directory
for File in FolderList:
for index, Image in enumerate(os.listdir(os.path.join(Path, File))):
# Convert the path into a file
ImageCV.append(cv2.resize(cv2.imread(os.path.join(Path, File) + os.path.sep + Image), (224,224)))
LabelList.append(classes.index(os.path.splitext(File)[0]))
ImageCV[index] = cv2.addWeighted (ImageCV[index],4,cv2.GaussianBlur(ImageCV[index] , (0,0) , 10) ,-4 ,128)
return ImageCV, LabelList
data, labels = ReadImages(TRAIN_DIR)
valid, vlabels = ReadImages(TEST_DIR)
vgg16_model = VGG16(weights="imagenet", include_top=True)
# (2) remove the top layer
base_model = Model(input=vgg16_model.input,
output=vgg16_model.get_layer("block5_pool").output)
# (3) attach a new top layer
base_out = base_model.output
base_out = Reshape((25088,))(base_out)
top_fc1 = Dense(64, activation="relu")(base_out)
top_fc1 = Dropout(0.50)(base_out)
# output layer: (None, 5)
top_preds = Dense(1, activation="sigmoid")(top_fc1)
# (4) freeze weights until the last but one convolution layer (block4_pool)
for layer in base_model.layers[0:14]:
layer.trainable = False
# (5) create new hybrid model
model = Model(input=base_model.input, output=top_preds)
# (6) compile and train the model
sgd = SGD(lr=0.000001, momentum=0.9)
model.compile(optimizer=sgd, loss="binary_crossentropy", metrics=["accuracy"])
data = np.asarray(data)
valid = np.asarray(valid)
data = data.astype('float32')
valid = valid.astype('float32')
data /= 255
valid /= 255
labels = np.array(labels)
datagen = ImageDataGenerator(
featurewise_center=True,
featurewise_std_normalization=True,
rotation_range=20,
width_shift_range=0.2,
height_shift_range=0.2,
horizontal_flip=True)
# compute quantities required for featurewise normalization
# (std, mean, and principal components if ZCA whitening is applied)
datagen.fit(data)
mean = datagen.mean
std = datagen.std
print(mean, "mean")
print(std, "std")
es = EarlyStopping(monitor='val_loss', verbose=1)
# fits the model on batches with real-time data augmentation:
model.fit_generator(datagen.flow(data, np.array(labels), batch_size=32),
steps_per_epoch=len(data) / 32, epochs=50,
validation_data=(valid, np.array(vlabels)),
nb_val_samples=72, callbacks=[es])
model.save('model.h5')
and this returns the follow:
Epoch 1/50
44/43 [==============================] - 475s 11s/step - loss: 0.9671 - acc: 0.4789 - val_loss: 0.6808 - val_acc: 0.6389
Epoch 2/50
44/43 [==============================] - 467s 11s/step - loss: 0.8427 - acc: 0.5007 - val_loss: 0.6364 - val_acc: 0.6389
Epoch 3/50
44/43 [==============================] - 468s 11s/step - loss: 0.7703 - acc: 0.5204 - val_loss: 0.6136 - val_acc: 0.6806
Epoch 4/50
44/43 [==============================] - 466s 11s/step - loss: 0.7324 - acc: 0.5512 - val_loss: 0.5941 - val_acc: 0.7500
Epoch 5/50
44/43 [==============================] - 466s 11s/step - loss: 0.7074 - acc: 0.5679 - val_loss: 0.5758 - val_acc: 0.7639
Epoch 6/50
44/43 [==============================] - 461s 10s/step - loss: 0.6640 - acc: 0.6146 - val_loss: 0.5584 - val_acc: 0.8194
Epoch 7/50
44/43 [==============================] - 455s 10s/step - loss: 0.6562 - acc: 0.6077 - val_loss: 0.5418 - val_acc: 0.8333
Epoch 8/50
44/43 [==============================] - 458s 10s/step - loss: 0.6076 - acc: 0.6700 - val_loss: 0.5263 - val_acc: 0.8889
Epoch 9/50
44/43 [==============================] - 456s 10s/step - loss: 0.5743 - acc: 0.7005 - val_loss: 0.5119 - val_acc: 0.9167
Epoch 10/50
44/43 [==============================] - 457s 10s/step - loss: 0.5649 - acc: 0.7041 - val_loss: 0.4981 - val_acc: 0.9306
Epoch 11/50
44/43 [==============================] - 452s 10s/step - loss: 0.5654 - acc: 0.7088 - val_loss: 0.4855 - val_acc: 0.9444
Epoch 12/50
44/43 [==============================] - 458s 10s/step - loss: 0.5046 - acc: 0.7616 - val_loss: 0.4740 - val_acc: 0.9444
Epoch 13/50
44/43 [==============================] - 465s 11s/step - loss: 0.5002 - acc: 0.7808 - val_loss: 0.4633 - val_acc: 0.9444
Epoch 14/50
44/43 [==============================] - 459s 10s/step - loss: 0.4694 - acc: 0.7924 - val_loss: 0.4514 - val_acc: 0.9583
Epoch 15/50
44/43 [==============================] - 463s 11s/step - loss: 0.4482 - acc: 0.8184 - val_loss: 0.4432 - val_acc: 0.9444
Epoch 16/50
44/43 [==============================] - 456s 10s/step - loss: 0.4326 - acc: 0.8343 - val_loss: 0.4330 - val_acc: 0.9583
Epoch 17/50
44/43 [==============================] - 454s 10s/step - loss: 0.4291 - acc: 0.8303 - val_loss: 0.4233 - val_acc: 0.9583
Epoch 18/50
44/43 [==============================] - 457s 10s/step - loss: 0.4060 - acc: 0.8376 - val_loss: 0.4145 - val_acc: 0.9583
Epoch 19/50
44/43 [==============================] - 457s 10s/step - loss: 0.3933 - acc: 0.8686 - val_loss: 0.4069 - val_acc: 0.9583
Epoch 20/50
44/43 [==============================] - 455s 10s/step - loss: 0.3786 - acc: 0.8684 - val_loss: 0.3985 - val_acc: 0.9583
Epoch 21/50
44/43 [==============================] - 456s 10s/step - loss: 0.3661 - acc: 0.8774 - val_loss: 0.3902 - val_acc: 0.9583
Epoch 22/50
44/43 [==============================] - 454s 10s/step - loss: 0.3493 - acc: 0.8956 - val_loss: 0.3833 - val_acc: 0.9583
Epoch 23/50
44/43 [==============================] - 456s 10s/step - loss: 0.3355 - acc: 0.9065 - val_loss: 0.3765 - val_acc: 0.9444
Epoch 24/50
44/43 [==============================] - 456s 10s/step - loss: 0.3332 - acc: 0.9053 - val_loss: 0.3680 - val_acc: 0.9583
Epoch 25/50
44/43 [==============================] - 457s 10s/step - loss: 0.3236 - acc: 0.9160 - val_loss: 0.3625 - val_acc: 0.9444
Epoch 26/50
44/43 [==============================] - 458s 10s/step - loss: 0.3097 - acc: 0.9181 - val_loss: 0.3559 - val_acc: 0.9583
Epoch 27/50
44/43 [==============================] - 469s 11s/step - loss: 0.2915 - acc: 0.9242 - val_loss: 0.3517 - val_acc: 0.9444
Epoch 28/50
44/43 [==============================] - 473s 11s/step - loss: 0.2832 - acc: 0.9368 - val_loss: 0.3454 - val_acc: 0.9583
Epoch 29/50
44/43 [==============================] - 468s 11s/step - loss: 0.2747 - acc: 0.9418 - val_loss: 0.3416 - val_acc: 0.9583
Epoch 30/50
44/43 [==============================] - 470s 11s/step - loss: 0.2627 - acc: 0.9508 - val_loss: 0.3350 - val_acc: 0.9722
Epoch 31/50
44/43 [==============================] - 469s 11s/step - loss: 0.2517 - acc: 0.9638 - val_loss: 0.3311 - val_acc: 0.9722
Epoch 32/50
44/43 [==============================] - 470s 11s/step - loss: 0.2517 - acc: 0.9484 - val_loss: 0.3266 - val_acc: 0.9722
Epoch 33/50
44/43 [==============================] - 490s 11s/step - loss: 0.2348 - acc: 0.9560 - val_loss: 0.3211 - val_acc: 0.9722
Epoch 34/50
44/43 [==============================] - 461s 10s/step - loss: 0.2427 - acc: 0.9517 - val_loss: 0.3158 - val_acc: 0.9722
Epoch 35/50
44/43 [==============================] - 467s 11s/step - loss: 0.2260 - acc: 0.9616 - val_loss: 0.3109 - val_acc: 0.9722
Epoch 36/50
44/43 [==============================] - 459s 10s/step - loss: 0.2243 - acc: 0.9706 - val_loss: 0.3064 - val_acc: 0.9722
Epoch 37/50
44/43 [==============================] - 456s 10s/step - loss: 0.2099 - acc: 0.9687 - val_loss: 0.3029 - val_acc: 0.9722
Epoch 38/50
44/43 [==============================] - 457s 10s/step - loss: 0.2094 - acc: 0.9733 - val_loss: 0.2994 - val_acc: 0.9722
Epoch 39/50
44/43 [==============================] - 465s 11s/step - loss: 0.2014 - acc: 0.9744 - val_loss: 0.2941 - val_acc: 0.9722
Epoch 40/50
44/43 [==============================] - 465s 11s/step - loss: 0.1924 - acc: 0.9709 - val_loss: 0.2915 - val_acc: 0.9722
Epoch 41/50
44/43 [==============================] - 457s 10s/step - loss: 0.1908 - acc: 0.9735 - val_loss: 0.2897 - val_acc: 0.9722
Epoch 42/50
44/43 [==============================] - 463s 11s/step - loss: 0.1864 - acc: 0.9709 - val_loss: 0.2861 - val_acc: 0.9722
Epoch 43/50
44/43 [==============================] - 464s 11s/step - loss: 0.1787 - acc: 0.9773 - val_loss: 0.2822 - val_acc: 0.9722
Epoch 44/50
44/43 [==============================] - 468s 11s/step - loss: 0.1820 - acc: 0.9744 - val_loss: 0.2794 - val_acc: 0.9722
Epoch 45/50
44/43 [==============================] - 469s 11s/step - loss: 0.1646 - acc: 0.9818 - val_loss: 0.2763 - val_acc: 0.9722
Epoch 46/50
44/43 [==============================] - 469s 11s/step - loss: 0.1689 - acc: 0.9820 - val_loss: 0.2730 - val_acc: 0.9722
Epoch 47/50
44/43 [==============================] - 471s 11s/step - loss: 0.1495 - acc: 0.9879 - val_loss: 0.2711 - val_acc: 0.9722
Epoch 48/50
44/43 [==============================] - 469s 11s/step - loss: 0.1578 - acc: 0.9858 - val_loss: 0.2676 - val_acc: 0.9722
Epoch 49/50
44/43 [==============================] - 462s 10s/step - loss: 0.1557 - acc: 0.9858 - val_loss: 0.2643 - val_acc: 0.9722
Epoch 50/50
44/43 [==============================] - 454s 10s/step - loss: 0.1501 - acc: 0.9794 - val_loss: 0.2612 - val_acc: 0.9722
This is my predict.py:
from keras.models import load_model
import cv2
import os
import numpy as np
from keras.preprocessing import image
TEST_DIR = 'v/'
pdr = 0
nonPdr = 0
model = load_model('model.h5')
def normalize(x, mean, std):
x[..., 0] -= mean[0]
x[..., 1] -= mean[1]
x[..., 2] -= mean[2]
x[..., 0] /= std[0]
x[..., 1] /= std[1]
x[..., 2] /= std[2]
return x
for filename in os.listdir(r'v/'):
if filename.endswith(".jpg") or filename.endswith(".ppm") or filename.endswith(".jpeg") or filename.endswith(".png"):
ImageCV = cv2.resize(cv2.imread(os.path.join(TEST_DIR) + filename), (224,224))
ImageCV = cv2.addWeighted (ImageCV,4,cv2.GaussianBlur(ImageCV , (0,0) , 10) ,-4 ,128)
ImageCV = np.asarray(ImageCV)
ImageCV = ImageCV.astype('float32')
ImageCV /= 255
x = ImageCV
x = np.expand_dims(x, axis=0)
x = normalize(x, [0.23883381, 0.23883381, 0.23883381], [0.24483591, 0.24579705, 0.2510857])
prob = model.predict(x)
if prob <= 0.75: #.75 = 80% | .70=79% >>>> .70 = 82% | .75 = 79%
print("nonPDR >>>", filename)
nonPdr += 1
else:
print("PDR >>>", filename)
pdr += 1
print(prob)
print("Number of retinas with PDR: ",pdr)
print("Number of retinas without PDR: ",nonPdr)
The problem is: after the train returns about 97% of accuracy, all my predictions are wrong... for example, this 3 images must be PDR(class1):
nonPDR >>> 16_left.jpeg
[[0.07062916]]
nonPDR >>> 16_right.jpeg
[[0.09434311]]
nonPDR >>> 217_left.jpeg
[[0.14126943]]
If I put to test the same images I put in train base, the model doesn't predict properly too...
I'd already tried to train without gaussianBlur but the accuracy was very poor.
What I'm doing wrong? Please, I appreciate your help!!
A couple of things to try: I would suggest not using data augmentation until you have some confidence that your training process is working, even if the performance is not good initially. As a double check, you might want to directly do prediction right after the model.fit, using the training data, just to verify that the resulting accuracy is the same as what you got in training. You might have some small differences in the processing of the test data that is causing the network to behave poorly, so it would be a good first step to convince yourself that the training part is okay then you can focus on the test part. I hope this helps.

Categories