TensorFlow image classification loss doesn't decrease - python

import tensorflow as tf
import matplotlib.pyplot as plt
import cv2
import os
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing import image
from tensorflow.keras.optimizers import Nadam
from tensorflow.keras.models import load_model
train = ImageDataGenerator(rescale=1 / 255)
validation = ImageDataGenerator(rescale=1 / 255)
train_dataset = train.flow_from_directory('raw-img/training', target_size=(200, 200), batch_size=3,
class_mode='categorical')
validation_dataset = train.flow_from_directory('raw-img/validation', target_size=(200, 200), batch_size=3,
class_mode='categorical')
model = tf.keras.models.Sequential([tf.keras.layers.Conv2D(16, (3, 3), activation='relu', input_shape=(200, 200, 3),padding='same'),
tf.keras.layers.MaxPool2D(2, 2,padding='same'),
#
tf.keras.layers.Conv2D(32, (3, 3), activation='relu',padding='same'),
tf.keras.layers.MaxPool2D(2, 2),
tf.keras.layers.Dropout(rate=0.6),
#
tf.keras.layers.Conv2D(64, (3, 3), activation='relu',padding='same'),
tf.keras.layers.MaxPool2D(2, 2),
tf.keras.layers.Dropout(rate=0.6),
#
tf.keras.layers.Conv2D(128, (3, 3), activation='relu',padding='same'),
tf.keras.layers.MaxPool2D(2, 2),
#
tf.keras.layers.Conv2D(128, (3, 3), activation='relu',padding='same'),
tf.keras.layers.MaxPool2D(2, 2),
#
tf.keras.layers.Conv2D(256, (3, 3), activation='relu',padding='same'),
tf.keras.layers.MaxPool2D(2, 2,),
#
tf.keras.layers.Flatten(),
#
tf.keras.layers.Dense(512, activation='relu'),
tf.keras.layers.Dense(256, activation='relu'),
tf.keras.layers.Dense(10, activation='sigmoid'),
])
print(model.summary())
model.compile(loss='binary_crossentropy', optimizer=Nadam(learning_rate=0.003), metrics['accuracy'])
model_fit = model.fit(train_dataset, epochs=70, batch_size=3, validation_data=validation_dataset,steps_per_epoch=len(train_dataset),validation_steps=len(validation_dataset))
loss, accuracy = model.evaluate(train_dataset)
print("Loss: ", loss)
print("Accuracy: ", accuracy)
Found 26179 images belonging to 10 classes.
Found 8196 images belonging to 10 classes.
Epoch 1/70
2909/2909 [==============================] - 1005s 345ms/step - loss: 0.3292 - accuracy: 0.1805 - val_loss: 0.3533 - val_accuracy: 0.0000e+00
Epoch 2/70
2909/2909 [==============================] - 645s 222ms/step - loss: 0.3167 - accuracy: 0.1758 - val_loss: 0.3654 - val_accuracy: 0.0000e+00
...
Epoch 8/70
2909/2909 [==============================] - 445s 153ms/step - loss: 0.3160 - accuracy: 0.1835 - val_loss: 0.3666 - val_accuracy: 0.0000e+00
Epoch 9/70
2909/2909 [==============================] - ETA: 0s - loss: 0.3146 - accuracy: 0.1867
What the problem with this code? Accuracy stuck at 0.1800 and 0.1900 and loss in doesn't decrease.

A couple of issues
since your images are 200 X 200 you could use a larger batch size like say 32
in the train = ImageDataGenerator(rescale=1 / 255) you might want to consider
adding some image augmentation like horizontal_flip=True, etc. If you do add
augmentation do not use the train in your code for the validation_data set, use
validation
in your model you have the code tf.keras.layers.Dense(10, activation='sigmoid')
unless you are doing multi-label classification change the activation to "softmax'
your model will likely over-fit. I would add at least one dropout layer to
your model like tf.keras.layers.Dropout(.3)
First do the above changes and look at the performance. If it starts to over-fit
increase the dropout rate or reduce the number of nodes in the hidden layers
If you are not satisfied with the accuracy you might get an improvement by
using and adjustable learning rate. You can do this with the Keras callback
ReduceLROnPlateau. Documentation is here. My suggest code is
rlronp=tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=.5,patience=1,verbose=1)
I also recommend you use the Keras callback EarlyStopping. It will monitor
the validation loss if it fails to reduce after 'patience' number of epochs.
Documentation is here. set restore_best_weights=True so that is this
callback activates it will load your model with the weights from the epoch with
the lowest validation loss. My suggested code is
es=tf.keras.callbacks.EarlyStopping(monitor="val_loss",patience=4,verbose=1,
restore_best_weights=True)
if you use the callbacks then in model.fit set callbacks=[rlronp,es]

Related

Kaggle problem for dogs and cats with kernal using ANN is not working for me

I have written Artificial Neural network code to solve Keggale Dog and Cats Kernal problem but somehow during training, it shows loss=nan and bad accuracy. My code can be found at https://www.kaggle.com/dilipkumar2k6/dogs-vs-cats-with-new-kernel/notebook
Following are details on error
from tensorflow import keras
# First apply Artificial neural network (ANN)
ann = keras.Sequential([
keras.layers.Flatten(input_shape=(IMG_SIZE, IMG_SIZE, 3)), # Flaten 3d to 1d
keras.layers.Dense(3000, activation='relu'), # more hidden layer gives better perf
keras.layers.Dense(1000, activation='relu'), # more hidden layer gives better perf
keras.layers.Dense(100, activation='relu'), # more hidden layer gives better perf
keras.layers.Dense(2, activation='sigmoid')
])
ann.compile(optimizer='SGD', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
ann.fit(train_X, train_y, epochs=10)
Error
Epoch 1/10
438/438 [==============================] - 2s 2ms/step - loss: nan - accuracy: 5.0000e-04
Epoch 2/10
438/438 [==============================] - 1s 2ms/step - loss: nan - accuracy: 0.0000e+00
Using a sigmoid activation function in your output layer seems a bit strange to me when using sparse_categorical_crossentropy (although it could also work). Anyway, I think you should consider changing this line:
keras.layers.Dense(2, activation='sigmoid')
to
keras.layers.Dense(1, activation='sigmoid')
and use tf.keras.losses.BinaryCrossentropy().
Or change your activation function to softmax and leave the rest as it is.
You should also consider redesigning your model and using at least one tf.keras.layers.Conv2D layer before flattening the data. Here is a working example:
import tensorflow_datasets as tfds
import tensorflow as tf
ds, ds_info = tfds.load('cats_vs_dogs', split='train', with_info=True)
normalization_layer = tf.keras.layers.Rescaling(1./255)
def resize_inputs(data):
images, labels = data['image'], data['label']
images = tf.image.resize(normalization_layer(images),[64, 64], method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
return images, labels
ds = ds.map(resize_inputs).batch(64)
ann = tf.keras.Sequential([
tf.keras.layers.Conv2D(64, kernel_size=3, input_shape=(64, 64, 3)),
tf.keras.layers.Flatten(), # Flaten 3d to 1d
tf.keras.layers.Dense(200, activation='relu'), # more hidden layer gives better perf
tf.keras.layers.Dense(100, activation='relu'), # more hidden layer gives better perf
tf.keras.layers.Dense(50, activation='relu'), # more hidden layer gives better perf
tf.keras.layers.Dense(1, activation='sigmoid')
])
ann.compile(optimizer='adam', loss=tf.keras.losses.BinaryCrossentropy(), metrics=['accuracy'])
ann.fit(ds, epochs=10)
Epoch 1/10
364/364 [==============================] - 58s 140ms/step - loss: 0.8692 - accuracy: 0.5902
Epoch 2/10
364/364 [==============================] - 51s 141ms/step - loss: 0.6155 - accuracy: 0.6559
Epoch 3/10
364/364 [==============================] - 51s 141ms/step - loss: 0.5708 - accuracy: 0.7009
Epoch 4/10
364/364 [==============================] - 51s 140ms/step - loss: 0.5447 - accuracy: 0.7262
...
You can experiment with this example and find out which combination of activation function, loss function, and number of output nodes works best for you.

Why tf.keras loss becomes NaN when number of train images increases from 100 to 9000?

I am following a CNN example in here.
Here are my code to prepare the CNN model:
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(100, 100, 3)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(7)) # 7 outputs for 7 classes which are 1, 2, 3, ..., 7
And this is how I train the model:
model.compile(optimizer='adam',
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['accuracy'])
history = model.fit(trainGenerator, epochs=10,
validation_data=validationGenerator)
When trainGenerator has 80 images & validationGenerator has 20 images, everything is ok for the val_loss & loss like below from one of the epoch
Epoch 1/10
3/3 [==============================] - 1s 736ms/step - loss: 1.8475 - accuracy: 0.2500 - val_loss: 2.4287 - val_accuracy: 0.5500
When the trainGenerator got 9817 images & validationGenerator got 2454 images, the val_loss & loss become NaN
Epoch 1/10
307/307 [==============================] - 20s 63ms/step - loss: nan - accuracy: 0.0090 - val_loss: nan - val_accuracy: 0.0000e+00
The batch size in trainGenerator & validationGenertor is 32 (default value) in both scenarios above.
I have rescaled the images when I import the images to trainGenerator & validationGenertor using the
trainDataGen=ImageDataGenerator(
rescale=1./255,
validation_split=0.2
)
After that I create trainGenerator using flow_from_dataframe like below:
trainGenerator = trainDataGen.flow_from_dataframe(
dataframe=train_df,
directory=trainingFilepath,
x_col="filename",
y_col="label",
target_size=(100,100),
class_mode="raw",
subset="training"
)
validationGenerator is created using the code above by replacing subset with validation
A similar question has been asked but it does not apply to my case as the problem persists when number of train images increases & I use sparce_categorical_crossentropy
Why do I get NaN in val_loss & val_accuracy is 0?
How do I fix it so it can work with more images in the train set?
what I would do is to use categorical cross entropy. In your generators change class_mode to 'categorical'. In model.compile make loss='categorical_crossentropy. Not sure this will fix it but it can't hurt. Could be when you use more images, perhaps there are some Na
labels. Check your datafame for Na's.

Neural network errors don't change

I am training a model using TensorFlow. I was getting weird results when looking at my model performance. I built two models to classify images, one using a CNN and the other using a traditional ANN. Below is the code setup for each of them.
#CNN model
model = Sequential()
model.add(Reshape((20, 60, 3)))
#model.add(Conv2D(128, (5, 5), (2, 2), activation='elu'))
#model.add(Conv2D(64, (4, 4), (2, 2), activation='elu'))
#model.add(Flatten())
#model.add(Dense(1, activation = 'elu'))
#model.add(Dense(25, activation = 'elu'))
#model.add(Dense(10, activation = 'elu'))
#model.add(Dense(1))
opt = keras.optimizers.RMSprop(lr=0.0009, decay=1e-6)
model.compile(Adam(lr = 0.0001), loss='mse', metrics = ['mae'])
history = model.fit(X_train, y_train, epochs = 20, validation_data=(X_val, y_val), batch_size= 32)
#ANN model
model = Sequential()
model.add(Reshape((20, 60, 3)))
#model.add(Flatten())
#model.add(Dense(10, activation = 'elu'))
#model.add(Dense(1))
opt = keras.optimizers.RMSprop(lr=0.0009, decay=1e-6)
model.compile(Adam(lr = 0.0001), loss='mse', metrics = ['mae'])
history = model.fit(X_train, y_train, epochs = 20, validation_data=(X_val, y_val), batch_size= 32)
However, the problem is that I am getting nearly identical loss, and mean absolute error metrics using both of these models, when I am expecting the mae to be MUCH higher for the 2nd model. Does anyone know why this is happening? Could it be something wrong with my input data?
P.S. This network is trying to do regression to predict steering angle for a self-driving rc car from a image
EDIT:
Here is the ending error with the CNN:
Epoch 20/20 113/113 [==============================] - 1s 5ms/step - loss: 0.0382 - mae: 0.1582 - val_loss: 0.0454 - val_mae: 0.1727 dict_keys(['loss', 'mae', 'val_loss', 'val_mae'])
Here is the ending error with the ANN:
Epoch 20/20 113/113 [==============================] - 0s 3ms/step - loss: 0.0789 - mae: 0.2187 - val_loss: 0.0854 - val_mae: 0.2300 dict_keys(['loss', 'mae', 'val_loss', 'val_mae'])
I think the issue is from your training data, try using another data and check the results again

keras classifier wrong evaluation while learning is great

have small dataset
Found 1836 images belonging to 2 classes.
Found 986 images belonging to 2 classes.
standard architecture of model
image_generator = ImageDataGenerator(
rescale=1./255,
validation_split=0.35
)
train_data_gen = image_generator.flow_from_directory(
directory=directory,
target_size=(IMG_SHAPE, IMG_SHAPE),
subset='training',
)
val_data_gen = image_generator.flow_from_directory(
directory=directory,
target_size=(IMG_SHAPE, IMG_SHAPE),
subset='validation',
)
---
model = tf.keras.models.Sequential([
tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(IMG_SHAPE, IMG_SHAPE, 3)),
tf.keras.layers.MaxPooling2D(2, 2),
tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
tf.keras.layers.MaxPooling2D(2, 2),
tf.keras.layers.Conv2D(128, (3, 3), activation='relu'),
tf.keras.layers.MaxPooling2D(2, 2),
tf.keras.layers.Conv2D(128, (3, 3), activation='relu'),
tf.keras.layers.MaxPooling2D(2, 2),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(512, activation='relu'),
tf.keras.layers.Dense(2, activation='softmax'),
])
model.compile(
optimizer='adam',
loss='categorical_crossentropy',
metrics=['accuracy']
)
BATCH_SIZE = 128
EPOCHS = 7
total_train, total_val = train_data_gen.samples, val_data_gen.samples
steps_per_epoch = int(np.ceil(total_train / float(BATCH_SIZE)))
validation_freq = int(np.ceil(total_val / float(BATCH_SIZE)))
history = model.fit(
train_data_gen,
epochs=EPOCHS,
steps_per_epoch=steps_per_epoch,
validation_data=val_data_gen,
validation_freq=validation_freq
)
getting perfect metrics
Epoch 1/7
15/15 [==============================] - 66s 4s/step - loss: 1.0809 - accuracy: 0.4917
Epoch 2/7
15/15 [==============================] - 56s 4s/step - loss: 0.3475 - accuracy: 0.8729
Epoch 3/7
15/15 [==============================] - 60s 4s/step - loss: 0.1113 - accuracy: 0.9583
Epoch 4/7
15/15 [==============================] - 58s 4s/step - loss: 0.1987 - accuracy: 0.9109
Epoch 5/7
15/15 [==============================] - 59s 4s/step - loss: 0.1127 - accuracy: 0.9438
Epoch 6/7
15/15 [==============================] - 60s 4s/step - loss: 0.0429 - accuracy: 0.9854
Epoch 7/7
15/15 [==============================] - 49s 3s/step - loss: 0.0542 - accuracy: 0.9812
but after i evaluate it, i get completely biased to first class results
it works only when i run it for 1 epoch, but with a lack of accuracy
eval code
def make_pred(model, labled_dataset, IMG_SHAPE, img_path) -> LabelName:
def make_image(img_path):
# img = img_path.resize((IMG_SHAPE, IMG_SHAPE), Image.ANTIALIAS)
img = image.load_img(img_path, target_size=(IMG_SHAPE, IMG_SHAPE))
img = image.img_to_array(img)
return np.expand_dims(img, axis=0)
pred_id: List[List] = np.argmax(model.predict(make_image(img_path)), axis=1)
all_labels = list(labled_dataset.class_indices.keys())
return all_labels[int(pred_id)]
what wrong with it?
should i downsize source image before eval it?
I believe you need to do two things. One resize the images you wish to predict, then rescale the images as you did for the training images. I also recommend that you set the validation_freq=1 so that you can set how the validation loss and accuracy are trending. This allows you to see how your model is performing relative to over fitting etc. You can detect if your model is over fitting if the training loss continues to declined but in later epochs your validation loss begins to increase. If you see over fitting add a Dropout layer after your dense 512 node dense layer. Documentation is here. Prediction accuracy should be close to the validation accuracy for the last epoch. I also recommend you consider using the keras callback ModelCheckpoint. Documentation is here. Set it up to monitor validation loss and save the model with the lowest validation loss. Then load the saved model to do predictions. Finally I find it effective to use an adjustable learning rate. The keras callback ReduceLROnPlateau makes this easy to do. Documentation is here. Set it up to monitor validation loss. The callback will automatically reduce the learning rate by a factor (parameter factor) if after (parameter patience) patience number of epochs the validation loss fails to decrease. I use factor=.5 and patience=1. This allows you to use a larger learning rate initially and have it decrease as needed so convergence will be faster. One more thing in your val_data_gen set shuffle=False so the validation images are processed in the same order each time.
problem was in validation_freq which is should be
validation_steps after that we finally getting val_accuracy, so
training starts validating in proper way
on top of it IMG_SHAPE wasn't the same for ImageDataGenerator and
inside the model input_shape=(IMG_SHAPE, IMG_SHAPE, 3)),
using PIL for prediction may helped too, it gives slightly different
from keras.preprocessing.image results
def make_pred_PIL(model, labled_dataset, IMG_SHAPE, img_path) -> LabelName:
img = cv2.imread(img_path)
img = cv2.resize(img, (IMG_SHAPE, IMG_SHAPE))
img = np.array(img, dtype=np.float32)
img = np.reshape(img, (-1, IMG_SHAPE, IMG_SHAPE, 3))
pred_id: List[List] = np.argmax(model.predict(img), axis=1)
all_labels = list(labled_dataset.class_indices.keys())
return all_labels[int(pred_id)]

CNN Acc and Loss constant with custom image data

I am trying to train a simple CNN model for a binary classification task in Keras with a dataset of images I mined. The problem is that I am getting constant accuracy, val_accuracy and loss after a couple of epochs. Am I processing the data the wrong way? Or is it something in the model settings?
At the beginning I was using softmax as the final activation function and categorical cossentropy, I was also using the to_categorical function on the labels.
After reading up on what usually causes this to happen I decided to use sigmoid and binary_crossentropy instead and not use to_categorical. Still the problem persists and I am starting to wonder whether it's my data the problem (the two classes are too similar) or the way I am feeding the image arrays.
conkeras1 = []
pics = os.listdir("/Matrices/")
# I do this for the images of both classes, just keeping it short.
for x in range(len(pics)):
img = image.load_img("Matrices/"+pics[x])
conkeras1.append(img)
conkeras = conkeras1+conkeras2
conkeras = np.array([image.img_to_array(x) for x in conkeras]).astype("float32")
conkeras = conkeras / 255 # I also tried normalizing with a z-score with no success
yecs1 = [1]*len(conkeras1)
yecs2 = [0]*len(conkeras2)
y_train = yecs1+yecs2
y_train = np.array(y_train).astype("float32")
model = Sequential([
Conv2D(64, (3, 3), input_shape=conkeras.shape[1:], padding="same", activation="relu"),
Conv2D(32, (3, 3), activation="relu", padding="same"),
Flatten(),
Dense(500, activation="relu"),
#Dense(4096, activation="relu"),
Dense(1, activation="sigmoid")
])
model.compile(loss=keras.losses.binary_crossentropy,
optimizer=keras.optimizers.Adam(lr=0.001),
metrics=['accuracy'])
history = model.fit(conkeras, y_train,
batch_size=32,
epochs=32, shuffle=True,
verbose=1,
callbacks=[tensorboard])
The output I get is this:
975/975 [==============================] - 107s 110ms/step - loss: 8.0022 - acc: 0.4800
Epoch 2/32
975/975 [==============================] - 99s 101ms/step - loss: 8.1756 - acc: 0.4872
Epoch 3/32
975/975 [==============================] - 97s 100ms/step - loss: 8.1756 - acc: 0.4872
Epoch 4/32
975/975 [==============================] - 97s 99ms/step - loss: 8.1756 - acc: 0.4872
and these are the shapes of the traning set and labels
>>> conkeras.shape
(975, 100, 100, 3)
>>> y_train.shape
(975,)

Categories