Loss is NaN on image classification task - python

I'm trying to train a basic CNN on the image dataset that contains faces of celebrities with the class assigned corresponding to each person. Given that there are about 10,000 classes I used sparse_categorical_crossentropy rather than one-hot encoding the classes, however as soon as the network starts training the loss is stuck at one number and after several batches is goes to NaN I tried different scaling of the images and a smaller network but with no luck. Any clues on what might be causing the NaN?
Function that generates batches:
def Generator(data, label, batch_size):
url = "../input/celeba-dataset/img_align_celeba/img_align_celeba/"
INPUT_SHAPE = (109, 109)
i = 0
while True:
image_batch = [ ]
label_batch = [ ]
for b in range(batch_size):
if i == len(data):
i = 0
data, label = shuffle(data, label)
sample = data[i]
label_batch.append(label[i])
i += 1
image = cv2.resize(cv2.imread(url + sample), INPUT_SHAPE)
image_batch.append((image.astype(float)) / 255)
yield (np.array(image_batch), np.array(label_batch))
The model:
class CNN():
def __init__(self, train, val, y_train, y_val, batch_size):
## Load the batch generator
self.train_batch_gen = Generator(train, y_train, batch_size)
self.val_batch_gen = Generator(val, y_val, batch_size)
self.input_shape = (109, 109, 3)
self.num_classes = len(np.unique(y_train))
self.len_train = len(train)
self.len_val = len(val)
self.batch_size = batch_size
self.model = self.buildModel()
def buildModel(self):
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', padding="same", input_shape=self.input_shape))
model.add(layers.Conv2D(64, (3, 3), activation='relu', padding="same", input_shape=self.input_shape))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu', padding="same"))
model.add(layers.Conv2D(128, (3, 3), activation='relu', padding="same"))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(96, (3, 3), activation='relu', padding="same"))
model.add(layers.Conv2D(192, (3, 3), activation='relu', padding="same"))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3), activation='relu', padding="same"))
model.add(layers.Conv2D(256, (3, 3), activation='relu', padding="same"))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(160, (3, 3), activation='relu', padding="same"))
model.add(layers.Conv2D(320, (3, 3), activation='relu', padding="same"))
model.add(layers.AveragePooling2D(pool_size=(4, 4)))
model.add(layers.Flatten())
model.add(layers.Dense(128, activation='tanh'))
model.add(layers.Dropout(rate=0.1))
model.add(layers.Dense(self.num_classes, activation = "softmax")) #Classification layer or output layer
opt = tf.keras.optimizers.Adam(learning_rate=0.00001)
model.compile(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
return model
def trainModel(self, epochs):
self.model.fit_generator(generator=self.train_batch_gen,
steps_per_epoch = int(self.len_train // self.batch_size),
epochs=epochs,
validation_data = self.val_batch_gen,
validation_steps = int(self.len_val // self.batch_size))

In my case, I used sparse_categorical_crossentropy with labels numbered from [1,2,3] (3 classes). In this case it produced NaNs from the start.
When I changed the labels from [1,2,3] to [0,1,2] the problem has disappeared.

Not sure why you are seeing those nans. I suspect it has something to do with your tanh activation on your dense layer. I would replace it with relu. I also suggest using more neurons on this dense layer cause 128 is probably small for a 10000 output.
If i were you, i would also try a pre-trained model and/or Siamese networks.

This looks like Exploding Gradients problem. I would recommend you to check how the weights and gradients are varying. See this: https://github.com/keras-team/keras/issues/2226
Check https://www.dlology.com/blog/how-to-deal-with-vanishingexploding-gradients-in-keras/ on how to spot exploding gradient problem and solutions to it. Also try out Xavier initialization in your dense layers to prevent exploding gradients.

Related

Get softmax output and raw output of the last layer of a model

When creating a neural network for image classification, I want to get the classification on one hand and the raw output on the other hand to determine if the image really contains one of the images I want to classify or not. If not then the raw output should contain very low values for all classes. But if the image really contains one of the objects that I want to classify, then the raw output should have a high value for one of the neurons.
Assuming I have the following code:
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Conv2D(16, (3, 3), activation='relu', input_shape=(80, 80, 3)))
model.add(tf.keras.layers.MaxPooling2D((2, 2)))
model.add(tf.keras.layers.Dropout(0.3))
model.add(tf.keras.layers.Conv2D(16, (3, 3), activation='relu'))
model.add(tf.keras.layers.MaxPooling2D((2, 2)))
model.add(tf.keras.layers.Dropout(0.3))
model.add(tf.keras.layers.Conv2D(16, (3, 3), activation='relu'))
model.add(tf.keras.layers.MaxPooling2D((2, 2)))
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(4, activation='softmax'))
How would I get the raw output of the last dense layer?
You can use functional API and implement your model in a next way:
inputs = tf.keras.Input(shape=(80, 80, 3))
x = tf.keras.layers.Conv2D(16, (3, 3), activation='relu')(inputs)
x = tf.keras.layers.MaxPooling2D((2, 2))(x)
x = tf.keras.layers.Dropout(0.3)(x)
x = tf.keras.layers.Conv2D(16, (3, 3), activation='relu')(x)
x = tf.keras.layers.MaxPooling2D((2, 2))(x)
x = tf.keras.layers.Dropout(0.3)(x)
x = tf.keras.layers.Conv2D(16, (3, 3), activation='relu')(x)
x = tf.keras.layers.MaxPooling2D((2, 2))(x)
x = tf.keras.layers.Flatten()(x)
# here you can get raw output
logits = tf.keras.layers.Dense(4)(x)
model = tf.keras.Model(
inputs=inputs,
outputs={
'logits': logits,
'predictions': tf.nn.softmax(logits)
}
)
model.summary()
After that, your model will have two outputs in dictionary format. Beware that you can't use a simple loss function like categorical_crossentropy because it will try to minimize loss for both outputs. You need to use losses argument in compile method to specify the loss for each output. For example:
model.compile(
optimizer='adam',
loss={
# ignore logits loss
'logits': lambda y_true, y_pred: 0.0,
'predictions': tf.keras.losses.CategoricalCrossentropy()
})
And your fit would look like this:
model.fit(
x_train,
{
'logits': y_train,
'predictions': y_train
},
epochs=10
)

Tensorflow/Keras model output is constant

I am trying to train a CNN in using keras. The input is a 128x128x3 rbg image and output is a single value between 0 and 1 (this is not a classifier model). I have normalised the input. Initially, my model was achieving some reasonable results, getting the mean absolute error to < 0.1. As I tried to tweak the model slightly I found the loss would plateau very quickly to around 0.23. I investigated further and found that it was outputting the same value for every input.
So I reverted my code back to when it was working, but it was no longer working. I eventually found that about 90% of the time it will get stuck at this local minima, outputting a constant value (which I suspect is mean of the training reference values (0.39). The other 10% of the time it will behave nicely and regress down to an error of < 0.1. So it is basically giving qualitatively different behaviour randomly and desired results rarely. The strange thing is, is that I swear it was consistently working before.
I have tried:
Changing the input size
Increasing/decreasing the learning rate by factor of 10
Removing a couple of dense layers
Changing 'relu' to 'leaky relu'
Increasing/removing dropout
def load_data(dir):
csv_data = get_csv_data()
xs = []
ys = []
for (name, y) in csv_data:
path = DIR + dir + "/" + name
img = tf.keras.preprocessing.image.load_img(path)
xs.append(tf.keras.preprocessing.image.img_to_array(img) * (1 / 255.0))
ys.append(normalize_output(float(y)))
return np.array(xs).reshape(len(csv_data), IMAGE_DIM, IMAGE_DIM, 3), np.array(ys).reshape(len(csv_data), 1)
def gen_model():
model = tf.keras.Sequential()
model.add(tf.keras.layers.Conv2D(filters=64, kernel_size = (5, 5), activation='relu', input_shape=(IMAGE_DIM, IMAGE_DIM, CHAN_COUNT)))
model.add(tf.keras.layers.MaxPool2D())
model.add(tf.keras.layers.Conv2D(filters=64, kernel_size = (5, 5), activation='relu'))
model.add(tf.keras.layers.MaxPool2D())
model.add(tf.keras.layers.Conv2D(filters=128, kernel_size = (5, 5), activation='relu'))
model.add(tf.keras.layers.MaxPool2D())
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(256, activation='relu'))
model.add(tf.keras.layers.Dropout(0.1))
model.add(tf.keras.layers.Dense(128, activation='relu'))
model.add(tf.keras.layers.Dropout(0.1))
model.add(tf.keras.layers.Dense(64, activation='relu'))
model.add(tf.keras.layers.Dropout(0.1))
model.add(tf.keras.layers.LeakyReLU())
model.add(tf.keras.layers.Dense(16, activation='sigmoid'))
model.add(tf.keras.layers.LeakyReLU())
model.add(tf.keras.layers.Dense(1, activation='sigmoid'))
model.compile(loss=keras.losses.MeanSquaredError(),
optimizer=tf.keras.optimizers.Adam(),
metrics=[keras.metrics.MeanAbsoluteError()])
return model
def run():
model = gen_model()
xs, ys = load_data("output")
generator = tf.keras.preprocessing.image.ImageDataGenerator(featurewise_center=False,
samplewise_center=False,
featurewise_std_normalization=False,
samplewise_std_normalization=False,
validation_split=0.1,
rotation_range=12,
horizontal_flip=True,
vertical_flip=True)
model.fit(generator.flow(xs, ys, batch_size=32, shuffle=True),
steps_per_epoch=len(xs) / 32,
epochs = 10,
use_multiprocessing=False)
I rearranged activation on the layers. Please give it a try :
def gen_model():
model = tf.keras.Sequential()
model.add(tf.keras.layers.Conv2D(filters=64, kernel_size = (5, 5), activation='relu', input_shape=(IMAGE_DIM, IMAGE_DIM, CHAN_COUNT)))
model.add(tf.keras.layers.MaxPool2D())
model.add(tf.keras.layers.Conv2D(filters=64, kernel_size = (5, 5), activation='relu'))
model.add(tf.keras.layers.MaxPool2D())
model.add(tf.keras.layers.Conv2D(filters=128, kernel_size = (5, 5), activation='relu'))
model.add(tf.keras.layers.MaxPool2D())
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(256, activation='relu'))
model.add(tf.keras.layers.Dropout(0.1))
model.add(tf.keras.layers.Dense(128, activation='relu'))
model.add(tf.keras.layers.Dropout(0.1))
model.add(tf.keras.layers.Dense(64, activation='relu'))
model.add(tf.keras.layers.Dropout(0.1))
model.add(tf.keras.layers.Dense(16, activation='relu'))
model.add(tf.keras.layers.Dense(1, activation='sigmoid'))
model.compile(loss=keras.losses.MeanSquaredError(),
optimizer=tf.keras.optimizers.Adam(),
metrics=[keras.metrics.MeanAbsoluteError()])
return model

i have 10000 images in a vector form how do i convert it for my Convolution neural network?

I am new to Convolutional Neural Network. Instead of getting my data in image format i have been given flattened images matrix which is [10000x784].
Means 10000 images of size 28x28
Considering one image size is 28x28, how should i give the data matrix to my input for CNN?
My model is:
model = models.Sequential()
model.add(layers.Conv2D(64, (3, 3), activation='relu', input_shape=(28,28,1)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
#model.add(layers.Flatten())
model.add(layers.Dense(2500, activation='relu'))
model.add(layers.Dense(2500, activation='relu'))
model.add(layers.Dense(1, activation='relu'))
model.compile(optimizer='adam',
loss='mean_squared_error',
metrics=['mae','mse'])
callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=15)
#Fits model
history= model.fit(x_trained, y_train, epochs = 7000, validation_split = 0.2, shuffle= True, verbose = 1, callbacks=[callback])
I get error at model.fit.
P.S: I am doing regression and for every image i have one value as output
Begin with a Reshape layer:
model = models.Sequential()
model.add(layers.Reshape((28, 28, 1), input_shape=(784,)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
# ...

Managing Reshape correctly to build RNN from a previous CNN

I am currently trying to build a CRNN with Keras. When I try to reshape the input size, I had some trouble finding the correct dimension for my LSTM. After some debugging, I found a field in my model object called output_shape whose value was (3,1,244) and I tried to pass it as a 2D array with (3,224). Everything worked fine, but did I do correctly? What is the math behind this and what can I do next time to discover this size without debugging?
def CRNN(blockSize, blockCount, inputShape, trainGen, testGen, epochs):
model = Sequential()
# Conv Layer
channels = 32
for i in range(blockCount):
for j in range(blockSize):
if (i, j) == (0, 0):
conv = Conv2D(channels, kernel_size=(5, 5),
input_shape=inputShape, padding='same')
else:
conv = Conv2D(channels, kernel_size=(5, 5), padding='same')
model.add(conv)
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(0.15))
if j == blockSize - 2:
channels += 32
model.add(MaxPooling2D(pool_size=(2, 2), padding='same'))
model.add(Dropout(0.15))
# Feature aggregation across time
model.add(Reshape((3, 224)))
# LSTM layer
model.add(Bidirectional(LSTM(200), merge_mode='ave'))
model.add(Dropout(0.5))
# Linear classifier
model.add(Dense(4, activation='softmax'))
model.compile(loss=keras.losses.categorical_crossentropy,
optimizer=keras.optimizers.Adam(),
metrics=['accuracy']) # F1?
model.fit_generator(trainGen,
validation_data=testGen, steps_per_epoch = trainGen.x.size // 20,
validation_steps = testGen.x.size // 20,
epochs=epochs, verbose=1)
return model
# Function call
model = CRNN(4, 6, (140, 33, 1), trainGen, testGen, 1)

Keras Convolutional Autoencoder blank output

Quick disclaimer: I'm pretty new to Keras, machine learning, and programming in general.
I'm trying to create a basic autoencoder for (currently) a single image. While it seems to run just fine, the output is just a white image. Here's what I've got:
img_height, img_width = 128, 128
input_img = '4.jpg'
output_img = '5.jpg'
# load image
x = load_img(input_img)
x = img_to_array(x) # array with shape (128, 128, 3)
x = x.reshape((1,) + x.shape) # array with shape (1, 128, 128, 3)
# define input shape
input_shape = (img_height, img_width, 3)
model = Sequential()
# encoding
model.add(Conv2D(128, (3, 3), activation='relu', input_shape=input_shape,
padding='same'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
# decoding
model.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
model.add(UpSampling2D(size=(2,2)))
model.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
model.add(UpSampling2D(size=(2,2)))
model.add(Conv2D(128, (3, 3), activation='relu', padding='same'))
model.add(Conv2D(3, (3, 3), activation='sigmoid', padding='same'))
model.compile(loss='binary_crossentropy', optimizer='adam')
print(model.summary())
checkpoint = ModelCheckpoint("autoencoder-loss-{loss:.4f}.hdf5", monitor='loss', verbose=0, save_best_only=True, mode='min')
model.fit(x, x, epochs=10, batch_size=1, verbose=1, callbacks=[checkpoint])
y = model.predict(x)
y = y[0, :, :, :]
y = array_to_img(y)
save_img(output_img, y)
I've looked at a handful of tutorials for reference, but I still can't figure out what my issue is.
Any guidance/suggestions/help would be greatly appreciated.
Thanks!
this solved the problem. The code was just missing
x = x.astype('float32') / 255.
This is a numpy built-in function to convert the values contained in that vector to floats.
This allows us to get decimal values, where the values are divided by 255. RGB values are stored as 8 bit integers, so we divide the values in the vector by 255 (2^8 - 1), to represent the colour as a decimal value between 0.0 and 1.0.

Categories