I'm trying to train a basic CNN on the image dataset that contains faces of celebrities with the class assigned corresponding to each person. Given that there are about 10,000 classes I used sparse_categorical_crossentropy rather than one-hot encoding the classes, however as soon as the network starts training the loss is stuck at one number and after several batches is goes to NaN I tried different scaling of the images and a smaller network but with no luck. Any clues on what might be causing the NaN?
Function that generates batches:
def Generator(data, label, batch_size):
url = "../input/celeba-dataset/img_align_celeba/img_align_celeba/"
INPUT_SHAPE = (109, 109)
i = 0
while True:
image_batch = [ ]
label_batch = [ ]
for b in range(batch_size):
if i == len(data):
i = 0
data, label = shuffle(data, label)
sample = data[i]
label_batch.append(label[i])
i += 1
image = cv2.resize(cv2.imread(url + sample), INPUT_SHAPE)
image_batch.append((image.astype(float)) / 255)
yield (np.array(image_batch), np.array(label_batch))
The model:
class CNN():
def __init__(self, train, val, y_train, y_val, batch_size):
## Load the batch generator
self.train_batch_gen = Generator(train, y_train, batch_size)
self.val_batch_gen = Generator(val, y_val, batch_size)
self.input_shape = (109, 109, 3)
self.num_classes = len(np.unique(y_train))
self.len_train = len(train)
self.len_val = len(val)
self.batch_size = batch_size
self.model = self.buildModel()
def buildModel(self):
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', padding="same", input_shape=self.input_shape))
model.add(layers.Conv2D(64, (3, 3), activation='relu', padding="same", input_shape=self.input_shape))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu', padding="same"))
model.add(layers.Conv2D(128, (3, 3), activation='relu', padding="same"))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(96, (3, 3), activation='relu', padding="same"))
model.add(layers.Conv2D(192, (3, 3), activation='relu', padding="same"))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3), activation='relu', padding="same"))
model.add(layers.Conv2D(256, (3, 3), activation='relu', padding="same"))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(160, (3, 3), activation='relu', padding="same"))
model.add(layers.Conv2D(320, (3, 3), activation='relu', padding="same"))
model.add(layers.AveragePooling2D(pool_size=(4, 4)))
model.add(layers.Flatten())
model.add(layers.Dense(128, activation='tanh'))
model.add(layers.Dropout(rate=0.1))
model.add(layers.Dense(self.num_classes, activation = "softmax")) #Classification layer or output layer
opt = tf.keras.optimizers.Adam(learning_rate=0.00001)
model.compile(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
return model
def trainModel(self, epochs):
self.model.fit_generator(generator=self.train_batch_gen,
steps_per_epoch = int(self.len_train // self.batch_size),
epochs=epochs,
validation_data = self.val_batch_gen,
validation_steps = int(self.len_val // self.batch_size))
In my case, I used sparse_categorical_crossentropy with labels numbered from [1,2,3] (3 classes). In this case it produced NaNs from the start.
When I changed the labels from [1,2,3] to [0,1,2] the problem has disappeared.
Not sure why you are seeing those nans. I suspect it has something to do with your tanh activation on your dense layer. I would replace it with relu. I also suggest using more neurons on this dense layer cause 128 is probably small for a 10000 output.
If i were you, i would also try a pre-trained model and/or Siamese networks.
This looks like Exploding Gradients problem. I would recommend you to check how the weights and gradients are varying. See this: https://github.com/keras-team/keras/issues/2226
Check https://www.dlology.com/blog/how-to-deal-with-vanishingexploding-gradients-in-keras/ on how to spot exploding gradient problem and solutions to it. Also try out Xavier initialization in your dense layers to prevent exploding gradients.
Related
When creating a neural network for image classification, I want to get the classification on one hand and the raw output on the other hand to determine if the image really contains one of the images I want to classify or not. If not then the raw output should contain very low values for all classes. But if the image really contains one of the objects that I want to classify, then the raw output should have a high value for one of the neurons.
Assuming I have the following code:
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Conv2D(16, (3, 3), activation='relu', input_shape=(80, 80, 3)))
model.add(tf.keras.layers.MaxPooling2D((2, 2)))
model.add(tf.keras.layers.Dropout(0.3))
model.add(tf.keras.layers.Conv2D(16, (3, 3), activation='relu'))
model.add(tf.keras.layers.MaxPooling2D((2, 2)))
model.add(tf.keras.layers.Dropout(0.3))
model.add(tf.keras.layers.Conv2D(16, (3, 3), activation='relu'))
model.add(tf.keras.layers.MaxPooling2D((2, 2)))
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(4, activation='softmax'))
How would I get the raw output of the last dense layer?
You can use functional API and implement your model in a next way:
inputs = tf.keras.Input(shape=(80, 80, 3))
x = tf.keras.layers.Conv2D(16, (3, 3), activation='relu')(inputs)
x = tf.keras.layers.MaxPooling2D((2, 2))(x)
x = tf.keras.layers.Dropout(0.3)(x)
x = tf.keras.layers.Conv2D(16, (3, 3), activation='relu')(x)
x = tf.keras.layers.MaxPooling2D((2, 2))(x)
x = tf.keras.layers.Dropout(0.3)(x)
x = tf.keras.layers.Conv2D(16, (3, 3), activation='relu')(x)
x = tf.keras.layers.MaxPooling2D((2, 2))(x)
x = tf.keras.layers.Flatten()(x)
# here you can get raw output
logits = tf.keras.layers.Dense(4)(x)
model = tf.keras.Model(
inputs=inputs,
outputs={
'logits': logits,
'predictions': tf.nn.softmax(logits)
}
)
model.summary()
After that, your model will have two outputs in dictionary format. Beware that you can't use a simple loss function like categorical_crossentropy because it will try to minimize loss for both outputs. You need to use losses argument in compile method to specify the loss for each output. For example:
model.compile(
optimizer='adam',
loss={
# ignore logits loss
'logits': lambda y_true, y_pred: 0.0,
'predictions': tf.keras.losses.CategoricalCrossentropy()
})
And your fit would look like this:
model.fit(
x_train,
{
'logits': y_train,
'predictions': y_train
},
epochs=10
)
I am trying to train a CNN in using keras. The input is a 128x128x3 rbg image and output is a single value between 0 and 1 (this is not a classifier model). I have normalised the input. Initially, my model was achieving some reasonable results, getting the mean absolute error to < 0.1. As I tried to tweak the model slightly I found the loss would plateau very quickly to around 0.23. I investigated further and found that it was outputting the same value for every input.
So I reverted my code back to when it was working, but it was no longer working. I eventually found that about 90% of the time it will get stuck at this local minima, outputting a constant value (which I suspect is mean of the training reference values (0.39). The other 10% of the time it will behave nicely and regress down to an error of < 0.1. So it is basically giving qualitatively different behaviour randomly and desired results rarely. The strange thing is, is that I swear it was consistently working before.
I have tried:
Changing the input size
Increasing/decreasing the learning rate by factor of 10
Removing a couple of dense layers
Changing 'relu' to 'leaky relu'
Increasing/removing dropout
def load_data(dir):
csv_data = get_csv_data()
xs = []
ys = []
for (name, y) in csv_data:
path = DIR + dir + "/" + name
img = tf.keras.preprocessing.image.load_img(path)
xs.append(tf.keras.preprocessing.image.img_to_array(img) * (1 / 255.0))
ys.append(normalize_output(float(y)))
return np.array(xs).reshape(len(csv_data), IMAGE_DIM, IMAGE_DIM, 3), np.array(ys).reshape(len(csv_data), 1)
def gen_model():
model = tf.keras.Sequential()
model.add(tf.keras.layers.Conv2D(filters=64, kernel_size = (5, 5), activation='relu', input_shape=(IMAGE_DIM, IMAGE_DIM, CHAN_COUNT)))
model.add(tf.keras.layers.MaxPool2D())
model.add(tf.keras.layers.Conv2D(filters=64, kernel_size = (5, 5), activation='relu'))
model.add(tf.keras.layers.MaxPool2D())
model.add(tf.keras.layers.Conv2D(filters=128, kernel_size = (5, 5), activation='relu'))
model.add(tf.keras.layers.MaxPool2D())
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(256, activation='relu'))
model.add(tf.keras.layers.Dropout(0.1))
model.add(tf.keras.layers.Dense(128, activation='relu'))
model.add(tf.keras.layers.Dropout(0.1))
model.add(tf.keras.layers.Dense(64, activation='relu'))
model.add(tf.keras.layers.Dropout(0.1))
model.add(tf.keras.layers.LeakyReLU())
model.add(tf.keras.layers.Dense(16, activation='sigmoid'))
model.add(tf.keras.layers.LeakyReLU())
model.add(tf.keras.layers.Dense(1, activation='sigmoid'))
model.compile(loss=keras.losses.MeanSquaredError(),
optimizer=tf.keras.optimizers.Adam(),
metrics=[keras.metrics.MeanAbsoluteError()])
return model
def run():
model = gen_model()
xs, ys = load_data("output")
generator = tf.keras.preprocessing.image.ImageDataGenerator(featurewise_center=False,
samplewise_center=False,
featurewise_std_normalization=False,
samplewise_std_normalization=False,
validation_split=0.1,
rotation_range=12,
horizontal_flip=True,
vertical_flip=True)
model.fit(generator.flow(xs, ys, batch_size=32, shuffle=True),
steps_per_epoch=len(xs) / 32,
epochs = 10,
use_multiprocessing=False)
I rearranged activation on the layers. Please give it a try :
def gen_model():
model = tf.keras.Sequential()
model.add(tf.keras.layers.Conv2D(filters=64, kernel_size = (5, 5), activation='relu', input_shape=(IMAGE_DIM, IMAGE_DIM, CHAN_COUNT)))
model.add(tf.keras.layers.MaxPool2D())
model.add(tf.keras.layers.Conv2D(filters=64, kernel_size = (5, 5), activation='relu'))
model.add(tf.keras.layers.MaxPool2D())
model.add(tf.keras.layers.Conv2D(filters=128, kernel_size = (5, 5), activation='relu'))
model.add(tf.keras.layers.MaxPool2D())
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(256, activation='relu'))
model.add(tf.keras.layers.Dropout(0.1))
model.add(tf.keras.layers.Dense(128, activation='relu'))
model.add(tf.keras.layers.Dropout(0.1))
model.add(tf.keras.layers.Dense(64, activation='relu'))
model.add(tf.keras.layers.Dropout(0.1))
model.add(tf.keras.layers.Dense(16, activation='relu'))
model.add(tf.keras.layers.Dense(1, activation='sigmoid'))
model.compile(loss=keras.losses.MeanSquaredError(),
optimizer=tf.keras.optimizers.Adam(),
metrics=[keras.metrics.MeanAbsoluteError()])
return model
I am new to Convolutional Neural Network. Instead of getting my data in image format i have been given flattened images matrix which is [10000x784].
Means 10000 images of size 28x28
Considering one image size is 28x28, how should i give the data matrix to my input for CNN?
My model is:
model = models.Sequential()
model.add(layers.Conv2D(64, (3, 3), activation='relu', input_shape=(28,28,1)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
#model.add(layers.Flatten())
model.add(layers.Dense(2500, activation='relu'))
model.add(layers.Dense(2500, activation='relu'))
model.add(layers.Dense(1, activation='relu'))
model.compile(optimizer='adam',
loss='mean_squared_error',
metrics=['mae','mse'])
callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=15)
#Fits model
history= model.fit(x_trained, y_train, epochs = 7000, validation_split = 0.2, shuffle= True, verbose = 1, callbacks=[callback])
I get error at model.fit.
P.S: I am doing regression and for every image i have one value as output
Begin with a Reshape layer:
model = models.Sequential()
model.add(layers.Reshape((28, 28, 1), input_shape=(784,)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
# ...
I am currently trying to build a CRNN with Keras. When I try to reshape the input size, I had some trouble finding the correct dimension for my LSTM. After some debugging, I found a field in my model object called output_shape whose value was (3,1,244) and I tried to pass it as a 2D array with (3,224). Everything worked fine, but did I do correctly? What is the math behind this and what can I do next time to discover this size without debugging?
def CRNN(blockSize, blockCount, inputShape, trainGen, testGen, epochs):
model = Sequential()
# Conv Layer
channels = 32
for i in range(blockCount):
for j in range(blockSize):
if (i, j) == (0, 0):
conv = Conv2D(channels, kernel_size=(5, 5),
input_shape=inputShape, padding='same')
else:
conv = Conv2D(channels, kernel_size=(5, 5), padding='same')
model.add(conv)
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(0.15))
if j == blockSize - 2:
channels += 32
model.add(MaxPooling2D(pool_size=(2, 2), padding='same'))
model.add(Dropout(0.15))
# Feature aggregation across time
model.add(Reshape((3, 224)))
# LSTM layer
model.add(Bidirectional(LSTM(200), merge_mode='ave'))
model.add(Dropout(0.5))
# Linear classifier
model.add(Dense(4, activation='softmax'))
model.compile(loss=keras.losses.categorical_crossentropy,
optimizer=keras.optimizers.Adam(),
metrics=['accuracy']) # F1?
model.fit_generator(trainGen,
validation_data=testGen, steps_per_epoch = trainGen.x.size // 20,
validation_steps = testGen.x.size // 20,
epochs=epochs, verbose=1)
return model
# Function call
model = CRNN(4, 6, (140, 33, 1), trainGen, testGen, 1)
Quick disclaimer: I'm pretty new to Keras, machine learning, and programming in general.
I'm trying to create a basic autoencoder for (currently) a single image. While it seems to run just fine, the output is just a white image. Here's what I've got:
img_height, img_width = 128, 128
input_img = '4.jpg'
output_img = '5.jpg'
# load image
x = load_img(input_img)
x = img_to_array(x) # array with shape (128, 128, 3)
x = x.reshape((1,) + x.shape) # array with shape (1, 128, 128, 3)
# define input shape
input_shape = (img_height, img_width, 3)
model = Sequential()
# encoding
model.add(Conv2D(128, (3, 3), activation='relu', input_shape=input_shape,
padding='same'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
# decoding
model.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
model.add(UpSampling2D(size=(2,2)))
model.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
model.add(UpSampling2D(size=(2,2)))
model.add(Conv2D(128, (3, 3), activation='relu', padding='same'))
model.add(Conv2D(3, (3, 3), activation='sigmoid', padding='same'))
model.compile(loss='binary_crossentropy', optimizer='adam')
print(model.summary())
checkpoint = ModelCheckpoint("autoencoder-loss-{loss:.4f}.hdf5", monitor='loss', verbose=0, save_best_only=True, mode='min')
model.fit(x, x, epochs=10, batch_size=1, verbose=1, callbacks=[checkpoint])
y = model.predict(x)
y = y[0, :, :, :]
y = array_to_img(y)
save_img(output_img, y)
I've looked at a handful of tutorials for reference, but I still can't figure out what my issue is.
Any guidance/suggestions/help would be greatly appreciated.
Thanks!
this solved the problem. The code was just missing
x = x.astype('float32') / 255.
This is a numpy built-in function to convert the values contained in that vector to floats.
This allows us to get decimal values, where the values are divided by 255. RGB values are stored as 8 bit integers, so we divide the values in the vector by 255 (2^8 - 1), to represent the colour as a decimal value between 0.0 and 1.0.