Tensorflow/Keras model output is constant - python

I am trying to train a CNN in using keras. The input is a 128x128x3 rbg image and output is a single value between 0 and 1 (this is not a classifier model). I have normalised the input. Initially, my model was achieving some reasonable results, getting the mean absolute error to < 0.1. As I tried to tweak the model slightly I found the loss would plateau very quickly to around 0.23. I investigated further and found that it was outputting the same value for every input.
So I reverted my code back to when it was working, but it was no longer working. I eventually found that about 90% of the time it will get stuck at this local minima, outputting a constant value (which I suspect is mean of the training reference values (0.39). The other 10% of the time it will behave nicely and regress down to an error of < 0.1. So it is basically giving qualitatively different behaviour randomly and desired results rarely. The strange thing is, is that I swear it was consistently working before.
I have tried:
Changing the input size
Increasing/decreasing the learning rate by factor of 10
Removing a couple of dense layers
Changing 'relu' to 'leaky relu'
Increasing/removing dropout
def load_data(dir):
csv_data = get_csv_data()
xs = []
ys = []
for (name, y) in csv_data:
path = DIR + dir + "/" + name
img = tf.keras.preprocessing.image.load_img(path)
xs.append(tf.keras.preprocessing.image.img_to_array(img) * (1 / 255.0))
ys.append(normalize_output(float(y)))
return np.array(xs).reshape(len(csv_data), IMAGE_DIM, IMAGE_DIM, 3), np.array(ys).reshape(len(csv_data), 1)
def gen_model():
model = tf.keras.Sequential()
model.add(tf.keras.layers.Conv2D(filters=64, kernel_size = (5, 5), activation='relu', input_shape=(IMAGE_DIM, IMAGE_DIM, CHAN_COUNT)))
model.add(tf.keras.layers.MaxPool2D())
model.add(tf.keras.layers.Conv2D(filters=64, kernel_size = (5, 5), activation='relu'))
model.add(tf.keras.layers.MaxPool2D())
model.add(tf.keras.layers.Conv2D(filters=128, kernel_size = (5, 5), activation='relu'))
model.add(tf.keras.layers.MaxPool2D())
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(256, activation='relu'))
model.add(tf.keras.layers.Dropout(0.1))
model.add(tf.keras.layers.Dense(128, activation='relu'))
model.add(tf.keras.layers.Dropout(0.1))
model.add(tf.keras.layers.Dense(64, activation='relu'))
model.add(tf.keras.layers.Dropout(0.1))
model.add(tf.keras.layers.LeakyReLU())
model.add(tf.keras.layers.Dense(16, activation='sigmoid'))
model.add(tf.keras.layers.LeakyReLU())
model.add(tf.keras.layers.Dense(1, activation='sigmoid'))
model.compile(loss=keras.losses.MeanSquaredError(),
optimizer=tf.keras.optimizers.Adam(),
metrics=[keras.metrics.MeanAbsoluteError()])
return model
def run():
model = gen_model()
xs, ys = load_data("output")
generator = tf.keras.preprocessing.image.ImageDataGenerator(featurewise_center=False,
samplewise_center=False,
featurewise_std_normalization=False,
samplewise_std_normalization=False,
validation_split=0.1,
rotation_range=12,
horizontal_flip=True,
vertical_flip=True)
model.fit(generator.flow(xs, ys, batch_size=32, shuffle=True),
steps_per_epoch=len(xs) / 32,
epochs = 10,
use_multiprocessing=False)

I rearranged activation on the layers. Please give it a try :
def gen_model():
model = tf.keras.Sequential()
model.add(tf.keras.layers.Conv2D(filters=64, kernel_size = (5, 5), activation='relu', input_shape=(IMAGE_DIM, IMAGE_DIM, CHAN_COUNT)))
model.add(tf.keras.layers.MaxPool2D())
model.add(tf.keras.layers.Conv2D(filters=64, kernel_size = (5, 5), activation='relu'))
model.add(tf.keras.layers.MaxPool2D())
model.add(tf.keras.layers.Conv2D(filters=128, kernel_size = (5, 5), activation='relu'))
model.add(tf.keras.layers.MaxPool2D())
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(256, activation='relu'))
model.add(tf.keras.layers.Dropout(0.1))
model.add(tf.keras.layers.Dense(128, activation='relu'))
model.add(tf.keras.layers.Dropout(0.1))
model.add(tf.keras.layers.Dense(64, activation='relu'))
model.add(tf.keras.layers.Dropout(0.1))
model.add(tf.keras.layers.Dense(16, activation='relu'))
model.add(tf.keras.layers.Dense(1, activation='sigmoid'))
model.compile(loss=keras.losses.MeanSquaredError(),
optimizer=tf.keras.optimizers.Adam(),
metrics=[keras.metrics.MeanAbsoluteError()])
return model

Related

tensorflow.python.framework.errors_impl.InvalidArgumentError: filter must be 4-dimensional: [4,24]

I got the error of tensorflow.python.framework.errors_impl.InvalidArgumentError: filter must be 4-dimensional: [4,24] when I am tried to run my code. This a A2C algorithm that the states are image of a area with several square blocks. I attached the code.
observation_reshaped = (np.array(observation).reshape(1, -1, *observation.shape) /
255)[0]
action_probs = actor.predict(observation_reshaped).flatten()
And my NN models are:
def create_actor(state_shape, action_shape):
learning_rate = 0.001
init = tf.keras.initializers.he_uniform()
model = keras.Sequential()
model.add(keras.layers.Conv2D(256, (3, 3), input_shape=state_shape,
activation=tf.keras.layers.LeakyReLU(),
kernel_initializer=init))
model.add(keras.layers.MaxPooling2D(pool_size=(2, 2)))
model.add(keras.layers.Dropout(0.2))
model.add(keras.layers.Conv2D(256, (3, 3), activation=tf.keras.layers.LeakyReLU(),
kernel_initializer=init))
model.add(keras.layers.MaxPooling2D(pool_size=(2, 2)))
model.add(keras.layers.Dropout(0.2))
model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(64, activation='linear', kernel_initializer=init))
model.add(keras.layers.Dense(action_shape, activation='softmax', kernel_initializer=init))
model.compile(loss='categorical_crossentropy',
optimizer=tf.keras.optimizers.Adam(lr=learning_rate), metrics=['accuracy'])
return model

Your input ran out of data; interrupting training. Make sure that your dataset or generator can generate at least `steps_per_epoch

When i am training my self-driving car model it is giving me error in the first epoch. although when i reduced the batch_size it is working fine. But that is not giving me accuracy as i want.
I am trainning my model in Google Collab.
tensorflow version 2.3.1
Error:
WARNING:tensorflow:Your input ran out of data; interrupting training. Make sure that your dataset or generator can generate at least `steps_per_epoch * epochs` batches (in this case, 20000 batches). You may need to use the repeat() function when building your dataset.
My code:
def modified_model():
model = Sequential()
model.add(Conv2D(60, (5, 5), input_shape=(32, 32, 1), activation='relu'))
model.add(Conv2D(60, (5, 5), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(30, (3, 3), activation='relu'))
model.add(Conv2D(30, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(500, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(43, activation='softmax'))
model.compile(Adam(lr = 0.001), loss='categorical_crossentropy', metrics=['accuracy'])
return model
model = modified_model()
print(model.summary())
history = model.fit_generator(datagen.flow(X_train, y_train, batch_size=50),
steps_per_epoch=2000,
epochs=10,
validation_data=(X_val, y_val), shuffle = 1)
When using generators, let the model figure out how many steps are practically there to cover a epoch otherwise you'll have to calculate steps_per_epoch=(data_samples/batch_size). Try running without the step_per_epoch parameter

i have 10000 images in a vector form how do i convert it for my Convolution neural network?

I am new to Convolutional Neural Network. Instead of getting my data in image format i have been given flattened images matrix which is [10000x784].
Means 10000 images of size 28x28
Considering one image size is 28x28, how should i give the data matrix to my input for CNN?
My model is:
model = models.Sequential()
model.add(layers.Conv2D(64, (3, 3), activation='relu', input_shape=(28,28,1)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
#model.add(layers.Flatten())
model.add(layers.Dense(2500, activation='relu'))
model.add(layers.Dense(2500, activation='relu'))
model.add(layers.Dense(1, activation='relu'))
model.compile(optimizer='adam',
loss='mean_squared_error',
metrics=['mae','mse'])
callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=15)
#Fits model
history= model.fit(x_trained, y_train, epochs = 7000, validation_split = 0.2, shuffle= True, verbose = 1, callbacks=[callback])
I get error at model.fit.
P.S: I am doing regression and for every image i have one value as output
Begin with a Reshape layer:
model = models.Sequential()
model.add(layers.Reshape((28, 28, 1), input_shape=(784,)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
# ...

Loss is NaN on image classification task

I'm trying to train a basic CNN on the image dataset that contains faces of celebrities with the class assigned corresponding to each person. Given that there are about 10,000 classes I used sparse_categorical_crossentropy rather than one-hot encoding the classes, however as soon as the network starts training the loss is stuck at one number and after several batches is goes to NaN I tried different scaling of the images and a smaller network but with no luck. Any clues on what might be causing the NaN?
Function that generates batches:
def Generator(data, label, batch_size):
url = "../input/celeba-dataset/img_align_celeba/img_align_celeba/"
INPUT_SHAPE = (109, 109)
i = 0
while True:
image_batch = [ ]
label_batch = [ ]
for b in range(batch_size):
if i == len(data):
i = 0
data, label = shuffle(data, label)
sample = data[i]
label_batch.append(label[i])
i += 1
image = cv2.resize(cv2.imread(url + sample), INPUT_SHAPE)
image_batch.append((image.astype(float)) / 255)
yield (np.array(image_batch), np.array(label_batch))
The model:
class CNN():
def __init__(self, train, val, y_train, y_val, batch_size):
## Load the batch generator
self.train_batch_gen = Generator(train, y_train, batch_size)
self.val_batch_gen = Generator(val, y_val, batch_size)
self.input_shape = (109, 109, 3)
self.num_classes = len(np.unique(y_train))
self.len_train = len(train)
self.len_val = len(val)
self.batch_size = batch_size
self.model = self.buildModel()
def buildModel(self):
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', padding="same", input_shape=self.input_shape))
model.add(layers.Conv2D(64, (3, 3), activation='relu', padding="same", input_shape=self.input_shape))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu', padding="same"))
model.add(layers.Conv2D(128, (3, 3), activation='relu', padding="same"))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(96, (3, 3), activation='relu', padding="same"))
model.add(layers.Conv2D(192, (3, 3), activation='relu', padding="same"))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3), activation='relu', padding="same"))
model.add(layers.Conv2D(256, (3, 3), activation='relu', padding="same"))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(160, (3, 3), activation='relu', padding="same"))
model.add(layers.Conv2D(320, (3, 3), activation='relu', padding="same"))
model.add(layers.AveragePooling2D(pool_size=(4, 4)))
model.add(layers.Flatten())
model.add(layers.Dense(128, activation='tanh'))
model.add(layers.Dropout(rate=0.1))
model.add(layers.Dense(self.num_classes, activation = "softmax")) #Classification layer or output layer
opt = tf.keras.optimizers.Adam(learning_rate=0.00001)
model.compile(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
return model
def trainModel(self, epochs):
self.model.fit_generator(generator=self.train_batch_gen,
steps_per_epoch = int(self.len_train // self.batch_size),
epochs=epochs,
validation_data = self.val_batch_gen,
validation_steps = int(self.len_val // self.batch_size))
In my case, I used sparse_categorical_crossentropy with labels numbered from [1,2,3] (3 classes). In this case it produced NaNs from the start.
When I changed the labels from [1,2,3] to [0,1,2] the problem has disappeared.
Not sure why you are seeing those nans. I suspect it has something to do with your tanh activation on your dense layer. I would replace it with relu. I also suggest using more neurons on this dense layer cause 128 is probably small for a 10000 output.
If i were you, i would also try a pre-trained model and/or Siamese networks.
This looks like Exploding Gradients problem. I would recommend you to check how the weights and gradients are varying. See this: https://github.com/keras-team/keras/issues/2226
Check https://www.dlology.com/blog/how-to-deal-with-vanishingexploding-gradients-in-keras/ on how to spot exploding gradient problem and solutions to it. Also try out Xavier initialization in your dense layers to prevent exploding gradients.

Managing Reshape correctly to build RNN from a previous CNN

I am currently trying to build a CRNN with Keras. When I try to reshape the input size, I had some trouble finding the correct dimension for my LSTM. After some debugging, I found a field in my model object called output_shape whose value was (3,1,244) and I tried to pass it as a 2D array with (3,224). Everything worked fine, but did I do correctly? What is the math behind this and what can I do next time to discover this size without debugging?
def CRNN(blockSize, blockCount, inputShape, trainGen, testGen, epochs):
model = Sequential()
# Conv Layer
channels = 32
for i in range(blockCount):
for j in range(blockSize):
if (i, j) == (0, 0):
conv = Conv2D(channels, kernel_size=(5, 5),
input_shape=inputShape, padding='same')
else:
conv = Conv2D(channels, kernel_size=(5, 5), padding='same')
model.add(conv)
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(0.15))
if j == blockSize - 2:
channels += 32
model.add(MaxPooling2D(pool_size=(2, 2), padding='same'))
model.add(Dropout(0.15))
# Feature aggregation across time
model.add(Reshape((3, 224)))
# LSTM layer
model.add(Bidirectional(LSTM(200), merge_mode='ave'))
model.add(Dropout(0.5))
# Linear classifier
model.add(Dense(4, activation='softmax'))
model.compile(loss=keras.losses.categorical_crossentropy,
optimizer=keras.optimizers.Adam(),
metrics=['accuracy']) # F1?
model.fit_generator(trainGen,
validation_data=testGen, steps_per_epoch = trainGen.x.size // 20,
validation_steps = testGen.x.size // 20,
epochs=epochs, verbose=1)
return model
# Function call
model = CRNN(4, 6, (140, 33, 1), trainGen, testGen, 1)

Categories