I've been training a CNN with keras. A binary classificator where it says if a depth image has a manhole or not. I've manually labeled the datasets with 0 (no manhole) and 1 (it has a manhole). I have 2 datasets 1 with 45k images to train the CNN and one with 26k images to test the CNN.
Both datasets are unbalanced double of negatives images than positives.
This is the code:
# dimensions of our images.
img_width, img_height = 80, 60
n_positives_img, n_negatives_img = 17874, 26308
n_total_img = 44182
#Labeled arrays for datasets
arrayceros = np.zeros(n_negatives_img)
arrayunos = np.ones(n_positives_img)
#Reshaping of datasets to convert separate them
arraynegativos= ds_negatives.reshape(( n_negatives_img, img_height, img_width,1))
arraypositivos= ds_positives.reshape((n_positives_img, img_height, img_width,1))
#Labeling datasets with the arrays
ds_negatives_target = tf.data.Dataset.from_tensor_slices((arraynegativos, arrayceros))
ds_positives_target = tf.data.Dataset.from_tensor_slices((arraypositivos, arrayunos))
#Concatenate 2 datasets and shuffle them
ds_concatenate = ds_negatives_target.concatenate(ds_positives_target)
datasetfinal = ds_concatenate.shuffle(n_total_img)
Then I have the same for the second dataset for testing.
#Adding batch dimension to datasets 4dim
valid_ds = datasetfinal2.batch(12)
train_ds = datasetfinal.batch(12)
#Defining model
model = Sequential()
model.add(Conv2D(5, kernel_size=(5, 5),activation='relu',input_shape=(60,80,1),padding='same'))
model.add(BatchNormalization())
model.add(MaxPooling2D((5, 5),padding='same'))
model.add(Dropout(0.3))
model.add(Conv2D(5, (5, 5), activation='relu',padding='same'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2),padding='same'))
model.add(Dropout(0.3))
model.add(Conv2D(5, (5, 5), activation='relu',padding='same'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2),padding='same'))
model.add(Dropout(0.3))
model.add(Conv2D(5, (5, 5), activation='relu',padding='same'))
model.add(BatchNormalization())
model.add(Flatten())
model.add(Dropout(0.5))
model.add(Dense(100, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(50, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))
#Compiling model
model.summary()
initial_learning_rate = 0.001
lr_schedule = keras.optimizers.schedules.ExponentialDecay(
initial_learning_rate, decay_steps=100000, decay_rate=0.96, staircase=True
)
model.compile(
loss="binary_crossentropy",
optimizer=keras.optimizers.Adam(learning_rate=lr_schedule),
metrics=["acc"],
)
# Define callbacks.
checkpoint_cb = keras.callbacks.ModelCheckpoint(
"2d_image_classification.h5", save_best_only=True
)
early_stopping_cb = keras.callbacks.EarlyStopping(monitor="val_acc", patience=15)
#Fitting the model
history= model.fit(train_ds, validation_data=valid_ds, batch_size=100, epochs=5,callbacks=[checkpoint_cb, early_stopping_cb])
This gives me 99% of acc in train dataset and 95% in test dataset.
But when i do this it gives me 60% precision for negatives images and 45% for positives:
#Get the real labels of valid dataset
valid_labels = list(valid_ds.flat_map(lambda x, y: tf.data.Dataset.from_tensor_slices((x, y))).as_numpy_iterator())
valid_labels = [y for x, y in valid_labels]
y_pred = model.predict(valid_ds)
y_pred = (y_pred > 0.5).astype(float)
from sklearn.metrics import classification_report
print(classification_report(valid_labels, y_pred))
Why this? I have printed both predicted labels and true labels and it look likes its random. It has no sense.
https://colab.research.google.com/drive/1bhrntDItqoeT0KLb-aKp0W8cV6LOQOtP?usp=sharing
If u need more information, just ask me.
Thanks!!!!
Related
I had tried model.fit() and model.fit_generator() but the result show that the model.fit() has better result compared to model.fit_generator(). I would like to augment the training set thus I used ImageDataGenerator() and model.fit_generator(). Below is the graph with model.fit() and model.fit_generator().
As you can see, the model.fit() has a better validation accuracy and validation loss compared to model.fit_generator(). Below is my CNN code.
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=2)
model = Sequential()
# filters, kernel size, input size
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=X.shape[1:], padding='Same'))
model.add(Conv2D(32, (3, 3), activation='relu', padding='Same'))
model.add(MaxPooling2D(pool_size=(2, 2), strides=2))
model.add(Dropout(0.25))
model.add(Conv2D(64, (3, 3), activation='relu', padding='Same'))
model.add(MaxPooling2D(pool_size=(2, 2), strides=2))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Dense(3, activation='softmax'))
tensorboard = TensorBoard(log_dir="CNN_Model_Rebuilt/logs/{}".format(NAME))
augmented_checkpoint = ModelCheckpoint(
'CNN_Model_Rebuilt/best model/' + NAME + '-best.h5',
monitor='val_loss', verbose=0,
save_best_only=True, mode='auto')
es = EarlyStopping(monitor='val_loss',
min_delta=0,
patience=20,
verbose=0, mode='auto')
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
datagen = ImageDataGenerator(
featurewise_center=False, # set input mean to 0 over the dataset
samplewise_center=False, # set each sample mean to 0
featurewise_std_normalization=False, # divide inputs by std of the dataset
samplewise_std_normalization=False, # divide each input by its std
zca_whitening=False, # apply ZCA whitening
rotation_range=10, # randomly rotate images in the range (degrees, 0 to 180)
zoom_range=0.1, # Randomly zoom image
width_shift_range=0.1, # randomly shift images horizontally (fraction of total width)
height_shift_range=0.1, # randomly shift images vertically (fraction of total height)
horizontal_flip=False, # randomly flip images
vertical_flip=False) # randomly flip images
datagen.fit(X_train)
epochs = 50
batchsize = 16
history = model.fit_generator(datagen.flow(X_train, to_categorical(y_train), batch_size=batchsize),
epochs=epochs, validation_data=(X_test, to_categorical(y_test)),
verbose=2, steps_per_epoch=X-train.shape[0], callbacks=
[augmented_checkpoint, tensorboard, es])
Does the code any problem? Any suggestion will be grateful. Thanks.
They should give the same results on the same data. I notice in your code you have
datagen.fit(X_train). You don't need this since you have featurewise_center=False, samplewise_center=False, featurewise_std_normalization=False, featureewise_std_normalization=False,and zca_whitening=False. You show your code for model.fit_generator but not for model.fit. Did you use the same data generator to provide the input to model.fit? Your generator is augmenting the data so the training data gets modified by the generator. If you do not have the same data for both model.fit and model.fit_generator you will get different results. Also note that the parameters in model.fit and model.fit_generator are not identical. Make sure you are selecting the training and validation data in the same manner for both cases. Also did you use flow from directory? Did you set the batch size in flow from directory with the fit generator to be the same as the batch size for model.fit.
I am learning Keras using audio classification, Actually, I am implementing the code with modification from https://github.com/deepsound-project/genre-recognition/blob/master/train_model.py using Keras.
The shape of the dataset is
X_train shape = (800, 32, 1)
y_train shape = (800, 10)
X_test shape = (200, 32, 1)
y_test shape = (200, 10)
The model
model = Sequential()
model.add(Conv1D(filters=256, kernel_size=5, input_shape=(32,1), activation="relu"))
model.add(BatchNormalization(momentum=0.9))
model.add(MaxPooling1D(2))
model.add(Dropout(0.5))
model.add(Conv1D(filters=256, kernel_size=5, activation="relu"))
model.add(BatchNormalization(momentum=0.9))
model.add(MaxPooling1D(2))
model.add(Dropout(0.5))
model.add(Flatten())
model.add(Dense(128, activation="relu", ))
model.add(Dense(10, activation='softmax'))
model.compile(
loss='categorical_crossentropy',
optimizer = Adam(lr=0.001),
metrics = ['accuracy'],
)
model.summary()
red_lr= ReduceLROnPlateau(monitor='val_loss',patience=2,verbose=2,factor=0.5,min_delta=0.01)
check=ModelCheckpoint(filepath=r'/content/drive/My Drive/Colab Notebooks/gen/cnn.hdf5', verbose=1, save_best_only = True)
History = model.fit(X_train,
y_train,
epochs=100,
#batch_size=512,
validation_data = (X_test, y_test),
verbose = 2,
callbacks=[check, red_lr],
shuffle=True )
The accuracy graph
Loss graph
I do not understand, Why the val_acc is in the range of 70%. I tried to modify the model architecture including optimizer, but no improvement.
And, Is it good to have a lot of difference between loss and val_loss.
how to improve the accuracy above 80... any help...
Thank you
I found it, I use concatenate function from Keras to concatenate all convolution layers and, it gives the best performance.
I'm experimenting with geometric shape classification. My datasets are 100x100 px thresholded black and white images of squares, circles and triangles in total 3000 and 1000 for each shape. They look like these:
But I got them as a csv file, where each row is the one dimensional representation of the image and last column is label.
I used MLP from sklearn to make a classifier. It performed well. Almost 99%.
df = pd.read_csv("img_data.csv", sep=";")
df = df.sample(frac=1) # shuffling the whole dataset
X = df.drop('label', axis=1) # Because 'label' is the column of label
y = df['label']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)
clf = MLPClassifier(solver='adam', activation="relu",alpha=1e- 5,hidden_layer_sizes=(1000,), random_state=1, verbose=True)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
print('accuracy',accuracy_score(y_test, y_pred))
Then I wanted to try with CNN. For that I used keras with tensorflow backend. But accuracy here couldn't cross above 92% even after 20 epochs. Here's my code:
df = pd.read_csv("img_data.csv", sep=";")
df = df.sample(frac=1) # shuffling the whole dataset
X = df.drop('label', axis=1) # Because 'label' is the column of label
y = df['label']
X=X.as_matrix()
X = np.reshape(X, (-1, 100, 100, 1)) #made 1d to 2d
a = list(y)
label_binarizer = sklearn.preprocessing.LabelBinarizer()
label_binarizer.fit(range(max(a)))
y = label_binarizer.transform(a) # encoding one hot for labels
X_train, X_test, y_train, y_test = train_test_split(all_images, y, test_size=0.20)
model = Sequential()
model.add(Conv2D(32, 3, activation='relu', input_shape=[100, 100, 1]))
model.add(MaxPool2D())
model.add(BatchNormalization())
model.add(Conv2D(64, 3, activation='relu'))
model.add(MaxPool2D())
model.add(BatchNormalization())
model.add(Conv2D(128, 3, activation='relu'))
model.add(MaxPool2D())
model.add(BatchNormalization())
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dense(3, activation='softmax'))
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
epochs = 20
model.fit(X_train, y_train,
validation_data=(X_test, y_test),
epochs=epochs, batch_size=64, verbose=1)
This seems to be a very simple problem. There is very little structure inside the data, so I think you could try to reduce the depth of the neural network by removing the last two convolution and max pooling layers. Instead increase the number of nodes in the fully-connected layer, like this:
model = Sequential()
model.add(Conv2D(32, 3, activation='relu', input_shape=[100, 100, 1]))
model.add(MaxPool2D())
model.add(BatchNormalization())
model.add(Conv2D(64, 3, activation='relu'))
model.add(MaxPool2D())
model.add(BatchNormalization())
model.add(Flatten())
model.add(Dense(1000, activation='relu'))
model.add(Dense(3, activation='softmax'))
You could also try to use some image augmentation techniques like shifting and rotating to increase your dataset. Then I expect the convnet to outperform the standard mlp.
Best
I am generating at most 4 digits captchas with the following method:
def genData(n=30000, max_digs=4, width=150):
capgen = ImageCaptcha()
data = []
target = []
for i in range(n):
x = np.random.randint(0, 10 ** max_digs)
img = misc.imread(capgen.generate(str(x)))
img = np.mean(img, axis=2)[:, :width]
data.append(img.flatten())
target.append(x)
return np.array(data), np.array(target)
Then I am processing data like following
train_data, train_target = genData()
test_data, test_target = genData(1000)
train_data = train_data.reshape(train_data.shape[0], 1, 150, 60)
test_data = test_data.reshape(test_data.shape[0], 1, 150, 60)
train_data = train_data.astype('float32')
test_data = test_data.astype('float32')
train_data /= 255
test_data /= 255
My model structure is as follows:
def get_model():
# create model
model = Sequential()
model.add(Conv2D(30, (5, 5), input_shape=(1, 150, 60), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(15, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.2))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(50, activation='relu'))
model.add(Dense(10 ** 4, activation='softmax'))
# Compile model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
return model
Then I am training the model
model = get_model()
# Fit the model
model.fit(train_data, train_target, validation_data=(test_data, test_target), epochs=10, batch_size=200)
# Final evaluation of the model
scores = model.evaluate(test_data, test_target, verbose=0)
print("Large CNN Error: %.2f%%" % (100 - scores[1] * 100))
I don't know which part that I am doing wrong but my accuracy cannot reach even %1.
You have 10000(!) classes. How long do you train? How much training data do you have per class?
Your approach is almost certainly the problem. While you can solve problems "brute force" like this, it is a very bad way to do so. You should first try to detect single digits and then classify each digit with a 10-class classifier.
What I want to do:
I want to train a convolutional neural network on the cifar10 dataset on just two classes. Then once I get my fitted model, I want to take all of the layers and reproduce the input image. So I want to get an image back from the network instead of a classification.
What I have done so far:
def copy_freeze_model(model, nlayers = 1):
new_model = Sequential()
for l in model.layers[:nlayers]:
l.trainable = False
new_model.add(l)
return new_model
numClasses = 2
(X_train, Y_train, X_test, Y_test) = load_data(numClasses)
#Part 1
rms = RMSprop()
model = Sequential()
#input shape: channels, rows, columns
model.add(Convolution2D(32, 3, 3, border_mode='same',
input_shape=(3, 32, 32)))
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.5))
model.add(Flatten())
model.add(Dense(512))
model.add(Activation("relu"))
model.add(Dropout(0.5))
#output layer
model.add(Dense(numClasses))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy', optimizer=rms,metrics=["accuracy"])
model.fit(X_train,Y_train, batch_size=32, nb_epoch=25,
verbose=1, validation_split=0.2,
callbacks=[EarlyStopping(monitor='val_loss', patience=2)])
print('Classifcation rate %02.3f' % model.evaluate(X_test, Y_test)[1])
##pull the layers and try to get an output from the network that is image.
newModel = copy_freeze_model(model, nlayers = 8)
newModel.add(Dense(1024))
newModel.compile(loss='mean_squared_error', optimizer=rms,metrics=["accuracy"])
newModel.fit(X_train,X_train, batch_size=32, nb_epoch=25,
verbose=1, validation_split=0.2,
callbacks=[EarlyStopping(monitor='val_loss', patience=2)])
preds = newModel.predict(X_test)
Also when I do:
input_shape=(3, 32, 32)
Does this means a 3 channel (RGB) 32 x 32 image?
What I suggest you is a stacked convolutional autoencoder. This makes unpooling layers and deconvolution compulsory. Here you can find the general idea and code in Theano (on which Keras is built):
https://swarbrickjones.wordpress.com/2015/04/29/convolutional-autoencoders-in-pythontheanolasagne/
An example definition of layers needed can be found here :
https://github.com/fchollet/keras/issues/378