Improving a bad CNN- Detecting Image Orientation - python

just starting out in ML and finally got my first CNN up and running :) except its accuracy is only slightly better than a random guess (~27%). I give the model a set of 2000 pictures of faces sorted into either 0 degrees, 90 degrees, 180 degrees, or 270 degrees rotated. Below is my code:
from keras.models import Model
from keras.layers import Input
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers.convolutional import Conv2D
from keras.layers.pooling import MaxPooling2D
from keras.preprocessing.image import ImageDataGenerator
#import matplotlib.pyplot as plt
datagen = ImageDataGenerator()
train_it = datagen.flow_from_directory('firstThousandTransformed/', class_mode='categorical', batch_size=64, color_mode="grayscale", target_size=(64,64))
val_it = datagen.flow_from_directory('validation/', class_mode='categorical', batch_size=64, color_mode="grayscale", target_size=(64,64))
test_it = datagen.flow_from_directory('test/', class_mode='categorical', batch_size=64, color_mode='grayscale', target_size=(64,64))
imageInput = Input(shape=(64,64,1))
conv1 = Conv2D(128, kernel_size=8, activation='relu')(imageInput)
pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)
conv2 = Conv2D(64, kernel_size=4, activation='relu')(pool1)
pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)
conv3 = Conv2D(64, kernel_size=4, activation='relu')(pool2)
pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)
flat = Flatten()(pool3)
hidden1 = Dense(10, activation='relu')(flat)
output = Dense(4, activation='softmax')(hidden1)
model = Model(inputs=imageInput, outputs=output)
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
history = model.fit(train_it, steps_per_epoch=16, validation_data=val_it, validation_steps=8)
loss = model.evaluate(test_it, steps=16)
_, accuracy = model.evaluate(train_it)
print('Accuracy: %.2f' % (accuracy*100))
print(model.summary())
The way I envisioned this network working was that the convolution layers might detect some hair or a chin in a certain place and be able to distinguish that hair or chin placement from another image. This clearly is not working. Could you give a noobie some advice? How can I make this better? How can I think about this problem? Am I using the wrong kind of layers? Do I need more pictures?
EDIT:
So I have been playing around with it a little bit by messing with the kernel_sizes (now they are 12, 8, and 4) and changing the number of epochs to 20, and something crazy happened. When I ran the program, I got an accuracy of 99%!! (see screenshot below)
HOWEVER, when I ran it again to double check, it went back to ~27%. What does this mean?

Related

Matrix size incompatible: ln[0] : [1:43264], ln[16,512]

I want to build a model that classifies and predicts words from the users lips. With adverb a total of 142657 images that have been preproccessed using the dataset of videos of individual speakers but I get this error when running the model and doesnt even get past the first epoch
heres my code
import os
from silence_tensorflow import silence_tensorflow
silence_tensorflow()
import tensorflow as tf
from tensorflow.keras.layers import Dense, Activation, Dropout, Input, Conv2D, \
MaxPooling2D, Flatten, BatchNormalization
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.image import ImageDataGenerator
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
tf.autograph.set_verbosity(0)
tf.get_logger().setLevel('ERROR')
class AdverbNet(object):
def __init__(self):
self.Model = Sequential()
self.build()
def build(self):
self.Model.add(Input(name='the_input', shape=(224, 224, 1), batch_size=16, dtype='float32'))
self.Model.add(Conv2D(32, (3, 3), activation='sigmoid', name='convo2'))
self.Model.add(MaxPooling2D(pool_size=(2, 2)))
self.Model.add(Conv2D(32, (3, 3), activation='sigmoid', name='convo3'))
self.Model.add(MaxPooling2D(pool_size=(2, 2)))
self.Model.add(Conv2D(64, (3, 3), activation='relu', name='convo4'))
self.Model.add(MaxPooling2D(pool_size=(2, 2)))
self.Model.add(Flatten())
self.Model.add(Dense(512))
self.Model.add(Dropout(0.5))
self.Model.add(BatchNormalization(scale=False))
self.Model.add(Activation('relu'))
self.Model.add(Dropout(0.5))
self.Model.add(Dense(4, activation='softmax'))
def summary(self):
self.Model.summary()
if __name__ == "__main__":
common_path = 'C:/Users/Loide/Desktop/Liphy/'
C = AdverbNet()
C.Model.compile(optimizer="Adam", loss='categorical_crossentropy', metrics=['accuracy'])
C.Model.summary()
with tf.device('/device:GPU:0')
batch_size = 16
epochs = 32
train_dir = common_path + 'Images/Adverb/'
test_dir = common_path + 'Images/Adverb/'
checkpoint_path = common_path + 'SavedModels/Adverb/'
train_image_generator = ImageDataGenerator(rescale=1. / 255) # Generator for training data generate training anD test set
train_data_gen = train_image_generator.flow_from_directory(batch_size=batch_size,
directory=train_dir,
shuffle=True,
target_size=(224, 224),
class_mode='categorical',
color_mode='grayscale')
test_image_generator = ImageDataGenerator(rescale=1. / 255) # Generator for test data
test_data_gen = test_image_generator.flow_from_directory(batch_size=batch_size,
directory=test_dir,
shuffle=False,
target_size=(224, 224),
class_mode='categorical',
color_mode='grayscale')
callback = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy',
patience=10,
restore_best_weights=True,
baseline=0.45)
history = C.Model.fit(train_data_gen,
steps_per_epoch=8916, # Number of images // Batch size
epochs=epochs,
verbose=1,
validation_data=test_data_gen,
validation_steps=187,
callbacks=[callback])
C.Model.save(checkpoint_path, save_format='tf')
**and I get the following error **
[Matrix size-incompatible: In[0]: [1,43264], In[1]: [16,512]
[[{{node gradient_tape/sequential/dense/MatMul/MatMul_1}}]] [Op:__inference_train_function_1179]
Seems like you have two matrices (one 1x43265 and one 16x512). You try to multiply them but its product is mathematically not defined. You need one matrix to be a (a x b) matrix and the other to be a (b x c) matrix. Thats why your program can't run. If your images are a test dataset try to follow the instructions step by step. If not, your preprocessing is probably bad.

Any Advice on how to make this CNN training faster?

I have been training a Neural Network for recognizing the differences between a paper with handwriting and a paper with Drawings, My images are all in (3508, 2480) size and I'm using a CNN for the task, the problem is that it is taking ages to train, I have 30,000 data belonging to 2 classes which are separated into validation and training, so I have:
13650 Images of Handwritten Paragraphs for training
13650 Images of Drawings for training
1350 Images of Drawings for validation
1250 Images of Drawings for validation
If you want to see my architecture here it is my
And here is my code:
import tensorflow as tf
import os
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import numpy as np
from google.colab import drive
drive.mount('/content/drive')
l0 = tf.keras.layers.Conv2D(32, (60,60), activation='relu', input_shape=(438, 310, 1), name='input')
l1 = tf.keras.layers.Dropout(.3)
l2 = tf.keras.layers.BatchNormalization()
l3 = tf.keras.layers.MaxPool2D(pool_size=(2,2),padding='same')
l12 = tf.keras.layers.Flatten()
l16 = tf.keras.layers.Dense(32, activation='relu')
l17 = tf.keras.layers.Dropout(.5)
l18 = tf.keras.layers.BatchNormalization()
l22 = tf.keras.layers.Dense(1, activation='sigmoid', name='output')
from keras.preprocessing.image import ImageDataGenerator
trdata = ImageDataGenerator(rescale=1/255)
traindata = trdata.flow_from_directory("/content/drive/MyDrive/Sae/TesisProgra/DataSets/ParagraphsVsDrawings/Paste/0_Final/Training",target_size=(438, 310), color_mode="grayscale", batch_size=250)
valdata = ImageDataGenerator(rescale=1/255)
validationdata = valdata.flow_from_directory("/content/drive/MyDrive/Sae/TesisProgra/DataSets/ParagraphsVsDrawings/Paste/0_Final/Validation",target_size=(438, 310), color_mode="grayscale", batch_size=250)
from keras.callbacks import ModelCheckpoint, EarlyStopping
checkpoint = ModelCheckpoint("ParagraphsVsDrawings.h5", monitor='val_accuracy', verbose=1, save_best_only=True, save_weights_only=False, save_freq='epoch', mode='auto')
history = model.fit(traindata, validation_data=validationdata, validation_steps=10,epochs=20, verbose=True, callbacks=[checkpoint])
I´m using Google Colab PRO for the training with TPU and Big RAM options activated
I have trained CNN before, but they trained really fast, I don´t know if it's for my images being to big maybe I could try resizing them with pillow, but I'm really lost at this point, I have been waiting 12 hours and It's still on first epoch
Your kernel size, 60 by 60, is quite big. Try 3 by 3 kernel or 5 by 5 kernel. It doesn't seem that image size is the problem since you are resizing from (3508, 2480) to (438, 310).
Also notice that the number of weights you have is very, very large. It is around 24 million. This is because you are flattening a (189, 125, 32) shape array and then your next layer (Dense) has 32 units, so 189 * 125 * 32 * 32 weights for that layer. That will take very, very long to train.
Try to add one or two more conv layers + pooling layers so that the number of weights when flattened is manageable.
For anyone who has my same problem here it is my final code of the CNN architecture, which runs much faster than the one I had previusly, credits to users Afif Al Mamun and ntlarry
ModelSummary()
Code
lc0 = tf.keras.layers.Conv2D(64, (5,5), activation='relu', input_shape=(438, 310, 1), name='input')
lc1 = tf.keras.layers.Dropout(.3)
lc2 = tf.keras.layers.BatchNormalization()
lc3 = tf.keras.layers.MaxPool2D(pool_size=(2,2),padding='same')
lc4 = tf.keras.layers.Conv2D(32, (3,3), activation='relu')
lc5 = tf.keras.layers.Dropout(.3)
lc6 = tf.keras.layers.BatchNormalization()
lc7 = tf.keras.layers.MaxPool2D(pool_size=(2,2),padding='same')
lc8 = tf.keras.layers.Conv2D(16, (3,3), activation='relu')
lc9 = tf.keras.layers.Dropout(.3)
lc10 = tf.keras.layers.BatchNormalization()
lc11 = tf.keras.layers.MaxPool2D(pool_size=(4,80),padding='same')
lc8 = tf.keras.layers.Conv2D(8, (3,3), activation='relu')
lc9 = tf.keras.layers.Dropout(.3)
lc10 = tf.keras.layers.BatchNormalization()
lc11 = tf.keras.layers.MaxPool2D(pool_size=(4,80),padding='same')
lf = tf.keras.layers.Flatten()
ld1 = tf.keras.layers.Dense(32, activation='relu')
ld2 = tf.keras.layers.Dropout(.5)
ld3 = tf.keras.layers.BatchNormalization()
lfinal = tf.keras.layers.Dense(1, activation='sigmoid', name='output')
model = tf.keras.Sequential([lc0,lc1,lc2,lc3,lc8,lc9,lc10,lc11,lf,ld1,ld2,ld3,lfinal], name="ParagraphIdentification")
model.compile(loss='binary_crossentropy', optimizer=tf.keras.optimizers.RMSprop(lr = 0.001), metrics=['accuracy'])
model.summary()

Using Keras to design a CNN: Understanding Tensor Shape

just starting out with ML, and wanted to create my own CNN to detect orientation of images with faces. I followed a tutorial to accept input images of 64x64x1, and here is my code:
from keras.models import Model
from keras.layers import Input
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers.convolutional import Conv2D
from keras.layers.pooling import MaxPooling2D
from keras.preprocessing.image import ImageDataGenerator
datagen = ImageDataGenerator()
train_it = datagen.flow_from_directory('firstThousandTransformed/', class_mode='categorical', batch_size=64, color_mode="grayscale")
val_it = datagen.flow_from_directory('validation/', class_mode='categorical', batch_size=64, color_mode="grayscale")
imageInput = Input(shape=(64,64,1))
conv1 = Conv2D(32, kernel_size=4, activation='relu')(imageInput)
pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)
conv2 = Conv2D(16, kernel_size=4, activation='relu')(pool1)
pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)
flat = Flatten()(pool2)
hidden1 = Dense(10, activation='relu')(flat)
output = Dense(4, activation='sigmoid')(hidden1)
model = Model(inputs=imageInput, outputs=output)
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())
model.fit(train_it, steps_per_epoch=16, validation_data=val_it, validation_steps=8)
However, I get this error when I try to run:
Input to reshape is a tensor with 3810304 values, but the requested
shape requires a multiple of 2704 [[node model/flatten/Reshape
(defined at c:\Users\cdues\Desktop\kerasTutorial\orentationTry.py:33)
]] [Op:__inference_train_function_836]
Below is my model summary:
I need some help understanding what a Tensor shape is and where my code has gone wrong here. Just working through the tutorial with Keras, I didn't encounter Tensor shape and now I am sort of lost. Sorry for the basic question, can yall help a noobie out? Thanks!
Try using the target_size argument while calling flow_from_directory.
train_it = datagen.flow_from_directory('firstThousandTransformed/',
class_mode='categorical',
batch_size=64,
color_mode='grayscale',
target_size=(64,64))
val_it = datagen.flow_from_directory('validation/',
class_mode='categorical',
batch_size=64,
color_mode='grayscale',
target_size=(64,64))
This way you can reshape the images from the directories before feeding to the model.
First in ImageDataGenerator there is a parameter called rescale. Typically with pixel values in the range 0 to 255 rescale is set to 1/255 so pixel value fall in the range from 0 to 1. I recommend you use that. Documentation for ImageDataGenerator is here.. In flow from directory you can specify the image size with parameter target_size: Tuple of integers (height, width), default: (256, 256). Documentation is at location specified earlier. In your model you have 4 nodes in your output layer. This implies you are classifying images into one of 4 classes. If that is the case in model.compile you should use categorical cross entropy as the loss. Change the activation function in your output layer to softmax.

Python Keras - CNN stuck on epoch 1

from keras import *
import os
import numpy as np
from keras.models import Sequential
from keras.layers import Activation, Dropout, Flatten, Dense
from keras.preprocessing.image import ImageDataGenerator
from keras.layers import Convolution2D, MaxPooling2D, ZeroPadding2D
from keras import optimizers
#from parser import load_data # data loading
# Collecting data:
img_width, img_height = 150, 150
training_data_dir = "train"
testing_data_dir = "test"
# used to rescale the pixel values from [0, 255] to [0, 1] interval
datagen = ImageDataGenerator(rescale=1./255)
# automagically retrieve images and their classes for train and validation sets
train_generator = datagen.flow_from_directory(
training_data_dir,
target_size=(img_width, img_height),
batch_size=16,
class_mode='binary')
test_generator = datagen.flow_from_directory(
testing_data_dir,
target_size=(img_width, img_height),
batch_size=32,
class_mode='binary')
# Building model:
model = Sequential()
model.add(Convolution2D(32, 3, 3, input_shape=(img_width, img_height,3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Convolution2D(32, 3, 3))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Convolution2D(64, 3, 3))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(1))
model.add(Activation('sigmoid'))
model.compile(loss="binary_crossentropy",
optimizer="rmsprop",
metrics=["accuracy"])
# Training model:
nb_epoch = 30
nb_train_samples = 2048
nb_validation_samples = 832
model.fit_generator(
train_generator,
samples_per_epoch=nb_train_samples,
nb_epoch=nb_epoch,
validation_data=test_generator,
nb_val_samples=nb_validation_samples)
This is my code for a CNN which is trained using images from the folders train and test. But whenever I try training it, the program seems to get stuck at epoch 1/30 all the time, i left it on overnight for 8 hours and it hasn't moved along at all, any fixes I could try?
Update:
The output of my code currently is:
Using TensorFlow backend.
Found 0 images belonging to 0 classes.
Found 0 images belonging to 0 classes.
image_classifiy.py:78: UserWarning: Update your fit_generator call to the Keras 2 API: fit_generator(<keras_pre..., epochs=30, validation_data=<keras_pre..., validation_steps=832, steps_per_epoch=128)
steps_per_epoch=128)
Epoch 1/30
By decoding "Found 0 images belonging to 0 classes", one can conclude that the subdirectories for each class are not created. In keras, there must be a folder for every class and in that folder it must contain the images. So, make sure you make subdirectories for each class inside the train and test folders.

What does the prediction say? CNN Keras

I've created a CNN model to try to predict if the image is either a dog or a cat, but on the output I don't know what it predicted. See below:
import pandas as pd
from keras.models import Sequential
from keras.preprocessing.image import ImageDataGenerator
from keras.layers import Dense, Flatten, Conv2D, Dropout, MaxPooling2D
from scipy import misc
import numpy as np
def build_classifier():
# Model based on 'https://www.researchgate.net/profile/Le_Lu/publication/277335071/figure/fig8/AS:294249976352779#1447166069905/Figure-8-The-proposed-CNN-model-architecture-is-composed-of-five-convolutional-layers.png'
#It's smarter to add layer without creating variables because of the processing, but as a small dataset it doesn't matter a lot.
classifier = Sequential()
conv1 = Conv2D(filters=64, kernel_size=(2,2), activation='relu', input_shape=(64,64,3))
conv2 = Conv2D(filters=192, kernel_size=(2,2), activation='relu')
conv3 = Conv2D(filters=384, kernel_size=(2,2), activation='relu')
conv4 = Conv2D(filters=256, kernel_size=(2,2), activation='relu')
conv5 = Conv2D(filters=256, kernel_size=(2,2), activation='relu')
pooling1 = MaxPooling2D(pool_size=(2,2))
pooling2 = MaxPooling2D(pool_size=(2,2))
pooling3 = MaxPooling2D(pool_size=(2,2))
fcl1 = Dense(1024, activation='relu')
fcl2 = Dense(1024, activation='relu')
fcl3 = Dense(2, activation='softmax')
dropout1= Dropout(0.5)
dropout2 = Dropout(0.5)
flatten = Flatten()
layers = [conv1, pooling1, conv2, pooling2, conv3, conv4, conv5,
pooling3, flatten, fcl1, dropout1, fcl2, dropout2, fcl3]
for l in layers:
classifier.add(l)
return classifier
model = build_classifier()
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
train_datagen = ImageDataGenerator(
rescale=1./255,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True)
test_datagen = ImageDataGenerator(rescale=1./255)
train_generator = train_datagen.flow_from_directory(
'dataset/training_set',
target_size=(64, 64),
batch_size=32,
class_mode='categorical')
validation_generator = test_datagen.flow_from_directory(
'dataset/test_set',
target_size=(64, 64),
batch_size=32,
class_mode='categorical')
model.fit_generator(
train_generator,
steps_per_epoch=200,
epochs=32,
validation_data=validation_generator,
validation_steps=100)
model.save('model.h5')
model.save_weights('model_weights.h5')
I opened my saved model in another file:
from keras.models import load_model
from scipy import misc
import numpy as np
def single_pred(filepath, model):
classifier = load_model(model)
img = misc.imread(filepath)
img = misc.imresize(img, (64,64,3))
img = np.expand_dims(img, 0)
print(classifier.predict(img))
if __name__ == '__main__':
single_pred('/home/leonardo/Desktop/Help/dataset/single_prediction/cat_or_dog_2.jpg', 'model.h5')
As output I get this:
Using TensorFlow backend.
2017-10-09 14:06:25.520018: W tensorflow/core/platform/cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use SSE4.1 instructions, but these are available on your machine and could speed up CPU computations.
2017-10-09 14:06:25.520054: W tensorflow/core/platform/cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use SSE4.2 instructions, but these are available on your machine and could speed up CPU computations.
[[ 0. 1.]]
But how can I know if the prediction says that it is a dog or a cat? Having this result in hands I still don't know if the image is a dog or a cat.
Unless you specify the labels, your generator will automatically create the categorical labels for you. You can inspect those using train_generator.class_indices
The order of the class labels is alphanumeric, so cats=0 dogs=1

Categories