How to predict input probability of image using trained model in Keras? - python

I'm beginning ML and have come to a huge road bump, looked at this thing for hours. I want to get the predict() probability as output but the predictions are only outputting [[1.]] for every test image. Using larger numbers of training data and more epochs, the acc and validation acc are up to about 90% each. This is only binary classification but I don't want predict_classes. I have no idea why it's printing [[1.]]
Here is the code I'm using:
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense
from keras import backend as K
img_width, img_height = 150, 150
train_data_dir = 'D:\Machine_Learning\\train'
validation_data_dir = 'D:\Machine_Learning\\test'
nb_train_samples = 20000
nb_validation_samples = 7000
epochs = 50
batch_size = 40
if K.image_data_format() == 'channels_first':
input_shape = (3, img_width, img_height)
else:
input_shape = (img_width, img_height, 3)
# Build model structure
model = Sequential()
model.add(Conv2D(32, (3, 3), input_shape=input_shape))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(1))
model.add(Activation('sigmoid'))
model.compile(loss='binary_crossentropy',
optimizer='adam',
metrics=['accuracy'])
# Image augmentation
train_datagen = ImageDataGenerator(
rescale=1. / 255,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True)
test_datagen = ImageDataGenerator(rescale=1. / 255)
train_generator = train_datagen.flow_from_directory(
train_data_dir,
target_size=(img_width, img_height),
batch_size=batch_size,
class_mode='binary')
validation_generator = test_datagen.flow_from_directory(
validation_data_dir,
target_size=(img_width, img_height),
batch_size=batch_size,
class_mode='binary')
model.fit_generator(
train_generator,
steps_per_epoch=nb_train_samples // batch_size,
epochs=epochs,
validation_data=validation_generator,
validation_steps=nb_validation_samples // batch_size)
model.save('first_try.h5')
model.save_weights('my_weights.model')
And for predictions:
from keras.models import load_model
from keras.preprocessing import image
model = load_model('first_try.h5')
def prepare(image):
imageSize = 150
#imageArray = cv2.imread(filePath)
newImageArray = cv2.resize(image, (imageSize, imageSize))
return newImageArray.reshape(-1, imageSize, imageSize, 3)
for i in range(len(test_images)):
print(test_images[i])
im = prepare(test_images[i])
prediction = model.predict([im])
print(prediction)

Related

How process image on Convolutional Neural network [duplicate]

In ImageDataGenerator of Keras the flow method has argument x which takes data with rank 4. Why?
I have a test image which has RGB (150, 150, 3).
Data has been trained on the images of type (150,150,3) where 150,150 are width and height, and 3 is for RGB. But I am getting error
ValueError: ('Input data in NumpyArrayIterator should have rank 4. You passed an array with shape', (3, 150, 150))
how can an image have shape with rank 4?
and how to make test input image type with rank 4?
For the reference, my code is as per follow:
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense
from keras import backend as K
# dimensions of our images.
img_width, img_height = 150, 150
train_data_dir = 'C:/Users/imageNetToyDataset/train'
validation_data_dir = 'C:/Users/imageNetToyDataset/validation'
epochs = 5
nb_train_samples = 2000
nb_validation_samples = 50
batch_size = 16
input_shape = (img_width, img_height, 3)
model = Sequential()
model.add(Conv2D(32, (3, 3), input_shape=input_shape))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(1))
model.add(Activation('sigmoid'))
model.compile(loss='binary_crossentropy',
optimizer='rmsprop',
metrics=['accuracy'])
train_datagen = ImageDataGenerator(
rescale=1. / 255,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True)
# this is the augmentation configuration we will use for testing:
# only rescaling
test_datagen = ImageDataGenerator(rescale=1. / 255)
train_generator = train_datagen.flow_from_directory(
train_data_dir,
target_size=(img_width, img_height),
batch_size=batch_size,
class_mode='binary')
validation_generator = test_datagen.flow_from_directory(
validation_data_dir,
target_size=(img_width, img_height),
batch_size=batch_size,
class_mode='binary')
model.fit_generator(
train_generator,
steps_per_epoch=nb_train_samples // batch_size,
epochs=epochs,
validation_data=validation_generator,
validation_steps=nb_validation_samples // batch_size)
import numpy as np
import cv2
import csv
import os
from keras.preprocessing.image import ImageDataGenerator, array_to_img,
img_to_array, load_img
from scipy.misc import imresize
import scipy
def predict_labels(model):
"""writes test image labels and predictions to csv"""
test_data_dir = "C:/Users/imageNetToyDataset/test"
test_datagen = ImageDataGenerator(rescale=1./255)
test_generator = test_datagen.flow_from_directory(
test_data_dir,
target_size=(img_width, img_height),
batch_size=32,
shuffle=False,
class_mode="binary")
with open("prediction.csv", "w") as f:
p_writer = csv.writer(f, delimiter=',', lineterminator='\n')
for _, _, imgs in os.walk(test_data_dir):
print ("number of images: {}".format(len(imgs)))
for im in imgs:
print ("image:\n{}".format(im))
pic_id = im.split(".")[0]
imgPath = os.path.join(test_data_dir,im)
print (imgPath)
img = load_img(imgPath)
img = imresize(img, size=(img_width, img_height))
print ("img shape = {}".format(img.shape))
test_x = img_to_array(img).reshape(3, img_width, img_height)
print ("test_x shape = {}".format(test_x.shape))
test_generator = test_datagen.flow(test_x,
batch_size=1,
shuffle=False)
prediction = model.predict_generator(test_generator,1,epochs)
p_writer.writerow([pic_id, prediction])
prediction=predict_labels(model)
The forth dimension is the number of samples in a batch.
Look at https://keras.io/preprocessing/image/ at the data_format explanation

image classifer with multiple categories

hiya i followed a guide on how to make an image classifier from here https://blog.keras.io/building-powerful-image-classification-models-using-very-little-data.html and this categorizes it into only 2 categories these code gives me the F1 score and confusion matrix is there a way to make a multiple category image classifier using these codes my data sets right now are types of mushroom
import numpy
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense
from keras import backend as K
import matplotlib.pyplot as plt
# dimensions of our images.
img_width, img_height = 150, 150
train_data_dir = r'C:\Users\Acer\imagerec\Mushrooms\TRAIN'
validation_data_dir = r'C:\Users\Acer\imagerec\Mushrooms\VAL'
nb_train_samples = 7025
nb_validation_samples = 6262
epochs = 50
batch_size = 16
if K.image_data_format() == 'channels_first':
input_shape = (3, img_width, img_height)
else:
input_shape = (img_width, img_height, 3)
model = Sequential()
model.add(Conv2D(32, (3, 3), input_shape=input_shape))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(1))
model.add(Activation('sigmoid'))
model.compile(loss='binary_crossentropy',
optimizer='rmsprop',
metrics=['accuracy'])
# this is the augmentation configuration we will use for training
train_datagen = ImageDataGenerator(
rescale=1. / 255,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True)
# this is the augmentation configuration we will use for testing:
# only rescaling
test_datagen = ImageDataGenerator(rescale=1. / 255)
train_generator = train_datagen.flow_from_directory(
train_data_dir,
target_size=(img_width, img_height),
batch_size=batch_size,
class_mode='binary')
validation_generator = test_datagen.flow_from_directory(
validation_data_dir,
target_size=(img_width, img_height),
batch_size=batch_size,
class_mode='binary')
model.fit_generator(
train_generator,
steps_per_epoch=nb_train_samples // batch_size,
epochs=epochs,
validation_data=validation_generator,
validation_steps=nb_validation_samples // batch_size)
model.save_weights('first_try.h5')
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
import seaborn as sns
test_steps_per_epoch = numpy.math.ceil(validation_generator.samples / validation_generator.batch_size)
predictions = model.predict_generator(validation_generator, steps=test_steps_per_epoch)
# Get most likely class
predicted_classes = numpy.argmax(predictions, axis=1)
true_classes = validation_generator.classes
class_labels = list(validation_generator.class_indices.keys())
report = classification_report(true_classes, predicted_classes, target_names=class_labels)
print(report)
cm=confusion_matrix(true_classes,predicted_classes)
sns.heatmap(cm, annot=True)
print(cm)
plt.show()
You clearly defined a binary classification in your code. To turn it to a multi-class task, let's say to N class, you need to change your last layer from 1 Dense to N Dense, and for the activation, you should change it from sigmoid to softmax.
Last but not least, you should change your loss function from binary_crossentropy to categorical_crossentropy if your classes are already hot-encoded. Otherwise, you might want to go with sparse_categorical_crossentropy
That part of your code should seems something like this after applying the changes:
model.add(Dense(N))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy',
optimizer='rmsprop',
metrics=['accuracy'])
Where N is the number of different classes you have.
Edit: you also need to go from "binary" to "categorical" for class_mode in your generators. You should also check how to generate the labels (one-hot-encoded)

Image Classification using keras predict same output for different classes

I have problem with my image classification model using keras.
This is the code which have binary class.
tried to set the number of images in datasets as equal.
this is a code for Keras model
train_data_dir = 'path'
validation_data_dir = 'path'
nb_train_samples = 2000
nb_validation_samples = 800
epochs = 10
batch_size = 16
if K.image_data_format() == 'channels_first':
input_shape = (3, img_width, img_height)
else:
input_shape = (img_width, img_height, 3)
model = Sequential()
model.add(Conv2D(32, (3, 3), input_shape=input_shape))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(1))
model.add(Activation('sigmoid'))
model.compile(loss='binary_crossentropy',
optimizer='rmsprop',
metrics=['accuracy'])
train_datagen = ImageDataGenerator(
rescale=1. / 255,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True)
test_datagen = ImageDataGenerator(rescale=1. / 255)
train_generator = train_datagen.flow_from_directory(
train_data_dir,
target_size=(img_width, img_height),
batch_size=batch_size,
class_mode='binary')
validation_generator = test_datagen.flow_from_directory(
validation_data_dir,
target_size=(img_width, img_height),
batch_size=batch_size,
class_mode='binary')
model.fit_generator(
train_generator,
steps_per_epoch=nb_train_samples // batch_size,
epochs=epochs,
validation_data=validation_generator,
validation_steps=nb_validation_samples // batch_size)
model.load_weights('second_try.h5')
and the model saved well.
so I run the test code
from keras.models import load_model
from keras.preprocessing import image
import numpy as np
# dimensions of our images
img_width, img_height = 150, 150
# load the model we saved
model = load_model('modelpath')
model.compile(loss='binary_crossentropy',
optimizer='rmsprop',
metrics=['accuracy'])
#predicting multiple images at once
img = image.load_img('imgpath', target_size=(img_width, img_height))
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)
y = image.img_to_array(img)
y = np.expand_dims(y, axis=0)
images = np.vstack([x, y])
classes = model.predict_classes(images, batch_size=10)
print(classes)
and both images from different classes are printed as 1.
Why does this happening?
You should be saving the model or model weights after your training, rather than doing load_weights after fit(). So, 2 ways to do this
After model.fit(), do model.save_weights('second_try.hdf5') which only saves weights. To load the weights, you should first compile your model, and then call load_weights on the model as model.load_weights('second_try.hdf5')
After model.fit(), do model.save('model.hdf5') which saves the weights and the model structure to a single HDF5 file. Then you can use that HDF5 file with load() to reconstruct the whole model, including weights.
model = load_model('model.hdf5')
Also, do check if your test data is being prepared correctly. As your training loop uses a generator to prepare the data, you should use a similar generator for preparing the test data as well.

How to get y_true, y_pred when using fit_generator()?

I am using the fit_generator() method, to fit my data in batches.
I want to get the list of label values (predicted and actual/y_pred, y_true) to generate a confusion matrix etc.
The Keras metrics documentation has no information on this and any examples I have found only refer to the fit() method.
How can I get y_pred and y_true at the end of each epoch?
My code:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.layers import Activation, Dropout, Flatten, Dense
from tensorflow.keras import backend as K
from tensorflow.keras.callbacks import TensorBoard, ReduceLROnPlateau, EarlyStopping, Callback
from sklearn.utils import class_weight
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np
img_width, img_height = 200, 200
train_data_dir = 'augmentedImg/200/training_data'
validation_data_dir = 'augmentedImg/200/validation_data'
nb_train_samples = 9008
nb_validation_samples = 2251
epochs = 100
batch_size = 32
layer_size = 64
if K.image_data_format() == 'channels_first':
input_shape = (1, img_width, img_height)
else:
input_shape = (img_width, img_height, 1)
model = Sequential()
model.add(Conv2D(layer_size, (3, 3), input_shape=input_shape))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(layer_size, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(layer_size, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(layer_size, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(1))
model.add(Activation('sigmoid'))
NAME="Phase10-Tryingauc_roc-%dSize-Grayscale-%depoch"% (img_width, epochs)
tensorboard = TensorBoard(log_dir="logs/{}".format(NAME))
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2,
patience=5, min_lr=0.001)
model.compile(loss='binary_crossentropy',
optimizer='adam',
metrics=['accuracy'])
train_datagen = ImageDataGenerator(
rescale=1. / 255,
rotation_range=90,
width_shift_range=0.1,
height_shift_range=0.1,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True
)
test_datagen = ImageDataGenerator(rescale=1. / 255)
train_generator = train_datagen.flow_from_directory(
train_data_dir,
target_size=(img_width, img_height),
color_mode='grayscale',
shuffle = True,
batch_size=batch_size,
class_mode='binary')
validation_generator = test_datagen.flow_from_directory(
validation_data_dir,
target_size=(img_width, img_height),
color_mode='grayscale',
batch_size=batch_size,
class_mode='binary')
class_weights = class_weight.compute_class_weight(
'balanced',
np.unique(train_generator.classes),
train_generator.classes)
my_callbacks = [tensorboard, reduce_lr]
model.fit_generator(
train_generator,
class_weight=class_weights,
steps_per_epoch=nb_train_samples // batch_size,
epochs=epochs,
validation_data=validation_generator,
validation_steps=nb_validation_samples,# // batch_size,
callbacks=my_callbacks
)
print("End of program")
To get label values you can use validation_generator.classes. It gives all the labels that are used for the validation. For more information you can have look on this code. It shows an example to do confusion matrix evaluation with keras data flow_from_directory.

Keras model overfiting

im working on a multi class image classification problem in keras. Using the dog-breeds dataset on kaggle. My accuracy for 12 breeds is 95% yet, my validation accuracy is only 50%. It looks like the model is overfitting, but im not sure what i would need to do to prevent overfitting
Here's my basic training setup
from keras.utils.np_utils import to_categorical
from keras.layers import Conv2D, Activation, MaxPooling2D
from keras import optimizers
from keras.layers.normalization import BatchNormalization
img_width, img_height = 224, 224
datagen_top = ImageDataGenerator(
rotation_range=180,
width_shift_range=0.2,
height_shift_range=0.2,
rescale=1./255,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True,
fill_mode='nearest')
generator_top = datagen_top.flow_from_directory(
train_data_dir,
target_size=(img_width, img_height),
batch_size=batch_size,
class_mode='categorical',
shuffle=False)
nb_train_samples = len(generator_top.filenames)
num_classes = len(generator_top.class_indices)
train_data = bottleneck_features_train
# get the class lebels for the training data, in the original order
train_labels = generator_top.classes
# https://github.com/fchollet/keras/issues/3467
# convert the training labels to categorical vectors
train_labels = to_categorical(train_labels, num_classes=num_classes)
generator_top = datagen_top.flow_from_directory(
validation_data_dir,
target_size=(img_width, img_height),
batch_size=batch_size,
class_mode=None,
shuffle=False)
nb_validation_samples = len(generator_top.filenames)
validation_data = bottleneck_features_validation
validation_labels = generator_top.classes
validation_labels = to_categorical(
validation_labels, num_classes=num_classes)
input_shape = train_data.shape[1:]
model = Sequential()
model.add(Flatten(input_shape=input_shape))
model.add(Dense(num_classes, activation='softmax'))
model.compile(optimizer=optimizers.RMSprop(lr=2e-4),
loss='categorical_crossentropy', metrics=['accuracy'])
history = model.fit(train_data, train_labels,
epochs=epochs,
batch_size=batch_size,
callbacks=[],
validation_data=(validation_data, validation_labels))
model.save_weights(top_model_weights_path)
(eval_loss, eval_accuracy) = model.evaluate(
validation_data, validation_labels, batch_size=batch_size, verbose=1)
notebook is on colab.
https://colab.research.google.com/drive/13RzXpxE-yMEuMFPHnmBpzD1gFXWxVyXK
A single layer network isn't gonna fly with an image classification problem. The network will never be able to generalize because there is no opportunity to. Try expanding the network with a few more layers and maybe try a CNN.
Example:
model = Sequential()
model.add(Conv2D(32, (3, 3), padding='same',
activation='relu',
input_shape=input_shape))
model.add(Conv2D(32, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(64, (3, 3), padding='same', activation='relu'))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))
model.compile(loss='categorical_crossentropy',
optimizer=optimizers.RMSprop(),
metrics=['accuracy'])
This usually happens when you have too many layers and the resulting dimensionality (after striding and pooling) is lower than the minimum input size (convolutional kernel) of a subsequent layer.
Which is the image size of the dog-breeds data?
Have you made sure that the reshaping works correctly?

Categories