Please help me to understand why predict doesn't work correct, when train-test accuracy is 0.97.
Is it from data, or network should be changed?
Input data are 32500 (5 gesture with 6500 images) RGB images with 640*480 pixels.
dataset
Images loaded and resized IMG_WIDTH = 100, IMG_HEIGHT = 77.
Here's the function which load, resized images and return np.array.
def load_data(data_dir):
"""
Load image data from directory `data_dir`.
Assume `data_dir` has one directory named after each category, numbered
0 through NUM_CATEGORIES - 1. Inside each category directory will be some
number of image files.
Return tuple `(images, labels)`. `images` should be a list of all
of the images in the data directory, where each image is formatted as a
numpy ndarray with dimensions IMG_WIDTH x IMG_HEIGHT x 3. `labels` should
be a list of integer labels, representing the categories for each of the
corresponding `images`.
"""
images = []
labels = []
for dir in range(0, NUM_CATEGORIES):
# get path for each gesture
d = os.path.join(data_dir, f"{str(dir)}")
# os.listdir(d) return the list of all names of images in that folder
for image_path in os.listdir(d):
# get the full path of specific image
full_path = os.path.join(data_dir, f"{str(dir)}", image_path)
# Returns an image that is loaded from the specified file
image = cv2.imread(full_path)
# get dimension for each image
dim = (100, 77)
# resized the image
image_resized = cv2.resize(image, dim, interpolation = cv2.INTER_AREA)
# add image and their directory name to images and labels list
images.append(image_resized)
labels.append(dir)
return images, labels
Here's my model.
def get_model():
"""
Returns a compiled convolutional neural network model. Assume that the
`input_shape` of the first layer is `(IMG_WIDTH=100, IMG_HEIGHT=77, 3)`.
The output layer should have `NUM_GESTURE = 5` units, one for each category.
"""
# Create a convolutional neural network
model = tf.keras.models.Sequential(
[
# Convolutional layer. Learn 32 filters using a 3x3 kernel
tf.keras.layers.Conv2D(
32, (5, 5), activation='relu', input_shape=(IMG_WIDTH, IMG_HEIGHT, 3)
),
# Max-pooling layer, using 2x2 pool size
tf.keras.layers.MaxPool2D(pool_size=(2, 2)),
tf.keras.layers.Conv2D(
64, (3, 3), activation='relu', input_shape=(IMG_WIDTH, IMG_HEIGHT, 3)
),
# Max-pooling layer, using 2x2 pool size
tf.keras.layers.MaxPool2D(pool_size=(2, 2)),
tf.keras.layers.Conv2D(
64, (3, 3), activation='relu', input_shape=((IMG_WIDTH), (IMG_HEIGHT), 3)
),
tf.keras.layers.MaxPool2D(pool_size=(2, 2)),
tf.keras.layers.Conv2D(
128, (3, 3), activation='relu', input_shape=((IMG_WIDTH), (IMG_HEIGHT), 3)
),
tf.keras.layers.MaxPool2D(pool_size=(2, 2)),
tf.keras.layers.Flatten(),
# Add a hidden layer with dropout
tf.keras.layers.Dense(128, activation='relu'),
tf.keras.layers.Dropout(0.3),
# Add an output layer with output units for all 5 gestures
tf.keras.layers.Dense(5, activation='softmax')
])
# Train neural network
model.compile(
optimizer='adam',
loss="categorical_crossentropy",
metrics=["accuracy"]
)
return model
labels = tf.keras.utils.to_categorical(labels)
x_train, x_test, y_train, y_test = train_test_split(
np.array(images), np.array(labels), test_size=0.4)
model = get_model()
model.fit(x_train, y_train, batch_size=64, epochs=10)
model.evaluate(x_test, y_test, verbose=2)
The result is 0.97.
fit result
From video I save the image and want to real time predict the hand gesture.
GESTURE = {0:"ok", 1:"down", 2:"up", 3:"palm", 4:"l"}
video = cv2.VideoCapture(0)
while True:
# Capture the video frame
ret, img = video.read()
# Display the resulting frame
# to flip the video with 180 degree
image = cv2.flip(img, 1)
# save image for prediction
image = cv2.imwrite('Frame'+str(0)+'.jpg', image)
image_addr = "Frame0.jpg"
image = cv2.imread(image_addr)
dim = (100,77)
image = tf.keras.preprocessing.image.load_img(image_addr, target_size=dim)
# Converts a PIL Image instance to a Numpy array. Return a 3D Numpy array.
input_arr = tf.keras.preprocessing.image.img_to_array(image)
# Convert single image to a batch.
input_arr = np.array([input_arr])
input_arr = input_arr.astype('float32')/255
# Generates output predictions for the input samples. Return Numpy array(s) of predictions.
predictions = model.predict(input_arr)
# Return the index_array of the maximum values along an axis.
pre_class = np.argmax(predictions, axis=-1)
# for writing in the video
text = GESTURE[pre_class[0]]
font = cv2.FONT_HERSHEY_SIMPLEX
image = cv2.flip(img, 1)
cv2.putText(image,
text,
(50, 50),
font, 2,
(0, 0, 0),
2,
cv2.LINE_4)
cv2.imshow('video', image)
# the 'q' button is set as the
# quitting button you may use any
# desired button of your choice
k = cv2.waitKey(1)
if k == ord('q'):
break
video.release()
cv2.destroyAllWindows()
github link
I am no expert, but typically when you perform well on the training data and the testing data "The result is 0.97", but perform poorly on new end-user data, it is because there is a data-mismatch (although it could possibly be overfitting).
As in, the data you trained and tested on is so different (pixel values, probability distribution of pixels, or unseen differences that are noticable to the model) that the model could not generalize to it and performed badly.
It is good practice to use the same data you would use in production/final-product as a test set. Andrew Ng uses this dataset split(this is applicable if you have enough data):
From the training data:
Training Set
Train-Dev (same as Validation, I think) Set
From the end-product data:
Development Set
Test Set
You can check this post for more information regarding why: https://cs230.stanford.edu/blog/split/
Your preprocess in training step mismatch with preprocess in predict step:
input_arr = input_arr.astype('float32')/255
Related
I am using a ResNet50 as base model to predict multiple label in an image and sum up the respective values of the labels.
reading the data:
#read the data
data_path = '/content/drive/MyDrive/Notifyer-dataset/dataset'
def load_dataset(folder):
X = [] # create an empty list to store the images
y = [] # create an empty list to store the labels
# get a list of all the files in the folder
filenames = os.listdir(folder)
# iterate over the files
for filename in filenames:
# get the label from the filename
label = filename.split('_')[0]
# open the image file and convert it to a NumPy array
image = Image.open(os.path.join(folder, filename))
image = image.resize((200, 200)) # resize the image to 200x200
image = image.convert('RGB') # convert the image to RGB
image = np.array(image) / 255 # normalize the pixel values
image = image.reshape(-1, 200, 200, 3) # reshape to (batch_size, height, width, channels)
# append the image and label to the list
X.append(image)
y.append(label)
# convert the lists to NumPy arrays
X = np.array(X)
y = np.array(y)
#preprocessing
X = X.reshape(-1, 200, 200, 3) # reshape arrays to 200x200 images with 1 channel
X = X / 255.0 # normalize pixel values
#one hot encoding
num_classes = len(np.unique(y))
y = to_categorical(y, num_classes)
return X, y,num_classes
X, y, num_classes = load_dataset(data_path)
building the model:
def build_r_cnn_model(num_classes):
"""
Build a region-based CNN model.
Parameters:
num_classes (int): number of classes to classify
Returns:
Model: the R-CNN model
"""
# load the ResNet50 model pre-trained on ImageNet
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(200, 200, 3))
# freeze the base model layers
for layer in base_model.layers:
layer.trainable = False
# add a global average pooling layer
x = base_model.output
x = tf.keras.layers.GlobalAveragePooling2D()(x)
# add a fully-connected layer
x = tf.keras.layers.Dense(1024, activation='relu')(x)
# add a dropout layer
x = tf.keras.layers.Dropout(0.5)(x)
# add a classification layer
predictions = tf.keras.layers.Dense(num_classes, activation='softmax')(x)
#build the model
model = Model(inputs=base_model.input, outputs=predictions)
return model
compiling the model:
# build and compile the model
model = build_r_cnn_model(num_classes)
model.compile(loss='binary_crossentropy', optimizer='sgd', metrics=['accuracy'])
training the model:
#train
history = model.fit(X_train, y_train, epochs=10, batch_size=128, validation_data=(X_val, y_val))
function to sum up all label values in the image:
#function to calculate total sum of value of predicted labels
def predict_total_sum(model, image):
y_pred = model.predict(image) # classify the image
# define a lookup table to map class indices to values
value_lookup = {
0: 1, # class 0 corresponds to value 1
1: 2, # class 1 corresponds to value 2
}
total_sum = 0
for prediction in y_pred:
# get the class index with the highest predicted probability
class_index = np.argmax(prediction)
print(class_index)
# add the value of the detected denomination to the total sum
total_sum += value_lookup[class_index]
return total_sum
It gives value 1 or 2 for every image for each model compilation which means it is only predicting only one label even if the image has multiple objects of both the labels.
My dataset is small and every image in it contains object of one of the label, do I need to diversify my dataset to make the model identify both labels in an image or is there something wrong with the model architecture? I have also tried to build a CNN model from scratch but it is giving the same result...
I think the output of model.predict has shape [1, num_of_classes] (you can verify it by printing it's shape once). Hence when you are looping on y_pred then you basically iterate only once and add one of the class index to the total_sum. Even if the shape was [num_of_classes], then also I think that this is not how you should try multi-class classification. Would prefer you to read more about how multiclass classification is done.
You can take help from this link: https://www.kaggle.com/code/prateek0x/multiclass-image-classification-using-keras
I am trying to implement the shap.image_plot() function as it is described in the shape library documentation(link:https://shap.readthedocs.io/en/latest/example_notebooks/image_examples/image_classification/Multi-class%20ResNet50%20on%20ImageNet%20%28TensorFlow%29-checkpoint.html). However when running it on my own CNN model I obtain an AssertionError: Labels must have same row count as shap values arrays!
Does somebody know how to fix the error.
The data used is found under this link: https://www.kaggle.com/ahmedhamada0/brain-tumor-detection
import numpy as np
import shap
import tensorflow as tf
import glob
import cv2
from tensorflow.keras import layers, models, losses
class Data:
def __init__(self, dataset_file_name, tumor_filename, no_tumor_filename, image_format):
# first set of data for analysis
self.images = []
self.processed_images = []
self.labels = []
self.class_names = ["no tumor", "tumor"]
# second set of data for feeding to model
self.img_shape = (225, 225, 3) # (img_width, img_height, colour channels)
# TODO: Import dataset
self.path_tumor = dataset_file_name+"/"+tumor_filename+"/*."+image_format
self.path_no_tumor = dataset_file_name+"/"+no_tumor_filename+"/*."+image_format
for filename in glob.glob(self.path_tumor):
image = np.asarray(cv2.imread(filename)) # load dataset
self.images.append(image)
self.labels.append(1) # add label 1 if image has a tumor
for filename in glob.glob(self.path_no_tumor):
image = np.asarray(cv2.imread(filename))
self.images.append(image)
self.labels.append(0)
for image in self.images:
self.processed_images.append(preprocess_image(image))
# convert to np array
self.processed_images = np.asarray(self.processed_images)
self.labels = np.asarray(self.labels)
# convert array to float32:
self.processed_images = self.processed_images.astype("float32")
self.processed_images /= 255 # 1-255 (uint8) --> 0-1 (float32)
# TODO: Preprocess data --> reshape image to (225,225,3) (width,height,colour channels)
def preprocess_image(image):
if image.shape != (225, 225, 3):
# fixed aspect ratio resize
max_dimension = max(image.shape[0], image.shape[1]) # largest dimension to scale to 225
scaling_factor = float(225 / max_dimension) # factor by which to scale
resized = cv2.resize(image, (0, 0), fx=scaling_factor,
fy=scaling_factor) # fixed aspect ratio rescaling
# zero padding the dimension that isn't 225px
if resized.shape[0] != 225: # height padding
height_pad = 225 - resized.shape[0] # height pad on each side of image
width_pad = 0
elif resized.shape[1] != 0: # height padding
width_pad = 225 - resized.shape[1]
height_pad = 0
reshaped = cv2.copyMakeBorder(resized, height_pad, 0, width_pad, 0, cv2.BORDER_CONSTANT, 0)
return reshaped
else:
return image
#load the data
data = Data("../dataset", "yes", "no", "jpg")
train_images, test_images, train_labels, test_labels = train_test_split(
data.processed_images, data.labels, train_size=0.8, shuffle=True)
# TODO: Create Network
def run_model(epochs):
# create the model
model = models.Sequential()
model.add(layers.Conv2D(32, kernel_size=(3, 3), input_shape=data.img_shape))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, kernel_size=(3, 3)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Flatten())
model.add(layers.Dense(64, activation="relu"))
model.add(layers.Dense(1, activation="sigmoid"))
model.summary()
model.compile(optimizer='adam',
loss=losses.binary_crossentropy,
metrics=[tf.keras.metrics.BinaryAccuracy()])
# train the model
model.fit(train_images, train_labels, epochs=epochs, validation_data=(test_images, test_labels))
return model
model = run_model(1)
# define a masker that is used to mask out partitions of the input image.
masker = shap.maskers.Image("inpaint_telea", test_images[0].shape)
# create an explainer with model and image masker
explainer = shap.Explainer(model.predict, masker, output_names=data.class_names)
# here we explain two images using 500 evaluations of the underlying model to estimate the SHAP values
shap_values = explainer(test_images[1:3].astype("double"), max_evals=500, batch_size=50, outputs=shap.Explanation.argsort.flip[:1])
# output with shap values
shap.image_plot(shap_values)
I have been trying to train a network consisting of a VGG16network followed by some LSTM layers. Since my images are quite big and because VGG16 do not scale with image size i decided to break my images into small patches and train an LSTM to read an image piece by piece. Since I have a large data set i need the model to load the data batch wise. I have made an attempt to build a custom data generator, however i am not sure my implementation is correct since i don't know how to see which images the model has loaded while it is training. I also don't know how to make it adaptable if i have a different number of patches at each epoch.
My dataset is organised as followed:
data|
|---train|
|---class1|
|---image1|
|---im1_patch1.tif
|---im1_patch2.tif
...
|---im1_patch352.tif
|---image2|
|---im2_patch1.tif
|---im2_patch2.tif
...
|---im2_patch352.tif
|---class2|
|---image3|
|---im3_patch1.tif
|---im3_patch2.tif
...
|---im3_patch352.tif
|---image4|
|---im4_patch1.tif
|---im4_patch2.tif
...
|---im4_patch352.tif
As you can see my images are already broken into patches and i would like to load them batch wise such that my tensor X for each batch as the following dimension: [batch_size, n_patches, w, h, n_channels]. batch_size is the number of image at each epoch, n_patches is the number of patches per image, w and h are dimension of each patch (fixed) and n_channels number of channels of each patch (fixed to 3)
I first have some questions,
Does it make sense to have a batch_size > 1 for each epoch? I guess it doesn't make sense if my images have a different number of patches, am I right?
can I feed my LSTM with a different number of patches for each epoch? for example if my images have different number of patches. I am not sure how to do it.
Let's say i load one image (consisting of npatches) per epoch, what happen when the number of epoch is bigger than the number of images? Is the loading restarting from 0 (it doesn't seem in my code)? may be I should pick images randomly what do you think?
My network architecture is the following:
vgg = VGG16(
include_top=False,
weights='imagenet',
input_shape=(224, 224, 3)
)
for layer in vgg.layers:
layer.trainable = False
model = Sequential()
model.add(TimeDistributed(vgg, input_shape=(npatches, w, h, nchannels)))
model.add(TimeDistributed(Flatten(name="flatten")))
model.add(TimeDistributed(Dense(4096, activation="relu")))
model.add(TimeDistributed(Dropout(0.5)))
model.add(TimeDistributed(Dense(4096, activation="relu")))
model.add(TimeDistributed(Dropout(0.5)))
model.add(TimeDistributed(Flatten(name="flatten")))
model.add(LSTM(264, activation='tanh',return_sequences=True))
model.add(LSTM(128, activation='tanh',return_sequences=True))
model.add(LSTM(64, activation='tanh',return_sequences=False))
model.add(Dense(64, activation='relu'))
model.add(Dropout(.5))
model.add(Dense(1, activation='sigmoid'))
model.summary()
Finally, my attempt to build a custom image generator:
def LoadImages(folder, batch_size, npatches):
batch_start = 0
batch_end = n_patches-1
epoch_id = 0
files = list(paths.list_images(folder))
n_images = len(files)
StopCriteria = n_images/(batch_size*n_patches)
while True:
while epoch_id < StopCriteria:
patch_id = 0
X = np.empty([batch_size, n_patches, 224, 224, 3])
Y = np.empty([batch_size, n_patches])
for image_path in files[batch_start:batch_end]:
img = tf.keras.preprocessing.image.load_img(image_path,color_mode="rgb")
input_arr = keras.preprocessing.image.img_to_array(img)
X[epoch_id,patch_id,:,:,:] = input_arr
if image_path.split("\\")[-3].split('/')[-1] == 'long':
Y[epoch_id,patch_id] = 0
if image_path.split("\\")[-3].split('/')[-1] == 'short':
Y[epoch_id,patch_id] = 1
patch_id += 1
yield (X,Y)
batch_start += batch_size + n_patches-1
batch_end += batch_size + n_patches-1
epoch_id += 1
Thank you!
I have an image containing 6 different bands.
I have pre-processed the image to make it ready for the model as follows:
ds1, image = raster.read(imagePath, bands='all')
then I reshaped it with pyrsgis
image = changeDimension(image)
Finally, I split over 255
xTrain = xTrain / 255.0
and reshape it to be coupled with manually tagged classes
image= image.reshape((image.shape[0], 1, image.shape[1]))
I am trying to pass the image through an autoencoder that I've built with Keras and has the following structure:
model = keras.Sequential([
keras.layers.Flatten(input_shape=(1, nBands)),
keras.layers.Dense(14, activation='relu'),
keras.layers.Dense(2, activation='relu', activity_regularizer=l1(10e-6)),
keras.layers.Dense(14, activation='relu'),
keras.layers.Dense(6, activation='sigmoid')])
The training works well.
When I use predicted = model.predict(newImg) I get the predicted array with the shape of (1391808, 6).
But the problem is, I do not know how can I reconstruct the image from the output so I can finally save it.
If anyone had the same issue, this was the solution
predicted = model.predict(featuresHyderabad)
predictedArr = np.zeros(shape = ( nBands, imageHeight, imageWidth ))
for i in range(nBands):
prediction = np.reshape(predicted[:,i], (imageHeight, imageWidth))
predictedArr[i] = prediction
raster.export(predictedArr, metaData, filename=outFile, dtype='float', bands='all')
I have a image data set that I want to use to train a CNN. I have initialized a class "tensorflow.python.data.ops.dataset_ops.DatasetV1Adapter" object that I understand is essentially an iterator that caches the train images in batches so that the entire data set need not be loaded at once.
I have received this error when trying to call model.fit():
ValueError: Error when checking input: expected conv2d_input to have 4 dimensions, but got array with
shape (None, 1)
I understand that I need to add a dimension to my model input. I want to add a channels dimension to my images. I have tried to use np.expand_dims() and tf.expand_dims() on my class "tensorflow.python.data.ops.dataset_ops.DatasetV1Adapter" object but the former changes the object type and the latter is not supported for the class object. Any help is appreciated. Below is my model structure:
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.summary()
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(10))
history = model.fit(train_data, epochs=10, validation_data=(val_data),steps_per_epoch=x,
validation_steps=y)
I have been following the tutorial in the example listed here, https://www.tensorflow.org/tutorials/load_data/images, but have tried to create and load in my own data set.
Below is my tf pipeline:
BATCH_SIZE = 32
IMG_HEIGHT = 224
IMG_WIDTH = 224
STEPS_PER_EPOCH = np.ceil(image_count/BATCH_SIZE)
data_dir = 'C:\\Users\\rtlum\\Documents\\DataSci_Projects\\PythonTensorFlowProjects\\google-images-download\\images'
list_ds = tf.data.Dataset.list_files(str(data_dir+"*.jpg")) #Make dataset of file paths
class_names = ['sad', 'angry']
size = 0
for count in enumerate(list_ds):
size += 1
val_data_size = size * .2
for f in list_ds.take(5):#test for correct file paths
print(f.numpy())
def get_label(file_path):
# convert the path to a list of path components
parts = tf.strings.split(file_path, os.path.sep)
# The second to last is the class-directory
return parts[-2] == class_names
def decode_img(img):
# convert the compressed string to a 3D uint8 tensor
img = tf.image.decode_jpeg(img, channels=3)
# Use `convert_image_dtype` to convert to floats in the [0,1] range.
img = tf.image.convert_image_dtype(img, tf.float32)
# resize the image to the desired size.
return tf.image.resize(img, [64, 64])
def process_path(file_path):
label = get_label(file_path)
# load the raw data from the file as a string
img = tf.io.read_file(file_path)
img = decode_img(img)
return img, label
# Set `num_parallel_calls` so multiple images are loaded/processed in parallel.
labeled_ds = list_ds.map(process_path)
for image, label in labeled_ds.take(1):
print("Image shape: ", image.numpy().shape)
print("Label: ", label.numpy())
shuffle_buffer_size=1000
def prepare_for_training(ds, cache=True, shuffle_buffer_size=1000):
# This is a small dataset, only load it once, and keep it in memory.
# use `.cache(filename)` to cache preprocessing work for datasets that don't
# fit in memory.
if cache:
if isinstance(cache, str):
ds = ds.cache(cache)
else:
ds = ds.cache()
ds = ds.shuffle(buffer_size=shuffle_buffer_size)
# Repeat forever
ds = ds.repeat()
ds = ds.batch(BATCH_SIZE)
# `prefetch` lets the dataset fetch batches in the background while the model
# is training.
ds = ds.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
return ds
ds = prepare_for_training(list_ds)
val_data = ds.take(int(val_data_size))
train_data = ds.skip(int(val_data_size))
Two problems,
Your wildcard for directory matching appears to be incorrect.
By looking at your code, it seems that your data needs to follow a structure like,
data
|- sad
|- 1.jpg
...
|- angry
|- 1.jpg
...
This is not what you're matching when you say,
tf.data.Dataset.list_files(str(data_dir+"*.jpg"))
it should be,
tf.data.Dataset.list_files(str(data_dir+os.path.sep+"*"+os.path.sep+"*.jpg"))
You have the wrong dataset,
You have,
ds = prepare_for_training(list_ds)
should be,
ds = prepare_for_training(labeled_ds)
Other issues,
You are resizing data to 64x64, but your model accepts a 32x32 model.
You have 2 labels, but your model expects 10 classes.
You don't have a model compilation line (i.e. model.compile(...))