Tensorflow dataset with multiple inputs and target - python

I am trying to implement a model with the ArcFace Layer:
https://github.com/4uiiurz1/keras-arcface
to this extend I created a tf.data.dataset like so:
images= tf.data.Dataset.from_tensor_slices(train.A_image.to_numpy())
target = tf.keras.utils.to_categorical(
train.Label.to_numpy(), num_classes=n_class, dtype='float32'
)
target = tf.data.Dataset.from_tensor_slices(target)
images= images.map(transform_img)
dataset = tf.data.Dataset.zip((images, target, target))
when I call model.fit(dataset)
I get the following error:
ValueError: Layer model expects 2 input(s), but it received 1 input tensors. Inputs received: [<tf.Tensor 'IteratorGetNext:0' shape=<unknown> dtype=float32>]
But this should work according:
tf.data with multiple inputs / outputs in Keras
Can someone point out my folly?
Thanks!
Edit:
this solves some problems:
#reads in filepaths to images from dataframe train
images = tf.data.Dataset.from_tensor_slices(train.image.to_numpy())
#converts labels to one hot encoding vector
target = tf.keras.utils.to_categorical(train.Label.to_numpy(), num_classes=n_class, dtype='float32')
#reads in the image and resizes it
images= images.map(transform_img)
input_1 = tf.data.Dataset.zip((anchors, target))
dataset = tf.data.Dataset.zip((input_1, target))
And I think it's what we are trying. But I get a shape error for targets, it's (n_class, 1) instead of just (n_class,)
I.e. the fit methods throws this error
ValueError: Shapes (n_class, 1) and (n_class, n_class) are incompatible
and this warning
input expected is (None, n_class) but received an input of (n_class, 1)

I've made changes to the solution based on the arcface, you've wanted here is the code, i've managed to train it
The first one is from tensor slices as the original input and i used mnist to test it out
def map_data(inputs, outputs):
image = tf.cast(inputs['image_input'], tf.float32)
image = image / 255.
image = tf.expand_dims(image, axis=2)
labels = tf.one_hot(outputs, 10)
return {'image_input': image, 'label_input': labels}, labels
dataset = tf.data.Dataset.from_tensor_slices(({
'image_input': x_train, 'label_input': y_train
}, y_train))
dataset = dataset.map(map_data)
dataset = dataset.batch(2)
Here is the second type i have tried using a normal from tensor slices then i converted it to a multiple input, since both the normal labels are used for both the input and output
def map_data(images, annot_labels):
image = tf.cast(images, tf.float32)
image = image / 255.
image = tf.expand_dims(image, axis=2) # convert to 0 - 1 range
labels = tf.one_hot(annot_labels, 10)
return {'image_input': image, 'label_input': labels}, labels
dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
dataset = dataset.map(map_data)
dataset = dataset.batch(2)

I think you should do it like this:
target = tf.keras.utils.to_categorical(train.Label.to_numpy(), num_classes=n_class, dtype='float32')
images_target = tf.data.Dataset.from_tensor_slices((train.A_image.to_numpy(), target))
images_target = images_target.map(lambda x, y: (transform_img(x), y))
target = tf.data.Dataset.from_tensor_slices(target)
dataset = tf.data.Dataset.zip((images_target, target))

Related

ResNet50 base model for multi-label classification predicting only label

I am using a ResNet50 as base model to predict multiple label in an image and sum up the respective values of the labels.
reading the data:
#read the data
data_path = '/content/drive/MyDrive/Notifyer-dataset/dataset'
def load_dataset(folder):
X = [] # create an empty list to store the images
y = [] # create an empty list to store the labels
# get a list of all the files in the folder
filenames = os.listdir(folder)
# iterate over the files
for filename in filenames:
# get the label from the filename
label = filename.split('_')[0]
# open the image file and convert it to a NumPy array
image = Image.open(os.path.join(folder, filename))
image = image.resize((200, 200)) # resize the image to 200x200
image = image.convert('RGB') # convert the image to RGB
image = np.array(image) / 255 # normalize the pixel values
image = image.reshape(-1, 200, 200, 3) # reshape to (batch_size, height, width, channels)
# append the image and label to the list
X.append(image)
y.append(label)
# convert the lists to NumPy arrays
X = np.array(X)
y = np.array(y)
#preprocessing
X = X.reshape(-1, 200, 200, 3) # reshape arrays to 200x200 images with 1 channel
X = X / 255.0 # normalize pixel values
#one hot encoding
num_classes = len(np.unique(y))
y = to_categorical(y, num_classes)
return X, y,num_classes
X, y, num_classes = load_dataset(data_path)
building the model:
def build_r_cnn_model(num_classes):
"""
Build a region-based CNN model.
Parameters:
num_classes (int): number of classes to classify
Returns:
Model: the R-CNN model
"""
# load the ResNet50 model pre-trained on ImageNet
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(200, 200, 3))
# freeze the base model layers
for layer in base_model.layers:
layer.trainable = False
# add a global average pooling layer
x = base_model.output
x = tf.keras.layers.GlobalAveragePooling2D()(x)
# add a fully-connected layer
x = tf.keras.layers.Dense(1024, activation='relu')(x)
# add a dropout layer
x = tf.keras.layers.Dropout(0.5)(x)
# add a classification layer
predictions = tf.keras.layers.Dense(num_classes, activation='softmax')(x)
#build the model
model = Model(inputs=base_model.input, outputs=predictions)
return model
compiling the model:
# build and compile the model
model = build_r_cnn_model(num_classes)
model.compile(loss='binary_crossentropy', optimizer='sgd', metrics=['accuracy'])
training the model:
#train
history = model.fit(X_train, y_train, epochs=10, batch_size=128, validation_data=(X_val, y_val))
function to sum up all label values in the image:
#function to calculate total sum of value of predicted labels
def predict_total_sum(model, image):
y_pred = model.predict(image) # classify the image
# define a lookup table to map class indices to values
value_lookup = {
0: 1, # class 0 corresponds to value 1
1: 2, # class 1 corresponds to value 2
}
total_sum = 0
for prediction in y_pred:
# get the class index with the highest predicted probability
class_index = np.argmax(prediction)
print(class_index)
# add the value of the detected denomination to the total sum
total_sum += value_lookup[class_index]
return total_sum
It gives value 1 or 2 for every image for each model compilation which means it is only predicting only one label even if the image has multiple objects of both the labels.
My dataset is small and every image in it contains object of one of the label, do I need to diversify my dataset to make the model identify both labels in an image or is there something wrong with the model architecture? I have also tried to build a CNN model from scratch but it is giving the same result...
I think the output of model.predict has shape [1, num_of_classes] (you can verify it by printing it's shape once). Hence when you are looping on y_pred then you basically iterate only once and add one of the class index to the total_sum. Even if the shape was [num_of_classes], then also I think that this is not how you should try multi-class classification. Would prefer you to read more about how multiclass classification is done.
You can take help from this link: https://www.kaggle.com/code/prateek0x/multiclass-image-classification-using-keras

ValueError: `decode_predictions` expects a batch of predictions (i.e. a 2D array of shape (samples, 1000)). Found array with shape: (1, 26)

I am using a model trained by myself to translate braille digits into plain text. As you can see this is a classification problem with 26 classes, one for each letter in the alphabet.
This is the dataset that I used to train my model: https://www.kaggle.com/datasets/shanks0465/braille-character-dataset
This is how I am generating my training and validation set:
os.mkdir('./images/')
alpha = 'a'
for i in range(0, 26):
os.mkdir('./images/' + alpha)
alpha = chr(ord(alpha) + 1)
rootdir = "C:\\Users\\ffernandez\\Downloads\\capstoneProject\\Braille Dataset\\Braille Dataset\\"
for file in os.listdir(rootdir):
letter = file[0]
copyfile(rootdir+file, './images/' + letter + '/' + file)
The resulting folder looks like this:
folder structure
And this is how I create the train and validation split:
datagen = ImageDataGenerator(rotation_range=20,
shear_range=10,
validation_split=0.2)
train_generator = datagen.flow_from_directory('./images/',
target_size=(28,28),
subset='training')
val_generator = datagen.flow_from_directory('./images/',
target_size=(28,28),
subset='validation')
Finally this is the code corresponding to the design, compilation and training of the model:
K.clear_session()
model_ckpt = ModelCheckpoint('BrailleNet.h5',save_best_only=True)
reduce_lr = ReduceLROnPlateau(patience=8,verbose=0)
early_stop = EarlyStopping(patience=15,verbose=1)
entry = L.Input(shape=(28,28,3))
x = L.SeparableConv2D(64,(3,3),activation='relu')(entry)
x = L.MaxPooling2D((2,2))(x)
x = L.SeparableConv2D(128,(3,3),activation='relu')(x)
x = L.MaxPooling2D((2,2))(x)
x = L.SeparableConv2D(256,(2,2),activation='relu')(x)
x = L.GlobalMaxPooling2D()(x)
x = L.Dense(256)(x)
x = L.LeakyReLU()(x)
x = L.Dense(64,kernel_regularizer=l2(2e-4))(x)
x = L.LeakyReLU()(x)
x = L.Dense(26,activation='softmax')(x)
model = Model(entry,x)
model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])
history = model.fit_generator(train_generator,validation_data=val_generator,epochs=666,
callbacks=[model_ckpt,reduce_lr,early_stop],verbose=0)
Then this is the code for testing an image of the letter 'a' in braille has the same size as the training and validation set (28x28):
img_path = "./test/a1.JPG10whs.jpg"
img = plt.imread(img_path)
img_array = tf.keras.utils.img_to_array(img)
img_batch = np.expand_dims(img_array, axis=0)
img_preprocessed = tf.keras.applications.resnet50.preprocess_input(img_batch)
prediction = model.predict(img_preprocessed)
print(tf.keras.applications.imagenet_utils.decode_predictions(prediction, top=3)[0])
Just when I execute that last line of code this error appears:
ValueError: decode_predictions expects a batch of predictions (i.e. a 2D array of shape (samples, 1000)). Found array with shape: (1, 26)
A similar question I found here on stackoverflow (ValueError: `decode_predictions` expects a batch of predictions (i.e. a 2D array of shape (samples, 1000)). Found array with shape: (1, 7)).
I've seen that using "decode_predictions" only makes sense if your model outputs the ImageNet classes (1000-dimensional) but if I can't use "decode_predictions" I don't know how to get my predictions.
My desired output would be like:
prediction = model.predict(img_preprocessed)
print(prediction)
output: 'a'
Any hint or suggestion on how to solve this issue is highly appreciated.
If we take a look at what the prediction object acually is we can see that it has 26 values. These values are the propabiity for each letter that the model predicts:
So we need a way to map the prediction value to the respective letter.
A simple way to do this could to create a list of all the 26 possible letters and search the max value in the prediction array. Example:
#Create prediction labels from a-z
alpha="a"
labels=["a"]
for i in range(0, 25):
alpha = chr(ord(alpha) + 1)
labels.append(alpha)
#Search the max value in prediction
labels[np.argmax(prediction)]
The output should be the character with the highest probability:

How to train transfer-learning model on custom dataset? ValueError: Shape must be rank 4

I am trying to build a transfer learning model to classify images. The images are a gray-scale (2D). previously I used image_dataset_from_directory method to read the images and there was no problem. However, I am trying to use a custom read function to have more control and access on the data such as knowing how many images in each class. When using this custom read function, I get an error (down below) while trying to train the model. I am not sure about what caused this error.
part1: reading the dataset
import numpy as np
import os
import tensorflow as tf
import cv2
from tensorflow import keras
# neural network
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers.experimental import preprocessing
IMG_WIDTH=160
IMG_HEIGHT=160
DATA_PATH = r"C:\Users\user\Documents\chest_xray"
TRAIN_DIR = os.path.join(DATA_PATH, 'train')
def create_dataset(img_folder):
img_data_array=[]
class_name=[]
for dir1 in os.listdir(img_folder):
for file in os.listdir(os.path.join(img_folder, dir1)):
image_path= os.path.join(img_folder, dir1, file)
image= cv2.imread( image_path, 0)
image=cv2.resize(image, (IMG_HEIGHT, IMG_WIDTH),interpolation = cv2.INTER_AREA)
image=np.array(image)
image = image.astype('float32')
image /= 255
img_data_array.append(image)
class_name.append(dir1)
return img_data_array, class_name
# extract the image array and class name
img_data, class_name =create_dataset(TRAIN_DIR)
target_dict={k: v for v, k in enumerate(np.unique(class_name))}
target_dict
target_val= [target_dict[class_name[i]] for i in range(len(class_name))]
this part will produce A list that has a size of 5232. inside the list there are numpy arrays of size 160X160 (float 32)
part 2: creating the model
def build_model():
inputs = tf.keras.Input(shape=(160, 160, 3))
x = Sequential(
[
preprocessing.RandomRotation(factor=0.15),
preprocessing.RandomTranslation(height_factor=0.1, width_factor=0.1),
preprocessing.RandomFlip(),
preprocessing.RandomContrast(factor=0.1),
],
name="img_augmentation",
)(inputs)
# x = img_augmentation(inputs)
model=tf.keras.applications.EfficientNetB7(include_top=False,
drop_connect_rate=0.4,
weights='imagenet',
input_tensor=x)
# Freeze the pretrained weights
model.trainable = False
# Rebuild top
x = tf.keras.layers.GlobalAveragePooling2D(name="avg_pool")(model.output)
x = tf.keras.layers.BatchNormalization()(x)
top_dropout_rate = 0.2
x = tf.keras.layers.Dropout(top_dropout_rate, name="top_dropout")(x)
outputs = tf.keras.layers.Dense(1, name="pred")(x)
# Compile
model = tf.keras.Model(inputs, outputs, name="EfficientNet")
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-2)
model.compile(
optimizer=optimizer,
loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
metrics=["accuracy"]
)
return model
model = build_model()
part 3: train the model
history = model.fit(x=np.array(img_data), y=np.array(target_val), epochs=5)
the error I get:
ValueError: Shape must be rank 4 but is rank 3 for '{{node
EfficientNet/img_augmentation/random_rotation_1/transform/ImageProjectiveTransformV3}} =
ImageProjectiveTransformV3[dtype=DT_FLOAT, fill_mode="REFLECT", interpolation="BILINEAR"]
(IteratorGetNext, EfficientNet/img_augmentation/random_rotation_1/rotation_matrix/concat,
EfficientNet/img_augmentation/random_rotation_1/transform/strided_slice,
EfficientNet/img_augmentation/random_rotation_1/transform/fill_value)' with input shapes:
[?,160,160], [?,8], [2], [].
The problem in the code is that OpenCV reads the image in grayscale format, but the grayscale format of the image returned is not (160,160,1) but (160,160).
Because of this fact, the error is thrown.
I managed to replicate your problem by testing it locally.
Say we randomly train on 12 samples.
Possible input formats:
#This one works
1. history = model.fit(x=np.random.rand(12,160,160,3), y=np.array([1,1,1,1,1,1,0,0,0,0,0,0]), epochs=5,verbose=1) WORKS
#This one works
2. history = model.fit(x=np.random.rand(12,160,160,1), y=np.array([1,1,1,1,1,1,0,0,0,0,0,0]), epochs=5,verbose=1) WORKS
#This one fails
3. history = model.fit(x=np.random.rand(12,160,160), y=np.array([1,1,1,1,1,1,0,0,0,0,0,0]), epochs=5,verbose=1) FAILS
(1) and (2) work.
(3) fails, yielding:
ValueError: Shape must be rank 4 but is rank 3 for '{{node
EfficientNet/img_augmentation/random_rotation_4/transform/ImageProjectiveTransformV2}} = ImageProjectiveTransformV2[dtype=DT_FLOAT, fill_mode="REFLECT", interpolation="BILINEAR"](IteratorGetNext,
EfficientNet/img_augmentation/random_rotation_4/rotation_matrix/concat,
EfficientNet/img_augmentation/random_rotation_4/transform/strided_slice)'
with input shapes: [?,160,160], [?,8], [2].
Therefore, ensure that your data format is in the shape (160,160,1) or (160,160,3).
As an alternative, after you you read the image with OpenCV, you can use
image = np.expand_dims(image,axis=-1)
to programatically insert the last axis (the grayscale).

How to reshape data in Tensorflow dataset?

I am writing a data pipeline to feed batches of time-series sequences and corresponding labels into an LSTM model which requires a 3D input shape. I currently have the following:
def split(window):
return window[:-label_length], window[-label_length]
dataset = tf.data.Dataset.from_tensor_slices(data.sin)
dataset = dataset.window(input_length + label_length, shift=label_shift, stride=1, drop_remainder=True)
dataset = dataset.flat_map(lambda window: window.batch(input_length + label_length))
dataset = dataset.map(split, num_parallel_calls=tf.data.AUTOTUNE)
dataset = dataset.cache()
dataset = dataset.shuffle(shuffle_buffer, seed=shuffle_seed, reshuffle_each_iteration=False)
dataset = dataset.batch(batch_size=batch_size, drop_remainder=True)
dataset = dataset.prefetch(tf.data.AUTOTUNE)
The resulting shape of for x, y in dataset.take(1): x.shape is (32, 20), where 32 is the batch size and 20 the length of the sequence, but I need a shape of (32, 20, 1), where the additional dimension denotes the feature.
My question is how I can reshape, ideally in the split function that is passed into the dataset.map function before caching the data?
That's easy. Do this in your split function
def split(window):
return window[:-label_length, tf.newaxis], window[-label_length, tf.newaxis, tf.newaxis]

Tensorflow get validation loss issue

I have confused ideas on how to calculate the validation loss in my model. I've a problem of classifying the pixels of the image.
So, after training, in validation statement, i can get my prediction image with this code:
output_image = sess.run(network,feed_dict={net_input:input_image})
But, if i run:
cost , output_image = sess.run([loss,network],feed_dict={net_input: input_image, net_output: output_image})
I get this exception: Cannot feed value of shape (1, 480, 480, 3) for Tensor 'Placeholder_1:0', which has shape '(?, ?, ?, 2)'
I don't understand why, the shape of input and output images are the same (1, 480, 480, 3).
What is the way to get the cost of validation image?
EDIT:
This is my code:
net_input = tf.placeholder(tf.float32,shape=[None,None,None,3])
net_output = tf.placeholder(tf.float32,shape=[None,None,None,num_classes])
network, _ = model_builder.build_model(args.model, net_input=net_input, num_classes=num_classes, crop_width=args.crop_width, crop_height=args.crop_height)
sess.run(tf.global_variables_initializer())
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=network, labels=net_output))
opt = tf.train.RMSPropOptimizer(learning_rate=0.0001, decay=0.995).minimize(loss, var_list=[var for var in tf.trainable_variables()])
saver=tf.train.Saver(max_to_keep=1000)
sess.run(tf.global_variables_initializer())
# Set random seed to make sure models are validated on the same validation images.
# So you can compare the results of different models more intuitively.
val_indices=random.sample(range(0,len(val_input_names)),num_vals)
# Do the training here
for epoch in range(args.epoch_start_i, args.num_epochs):
current_losses = []
# Equivalent to shuffling
id_list = np.random.permutation( len(train_input_names) )
num_iters = int(np.floor(len(id_list) / args.batch_size))
#for i in range(num_iters):
description_train = '[i]Train Epoch {:>2}/{}'.format(epoch + 1, args.num_epochs)
for i in tqdm(range(num_iters), desc=description_train, unit='batch'):
input_image_batch = []
output_image_batch = []
# Collect a batch of images
for j in range(args.batch_size):
index = i*args.batch_size + j
id = id_list[index]
input_image = utils.load_image(train_input_names[id])
output_image = utils.load_image(train_output_names[id])
with tf.device('/cpu:0'):
input_image, output_image = data_augmentation(input_image, output_image)
# Prep the data. Make sure the labels are in one-hot format
input_image = np.float32(input_image) / 255.0
output_image = np.float32(helpers.one_hot_it(label=output_image, label_values=label_values))
input_image_batch.append(np.expand_dims(input_image, axis=0))
output_image_batch.append(np.expand_dims(output_image, axis=0))
if args.batch_size == 1:
input_image_batch = input_image_batch[0]
output_image_batch = output_image_batch[0]
else:
input_image_batch = np.squeeze(np.stack(input_image_batch, axis=1))
output_image_batch = np.squeeze(np.stack(output_image_batch, axis=1))
# Do the training
_,current=sess.run([opt,loss],feed_dict={net_input:input_image_batch,net_output:output_image_batch})
# Do the validation on a small set of validation images
description_val = '[i]Validazione {:>2}/{}'.format(epoch + 1, args.num_epochs)
loss_val = [];
for ind in tqdm(val_indices, total=len(val_indices), desc=description_val, unit='img'):
input_image = np.expand_dims(np.float32(utils.load_image( val_input_names[ind] )[:args.crop_height, :args.crop_width]),axis=0)/255.0
output_image = np.expand_dims(np.float32(utils.load_image(val_output_names[ind])[:args.crop_height, :args.crop_width]), axis=0) / 255.0
#Do the validation
output_image =sess.run(network,feed_dict{net_input:input_image})
Looks like the number of classes (num_classes) is two in your case. So output_image you are feeding to sess.run() as net_output should have only two channels. But in your case, you have three channels and that's why you are getting this error.
Use helpers.one_hot_it() for getting a binary mask of your output image. You will have to expand dimension using np.expand_dim() to make it a batch of one image since the network accepts one batch at a time, not one image at a time.
You can make use of the following code snippet to get validation loss:
# Do the validation on a small set of validation images
description_val = '[i]Validazione {:>2}/{}'.format(epoch + 1, args.num_epochs)
loss_val = [];
for ind in tqdm(val_indices, total=len(val_indices), desc=description_val, unit='img'):
input_image = np.expand_dims(np.float32(utils.load_image( val_input_names[ind] )[:args.crop_height, :args.crop_width]),axis=0)/255.0
output_image = utils.load_image(val_output_names[ind])[:args.crop_height, :args.crop_width]
output_one_hot = helpers.one_hot_it(output_image, label_values)
#Do the validation
output_image, loss_value = sess.run([network, loss], feed_dict={net_input: input_image, net_output: np.expand_dims(output_one_hot, axis=0)})
The problem lies in net_output = tf.placeholder(tf.float32,shape=[None,None,None,num_classes]). If this is the known classification of the image then it should be of the like (if those are already logits):
net_output = tf.placeholder(tf.float32,shape=[None, num_classes])
To train your network you would do:
_, eval_loss = sess.run([opt, loss], feed_dict={net_input: input_image_batch, net_output:output_image_batch}})`
because you want to run the optimizer you have opt and because you want to monitor the loss you do loss.
To get the classification of an image you would do (after training):
classified_images = sess.run([network], feed_dict={net_input: input_image_batch})

Categories