TensorFlow batch tensors with different shapes - python

I am studying Neural Network and I have encountered what is probably a silly problem which I can't figure out. For my first ever network, I have to create a flower image classifier in Keras and TensorFlow using the oxford_flowers102 and the MobileNet pre-trained model from TensorFlow Hub.
The issue seems to be that the images are not re-sized to (224,224,3), but they keep their original shapes which are different from one and other. However, according to my class material, my re-sizing code is correct so I don't understand what is going on and what I am doing wrong.
Thank you very much for all your help.
# LOADING
dataset, dataset_info = tfds.load('oxford_flowers102', as_supervised=True, with_info=True)
training_set, testing_set, validation_set = dataset['train'], dataset['test'],dataset['validation']
# PROCESSING AND BATCHES
def normalize(img, lbl):
img = tf.cast(img, tf.float32)
img = tf.image.resize(img, size=(224,224))
img /= 255
return img, lbl
batch_size = 64
training_batches = training_set.cache().shuffle(train_examples//4).batch(batch_size).map(normalize).prefetch(1)
validation_batches = validation_set.cache().batch(batch_size).map(normalize).prefetch(1)
testing_batches = testing_set.cache().batch(batch_size).map(normalize).prefetch(1)
# BUILDING THE NETWORK
URL = "https://tfhub.dev/google/tf2-preview/mobilenet_v2/feature_vector/4"
mobile_net = hub.KerasLayer(URL, input_shape=(224, 224,3))
mobile_net.trainable = False
skynet = tf.keras.Sequential([
mobile_net,
tf.keras.layers.Dense(256, activation='relu'),
tf.keras.layers.Dropout(0.5),
tf.keras.layers.Dense(num_classes, activation= 'softmax')
])
# TRAINING THE NETWORK
skynet.compile(optimizer='adam', loss= 'sparse_categorical_crossentropy', metrics=['accuracy'])
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)
Epochss = 25
history = skynet.fit(training_batches,
epochs= Epochss,
validation_data=validation_set,
callbacks=[early_stopping])
ERROR:
InvalidArgumentError: Cannot batch tensors with different shapes in component 0. First element had shape [590,501,3] and element 1 had shape [500,752,3].
[[node IteratorGetNext (defined at /opt/conda/lib/python3.7/site-packages/tensorflow_core/python/framework/ops.py:1751) ]] [Op:__inference_distributed_function_18246]
Function call stack:
distributed_function

The problem was that in your input pipeline you were batching your dataset before you were making your images of equal size. You're def normalize(img, lbl) is only made to handle a single image and not a complete batch.
So in order to make your code run, you will have to make the following changes, you will have to call the map API before batch API as shown below.
batch_size = 64
training_batches = training_set.cache().map(normalize).batch(batch_size).prefetch(1)
validation_batches = validation_set.cache().map(normalize).batch(batch_size).prefetch(1)
testing_batches = testing_set.cache().map(normalize).batch(batch_size).prefetch(1)

Related

Why is TensorFlow putting all data into system memory?

I keep getting OOM error for system memory (not GPU memory) but I'm not sure which function is causing tensorflow to load everything into RAM. I ran an image classifier on a different dataset half current size a month ago and copied the code with some small changes. So there are two changes that could cause OOM compared to the previous dataset. 1) The image sizes are much larger, but I resized them to 224x224 early on so I don't think it should have any effect at runtime. 2) The dataset is double the size but I am not using cache or shuffle this time around, so I'm not sure why it isn't just the batch size being loaded into memory.
def read_and_decode(filename, label):
# Returns a tensor with byte values of the entire contents of the input filename.
img = tf.io.read_file(filename)
# Decoding raw JPEG tensor data into 3D (RGB) uint8 pixel value tensor
img = tf.io.decode_jpeg(img, channels=3)
#Resize
img = tf.image.resize_with_pad(
img,
224,
224,
method=tf.image.ResizeMethod.BILINEAR,
antialias=False
)
img = preprocess_input(img)
return img, label
ds_oh = tf.data.Dataset.from_tensor_slices((img_paths, oh_input))
ds_oh = ds_oh.map(read_and_decode)
All data is now in ds_oh, size 224x224, with correct labels.
def ds_split(ds, ds_size, shuffle_size, train_split=0.8, val_split=0.2, shuffle=True):
assert (train_split + val_split) == 1
if shuffle:
ds = ds.shuffle(shuffle_size, seed=99)
train_size = int(train_split * ds_size)
val_size = int(val_split * ds_size)
train_ds = ds.take(train_size)
val_ds = ds.skip(train_size).take(val_size)
return train_ds, val_ds
train_ds, val_ds = ds_split(ds_oh, len(img_paths), len(img_paths), train_split=0.8, val_split=0.2, shuffle=True)
Split into train and validate datasets, shuffled.
#One hot
#train_ds = train_ds.cache()
#train_ds = train_ds.shuffle(buffer_size=len(img_paths), reshuffle_each_iteration=True)
train_ds = train_ds.batch(BATCH_SIZE)
train_ds = train_ds.prefetch(tf.data.AUTOTUNE)
#val_ds = val_ds.cache()
val_ds = val_ds.batch(BATCH_SIZE)
val_ds = val_ds.prefetch(tf.data.AUTOTUNE)
Batching and prefetching, removing caching and shuffling for OOM error.
# input layers
inputs = tf.keras.Input(shape=(224, 224, 3))
base_model = ResNet50(weights="imagenet", include_top=False, input_shape=(224, 224, 3))(inputs)
# creating our new model head to combine with the ResNet base model
head_model = MaxPool2D(pool_size=(4, 4))(base_model)
head_model = Flatten(name='flatten')(head_model)
head_model = Dense(1024, activation='relu')(head_model)
head_model = Dropout(0.2)(head_model)
head_model = Dense(512, activation='relu')(head_model)
head_model = Dropout(0.2)(head_model)
head_model = Dense(29, activation='softmax')(head_model)
# final configuration
model = Model(inputs, head_model)
model.layers[2].trainable = False
optimizer = SGD(learning_rate=0.01, momentum=0.9)
model.compile(loss="categorical_crossentropy", optimizer=optimizer, metrics=['accuracy'])
Model built
INITIAL_EPOCHS = 35
history = model.fit(train_ds,
epochs=INITIAL_EPOCHS,
validation_data=val_ds)
Epoch 1/35
Fails before first epoch
For anyone wondering, the problem was in splitting my tf.data dataset (all images are in one combined folder location) into train and val. The function I found online (ds_split) caused a memory leak for some reason. Even using td.data.take(1) caused an OOM error. I tried a second, similar function I found online and received the same issues.
I decided to use train_test_split from scikitlearn on the list of image file paths and labels and built of two tf.data datasets right off the bat. Everything appears to be working fine now.

Decode prediction of custom keras model

some days ago I started with ML as I wanted to do a hcaptcha solver. I have everything ready, I just need to train a model that will classify the captcha images so I can send a request with the good answer and get the captcha token.
I've looked into some tutorials on how to train my own model with several classes. I have it the following way:
1 trainer folder, 1 validation folder and 1 testing folder. On the trainer and validation folder there is more subfolders named airplane, truck, boat, train,... each one containing aprox 20 images. On the testing folder, some random images related with the classes I have.
I have trained the model and it seems like I'm getting a 1 accuracy. Then I get some of the random testing images and try to predict them using this saved model. It does it's job and predicts them, returning an array of numbers. The thing is I don't know how to decode those predictions nor how to see the classes list with his representative integer before predicting.
I'm super new on this so I'm sure anything will help :)
My code below:
import os
from keras.preprocessing import image
from keras.models import Sequential
from keras import layers
from keras.models import load_model
import numpy as np
trainer_path = "./img/trainer"
validator_path = "./img/validator"
testing_path = "./img/tester"
WIDTH = 128
HEIGHT = 128
BATCH = 30
EPOCHS = 15
train_dataset = image.image_dataset_from_directory(
trainer_path,
label_mode="int",
batch_size=BATCH,
image_size=(WIDTH, HEIGHT)
)
validator_dataset = image.image_dataset_from_directory(
validator_path,
label_mode="int",
batch_size=BATCH,
image_size=(WIDTH, HEIGHT)
)
model = Sequential([
layers.Input((WIDTH, HEIGHT, 3)),
layers.Conv2D(16, 3, padding="same"),
layers.Conv2D(32, 3, padding="same"),
layers.MaxPooling2D(),
layers.Flatten(),
layers.Dense(10)
])
model.compile(
optimizer="adam",
loss=[
"sparse_categorical_crossentropy"
],
metrics=["accuracy"]
)
model_fit = model.fit(
train_dataset,
epochs=EPOCHS,
validation_data=validator_dataset,
verbose=2
)
#loading the saved model
model = load_model("./model")
for i in os.listdir(testing_path):
img = image.load_img(testing_path + "/" + i, target_size=(WIDTH, HEIGHT, 3))
img_array = image.img_to_array(img)
img_batch = np.expand_dims(img_array, axis=0)
prediction = model.predict(img_batch)
print(prediction)
print()
Output example:
[[ 875.5614 3123.8257 1521.7046 90.056526 335.5274
-785.3671 1075.9199 1105.3068 -14.917503 -3745.6494 ]]
You have to apply activation function on last Dense layer, if you want to classify the image it should be softmax (you will get probabilities for all classes), here is the link:
https://keras.io/api/layers/activations/
When it comes to class names it should be sorted by alphanumerical values, you can also pass class_names argument, here is the link to arguments of this function:
https://www.tensorflow.org/api_docs/python/tf/keras/utils/image_dataset_from_directory

Why is my model giving poor accuracy when the data is loaded using tf.data?

I am new to the tf.data API and trying to use it to load images from disk in the Dogs vs. Cats Redux: Kernels Edition Kaggle competition. To do this, I first created a pandas DataFrame named train_df with two columns - file_path containing the relative path of images and target containing the target labels 0 (for cat) and 1(for dog). Here's how the first 10 rows of the DataFrame looks like:
Then, I tried loading the images with the following code:
import tensorflow as tf
BATCH_SIZE = 128
IMG_HEIGHT = 224
IMG_WIDTH = 224
def read_images(X, y):
X = tf.io.read_file(X)
X = tf.io.decode_image(X, expand_animations=False, dtype=tf.float32, channels=3)
X = tf.image.resize(X, [IMG_HEIGHT, IMG_WIDTH])
X = tf.keras.applications.efficientnet.preprocess_input(X, data_format="channels_last")
return (X, y)
def build_data_pipeline(X, y):
data = tf.data.Dataset.from_tensor_slices((X, y))
data = data.map(read_images)
data = data.batch(BATCH_SIZE)
data = data.prefetch(tf.data.AUTOTUNE)
return data
tf_data = build_data_pipeline(train_df["file_path"], train_df["target"])
After this, I tried training my model using the following code
model.fit(tf_data, epochs=10)
but got a training accuracy of only 50% whereas with ImageDataGenerator, I am getting an accuracy of 99%. Thus, the problem lies somewhere in the data loading part which I am not able find out.
I have used EfficientNetB0 with weights trained from imagenet as feature extractor and single neuron layer at the end as classifier.
Pretrained EfficientNetB0 model:
pretrained_model = tf.keras.applications.EfficientNetB0(
input_shape=(IMG_HEIGHT, IMG_WIDTH, 3),
include_top=False,
weights="imagenet"
)
for layer in pretrained_model.layers:
layer.trainable = False
Dense layer with one neuron at the end of the EfficientNetB0:
pretrained_output = pretrained_model.get_layer('top_activation').output
x = tf.keras.layers.GlobalAveragePooling2D()(pretrained_output)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Dense(1, activation="sigmoid")(x)
model = tf.keras.models.Model(pretrained_model.input, x)
Compiling the model:
model.compile(
optimizer="adam",
loss="binary_crossentropy",
metrics=["accuracy"]
)
In the above notebook, change the input reading function read_images as follows:
def read_images(X, y):
X = tf.io.read_file(X)
X = tf.image.decode_jpeg(X, channels = 3)
X = tf.image.resize(X, [IMG_HEIGHT, IMG_WIDTH]) #/255.0
return (X, y)
Also note that, tf.keras.applications.EfficientNet-Bx has in-built normalization layer. So, it's better not to normalize the data in the above function (i.e. /255.0).

Gradcam with guided backprop for transfer learning in Tensorflow 2.0

I get an error using gradient visualization with transfer learning in TF 2.0. The gradient visualization works on a model that does not use transfer learning.
When I run my code I get the error:
assert str(id(x)) in tensor_dict, 'Could not compute output ' + str(x)
AssertionError: Could not compute output Tensor("block5_conv3/Identity:0", shape=(None, 14, 14, 512), dtype=float32)
When I run the code below it errors. I think there's an issue with the naming conventions or connecting inputs and outputs from the base model, vgg16, to the layers I'm adding. Really appreciate your help!
"""
Broken example when grad_model is created.
"""
!pip uninstall tensorflow
!pip install tensorflow==2.0.0
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers
import matplotlib.pyplot as plt
IMAGE_PATH = '/content/cat.3.jpg'
LAYER_NAME = 'block5_conv3'
model_layer = 'vgg16'
CAT_CLASS_INDEX = 281
imsize = (224,224,3)
img = tf.keras.preprocessing.image.load_img(IMAGE_PATH, target_size=(224, 224))
plt.figure()
plt.imshow(img)
img = tf.io.read_file(IMAGE_PATH)
img = tf.image.decode_jpeg(img)
img = tf.cast(img, dtype=tf.float32)
# img = tf.keras.preprocessing.image.img_to_array(img)
img = tf.image.resize(img, (224,224))
img = tf.reshape(img, (1, 224,224,3))
input = layers.Input(shape=(imsize[0], imsize[1], imsize[2]))
base_model = tf.keras.applications.VGG16(include_top=False, weights='imagenet',
input_shape=(imsize[0], imsize[1], imsize[2]))
# base_model.trainable = False
flat = layers.Flatten()
dropped = layers.Dropout(0.5)
global_average_layer = tf.keras.layers.GlobalAveragePooling2D()
fc1 = layers.Dense(16, activation='relu', name='dense_1')
fc2 = layers.Dense(16, activation='relu', name='dense_2')
fc3 = layers.Dense(128, activation='relu', name='dense_3')
prediction = layers.Dense(2, activation='softmax', name='output')
for layr in base_model.layers:
if ('block5' in layr.name):
layr.trainable = True
else:
layr.trainable = False
x = base_model(input)
x = global_average_layer(x)
x = fc1(x)
x = fc2(x)
x = prediction(x)
model = tf.keras.models.Model(inputs = input, outputs = x)
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
loss='binary_crossentropy',
metrics=['accuracy'])
This portion of the code is where the error lies. I'm not sure what is the correct way to label inputs and outputs.
# Create a graph that outputs target convolution and output
grad_model = tf.keras.models.Model(inputs = [model.input, model.get_layer(model_layer).input],
outputs=[model.get_layer(model_layer).get_layer(LAYER_NAME).output,
model.output])
print(model.get_layer(model_layer).get_layer(LAYER_NAME).output)
# Get the score for target class
# Get the score for target class
with tf.GradientTape() as tape:
conv_outputs, predictions = grad_model(img)
loss = predictions[:, 1]
The section below is for plotting a heatmap of gradcam.
print('Prediction shape:', predictions.get_shape())
# Extract filters and gradients
output = conv_outputs[0]
grads = tape.gradient(loss, conv_outputs)[0]
# Apply guided backpropagation
gate_f = tf.cast(output > 0, 'float32')
gate_r = tf.cast(grads > 0, 'float32')
guided_grads = gate_f * gate_r * grads
# Average gradients spatially
weights = tf.reduce_mean(guided_grads, axis=(0, 1))
# Build a ponderated map of filters according to gradients importance
cam = np.ones(output.shape[0:2], dtype=np.float32)
for index, w in enumerate(weights):
cam += w * output[:, :, index]
# Heatmap visualization
cam = cv2.resize(cam.numpy(), (224, 224))
cam = np.maximum(cam, 0)
heatmap = (cam - cam.min()) / (cam.max() - cam.min())
cam = cv2.applyColorMap(np.uint8(255 * heatmap), cv2.COLORMAP_JET)
output_image = cv2.addWeighted(cv2.cvtColor(img.astype('uint8'), cv2.COLOR_RGB2BGR), 0.5, cam, 1, 0)
plt.figure()
plt.imshow(output_image)
plt.show()
I also asked this to the tensorflow team on github at https://github.com/tensorflow/tensorflow/issues/37680.
I figured it out. If you set up the model extending the vgg16 base model with your own layers, rather than inserting the base model into a new model like a layer, then it works.
First set up the model and be sure to declare the input_tensor.
inp = layers.Input(shape=(imsize[0], imsize[1], imsize[2]))
base_model = tf.keras.applications.VGG16(include_top=False, weights='imagenet', input_tensor=inp,
input_shape=(imsize[0], imsize[1], imsize[2]))
This way we don't have to include a line like x=base_model(inp) to show what input we want to put in. That's already included in tf.keras.applications.VGG16(...).
Instead of putting this vgg16 base model inside another model, it's easier to do gradcam by adding layers to the base model itself. I grab the output of the last layer of VGG16 (with the top removed), which is the pooling layer.
block5_pool = base_model.get_layer('block5_pool')
x = global_average_layer(block5_pool.output)
x = fc1(x)
x = prediction(x)
model = tf.keras.models.Model(inputs = inp, outputs = x)
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
loss='binary_crossentropy',
metrics=['accuracy'])
Now, I grab the layer for visualization, LAYER_NAME='block5_conv3'.
# Create a graph that outputs target convolution and output
grad_model = tf.keras.models.Model(inputs = [model.input],
outputs=[model.output, model.get_layer(LAYER_NAME).output])
print(model.get_layer(LAYER_NAME).output)
# Get the score for target class
# Get the score for target class
with tf.GradientTape() as tape:
predictions, conv_outputs = grad_model(img)
loss = predictions[:, 1]
print('Prediction shape:', predictions.get_shape())
# Extract filters and gradients
output = conv_outputs[0]
grads = tape.gradient(loss, conv_outputs)[0]
We (I plus a number of team members developing a project) found a similar problem with a code implementing Grad-CAM that we found in a tutorial.
That code didn't work with a model consisting of the base model of VGG19 plus a few extra layers added on top of it. The problem was that the VGG19 base model was inserted as a "layer" inside our model, and apparently the GradCAM code didn't know how to deal with it - we were getting a "Graph disconnected..." error. Then after some debugging (carried out by another team member, not me) we managed to modify the original code to make it work for this kind of model that contains another model inside it. The idea is to add the inner model as an extra argument of the class GradCAM. Since this may be helpful to others I am including the modified code below (we also renamed the GradCAM class as My_GradCAM).
class My_GradCAM:
def __init__(self, model, classIdx, inner_model=None, layerName=None):
self.model = model
self.classIdx = classIdx
self.inner_model = inner_model
if self.inner_model == None:
self.inner_model = model
self.layerName = layerName
[...]
gradModel = tensorflow.keras.models.Model(inputs=[self.inner_model.inputs],
outputs=[self.inner_model.get_layer(self.layerName).output,
self.inner_model.output])
Then the class can be instantiated by adding the inner model as the extra argument, e.g.:
cam = My_GradCAM(model, None, inner_model=model.get_layer("vgg19"), layerName="block5_pool")
I hope this helps.
Edit: Credit to Mirtha Lucas for doing the debugging and finding the solution.
After a lot of struggle, I condense the way to draw the heat map when you are using transfer learning. Here is the keras official tutorial
The issue I encounter is that when I'm trying to draw the heat map
from my model, the densenet can be only seen as functional layer in my
model. So the make_gradcam_heatmap can not figure out the layer that
inside functional layer. As the 5th layer shows.
Therefore, to simulate the Keras official document, I need to only use the densenet as the model for visualization. Here is the step
Only Take out the model from your model
dense_model = dense_model.get_layer('densenet121')
Copy the weight from dense model to your new initiated model
inputs = tf.keras.Input(shape=(224, 224, 3))
model = model_builder(weights="imagenet", include_top=True, input_tensor=inputs)
for layer, dense_layer in zip(model.layers[1:], dense_model.layers[1:]):
layer.set_weights(dense_layer.get_weights())
relu = model.get_layer('relu')
x = tf.keras.layers.GlobalAveragePooling2D()(relu.output)
outputs = tf.keras.layers.Dense(5)(x)
model = tf.keras.models.Model(inputs = inputs, outputs = outputs)
Draw the heat map
preprocess_input = keras.applications.densenet.preprocess_input
img_array = preprocess_input(get_img_array(img_path, size=(224, 224)))
heatmap = make_gradcam_heatmap(img_array, model, 'bn')
plt.matshow(heatmap)
plt.show()
get_img_array, make_gradcam_heatmap and save_and_display_gradcam are kept in still. Follow the keras tutorial then you are good to go.

How do I structure a Keras model for a custom image regression problem?

I'm attempting to develop a regression model using Tensorflow 2 and the keras API using a custom data set of png images. However, I'm not entirely sure what layers I should be using and how. I put together what I thought was a very simple model as a starting point however when I attempt to train the model the loss and accuracy values printed out are consistently 0. This leads me to believe my loss calculations are not working but I have no idea why. Below is a snippet of my source code, the full project for which can be found here:
import tensorflow as tf
import os
import random
import pathlib
AUTOTUNE = tf.data.experimental.AUTOTUNE
TRAINING_DATA_DIR = r'specgrams'
def gen_model():
model = tf.keras.models.Sequential([
tf.keras.layers.Flatten(input_shape=(256, 128, 3)),
tf.keras.layers.Dense(64, activation='relu'),
tf.keras.layers.Dense(1)
])
model.compile(optimizer=tf.keras.optimizers.Adam(),
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
return model
def fetch_batch(batch_size=1000):
all_image_paths = []
all_image_labels = []
data_root = pathlib.Path(TRAINING_DATA_DIR)
files = data_root.iterdir()
for file in files:
file = str(file)
all_image_paths.append(os.path.abspath(file))
label = file[:-4].split('-')[2:3]
label = float(label[0]) / 200
all_image_labels.append(label)
def preprocess_image(path):
img_raw = tf.io.read_file(path)
image = tf.image.decode_png(img_raw, channels=3)
image = tf.image.resize(image, [256, 128])
image /= 255.0
return image
def preprocess(path, label):
return preprocess_image(path), label
path_ds = tf.data.Dataset.from_tensor_slices(all_image_paths)
image_ds = path_ds.map(preprocess_image, num_parallel_calls=AUTOTUNE)
label_ds = tf.data.Dataset.from_tensor_slices(all_image_labels)
ds = tf.data.Dataset.zip((image_ds, label_ds))
ds = ds.shuffle(buffer_size=len(os.listdir(TRAINING_DATA_DIR)))
ds = ds.repeat()
ds = ds.batch(batch_size)
ds = ds.prefetch(buffer_size=AUTOTUNE)
return ds
ds = fetch_batch()
model = gen_model()
model.fit(ds, epochs=1, steps_per_epoch=10)
The code above is supposed to read in some spectrograms stored as 256 x 128 px png files, convert them to tensors and fit them so a regression model to predict a value (in this case the BPM of the music used to generate the spectrogram). The image file names contain the BPM which is divided by 200 to produce a value between 0 and 1 as the label.
As stated before, this code does run successfully but after each training step the loss and accuracy values printed out are always exactly 0.00000 and do not change.
It's also worth noting that I actually want my model to predict multiple values, not just a single BPM value but this is a separate issue and as such I have posted a separate question for that here.
Anyway for the answer. Regression model requires loss function related such as 'mean_squared_error', 'mean_absolut_error', 'mean_absolute_percentage_error' and 'mean_squared_logarithmic_error.
def gen_model():
model = tf.keras.models.Sequential([
tf.keras.layers.Flatten(input_shape=(256, 128, 3)),
tf.keras.layers.Dense(512, activation='relu'),
tf.keras.layers.Dense(512, activation='relu'),
tf.keras.layers.Dense(1)
])
model.compile(optimizer=tf.keras.optimizers.Adam(),
loss='mean_squared_error',
metrics=['accuracy'])
return model

Categories