I have run the base model to a good accuracy and now i want to load these weights and use them for a model with a few additional layers and later for hyperparameter tuning.
First i construct this new model
input_tensor = Input(shape=train_generator.image_shape)
base_model = applications.ResNet152(weights='imagenet', include_top=False, input_tensor=input_tensor)
for layer in base_model.layers[:]:
layer.trainable = False
x = Flatten()(base_model.output)
x = Dense(1024, kernel_regularizer=tf.keras.regularizers.L2(l2=0.01),
kernel_initializer=tf.keras.initializers.HeNormal(), kernel_constraint=tf.keras.constraints.UnitNorm(axis=0))(x)
x = LeakyReLU()(x)
x = BatchNormalization()(x)
x = Dropout(rate=0.1)(x)
x = Dense(512, kernel_regularizer=tf.keras.regularizers.L2(l2=0.01),
kernel_initializer=tf.keras.initializers.HeNormal(), kernel_constraint=tf.keras.constraints.UnitNorm(axis=0))(x)
x = LeakyReLU()(x)
x = BatchNormalization()(x)
predictions = Dense(num_classes, activation= 'softmax')(x)
model = Model(inputs = base_model.input, outputs = predictions)
Then i compile it because that is necessary at this stage because i have to run the model fit with dummy input before i load the weights. (i think, i have tried to put these code blocks in many different orders to make it work, but i have failed each time)
opt = tfa.optimizers.LazyAdam(lr=0.000074)
model.compile(
loss='sparse_categorical_crossentropy',
optimizer=opt,
metrics=['accuracy']
)
dummy_input = tf.random.uniform([32, 224, 224, 3])
dummy_label = tf.random.uniform([32,])
hist = model.fit(dummy_input, dummy_label)
Then i load the weights for the base model:
base_model.load_weights('/content/drive/MyDrive/MODELS_SAVED/model_RESNET152/model_weights2.h5', by_name=True)
Then i load the weights for the optimizer:
import pickle
with open("/content/drive/MyDrive/weight_values2optimizer.pkl", "rb") as f:
weights = pickle.load(f)
opt = model.optimizer.set_weights(weights)
This results in the following error:
ValueError: You called `set_weights(weights)` on optimizer LazyAdam
with a weight list of length 1245,
but the optimizer was expecting 13 weights.
Provided weights: [63504, array([[[[ 0.00000000e+00, -5.74126025e-04...
Anyone have ideas on how to solve this?
If you have a solution with Adam instead of LazyAdam that is fine too.(i have no idea if that would make a difference)
edit:
I have tried many new things last couple of days but nothing is working. Here is the entire code where i stand right now. It includes both the part where i am saving and the part where i am loading.
import tarfile
my_tar2 = tarfile.open('test.tgz')
my_tar2.extractall('test') # specify which folder to extract to
my_tar2.close()
import zipfile
with zipfile.ZipFile("/content/tot_train_bremoved2.zip", 'r') as zip_ref:
zip_ref.extractall("/content/train/")
import pandas as pd
train_info = pd.read_csv("/content/drive/MyDrive/train_info.csv")
test_info = pd.read_csv("/content/drive/MyDrive/test_info.csv")
train_folder = "/content/train"
test_folder = "/content/test/test"
import tensorflow as tf
import tensorflow.keras as keras
from keras.layers import Input, Lambda, Dense, Flatten, BatchNormalization, Dropout, PReLU, GlobalAveragePooling2D, LeakyReLU, MaxPooling2D
from keras.models import Model
from tensorflow.keras.applications.resnet_v2 import ResNet152V2, preprocess_input
from keras import applications
from keras.preprocessing import image
from keras.preprocessing.image import ImageDataGenerator
from keras.losses import sparse_categorical_crossentropy
from keras.callbacks import ReduceLROnPlateau, ModelCheckpoint, EarlyStopping, TensorBoard
import tensorflow_addons as tfa
from sklearn.metrics import confusion_matrix
import numpy as np
import matplotlib.pyplot as plt
num_classes = 423
epochs = 20
batch_size = 32
img_height = 224
img_width = 224
IMAGE_SIZE = [img_height, img_width]
_train_generator = ImageDataGenerator(
rotation_range=180,
zoom_range=0.2,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.3,
horizontal_flip=True,
vertical_flip=True,
preprocessing_function=preprocess_input)
_val_generator = ImageDataGenerator(
preprocessing_function=preprocess_input)
train_generator = _train_generator.flow_from_dataframe(dataframe = train_info,
directory = train_folder, x_col = "filename",
y_col = "artist", seed = 42,
batch_size = batch_size, shuffle = True,
class_mode="sparse", target_size = IMAGE_SIZE)
valid_generator = _val_generator.flow_from_dataframe(dataframe = test_info,
directory = test_folder, x_col = "filename",
y_col = "artist", seed = 42,
batch_size = batch_size, shuffle = True,
class_mode="sparse", target_size = IMAGE_SIZE)
def get_uncompiled_model():
input_tensor = Input(shape=train_generator.image_shape)
base_model = applications.ResNet152(weights='imagenet', include_top=False, input_tensor=input_tensor)
for layer in base_model.layers[:]:
layer.trainable = True
x = Flatten()(base_model.output)
predictions = Dense(num_classes, activation= 'softmax')(x)
model = Model(inputs = base_model.input, outputs = predictions)
return model
opt = keras.optimizers.Adam(lr=0.000074)
def get_compiled_model():
model = get_uncompiled_model()
model.compile(
loss='sparse_categorical_crossentropy',
optimizer=opt,
metrics=['accuracy']
)
return model
earlyStopping = EarlyStopping(monitor='val_loss', patience=5, verbose=0, mode='min')
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, verbose=1, min_delta=1e-4, mode='min')
model = get_compiled_model()
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
model.fit(
train_generator,
validation_data=valid_generator,
epochs=epochs,
verbose = 1,
steps_per_epoch=len_train // batch_size,
validation_steps=len_test // batch_size,
callbacks=[earlyStopping, reduce_lr]
)
import keras.backend as K
import pickle
model.save_weights('/content/drive/MyDrive/MODELS_SAVED/model_RESNET152/model_weights5.h5')
symbolic_weights = getattr(model.optimizer, 'weights')
weight_values = K.batch_get_value(symbolic_weights)
with open('/content/drive/MyDrive/MODELS_SAVED/optimizer3.pkl', 'wb') as f:
pickle.dump(weight_values, f)
#Here i am building the new model and its from here i am having problems
input_tensor = Input(shape=train_generator.image_shape)
base_model = applications.ResNet152(weights='imagenet', include_top=False, input_tensor=input_tensor)
for layer in base_model.layers[:]:
layer.trainable = False
x = Flatten()(base_model.output)
x = Dense(512, kernel_regularizer=tf.keras.regularizers.L2(l2=0.01),
kernel_initializer=tf.keras.initializers.HeNormal(),
kernel_constraint=tf.keras.constraints.UnitNorm(axis=0))(x)
x = LeakyReLU()(x)
x = BatchNormalization()(x)
predictions = Dense(num_classes, activation= 'softmax')(x)
model = Model(inputs = base_model.input, outputs = predictions)
model.compile(
loss='sparse_categorical_crossentropy',
optimizer='adam',
metrics=['accuracy']
)
base_model.load_weights('/content/drive/MyDrive/MODELS_SAVED/model_RESNET152/model_weights5.h5', by_name=True)
with open('/content/drive/MyDrive/MODELS_SAVED/optimizer3.pkl', 'rb') as f:
weight_values = pickle.load(f)
model.optimizer.set_weights(weight_values)
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
epochs = 2
model.fit(
train_generator,
validation_data=valid_generator,
epochs=epochs,
steps_per_epoch=len_train // batch_size,
validation_steps=len_test // batch_size,
verbose = 1,
callbacks=[earlyStopping, reduce_lr]
)
Now i am getting the following error running this code block (which above in the complete code is right before the model.fit):
with open('/content/drive/MyDrive/MODELS_SAVED/optimizer3.pkl', 'rb') as f:
weight_values = pickle.load(f)
model.optimizer.set_weights(weight_values)
ValueError: You called `set_weights(weights)` on optimizer Adam with a weight list of length 1245, but the optimizer was expecting 13 weights. Provided weights: [11907, array([[[[ 0.00000000e+00, -8.27514916e-04...
All i am trying to do is to save the weights for the model and optimizer and then build a new model where i am adding a few layers and loading the weights from the base of the model and the weights from the optimizer.
Both models have different architectures so weights of one can't be loaded into another,irrespective of that they inherited same base model. I think it is a simple case of fine-tuning a model (saved model in your case).
What you should do is change the way to create new model, i.e. rather than loading the original resnet model as base model with include_top = False, you should try loading the saved model and implementing your own include_top. This can be done as:
for layer in saved_model.layers[:]:
layer.trainable = False
x = Flatten()(saved_model.layers[-2].output)
Here the key thing is saved_model.layers[-2].output which means output from the second last layer.
Hope it helps, if not please clarify your doubts or let me know what I missed.
Please refer to the following notebook:
https://colab.research.google.com/drive/1j_zLqG1zUMi6UYPdc6gtmkJvHuawL4Sk?usp=sharing
{save,load}_weights on a model includes the weights of the optimizer. That would be the preferred way to initialise the optimizer weights.
You can copy the optimizer from one model to another.
The reason you are getting the error above is that the optimizer doesn't allocate its weights until training starts; If you really want to do it manually, just trigger model.fit() for 1 epoch 1 datapoint and then load the data manually.
You can replace
base_model.load_weights('/content/drive/MyDrive/MODELS_SAVED/model_RESNET152/model_weights5.h5', by_name=True)
with open('/content/drive/MyDrive/MODELS_SAVED/optimizer3.pkl', 'rb') as f:
weight_values = pickle.load(f)
model.optimizer.set_weights(weight_values)
with:
base_model.load_weights('/content/drive/MyDrive/MODELS_SAVED/model_RESNET152/model_weights5.h5', by_name=True)
model.optimizer = base_model.optimizer
After saving the first model's weights with model.save_weights('name.h5'), you should build a second model, exactly like the first one, let's call it model2. Then load the weights you saved before into it. The code should be model2.load_weights('name.h5'). See model.summary() to see the names and number of the first model's layers. For each layer, you need to define a variable and add those weights (and also biases) to that variable with a method called get_weights() . Here is an example:
x1 = model2.layers[1].get_weights()
Here, I put the weights and biases of the first layer (which in mine was a convolution layer) in the variable x1.
x1[0] is a list of the weights of the layer #1.
x1[1] is a list of the biases of the layer #1.
Related
I'm trying to use Imagenet V2 with transfer-learning for multiclass classification (6 classes), but getting the following error. Can anyone please help?
ValueError: `logits` and `labels` must have the same shape, received ((None, 6) vs (None, 1)).
I borrowed this code from Andrew Ng's CNN course I took a while back but the original code was for binary classification. I tried to modify it for multiclass classification but got this error. Here's my code:
import matplotlib.pyplot as plt
import numpy as np
import os
import tensorflow as tf
import tensorflow.keras.layers as tfl
import datetime
from tensorflow.keras.preprocessing import image_dataset_from_directory
from tensorflow.keras.layers.experimental.preprocessing import RandomFlip, RandomRotation
BATCH_SIZE = 16
IMG_SIZE = (160, 160)
training_directory = "/content/drive/MyDrive/Microscopy Data/04112028_multiclass_maiden/Training/Actin"
validation_directory = "/content/drive/MyDrive/Microscopy Data/04112028_multiclass_maiden/Validation/Actin"
train_dataset = image_dataset_from_directory(training_directory,
shuffle=True,
batch_size=BATCH_SIZE,
image_size=IMG_SIZE,
seed=42)
validation_dataset = image_dataset_from_directory(validation_directory,
shuffle=True,
batch_size=BATCH_SIZE,
image_size=IMG_SIZE,
seed=42)
Output:
Found 600 files belonging to 6 classes.
Found 600 files belonging to 6 classes.
Code Continued...
class_names = train_dataset.class_names
AUTOTUNE = tf.data.experimental.AUTOTUNE
train_dataset = train_dataset.prefetch(buffer_size=AUTOTUNE)
preprocess_input = tf.keras.applications.mobilenet_v2.preprocess_input
IMG_SHAPE = IMG_SIZE + (3,)
base_model = tf.keras.applications.MobileNetV2(input_shape=IMG_SHAPE,
include_top=True,
weights='imagenet')
def huvec_model (image_shape=IMG_SIZE, data_augmentation=data_augmenter()):
''' Define a tf.keras model for binary classification out of the MobileNetV2 model
Arguments:
image_shape -- Image width and height
data_augmentation -- data augmentation function
Returns:
Returns:
tf.keras.model
'''
input_shape = image_shape + (3,)
# Freeze the base model by making it non trainable
# base_model.trainable = None
# create the input layer (Same as the imageNetv2 input size)
# inputs = tf.keras.Input(shape=None)
# apply data augmentation to the inputs
# x = None
# data preprocessing using the same weights the model was trained on
# x = preprocess_input(None)
# set training to False to avoid keeping track of statistics in the batch norm layer
# x = base_model(None, training=None)
# Add the new Binary classification layers
# use global avg pooling to summarize the info in each channel
# x = None()(x)
#include dropout with probability of 0.2 to avoid overfitting
# x = None(None)(x)
# create a prediction layer with one neuron (as a classifier only needs one)
# prediction_layer = None
base_model = tf.keras.applications.MobileNetV2(input_shape=IMG_SHAPE,
include_top=False,
weights='imagenet')
base_model.trainable = False
inputs = tf.keras.Input(shape=input_shape)
x = data_augmentation(inputs)
x = preprocess_input(x)
x = base_model(x, training=False)
x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = tfl.Dropout(.2)(x)
prediction_layer = tf.keras.layers.Dense(units = len(class_names), activation='softmax')
# YOUR CODE ENDS HERE
outputs = prediction_layer(x)
model = tf.keras.Model(inputs, outputs)
return model
model2 = huvec_model(IMG_SIZE)
base_model.trainable = True
# Let's take a look to see how many layers are in the base model
print("Number of layers in the base model: ", len(base_model.layers))
# Fine-tune from this layer onwards
fine_tune_at = 120
# Freeze all the layers before the `fine_tune_at` layer
# for layer in base_model.layers[:fine_tune_at]:
# layer.trainable = None
# Define a BinaryCrossentropy loss function. Use from_logits=True
# loss_function=None
# Define an Adam optimizer with a learning rate of 0.1 * base_learning_rate
# optimizer = None
# Use accuracy as evaluation metric
# metrics=None
base_learning_rate = 0.01
# YOUR CODE STARTS HERE
for layer in base_model.layers[:fine_tune_at]:
layer.trainable = False
loss_function=tf.keras.losses.BinaryCrossentropy(from_logits=True)
optimizer= tf.keras.optimizers.Adam(learning_rate=0.1*base_learning_rate)
metrics=['accuracy']
# YOUR CODE ENDS HERE
model2.compile(loss=loss_function,
optimizer = optimizer,
metrics=metrics)
initial_epochs = 5
history = model2.fit(train_dataset, validation_data=validation_dataset, epochs=initial_epochs)
Looks like you yet have to one-hot-encode your labels, i.e. instead of having number i (between 0 and 5, inclusive) for a label of an image that belongs to the i-th class, which is of shape (None, 1), provide an array of all 0's except a 1 at index i, which is of shape (None, 6). Then labels has the same shape as logits.
It is easy you need to match the logits output or you need to remove softmax or distribution at the end of the model.
Almost correct, I change a bit on un-defined data_augmentation that is working.
It will have the output but the calculation is based on output expectation try to use meanquears you see errors or use class entropy that will provide different behavior.
Somebody told it boosted up accuracy output as they are using Binary cross entropy but not this way, it will highly boosted up when using with sequences of binary see the example of ALE games ( Street Fighters )
[ Sample ]:
import os
from os.path import exists
import tensorflow as tf
import matplotlib.pyplot as plt
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
Variables
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
BATCH_SIZE = 16
IMG_SIZE = (160, 160)
PATH = 'F:\\datasets\\downloads\\sample\\cats_dogs\\training'
training_directory = os.path.join(PATH, 'train')
validation_directory = os.path.join(PATH, 'validation')
train_dataset = tf.keras.utils.image_dataset_from_directory(training_directory,
shuffle=True,
batch_size=BATCH_SIZE,
image_size=IMG_SIZE,
seed=42)
validation_dataset = tf.keras.utils.image_dataset_from_directory(validation_directory,
shuffle=True,
batch_size=BATCH_SIZE,
image_size=IMG_SIZE,
seed=42)
class_names = train_dataset.class_names
print( "class_names: " + str( class_names ) )
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
Functions
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
def huvec_model (image_shape=IMG_SIZE, data_augmentation = tf.keras.Sequential([ tf.keras.layers.RandomFlip('horizontal'), tf.keras.layers.RandomRotation(0.2), ])):
# def huvec_model (image_shape=IMG_SIZE, data_augmentation=data_augmenter()):
''' Define a tf.keras model for binary classification out of the MobileNetV2 model
Arguments:
image_shape -- Image width and height
data_augmentation -- data augmentation function
Returns:
Returns:
tf.keras.model
'''
input_shape = image_shape + (3,)
base_model = tf.keras.applications.MobileNetV2(input_shape=IMG_SHAPE,
include_top=False,
weights='imagenet')
base_model.trainable = False
inputs = tf.keras.Input(shape=input_shape)
x = data_augmentation(inputs)
x = preprocess_input(x)
x = base_model(x, training=False)
x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = tf.keras.layers.Dropout(.2)(x)
prediction_layer = tf.keras.layers.Dense(units = len(class_names), activation='softmax')
outputs = prediction_layer(x)
model = tf.keras.Model(inputs, outputs)
return model
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
DataSet
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
AUTOTUNE = tf.data.experimental.AUTOTUNE
train_dataset = train_dataset.prefetch(buffer_size=AUTOTUNE)
preprocess_input = tf.keras.applications.mobilenet_v2.preprocess_input
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Model Initialize
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
IMG_SHAPE = IMG_SIZE + (3,)
base_model = tf.keras.applications.MobileNetV2(input_shape=IMG_SHAPE,
include_top=True,
weights='imagenet')
base_model.summary()
model2 = huvec_model(IMG_SIZE)
base_model.trainable = True
# Let's take a look to see how many layers are in the base model
print("Number of layers in the base model: ", len(base_model.layers))
# Fine-tune from this layer onwards
fine_tune_at = 120
base_learning_rate = 0.01
for layer in base_model.layers[:fine_tune_at]:
layer.trainable = False
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Optimizer
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
optimizer = tf.keras.optimizers.Adam(learning_rate=0.1*base_learning_rate)
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Loss Fn
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
lossfn = tf.keras.losses.BinaryCrossentropy(from_logits=False)
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Model Summary
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
model2.compile(optimizer=optimizer, loss=lossfn, metrics=[ 'accuracy' ])
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Training
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
history = model2.fit(train_dataset, validation_data=validation_dataset, epochs=5)
input('...')
[ Output ]
...
Found the bug:
I had to redefine loss in the model compiler as loss='sparse_categorical_crossentropy' which was initially defined as loss=tf.keras.losses.BinaryCrossentropy(from_logits=True).
Refer to the SO thread Changing Keras Model from Binary Classification to Multi-classification for more details.
I created a model for mask-detection through the transfer learning of a MobileNet CNN, for a multiclass problem: NoMask, Mask, UncorrectMask.
Below is the code:
import matplotlib.pyplot as plt
import tensorflow
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
IMAGE_SIZE = (224, 224)
IMG_SHAPE = IMAGE_SIZE + (3,)
DATASET_DIR = "./DATASET/Mask_Detection/"
BATCH_SIZE = 32
EPOCHS = 15
datagen = ImageDataGenerator(
validation_split = 0.2,
rescale = 1./255, #per processare più velocemente i dati
brightness_range=[1,2]
)
train_generator = datagen.flow_from_directory(
DATASET_DIR,
target_size = IMAGE_SIZE,
batch_size = BATCH_SIZE,
class_mode = "categorical",
subset = "training"
)
test_generator = datagen.flow_from_directory(
DATASET_DIR,
target_size = IMAGE_SIZE,
batch_size = BATCH_SIZE,
class_mode = "categorical",
subset = "validation"
)
mobielNetV2 = tensorflow.keras.applications.MobileNetV2(input_shape=IMG_SHAPE,include_top=False,weights='imagenet')
for layer in mobielNetV2.layers:
layer.trainable = False
x = Flatten()(mobielNetV2.output)
prediction = Dense(3, activation='softmax')(x)
model = Model(inputs=mobielNetV2.input, outputs=prediction)
model.summary()
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
r = model.fit(train_generator, validation_data=test_generator, epochs=EPOCHS,steps_per_epoch=len(train_generator),validation_steps=len(test_generator))
model.save("MobileNet.h5")
I have a problem, I would like to calculate the precision, recall, F1 and confusion matrix for my model, but I can't figure out how to do it, can someone help me?
I was able to easily calculate the accuracy and logloss.
See this tutorial. Basically you should get your model's predictions on your test set by
y_pred_logits = model.predict(test_generator)
y_pred = tf.math.argmax(y_pred_logits)
The link above also has a example for displaying the confusion matrix.
Precision, recall and F1 could be computed with sklearn as in https://stackoverflow.com/a/9092866/11664800.
I am working on a small project where i wanted to classify some images by training them within a convolutional network, with a pretrained VGG16 network.
These are the steps i took:
#Setting Conditions
img_size = (180,180)
batch_size = 600
num_classes = len(class_names)
#Using Keras built in preprocessing method which facilitates the preprocessing instead of having to do it manually.
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
"Data/Train",
validation_split = 0.2,
subset = "training",
seed = 1337, # creating a seed so t
image_size = img_size,
batch_size = batch_size,
)
valid_ds = tf.keras.preprocessing.image_dataset_from_directory(
"Data/Train",
validation_split = 0.2,
subset = "validation",
seed = 1337, # creating a seed so t
image_size = img_size,
batch_size = batch_size,
)
BUILDING MODEL
#Creating Model
model = Sequential()
#ADDING The VGG16 Pre trained network
model.add(VGG16(pooling ='avg',weights="imagenet", include_top=False))
#adding a dense layer
model.add(Dense(num_classes,activation = 'softmax'))
#Setting the trainable parameter for VGG16 to false, as we want to use this pretrained network, and train the new images.
model.layers[0].trainable = False
#The compile() method: specifying a loss, metrics, and an optimizer To train a model with fit(), #you need to specify a loss function, an optimizer, and optionally, some metrics to monitor.
#You pass these to the model as arguments to the compile() method
model.compile(optimizer = 'adam',loss = 'categorical_crossentropy', metrics =['accuracy'])
epoch_train = len(train_ds)
opoch_val = len(valid_ds)
numbers_epochs = 2
fit_model = model.fit(train_ds, steps_per_epoch = epoch_train,verbose = 1, validation_data = valid_ds, validation_steps = opoch_val,)
When i try to fit the model, i get the following error:
ValueError: Shapes (None, 1) and (None, 43) are incompatible
If there is an expert out there to call out what i did wrong or what steps i skipped... I would be very greatful!
You need to set label_mode='categorical in image_dataset_from_directory if you plan to train your model with categorical_crossentropy. By default, the image_dataset_from_directory method assumes that the labels are encoded as integers (label_mode='int'), meaning you should being using the sparse_categorical_crossentropy loss. Here is a working example:
import tensorflow as tf
import pathlib
import matplotlib.pyplot as plt
dataset_url = "https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz"
data_dir = tf.keras.utils.get_file('flower_photos', origin=dataset_url, untar=True)
data_dir = pathlib.Path(data_dir)
batch_size = 32
num_classes = 5
train_ds = tf.keras.utils.image_dataset_from_directory(data_dir, shuffle=True, batch_size=batch_size, label_mode='categorical')
model = tf.keras.Sequential()
model.add(tf.keras.applications.VGG16(pooling ='avg',weights="imagenet", include_top=False))
model.add(tf.keras.layers.Dense(num_classes, activation = 'softmax'))
model.layers[0].trainable = False
model.compile(optimizer = 'adam',loss = 'categorical_crossentropy', metrics =['accuracy'])
numbers_epochs = 2
fit_model = model.fit(train_ds, epochs=numbers_epochs)
My goal is to first train only using ResNet152 and then save the learned weights. Then i want to use these weights as a base for a more complex model with added layers which i ultimately want to do hyperparameter tuning on. The reason for this approach is that doing it all at once takes a very long time. The problem i am having is that my code doesnt seem to work. I dont get an error message but when i start training the more complex model it seems to start from 0 again and not using the learned ResNet152 weights.
Here is the code:
First i am only using ResNet152 and the output layer
input_tensor = Input(shape=train_generator.image_shape)
base_model = applications.ResNet152(weights='imagenet', include_top=False, input_tensor=input_tensor)
for layer in base_model.layers[:]:
layer.trainable = True¨
x = Flatten()(base_model.output)
predictions = Dense(num_classes, activation= 'softmax')(x)
model = Model(inputs = base_model.input, outputs = predictions)
model.compile(
loss='sparse_categorical_crossentropy',
optimizer=opt,
metrics=['accuracy'])
model.fit(
train_generator,
validation_data=valid_generator,
epochs=epochs,
steps_per_epoch=len_train // batch_size,
validation_steps=len_val // batch_size,
callbacks=[earlyStopping, reduce_lr]
)
Then i am saving the weights:
model.save_weights('/content/drive/MyDrive/MODELS_SAVED/model_RESNET152/model_weights.h5')
Adding more layers.
input_tensor = Input(shape=train_generator.image_shape)
base_model = applications.ResNet152(weights='imagenet', include_top=False, input_tensor=input_tensor)
for layer in base_model.layers[:]:
layer.trainable = False
x = Flatten()(base_model.output)
x = Dense(1024, kernel_regularizer=tf.keras.regularizers.L2(l2=0.01),
kernel_initializer=tf.keras.initializers.HeNormal(),
kernel_constraint=tf.keras.constraints.UnitNorm(axis=0))(x)
x = LeakyReLU()(x)
x = BatchNormalization()(x)
x = Dropout(rate=0.1)(x)
x = Dense(512, kernel_regularizer=tf.keras.regularizers.L2(l2=0.01),
kernel_initializer=tf.keras.initializers.HeNormal(),
kernel_constraint=tf.keras.constraints.UnitNorm(axis=0))(x)
x = LeakyReLU()(x)
x = BatchNormalization()(x)
predictions = Dense(num_classes, activation= 'softmax')(x)
model = Model(inputs = base_model.input, outputs = predictions)
Loading the weights after the added layers and using by_name=True, both according to the keras tutorial.
model.load_weights('/content/drive/MyDrive/MODELS_SAVED/model_RESNET152/model_weights.h5', by_name=True)
Then i start training again.
model.compile(
loss='sparse_categorical_crossentropy',
optimizer=opt,
metrics=['accuracy']
)
model.fit(
train_generator,
validation_data=valid_generator,
epochs=epochs,
steps_per_epoch=len_train // batch_size,
validation_steps=len_val // batch_size,
callbacks=[earlyStopping, reduce_lr]
)
But it is starting at a very low accuracy, basically from 0 again, so im guessing something is wrong here. Any ideas on how to fix this?
When you use adam and save model weights only - you have to save/load optimizer weights as well:
weight_values = model.optimizer.get_weights()
with open(output_path+'optimizer.pkl', 'wb') as f:
pickle.dump(weight_values, f)
dummy_input = tf.random.uniform(inp_shape) # create a tensor of input shape
dummy_label = tf.random.uniform(label_shape) # create a tensor of label shape
hist = model.fit(dummy_input, dummy_label)
with open(path_to_saved_model+'optimizer.pkl', 'rb') as f:
weight_values = pickle.load(f)
optimizer.set_weights(weight_values)
I am using resnet50 transfer learning for the Oxford-IIIT Pet Dataset to classify 37 breeds of cats and dogs. The idea is to follow fastai implementation closely using Keras code. However, I managed to get a training accuracy as high as 90% but can't seem to increase my val_accuracy higher than a random guess (1/37 or ~ 3% val_acc).
Any idea how do Keras compute the validation acc and how can I improve it? Or is there something wrong with my preprocessing steps? Thanks a lot.
To get my validation steps, I use sklearn StratifiedShuffleSplit to get a balanced validation set.
# Create dataframe with labels and filenames
annotations = pd.read_csv("annotation/list.txt",header=None,delim_whitespace=True)
annotations.drop([1,2,3],axis=1, inplace=True)
annotations.columns = ["filenames"]
# Create label columns
trans = str.maketrans("_0123456789"," ")
annotations["labels"] = annotations["filenames"].str.translate(trans).str.strip()
annotations["filenames"] = annotations["filenames"] +".jpg"
# Creating a validation set
from sklearn.model_selection import StratifiedShuffleSplit
df_array = annotations.to_numpy(copy=True)
sss = StratifiedShuffleSplit(n_splits = 1, test_size=0.2)
valid_idx = [test for _,test in sss.split(df_array[:,0],df_array[:,1])]
validation = annotations.iloc[valid_idx[0]]
annotations.drop(valid_idx[0], inplace=True)
Then, constructing my generator and training my model.
from tensorflow.keras.preprocessing.image import ImageDataGenerator
bs = 64
def normalize(x):
imagenet_mean = np.array([0.485, 0.456, 0.406]).reshape(1,1,3)
imagenet_sd = np.array([0.229, 0.224, 0.225]).reshape(1,1,3)
return (x- imagenet_mean)/imagenet_sd
train_datagen = ImageDataGenerator(rescale=1/255.,
horizontal_flip = True,
rotation_range=10,
width_shift_range = 0.1,
height_shift_range =0.1,
brightness_range =(0.9,1.1),
shear_range =0.1,
preprocessing_function=normalize)
train_generator = train_datagen.flow_from_dataframe(dataframe=annotations,
directory =os.getcwd(),
x_col="filenames",
y_col="labels",
target_size = (224,224),
batch_size = bs,
)
val_datagen = ImageDataGenerator(rescale=1/255.,
preprocessing_function=normalize)
validation_generator = val_datagen.flow_from_dataframe(dataframe=validation,
directory =os.getcwd(),
x_col="filenames",
y_col="labels",
target_size = (224,224),
batch_size=bs,
)
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras import optimizers
from tensorflow.keras import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Flatten, BatchNormalization, Dropout
base_model = ResNet50(include_top=False,weights="imagenet")
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Flatten()(x)
x = BatchNormalization(epsilon=1e-05,momentum=0.1)(x)
x = Dropout(0.25)(x)
x = Dense(512,activation="relu")(x)
x = BatchNormalization(epsilon=1e-05,momentum=0.1)(x)
x = Dropout(0.5)(x)
predictions = Dense(37,activation="softmax")(x)
model = Model(inputs=base_model.input,outputs=predictions)
for layer in base_model.layers:
layer.trainable = False
lr= 0.001
opti = optimizers.Adam(lr=lr, decay=lr/50)
model.compile(optimizer=opti,
loss="categorical_crossentropy",
metrics=["accuracy"])
model.fit_generator(train_generator,
epochs=10,
validation_data = validation_generator)
for layer in base_model.layers:
layer.trainable = True
model.fit_generator(train_generator,
epochs=10,
validation_data = validation_generator)
By the 10 epochs before unfreezing my layers
loss = 0.2189
acc = 0.9255
val_loss = 5.5082
val_acc = 0.0401