Use CustomCallback() Class to achieve freezing layers while training - python

I am trying to train a custom CNN model in TensorFlow. I want somehow to freeze some layers of the model in specific epochs while the training is still running. I have achieved freezing the layers but i had to train the model for some epochs, then change the trainable attribute to False in specific layers i wanted to freeze, then compile the model, and the start training again.
I have tried to implement it using the CustomCallback() Class, and in certain epochs to freeze some layers, but it seemed that this didn't work. As far as TensorFlow mentions about changing the .trainable attribute of a layer, then you have to compile the model for the change to be applied at the model, but there is an error emerging, "TypeError: 'NoneType' object is not callable".
That is my code:
Load libraries
import tensorflow as tf
from tensorflow.keras import datasets, layers, models
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
from keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras.utils import Sequence
from keras.models import load_model
Load dataset
#Load dataset
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.cifar10.load_data()
#Normalize
X_train, X_test = X_train/255.0, X_test/255.0
Build model
cnn = models.Sequential([
layers.Conv2D(filters = 32, kernel_size = (1,1), padding = "same", activation = "relu", input_shape = (32,32,3)),
layers.Conv2D(filters = 64, kernel_size = (3,3), padding = "same", activation = "relu"),
layers.MaxPool2D(pool_size = (2,2)),
layers.Conv2D(filters = 64, kernel_size = (3,3), padding = "same", activation = "relu"),
layers.Conv2D(filters = 128, kernel_size = (5,5), padding = "same", activation = "relu"),
layers.MaxPool2D(pool_size = (2,2)),
layers.Flatten(),
layers.Dense(64, activation = "relu"),
layers.Dense(128, activation = "relu"),
layers.Dense(64, activation = "relu"),
layers.Dense(10, activation = "softmax")
])
Create CustomCallback Class
class CustomCallback(tf.keras.callbacks.Callback):
def on_epoch_begin(self, epoch, logs = None):
if epoch == 5:
cnn.layers[0].trainable, cnn.layers[1].trainable, cnn.layers[2].trainable = (False, False, False)
cnn.compile(optimizer = optimizer, loss = "sparse_categorical_crossentropy", metrics = ["accuracy"])
elif epoch == 10:
cnn.layers[3].trainable, cnn.layers[4].trainable, cnn.layers[5].trainable = (False, False, False)
cnn.compile(optimizer = optimizer, loss = "sparse_categorical_crossentropy", metrics = ["accuracy"])
elif epoch == 15:
cnn.layers[6].trainable, cnn.layers[7].trainable, cnn.layers[8].trainable = (False, False, False)
cnn.compile(optimizer = optimizer, loss = "sparse_categorical_crossentropy", metrics = ["accuracy"])
Define optimizer and compile
#Define the optimizer
optimizer = tf.keras.optimizers.Adam(learning_rate = 0.001)
#Compile the model
cnn.compile(optimizer = optimizer , loss = "sparse_categorical_crossentropy", metrics = ["accuracy"])
Train model
results = cnn.fit(X_train, y_train, epochs = 20, validation_data = (X_test, y_test), batch_size = 1024, callbacks = [CustomCallback()])
An error pops-up "TypeError: 'NoneType' object is not callable".
If i don't compile the model after freezing some layers it seems to not get an error, but while training all layers are updated in all epochs.

Based on the official docs, what you need to access the model inside the callback is self.model and not cnn. Try this:
class CustomCallback(tf.keras.callbacks.Callback):
def on_epoch_begin(self, epoch, logs = None):
if epoch == 5:
self.model.layers[0].trainable, self.model.layers[1].trainable, self.model.layers[2].trainable = (False, False, False)
self.model.compile(optimizer = optimizer, loss = "sparse_categorical_crossentropy", metrics = ["accuracy"])
elif epoch == 10:
self.model.layers[3].trainable, self.model.layers[4].trainable, self.model.layers[5].trainable = (False, False, False)
self.model.compile(optimizer = optimizer, loss = "sparse_categorical_crossentropy", metrics = ["accuracy"])
elif epoch == 15:
self.model.layers[6].trainable, self.model.layers[7].trainable, self.model.layers[8].trainable = (False, False, False)
self.model.compile(optimizer = optimizer, loss = "sparse_categorical_crossentropy", metrics = ["accuracy"])

OK as pointed out in order to change the status of a layer one has to recompile the model. So what I did was to train the model for 5 epochs. The I saved the weights to a file. Then I set layer 7 to not trainable. Then I recompiled the model. Then I loaded the saved weights into the model then ran 5 more epochs. At the end of those epochs I compared the weights with those I loaded and they are the same. So the code is shown below starting after the model was compiled:
print('{0:^8s}{1:^80s}{2:^12s}'. format('Layer', 'Layer Description', 'Trainable'))
for i, layer in enumerate(cnn.layers):
print( '{0:^8s}{1:^80s}{2:^12s}'. format(str(i), str(layer), str(layer.trainable)))
This just gives the information for each layer in the model per the printout shown below
Layer Layer Description Trainable
0 <keras.layers.convolutional.Conv2D object at 0x00000261CCB7A370> True
1 <keras.layers.convolutional.Conv2D object at 0x00000261E55F4700> True
2 <keras.layers.pooling.MaxPooling2D object at 0x00000261E55F4970> True
3 <keras.layers.convolutional.Conv2D object at 0x00000261E567B160> True
4 <keras.layers.convolutional.Conv2D object at 0x00000261E567B280> True
5 <keras.layers.pooling.MaxPooling2D object at 0x00000261E55F44C0> True
6 <keras.layers.core.flatten.Flatten object at 0x00000261E567B700> True
7 <keras.layers.core.dense.Dense object at 0x00000261E567BD30> True
8 <keras.layers.core.dense.Dense object at 0x00000261E5680070> True
9 <keras.layers.core.dense.Dense object at 0x00000261E56802B0> True
10 <keras.layers.core.dense.Dense object at 0x00000261E56805B0> True
Then I trained the model for 5 epochs and printed out the weights and biases code is below
history=cnn.fit(x=train_gen, epochs=5, verbose=1, validation_data=valid_gen,
validation_steps=None, shuffle=True, initial_epoch=0) # train the model
weights_and_biases=cnn.layers[7].get_weights()
weights=weights_and_biases[0]
print ('shape of weights is= ',weights.shape) # has 64 nodes receiving 131072 inputs from the flatten layer
biases=weights_and_biases[1]
print ('shape of biases is- ',biases.shape)
first_10_weights=weights[0][0:10]
print (first_10_weights)
first_10_biases=biases[0:10]
print (first_10_biases)
The printout of the weights and biases at the end of the 5th epoch is shown below
shape of weights is= (131072, 64)
shape of biases is- (64,)
[-0.00171461 -0.00061654 -0.0004427 0.006399 0.00065272 0.00117902
0.00206342 -0.00248441 -0.00172774 0.00399113]
[-0.0098094 -0.01114658 -0.00550008 0.00675221 -0.00647649 0.01904665
0.0103933 0.01889692 -0.01373082 0.00189758]
Then I saved the weights to a file. I changed the state of layer 7 to not trainable and recompiled the model. After compiling I loaded the saved weights into the model and again printed out the weights and biases to make sure they loaded correctly. Code is below
filepath=r'C:\DATASETS\spiders\run1.h5' # save the weights at the end of 5 epochs to this file
cnn.save_weights(filepath, overwrite=True, save_format=None, options=None) # save the weights
cnn.layers[7].trainable=False # make layer 7 not trainable
cnn.compile(optimizer = optimizer , loss = "categorical_crossentropy", metrics = ["accuracy"]) # compile the model
cnn.load_weights(filepath, by_name=False, skip_mismatch=False, options=None) # load the model with the saved weights
weights_and_biases=cnn.layers[7].get_weights() #get the weights to make sure they are the same as at the end of epoch 5
weights=weights_and_biases[0] # print out the weights
print ('shape of weights is= ',weights.shape) # has 64 nodes receiving 131072 inputs from the flatten layer
biases=weights_and_biases[1]
print ('shape of biases is- ',biases.shape)
first_10_weights=weights[0][0:10]
print (first_10_weights)
first_10_biases=biases[0:10]
print (first_10_biases)
The printed results shown below were as expected
shape of weights is= (131072, 64)
shape of biases is- (64,)
[-0.00171461 -0.00061654 -0.0004427 0.006399 0.00065272 0.00117902
0.00206342 -0.00248441 -0.00172774 0.00399113]
[-0.0098094 -0.01114658 -0.00550008 0.00675221 -0.00647649 0.01904665
0.0103933 0.01889692 -0.01373082 0.00189758]
Then I trained for 5 more epochs. At the end of those epochs I printed out the layer 7 weights which should not have changed. The code is shown below
history=cnn.fit(x=train_gen, epochs=5, verbose=1, validation_data=valid_gen,
validation_steps=None, shuffle=True, initial_epoch=0) # train the model
weights_and_biases=cnn.layers[7].get_weights()
weights=weights_and_biases[0]
print ('shape of weights is= ',weights.shape) # has 64 nodes receiving 131072 inputs from the flatten layer
biases=weights_and_biases[1]
print ('shape of biases is- ',biases.shape)
first_10_weights=weights[0][0:10]
print (first_10_weights)
first_10_biases=biases[0:10]
print (first_10_biases)
The resultant printout shown below shows the weights and biases did not change
shape of weights is= (131072, 64)
shape of biases is- (64,)
[-0.00171461 -0.00061654 -0.0004427 0.006399 0.00065272 0.00117902
0.00206342 -0.00248441 -0.00172774 0.00399113]
[-0.0098094 -0.01114658 -0.00550008 0.00675221 -0.00647649 0.01904665
0.0103933 0.01889692 -0.01373082 0.00189758]
So the process is build and compile your model. Run for N epochs. Save the weights to a file. Then change the training status of the layers. Recompile the model. Load the saved weights. Continue training.

Related

Tensorflow keras: Problem with loading the weights of the optimizer

I have run the base model to a good accuracy and now i want to load these weights and use them for a model with a few additional layers and later for hyperparameter tuning.
First i construct this new model
input_tensor = Input(shape=train_generator.image_shape)
base_model = applications.ResNet152(weights='imagenet', include_top=False, input_tensor=input_tensor)
for layer in base_model.layers[:]:
layer.trainable = False
x = Flatten()(base_model.output)
x = Dense(1024, kernel_regularizer=tf.keras.regularizers.L2(l2=0.01),
kernel_initializer=tf.keras.initializers.HeNormal(), kernel_constraint=tf.keras.constraints.UnitNorm(axis=0))(x)
x = LeakyReLU()(x)
x = BatchNormalization()(x)
x = Dropout(rate=0.1)(x)
x = Dense(512, kernel_regularizer=tf.keras.regularizers.L2(l2=0.01),
kernel_initializer=tf.keras.initializers.HeNormal(), kernel_constraint=tf.keras.constraints.UnitNorm(axis=0))(x)
x = LeakyReLU()(x)
x = BatchNormalization()(x)
predictions = Dense(num_classes, activation= 'softmax')(x)
model = Model(inputs = base_model.input, outputs = predictions)
Then i compile it because that is necessary at this stage because i have to run the model fit with dummy input before i load the weights. (i think, i have tried to put these code blocks in many different orders to make it work, but i have failed each time)
opt = tfa.optimizers.LazyAdam(lr=0.000074)
model.compile(
loss='sparse_categorical_crossentropy',
optimizer=opt,
metrics=['accuracy']
)
dummy_input = tf.random.uniform([32, 224, 224, 3])
dummy_label = tf.random.uniform([32,])
hist = model.fit(dummy_input, dummy_label)
Then i load the weights for the base model:
base_model.load_weights('/content/drive/MyDrive/MODELS_SAVED/model_RESNET152/model_weights2.h5', by_name=True)
Then i load the weights for the optimizer:
import pickle
with open("/content/drive/MyDrive/weight_values2optimizer.pkl", "rb") as f:
weights = pickle.load(f)
opt = model.optimizer.set_weights(weights)
This results in the following error:
ValueError: You called `set_weights(weights)` on optimizer LazyAdam
with a weight list of length 1245,
but the optimizer was expecting 13 weights.
Provided weights: [63504, array([[[[ 0.00000000e+00, -5.74126025e-04...
Anyone have ideas on how to solve this?
If you have a solution with Adam instead of LazyAdam that is fine too.(i have no idea if that would make a difference)
edit:
I have tried many new things last couple of days but nothing is working. Here is the entire code where i stand right now. It includes both the part where i am saving and the part where i am loading.
import tarfile
my_tar2 = tarfile.open('test.tgz')
my_tar2.extractall('test') # specify which folder to extract to
my_tar2.close()
import zipfile
with zipfile.ZipFile("/content/tot_train_bremoved2.zip", 'r') as zip_ref:
zip_ref.extractall("/content/train/")
import pandas as pd
train_info = pd.read_csv("/content/drive/MyDrive/train_info.csv")
test_info = pd.read_csv("/content/drive/MyDrive/test_info.csv")
train_folder = "/content/train"
test_folder = "/content/test/test"
import tensorflow as tf
import tensorflow.keras as keras
from keras.layers import Input, Lambda, Dense, Flatten, BatchNormalization, Dropout, PReLU, GlobalAveragePooling2D, LeakyReLU, MaxPooling2D
from keras.models import Model
from tensorflow.keras.applications.resnet_v2 import ResNet152V2, preprocess_input
from keras import applications
from keras.preprocessing import image
from keras.preprocessing.image import ImageDataGenerator
from keras.losses import sparse_categorical_crossentropy
from keras.callbacks import ReduceLROnPlateau, ModelCheckpoint, EarlyStopping, TensorBoard
import tensorflow_addons as tfa
from sklearn.metrics import confusion_matrix
import numpy as np
import matplotlib.pyplot as plt
num_classes = 423
epochs = 20
batch_size = 32
img_height = 224
img_width = 224
IMAGE_SIZE = [img_height, img_width]
_train_generator = ImageDataGenerator(
rotation_range=180,
zoom_range=0.2,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.3,
horizontal_flip=True,
vertical_flip=True,
preprocessing_function=preprocess_input)
_val_generator = ImageDataGenerator(
preprocessing_function=preprocess_input)
train_generator = _train_generator.flow_from_dataframe(dataframe = train_info,
directory = train_folder, x_col = "filename",
y_col = "artist", seed = 42,
batch_size = batch_size, shuffle = True,
class_mode="sparse", target_size = IMAGE_SIZE)
valid_generator = _val_generator.flow_from_dataframe(dataframe = test_info,
directory = test_folder, x_col = "filename",
y_col = "artist", seed = 42,
batch_size = batch_size, shuffle = True,
class_mode="sparse", target_size = IMAGE_SIZE)
def get_uncompiled_model():
input_tensor = Input(shape=train_generator.image_shape)
base_model = applications.ResNet152(weights='imagenet', include_top=False, input_tensor=input_tensor)
for layer in base_model.layers[:]:
layer.trainable = True
x = Flatten()(base_model.output)
predictions = Dense(num_classes, activation= 'softmax')(x)
model = Model(inputs = base_model.input, outputs = predictions)
return model
opt = keras.optimizers.Adam(lr=0.000074)
def get_compiled_model():
model = get_uncompiled_model()
model.compile(
loss='sparse_categorical_crossentropy',
optimizer=opt,
metrics=['accuracy']
)
return model
earlyStopping = EarlyStopping(monitor='val_loss', patience=5, verbose=0, mode='min')
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, verbose=1, min_delta=1e-4, mode='min')
model = get_compiled_model()
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
model.fit(
train_generator,
validation_data=valid_generator,
epochs=epochs,
verbose = 1,
steps_per_epoch=len_train // batch_size,
validation_steps=len_test // batch_size,
callbacks=[earlyStopping, reduce_lr]
)
import keras.backend as K
import pickle
model.save_weights('/content/drive/MyDrive/MODELS_SAVED/model_RESNET152/model_weights5.h5')
symbolic_weights = getattr(model.optimizer, 'weights')
weight_values = K.batch_get_value(symbolic_weights)
with open('/content/drive/MyDrive/MODELS_SAVED/optimizer3.pkl', 'wb') as f:
pickle.dump(weight_values, f)
#Here i am building the new model and its from here i am having problems
input_tensor = Input(shape=train_generator.image_shape)
base_model = applications.ResNet152(weights='imagenet', include_top=False, input_tensor=input_tensor)
for layer in base_model.layers[:]:
layer.trainable = False
x = Flatten()(base_model.output)
x = Dense(512, kernel_regularizer=tf.keras.regularizers.L2(l2=0.01),
kernel_initializer=tf.keras.initializers.HeNormal(),
kernel_constraint=tf.keras.constraints.UnitNorm(axis=0))(x)
x = LeakyReLU()(x)
x = BatchNormalization()(x)
predictions = Dense(num_classes, activation= 'softmax')(x)
model = Model(inputs = base_model.input, outputs = predictions)
model.compile(
loss='sparse_categorical_crossentropy',
optimizer='adam',
metrics=['accuracy']
)
base_model.load_weights('/content/drive/MyDrive/MODELS_SAVED/model_RESNET152/model_weights5.h5', by_name=True)
with open('/content/drive/MyDrive/MODELS_SAVED/optimizer3.pkl', 'rb') as f:
weight_values = pickle.load(f)
model.optimizer.set_weights(weight_values)
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
epochs = 2
model.fit(
train_generator,
validation_data=valid_generator,
epochs=epochs,
steps_per_epoch=len_train // batch_size,
validation_steps=len_test // batch_size,
verbose = 1,
callbacks=[earlyStopping, reduce_lr]
)
Now i am getting the following error running this code block (which above in the complete code is right before the model.fit):
with open('/content/drive/MyDrive/MODELS_SAVED/optimizer3.pkl', 'rb') as f:
weight_values = pickle.load(f)
model.optimizer.set_weights(weight_values)
ValueError: You called `set_weights(weights)` on optimizer Adam with a weight list of length 1245, but the optimizer was expecting 13 weights. Provided weights: [11907, array([[[[ 0.00000000e+00, -8.27514916e-04...
All i am trying to do is to save the weights for the model and optimizer and then build a new model where i am adding a few layers and loading the weights from the base of the model and the weights from the optimizer.
Both models have different architectures so weights of one can't be loaded into another,irrespective of that they inherited same base model. I think it is a simple case of fine-tuning a model (saved model in your case).
What you should do is change the way to create new model, i.e. rather than loading the original resnet model as base model with include_top = False, you should try loading the saved model and implementing your own include_top. This can be done as:
for layer in saved_model.layers[:]:
layer.trainable = False
x = Flatten()(saved_model.layers[-2].output)
Here the key thing is saved_model.layers[-2].output which means output from the second last layer.
Hope it helps, if not please clarify your doubts or let me know what I missed.
Please refer to the following notebook:
https://colab.research.google.com/drive/1j_zLqG1zUMi6UYPdc6gtmkJvHuawL4Sk?usp=sharing
{save,load}_weights on a model includes the weights of the optimizer. That would be the preferred way to initialise the optimizer weights.
You can copy the optimizer from one model to another.
The reason you are getting the error above is that the optimizer doesn't allocate its weights until training starts; If you really want to do it manually, just trigger model.fit() for 1 epoch 1 datapoint and then load the data manually.
You can replace
base_model.load_weights('/content/drive/MyDrive/MODELS_SAVED/model_RESNET152/model_weights5.h5', by_name=True)
with open('/content/drive/MyDrive/MODELS_SAVED/optimizer3.pkl', 'rb') as f:
weight_values = pickle.load(f)
model.optimizer.set_weights(weight_values)
with:
base_model.load_weights('/content/drive/MyDrive/MODELS_SAVED/model_RESNET152/model_weights5.h5', by_name=True)
model.optimizer = base_model.optimizer
After saving the first model's weights with model.save_weights('name.h5'), you should build a second model, exactly like the first one, let's call it model2. Then load the weights you saved before into it. The code should be model2.load_weights('name.h5'). See model.summary() to see the names and number of the first model's layers. For each layer, you need to define a variable and add those weights (and also biases) to that variable with a method called get_weights() . Here is an example:
x1 = model2.layers[1].get_weights()
Here, I put the weights and biases of the first layer (which in mine was a convolution layer) in the variable x1.
x1[0] is a list of the weights of the layer #1.
x1[1] is a list of the biases of the layer #1.

The application of self-attention layer raised index error

So I am doing a classification machine learning with the input of (batch, step, features).
In order to improve the accuracy of this model, I intended to apply a self-attention layer to it.
I am unfamiliar with how to use it for my case since most examples online are concerned with embedding NLP models.
def opt_select(optimizer):
if optimizer == 'Adam':
adamopt = tf.keras.optimizers.Adam(lr=learning_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-8)
return adamopt
elif optimizer == 'RMS':
RMSopt = tf.keras.optimizers.RMSprop(lr=learning_rate, rho=0.9, epsilon=1e-6)
return RMSopt
else:
print('undefined optimizer')
def LSTM_attention_model(X_train, y_train, X_test, y_test, num_classes, loss,batch_size=68, units=128, learning_rate=0.005,epochs=20, dropout=0.2, recurrent_dropout=0.2,optimizer='Adam'):
class myCallback(tf.keras.callbacks.Callback):
def on_epoch_end(self, epoch, logs={}):
if (logs.get('acc') > 0.90):
print("\nReached 90% accuracy so cancelling training!")
self.model.stop_training = True
callbacks = myCallback()
model = tf.keras.models.Sequential()
model.add(Masking(mask_value=0.0, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Bidirectional(LSTM(units, dropout=dropout, recurrent_dropout=recurrent_dropout)))
model.add(SeqSelfAttention(attention_activation='sigmoid'))
model.add(Dense(num_classes, activation='softmax'))
opt = opt_select(optimizer)
model.compile(loss=loss,
optimizer=opt,
metrics=['accuracy'])
history = model.fit(X_train, y_train,
batch_size=batch_size,
epochs=epochs,
validation_data=(X_test, y_test),
verbose=1,
callbacks=[callbacks])
score, acc = model.evaluate(X_test, y_test,
batch_size=batch_size)
yhat = model.predict(X_test)
return history, that
This led to IndexError: list index out of range
What is the correct way to apply this layer to my model?
As requested, one may use the following codes to simulate a set of the dataset.
import tensorflow as tf
from tensorflow.keras.layers import Dense, Dropout,Bidirectional,Masking,LSTM
from keras_self_attention import SeqSelfAttention
X_train = np.random.rand(700, 50,34)
y_train = np.random.choice([0, 1], 700)
X_test = np.random.rand(100, 50, 34)
y_test = np.random.choice([0, 1], 100)
batch_size= 217
epochs = 600
dropout = 0.6
Rdropout = 0.7
learning_rate = 0.00001
optimizer = 'RMS'
loss = 'categorical_crossentropy'
num_classes = y_train.shape[1]
LSTM_attention_his,yhat = LSTM_attention_model(X_train,y_train,X_test,y_test,loss =loss,num_classes=num_classes,batch_size=batch_size,units=32,learning_rate=learning_rate,epochs=epochs,dropout = 0.5,recurrent_dropout=Rdropout,optimizer=optimizer)
Here is how I would rewrite the code -
import tensorflow as tf
from tensorflow.keras.layers import Dense, Dropout, Bidirectional, Masking, LSTM, Reshape
from keras_self_attention import SeqSelfAttention
import numpy as np
def opt_select(optimizer):
if optimizer == 'Adam':
adamopt = tf.keras.optimizers.Adam(lr=learning_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-8)
return adamopt
elif optimizer == 'RMS':
RMSopt = tf.keras.optimizers.RMSprop(lr=learning_rate, rho=0.9, epsilon=1e-6)
return RMSopt
else:
print('undefined optimizer')
def LSTM_attention_model(X_train, y_train, X_test, y_test, num_classes, loss, batch_size=68, units=128,
learning_rate=0.005, epochs=20, dropout=0.2, recurrent_dropout=0.2, optimizer='Adam'):
class myCallback(tf.keras.callbacks.Callback):
def on_epoch_end(self, epoch, logs={}):
if (logs.get('accuracy') > 0.90):
print("\nReached 90% accuracy so cancelling training!")
self.model.stop_training = True
callbacks = myCallback()
model = tf.keras.models.Sequential()
model.add(Masking(mask_value=0.0, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Bidirectional(LSTM(units, dropout=dropout, recurrent_dropout=recurrent_dropout, return_sequences=True)))
model.add(SeqSelfAttention(attention_activation='sigmoid'))
model.add(Reshape((-1, model.output.shape[1]*model.output.shape[2])))
model.add(Dense(num_classes, activation='softmax'))
opt = opt_select(optimizer)
model.compile(loss=loss,
optimizer=opt,
metrics=['accuracy'])
history = model.fit(X_train, y_train,
batch_size=batch_size,
epochs=epochs,
validation_data=(X_test, y_test),
verbose=1,
callbacks=[callbacks])
score, acc = model.evaluate(X_test, y_test,
batch_size=batch_size)
yhat = model.predict(X_test)
return history, that
X_train = np.random.rand(700, 50,34)
y_train = np.random.choice([0, 1], (700, 1))
X_test = np.random.rand(100, 50, 34)
y_test = np.random.choice([0, 1], (100, 1))
batch_size= 217
epochs = 600
dropout = 0.6
Rdropout = 0.7
learning_rate = 0.00001
optimizer = 'RMS'
loss = 'categorical_crossentropy'
num_classes = y_train.shape[1]
LSTM_attention_his,yhat = LSTM_attention_model(
X_train,y_train,X_test,y_test,
loss =loss,num_classes=num_classes,batch_size=batch_size,units=32,
learning_rate=learning_rate,epochs=epochs,dropout = 0.5,recurrent_dropout=Rdropout,optimizer=optimizer
)
These are the changes I had to make to get this to start training -
The original issue was caused by the LSTM layer outputting the wrong dimensions. The SeqSelfAttention layer needs a 3D input (one dimension corresponding to the sequence of the data) which was missing from the output of the LSTM layer. As mentioned by #today, in the comments, this can be solved by adding return_sequences=True to the LSTM layer.
But even with that modification,the code still gives an error at when trying to compute the cost function.The issue is that, the output of the self-attention layer is (None, 50, 64) when this is directly passed into the Dense layer, the final output of the network becomes (None, 50, 1). This doesn't make sense for what we are trying to do, because the final output should just contain a single label for each datapoint (it should have the shape (None, 1)). The issue is the output from the self-attention layer which is 3 dimensional (each data point has a (50, 64) feature vector). This needs to be reshaped into a single dimensional feature vector for the computation to make sense. So I added a reshape layer model.add(Reshape((-1, ))) between the attention layer and the Dense layer.
In addition, the myCallback class is testing if logs.get('acc') is > 0.9 but I think it should be (logs.get('accuracy').
To comment on OP's question in the comment on what kind of column should be added, in this case, it was just a matter of extracting the full sequential data from the LSTM layer. Without the return_sequence flag, the output from the LSTM layer is (None, 64) This is simply the final features of the LSTM without the intermediate sequential data.

Training a model with Resnet152, saving the weights, loading them and adding more layers issue

My goal is to first train only using ResNet152 and then save the learned weights. Then i want to use these weights as a base for a more complex model with added layers which i ultimately want to do hyperparameter tuning on. The reason for this approach is that doing it all at once takes a very long time. The problem i am having is that my code doesnt seem to work. I dont get an error message but when i start training the more complex model it seems to start from 0 again and not using the learned ResNet152 weights.
Here is the code:
First i am only using ResNet152 and the output layer
input_tensor = Input(shape=train_generator.image_shape)
base_model = applications.ResNet152(weights='imagenet', include_top=False, input_tensor=input_tensor)
for layer in base_model.layers[:]:
layer.trainable = True¨
x = Flatten()(base_model.output)
predictions = Dense(num_classes, activation= 'softmax')(x)
model = Model(inputs = base_model.input, outputs = predictions)
model.compile(
loss='sparse_categorical_crossentropy',
optimizer=opt,
metrics=['accuracy'])
model.fit(
train_generator,
validation_data=valid_generator,
epochs=epochs,
steps_per_epoch=len_train // batch_size,
validation_steps=len_val // batch_size,
callbacks=[earlyStopping, reduce_lr]
)
Then i am saving the weights:
model.save_weights('/content/drive/MyDrive/MODELS_SAVED/model_RESNET152/model_weights.h5')
Adding more layers.
input_tensor = Input(shape=train_generator.image_shape)
base_model = applications.ResNet152(weights='imagenet', include_top=False, input_tensor=input_tensor)
for layer in base_model.layers[:]:
layer.trainable = False
x = Flatten()(base_model.output)
x = Dense(1024, kernel_regularizer=tf.keras.regularizers.L2(l2=0.01),
kernel_initializer=tf.keras.initializers.HeNormal(),
kernel_constraint=tf.keras.constraints.UnitNorm(axis=0))(x)
x = LeakyReLU()(x)
x = BatchNormalization()(x)
x = Dropout(rate=0.1)(x)
x = Dense(512, kernel_regularizer=tf.keras.regularizers.L2(l2=0.01),
kernel_initializer=tf.keras.initializers.HeNormal(),
kernel_constraint=tf.keras.constraints.UnitNorm(axis=0))(x)
x = LeakyReLU()(x)
x = BatchNormalization()(x)
predictions = Dense(num_classes, activation= 'softmax')(x)
model = Model(inputs = base_model.input, outputs = predictions)
Loading the weights after the added layers and using by_name=True, both according to the keras tutorial.
model.load_weights('/content/drive/MyDrive/MODELS_SAVED/model_RESNET152/model_weights.h5', by_name=True)
Then i start training again.
model.compile(
loss='sparse_categorical_crossentropy',
optimizer=opt,
metrics=['accuracy']
)
model.fit(
train_generator,
validation_data=valid_generator,
epochs=epochs,
steps_per_epoch=len_train // batch_size,
validation_steps=len_val // batch_size,
callbacks=[earlyStopping, reduce_lr]
)
But it is starting at a very low accuracy, basically from 0 again, so im guessing something is wrong here. Any ideas on how to fix this?
When you use adam and save model weights only - you have to save/load optimizer weights as well:
weight_values = model.optimizer.get_weights()
with open(output_path+'optimizer.pkl', 'wb') as f:
pickle.dump(weight_values, f)
dummy_input = tf.random.uniform(inp_shape) # create a tensor of input shape
dummy_label = tf.random.uniform(label_shape) # create a tensor of label shape
hist = model.fit(dummy_input, dummy_label)
with open(path_to_saved_model+'optimizer.pkl', 'rb') as f:
weight_values = pickle.load(f)
optimizer.set_weights(weight_values)

recurrent neural network ValueError: Found array with dim 3. Estimator expected <= 2

I am running an LSTM, GRU and bilstm model using the following code
# Create BiLSTM model
def create_model_bilstm(units):
model = Sequential()
model.add(Bidirectional(LSTM(units = units,
return_sequences=True),
input_shape=(X_train.shape[1], X_train.shape[2])))
#model.add(Bidirectional(LSTM(units = units)))
model.add(Dense(1))
#Compile model
model.compile(loss='mse', optimizer='adam')
return model
# Create LSTM or GRU model
def create_model(units, m):
model = Sequential()
model.add(m (units = units, return_sequences = True,
input_shape = [X_train.shape[1], X_train.shape[2]]))
model.add(Dropout(0.1))
#model.add(m (units = units))
#model.add(Dropout(0.2))
model.add(Dense(units = 1))
#Compile model
model.compile(loss='mse', optimizer='adam')
return model
# BiLSTM
model_bilstm = create_model_bilstm(20)
# GRU and LSTM
model_gru = create_model(50, GRU)
model_lstm = create_model(20, LSTM)
# Fit BiLSTM, LSTM and GRU
def fit_model(model):
early_stop = EarlyStopping(monitor = 'val_loss',
patience = 100)
history = model.fit(X_train, y_train, epochs = 700,
validation_split = 0.2, batch_size = 32,
shuffle = False, callbacks = [early_stop])
return history
history_bilstm = fit_model(model_bilstm)
history_lstm = fit_model(model_lstm)
history_gru = fit_model(model_gru)
This all runs smoothly and prints out my loss graphs. but when it comes to predictions i run the following code
# Make prediction
def prediction(model):
prediction = model.predict(X_test)
prediction = scaler_y.inverse_transform(prediction)
return prediction
prediction_bilstm = prediction(model_bilstm)
prediction_lstm = prediction(model_lstm)
prediction_gru = prediction(model_gru)
and i get the following error
ValueError Traceback (most recent call last)
<ipython-input-387-9d45f01ae2a2> in <module>
5 return prediction
6
----> 7 prediction_bilstm = prediction(model_bilstm)
8 prediction_lstm = prediction(model_lstm)
9 prediction_gru = prediction(model_gru)
<ipython-input-387-9d45f01ae2a2> in prediction(model)
2 def prediction(model):
3 prediction = model.predict(X_test)
----> 4 prediction = scaler_y.inverse_transform(prediction)
5 return prediction
...
ValueError: Found array with dim 3. Estimator expected <= 2.
I am assuming this has something to do with my X_test shape based on other posts i have read so i tried to reshape it to 2d but got another error telling me "expected bidirectional_3_input to have 3 dimensions, but got array with shape (62, 36)" on line 7 again.
What am i doing wrong and how can i fix it?
Data Explanation:
So I am trying to predict discharge rates (target variable) using groundwater levels (34 features), precipitation and temperature as input which gives me a total of 36 features. My data is in monthly resolution. I am using 63 observation for my test (5 year pred) and the rest for my train.
What are you doing wrong? Let's assume your input data has shape X_train.shape = [d0,d1,d2], then after setting up your BiLSTM-model like
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Bidirectional,LSTM,Dense
model = tf.keras.Sequential()
model.add(
tf.keras.layers.Bidirectional(
tf.keras.layers.LSTM(
units = 10,
return_sequences=True),
input_shape=(d1, d2)
)
)
model.add(Dense(1))
model.compile(loss='mse', optimizer='adam')
we can check the input- and output-shapes your model expects by
>>model.input.shape
TensorShape([None, d1, d2])
>>model.output.shape
TensorShape([None, d1, 1])
So your model expects input of shape (n_batch,d1,d2), where n_batch is the batch size of the data, and returns a shape (n_batch,d1,1), thus a 3d-tensor.
Now if you provide a 3d-tensor to your model, the model.prediction-method will succesfully return a 3d-tensor, however sklearn.preprocessing.StandardScaler.inverse_transform only works for 2d-data, thats why it says
ValueError: Found array with dim 3. Estimator expected <= 2.
On the other hand, if you first reshape your data to be 2d, then model.prediction complains, because it is set up to expect a 3d-tensor.
How can you fix it? For further help on how to fix your code, you will need to provide us with more detailled information on what you expect your model to do, especially what output-shape you want your BiLSTM-model to have. I assume you actually want your BiLSTM-model to return a scalar for each sample, so an additional Flatten-layer might do the trick:
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Bidirectional,LSTM,Dense,Flatten
model = tf.keras.Sequential()
model.add(
tf.keras.layers.Bidirectional(
tf.keras.layers.LSTM(
units = 10,
return_sequences=True),
input_shape=(d1, d2)
)
)
model.add(Flatten()) #<-- additional flatten-layer
model.add(Dense(1))
model.compile(loss='mse', optimizer='adam')

VGG16 different epoch and batch size generating the same result

I am trying to learn how vgg16 works. Below is my code, using vgg16 for another classification.
# Generate a model with all layers (with top)
model_vgg16_conv = VGG16(weights='imagenet', include_top=False)
model_vgg16_conv.summary()
# create your own input format
input = Input(shape=(128,128,3),name = 'image_input')
# Use the generated model
output_vgg16_conv = model_vgg16_conv(input)
# Add the fully-connected layers
x = Flatten(name='flatten')(output_vgg16_conv)
x = Dense(4096, activation='relu', name='fc1')(x)
x = Dense(4096, activation='relu', name='fc2')(x)
x = Dense(5, activation='softmax', name='predictions')(x)
#Create your own model
model = Model(input=input, output=x)
#In the summary, weights and layers from VGG part will be hidden, but they will be fit during the training
model.summary()
# Specify an optimizer to use
adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
# Choose loss function, optimization method, and metrics (which results to display)
model.compile(
optimizer = adam,
loss='categorical_crossentropy',
metrics=['accuracy']
)
model.fit(X_train,y_train,epochs=10,batch_size=10,verbose=2)
# model.fit(X_train,y_train,epochs=30,batch_size=100,verbose=2)
result = model.predict(y_test) # same result
For some reason, using different epoch size and batch size generate exactly the same result. Am I doing something wrong?

Categories