Semantic Segmentation Model's loss and binary IOU are not improving - python

I am new to machine learning and I am trying to make a semantic segmentation model for detecting couches in images. However, my validation loss and training loss are stuck at around 0.25, and my validation binary IoU and training binary IoU are stuck at around 0.7. Here is the code for my model and my training parameters.
def upsample(filters, size, norm_type='batchnorm', apply_dropout=False, kernel_regularizer=None):
"""Upsamples an input.
Conv2DTranspose => Batchnorm => Dropout => Relu
Args:
filters: number of filters
size: filter size
norm_type: Normalization type; either 'batchnorm' or 'instancenorm'.
apply_dropout: If True, adds the dropout layer
Returns:
Upsample Sequential Model
"""
initializer = tf.random_normal_initializer(0., 0.02)
result = tf.keras.Sequential()
result.add(
tf.keras.layers.Conv2DTranspose(filters, size, strides=2,
padding='same',
kernel_initializer=initializer,
kernel_regularizer=kernel_regularizer,
use_bias=False))
if norm_type.lower() == 'batchnorm':
result.add(tf.keras.layers.BatchNormalization())
elif norm_type.lower() == 'instancenorm':
result.add(InstanceNormalization())
if apply_dropout:
result.add(tf.keras.layers.Dropout(0.3))
result.add(tf.keras.layers.ReLU())
return result
class Unet:
output_channels = 1
def __init__(self):
base_model = tf.keras.applications.MobileNetV2(input_shape=[224, 224, 3], include_top=False)
# Use the activations of these layers
layer_names = [
'block_1_expand_relu',
'block_3_expand_relu',
'block_6_expand_relu',
'block_13_expand_relu',
'block_16_project',
]
base_model_outputs = [base_model.get_layer(name).output for name in layer_names]
# Create the feature extraction model
down_stack = tf.keras.Model(inputs=base_model.input, outputs=base_model_outputs)
down_stack.trainable = False
up_stack = [
upsample(1024, 3, norm_type='batchnorm', apply_dropout=True, kernel_regularizer="l2"),
upsample(512, 3, norm_type='batchnorm', apply_dropout=True, kernel_regularizer="l2"),
upsample(256, 3, norm_type='batchnorm', apply_dropout=True, kernel_regularizer="l2"),
upsample(128, 3, norm_type='batchnorm', apply_dropout=False, kernel_regularizer="l2"),
upsample(64, 3, norm_type='batchnorm', apply_dropout=True, kernel_regularizer="l2"),
]
inputs = tf.keras.layers.Input(shape=[224, 224, 3])
# Downsampling through the model
skips = down_stack(inputs)
x = skips[-1]
skips = reversed(skips[:-1])
# Upsampling and establishing the skip connections
for up, skip in zip(up_stack, skips):
x = up(x)
concat = tf.keras.layers.Concatenate()
x = concat([x, skip])
last = tf.keras.layers.Conv2DTranspose(
self.output_channels,
kernel_size=3, strides=2,
padding='same', activation='sigmoid')
x = last(x)
optimizer = tf.keras.optimizers.Nadam(1e-4)
metrics = [tf.keras.metrics.BinaryIoU(target_class_ids=[0, 1], threshold=0.5)]
self.model = tf.keras.Model(inputs=inputs, outputs=x)
self.model.compile(
optimizer=optimizer,
loss=tf.keras.losses.BinaryCrossentropy(),
metrics=metrics)
def train():
batch_size = 4
EPOCHS = 250
VAL_SUBSPLITS = 2
VALIDATION_STEPS = dataset_size // batch_size // VAL_SUBSPLITS
STEPS_PER_EPOCH = train_dataset_size // batch_size
unet.load_weights('./checkpoints/my_checkpoint')
history = unet.fit(train_gen, epochs=EPOCHS,
steps_per_epoch=STEPS_PER_EPOCH,
validation_steps=VALIDATION_STEPS,
validation_data=val_gen,
callbacks=[
ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=4),
EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=False)
],
)
Are there any tips to improve my model?
Current output: https://imgur.com/a/5Jqn1oX
What I've tried:
Augmented my data generator which is based from the COCO dataset
Used an Adam optimizer with an exponential decaying learning rate schedule
Increased batch size from 4 to 32
Used learning rates from 0.01 to 0.0000001

Related

Use CustomCallback() Class to achieve freezing layers while training

I am trying to train a custom CNN model in TensorFlow. I want somehow to freeze some layers of the model in specific epochs while the training is still running. I have achieved freezing the layers but i had to train the model for some epochs, then change the trainable attribute to False in specific layers i wanted to freeze, then compile the model, and the start training again.
I have tried to implement it using the CustomCallback() Class, and in certain epochs to freeze some layers, but it seemed that this didn't work. As far as TensorFlow mentions about changing the .trainable attribute of a layer, then you have to compile the model for the change to be applied at the model, but there is an error emerging, "TypeError: 'NoneType' object is not callable".
That is my code:
Load libraries
import tensorflow as tf
from tensorflow.keras import datasets, layers, models
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
from keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras.utils import Sequence
from keras.models import load_model
Load dataset
#Load dataset
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.cifar10.load_data()
#Normalize
X_train, X_test = X_train/255.0, X_test/255.0
Build model
cnn = models.Sequential([
layers.Conv2D(filters = 32, kernel_size = (1,1), padding = "same", activation = "relu", input_shape = (32,32,3)),
layers.Conv2D(filters = 64, kernel_size = (3,3), padding = "same", activation = "relu"),
layers.MaxPool2D(pool_size = (2,2)),
layers.Conv2D(filters = 64, kernel_size = (3,3), padding = "same", activation = "relu"),
layers.Conv2D(filters = 128, kernel_size = (5,5), padding = "same", activation = "relu"),
layers.MaxPool2D(pool_size = (2,2)),
layers.Flatten(),
layers.Dense(64, activation = "relu"),
layers.Dense(128, activation = "relu"),
layers.Dense(64, activation = "relu"),
layers.Dense(10, activation = "softmax")
])
Create CustomCallback Class
class CustomCallback(tf.keras.callbacks.Callback):
def on_epoch_begin(self, epoch, logs = None):
if epoch == 5:
cnn.layers[0].trainable, cnn.layers[1].trainable, cnn.layers[2].trainable = (False, False, False)
cnn.compile(optimizer = optimizer, loss = "sparse_categorical_crossentropy", metrics = ["accuracy"])
elif epoch == 10:
cnn.layers[3].trainable, cnn.layers[4].trainable, cnn.layers[5].trainable = (False, False, False)
cnn.compile(optimizer = optimizer, loss = "sparse_categorical_crossentropy", metrics = ["accuracy"])
elif epoch == 15:
cnn.layers[6].trainable, cnn.layers[7].trainable, cnn.layers[8].trainable = (False, False, False)
cnn.compile(optimizer = optimizer, loss = "sparse_categorical_crossentropy", metrics = ["accuracy"])
Define optimizer and compile
#Define the optimizer
optimizer = tf.keras.optimizers.Adam(learning_rate = 0.001)
#Compile the model
cnn.compile(optimizer = optimizer , loss = "sparse_categorical_crossentropy", metrics = ["accuracy"])
Train model
results = cnn.fit(X_train, y_train, epochs = 20, validation_data = (X_test, y_test), batch_size = 1024, callbacks = [CustomCallback()])
An error pops-up "TypeError: 'NoneType' object is not callable".
If i don't compile the model after freezing some layers it seems to not get an error, but while training all layers are updated in all epochs.
Based on the official docs, what you need to access the model inside the callback is self.model and not cnn. Try this:
class CustomCallback(tf.keras.callbacks.Callback):
def on_epoch_begin(self, epoch, logs = None):
if epoch == 5:
self.model.layers[0].trainable, self.model.layers[1].trainable, self.model.layers[2].trainable = (False, False, False)
self.model.compile(optimizer = optimizer, loss = "sparse_categorical_crossentropy", metrics = ["accuracy"])
elif epoch == 10:
self.model.layers[3].trainable, self.model.layers[4].trainable, self.model.layers[5].trainable = (False, False, False)
self.model.compile(optimizer = optimizer, loss = "sparse_categorical_crossentropy", metrics = ["accuracy"])
elif epoch == 15:
self.model.layers[6].trainable, self.model.layers[7].trainable, self.model.layers[8].trainable = (False, False, False)
self.model.compile(optimizer = optimizer, loss = "sparse_categorical_crossentropy", metrics = ["accuracy"])
OK as pointed out in order to change the status of a layer one has to recompile the model. So what I did was to train the model for 5 epochs. The I saved the weights to a file. Then I set layer 7 to not trainable. Then I recompiled the model. Then I loaded the saved weights into the model then ran 5 more epochs. At the end of those epochs I compared the weights with those I loaded and they are the same. So the code is shown below starting after the model was compiled:
print('{0:^8s}{1:^80s}{2:^12s}'. format('Layer', 'Layer Description', 'Trainable'))
for i, layer in enumerate(cnn.layers):
print( '{0:^8s}{1:^80s}{2:^12s}'. format(str(i), str(layer), str(layer.trainable)))
This just gives the information for each layer in the model per the printout shown below
Layer Layer Description Trainable
0 <keras.layers.convolutional.Conv2D object at 0x00000261CCB7A370> True
1 <keras.layers.convolutional.Conv2D object at 0x00000261E55F4700> True
2 <keras.layers.pooling.MaxPooling2D object at 0x00000261E55F4970> True
3 <keras.layers.convolutional.Conv2D object at 0x00000261E567B160> True
4 <keras.layers.convolutional.Conv2D object at 0x00000261E567B280> True
5 <keras.layers.pooling.MaxPooling2D object at 0x00000261E55F44C0> True
6 <keras.layers.core.flatten.Flatten object at 0x00000261E567B700> True
7 <keras.layers.core.dense.Dense object at 0x00000261E567BD30> True
8 <keras.layers.core.dense.Dense object at 0x00000261E5680070> True
9 <keras.layers.core.dense.Dense object at 0x00000261E56802B0> True
10 <keras.layers.core.dense.Dense object at 0x00000261E56805B0> True
Then I trained the model for 5 epochs and printed out the weights and biases code is below
history=cnn.fit(x=train_gen, epochs=5, verbose=1, validation_data=valid_gen,
validation_steps=None, shuffle=True, initial_epoch=0) # train the model
weights_and_biases=cnn.layers[7].get_weights()
weights=weights_and_biases[0]
print ('shape of weights is= ',weights.shape) # has 64 nodes receiving 131072 inputs from the flatten layer
biases=weights_and_biases[1]
print ('shape of biases is- ',biases.shape)
first_10_weights=weights[0][0:10]
print (first_10_weights)
first_10_biases=biases[0:10]
print (first_10_biases)
The printout of the weights and biases at the end of the 5th epoch is shown below
shape of weights is= (131072, 64)
shape of biases is- (64,)
[-0.00171461 -0.00061654 -0.0004427 0.006399 0.00065272 0.00117902
0.00206342 -0.00248441 -0.00172774 0.00399113]
[-0.0098094 -0.01114658 -0.00550008 0.00675221 -0.00647649 0.01904665
0.0103933 0.01889692 -0.01373082 0.00189758]
Then I saved the weights to a file. I changed the state of layer 7 to not trainable and recompiled the model. After compiling I loaded the saved weights into the model and again printed out the weights and biases to make sure they loaded correctly. Code is below
filepath=r'C:\DATASETS\spiders\run1.h5' # save the weights at the end of 5 epochs to this file
cnn.save_weights(filepath, overwrite=True, save_format=None, options=None) # save the weights
cnn.layers[7].trainable=False # make layer 7 not trainable
cnn.compile(optimizer = optimizer , loss = "categorical_crossentropy", metrics = ["accuracy"]) # compile the model
cnn.load_weights(filepath, by_name=False, skip_mismatch=False, options=None) # load the model with the saved weights
weights_and_biases=cnn.layers[7].get_weights() #get the weights to make sure they are the same as at the end of epoch 5
weights=weights_and_biases[0] # print out the weights
print ('shape of weights is= ',weights.shape) # has 64 nodes receiving 131072 inputs from the flatten layer
biases=weights_and_biases[1]
print ('shape of biases is- ',biases.shape)
first_10_weights=weights[0][0:10]
print (first_10_weights)
first_10_biases=biases[0:10]
print (first_10_biases)
The printed results shown below were as expected
shape of weights is= (131072, 64)
shape of biases is- (64,)
[-0.00171461 -0.00061654 -0.0004427 0.006399 0.00065272 0.00117902
0.00206342 -0.00248441 -0.00172774 0.00399113]
[-0.0098094 -0.01114658 -0.00550008 0.00675221 -0.00647649 0.01904665
0.0103933 0.01889692 -0.01373082 0.00189758]
Then I trained for 5 more epochs. At the end of those epochs I printed out the layer 7 weights which should not have changed. The code is shown below
history=cnn.fit(x=train_gen, epochs=5, verbose=1, validation_data=valid_gen,
validation_steps=None, shuffle=True, initial_epoch=0) # train the model
weights_and_biases=cnn.layers[7].get_weights()
weights=weights_and_biases[0]
print ('shape of weights is= ',weights.shape) # has 64 nodes receiving 131072 inputs from the flatten layer
biases=weights_and_biases[1]
print ('shape of biases is- ',biases.shape)
first_10_weights=weights[0][0:10]
print (first_10_weights)
first_10_biases=biases[0:10]
print (first_10_biases)
The resultant printout shown below shows the weights and biases did not change
shape of weights is= (131072, 64)
shape of biases is- (64,)
[-0.00171461 -0.00061654 -0.0004427 0.006399 0.00065272 0.00117902
0.00206342 -0.00248441 -0.00172774 0.00399113]
[-0.0098094 -0.01114658 -0.00550008 0.00675221 -0.00647649 0.01904665
0.0103933 0.01889692 -0.01373082 0.00189758]
So the process is build and compile your model. Run for N epochs. Save the weights to a file. Then change the training status of the layers. Recompile the model. Load the saved weights. Continue training.

Moving tensorflow 1D convolution code to pytorch

I am completely new to PyTorch, I would like to move my TF code to PyTorch, and I think I am missing something.
I have X as input and Y as output. X is a time series data, on which I would like to do 1D convolution. Y is just a plain number.
X has a shape of (1050589, 81, 21). I have 1050589 experiments, each experiment has 81 timestamps and each timestamp has 21 points of data. This is the required format for TF, but as far as I was able to get out in PyTorch the time dimension should be the last one.
I have my data in a numpy array, so first I transformed the data to fit PyTorch, and also transformed into a list.
a = []
for n, i in enumerate(X):
a.append([X[n].T, Y[n]])
train_data = DataLoader(a, batch_size=128)
My model looks like this:
class NeuralNetwork(nn.Module):
def __init__(self):
super(NeuralNetwork, self).__init__()
self.linear_relu_stack = nn.Sequential(
nn.Conv1d(EMBED_SIZE, 32, 7, padding='same'),
nn.ReLU(),
nn.Flatten(),
nn.Linear(81*32, 32),
nn.ReLU(),
nn.Linear(32, 1),
)
def forward(self, x):
logits = self.linear_relu_stack(x)
return logits.double()
The architecture is simple, as I want to keep it the same as I have in Tensorflow. One convolution with a kernel of 7 and 32 channels, followed by a dense layer and a single output layer.
Same network in Tensorflow:
def conv_1d_model():
model = Sequential(name="model_conv1D")
model.add(Conv1D(filters=32, kernel_size=7, activation='relu', input_shape=(81, 21), padding="same"))
model.add(Flatten())
model.add(Dense(32, activation='relu'))
model.add(Dense(1))
return model
Now when I try to optimize this network in PyTorch my losses are all over the place, not decreasing at all, while in TensorFlow it runs perfectly well.
I am sure I am missing something, can anyone point me in the right direction?
My optimization function in PyTorch:
model = NeuralNetwork()
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
def train_loop(dataloader, model, loss_fn, optimizer):
size = len(dataloader.dataset)
for batch, (X, y) in enumerate(dataloader):
# Compute prediction and loss
pred = torch.squeeze(model(X)) # I was getting a warning about the pred being in different shape than y, so I squeezed it
loss = loss_fn(pred, y)
# Backpropagation
optimizer.zero_grad()
loss.backward()
optimizer.step()
if batch % 10 == 0:
loss, current = loss.item(), batch * len(X)
print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")
Optimization in Tensorflow
model = conv_1d_model()
opt = Adam(learning_rate=learning_rate)
model.compile(loss='mse', optimizer=opt, metrics=['mae'])
model_history = model.fit(X, Y, validation_split=0.2, epochs=epochs, batch_size=batch_size, verbose=1)

model.predict () return an array instead of a number/label

I am trying to used a trained model to predict model.predict(data) a new testing data for classification. However, instead of a number/label, the program returns an array. How to modify my training code to get the output correctly? Thank you. Here is my code.
def make_model(input_shape):
input_layer = keras.layers.Input(input_shape)
conv1 = keras.layers.Conv1D(filters=150, kernel_size=100, padding="same")(input_layer)
conv1 = keras.layers.BatchNormalization()(conv1)
conv1 = keras.layers.ReLU()(conv1)
conv2 = keras.layers.Conv1D(filters=150, kernel_size=100, padding="same")(conv1)
conv2 = keras.layers.BatchNormalization()(conv2)
conv2 = keras.layers.ReLU()(conv2)
conv3 = keras.layers.Conv1D(filters=150, kernel_size=100, padding="same")(conv2)
conv3 = keras.layers.BatchNormalization()(conv3)
conv3 = keras.layers.ReLU()(conv3)
gap = keras.layers.GlobalAveragePooling1D()(conv3)
output_layer = keras.layers.Dense(num_classes, activation="softmax")(gap)
return keras.models.Model(inputs=input_layer, outputs=output_layer)
model = make_model(input_shape=x_train.shape[1:])
keras.utils.plot_model(model, show_shapes=True)
epochs = 400
batch_size = 16
callbacks = [
keras.callbacks.ModelCheckpoint(
"best_model.h5", save_best_only=True, monitor="val_loss"
),
keras.callbacks.ReduceLROnPlateau(
monitor="val_loss", factor=0.5, patience=20, min_lr=0.0001
),
keras.callbacks.EarlyStopping(monitor="val_loss", patience=50, verbose=1),
]
model.compile(
optimizer="adam",
loss="sparse_categorical_crossentropy",
metrics=["sparse_categorical_accuracy"],
)
history = model.fit(
x_train,
y_train,
batch_size=batch_size,
epochs=epochs,
callbacks=callbacks,
validation_split=0.2,
verbose=1,
)
Your model is performing as expected. Your last layer is calculated using softmax, and produces an array with probabilities of how "sure" it is of being each label.
If you want to get the actual predicted label, you can use argmax, along with the correct dimension, which returns the index(=label) that had the maximum probability. In any typical fit function, you can normally see that the accuracy is being calculated using argmax on the output of the model.

The application of self-attention layer raised index error

So I am doing a classification machine learning with the input of (batch, step, features).
In order to improve the accuracy of this model, I intended to apply a self-attention layer to it.
I am unfamiliar with how to use it for my case since most examples online are concerned with embedding NLP models.
def opt_select(optimizer):
if optimizer == 'Adam':
adamopt = tf.keras.optimizers.Adam(lr=learning_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-8)
return adamopt
elif optimizer == 'RMS':
RMSopt = tf.keras.optimizers.RMSprop(lr=learning_rate, rho=0.9, epsilon=1e-6)
return RMSopt
else:
print('undefined optimizer')
def LSTM_attention_model(X_train, y_train, X_test, y_test, num_classes, loss,batch_size=68, units=128, learning_rate=0.005,epochs=20, dropout=0.2, recurrent_dropout=0.2,optimizer='Adam'):
class myCallback(tf.keras.callbacks.Callback):
def on_epoch_end(self, epoch, logs={}):
if (logs.get('acc') > 0.90):
print("\nReached 90% accuracy so cancelling training!")
self.model.stop_training = True
callbacks = myCallback()
model = tf.keras.models.Sequential()
model.add(Masking(mask_value=0.0, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Bidirectional(LSTM(units, dropout=dropout, recurrent_dropout=recurrent_dropout)))
model.add(SeqSelfAttention(attention_activation='sigmoid'))
model.add(Dense(num_classes, activation='softmax'))
opt = opt_select(optimizer)
model.compile(loss=loss,
optimizer=opt,
metrics=['accuracy'])
history = model.fit(X_train, y_train,
batch_size=batch_size,
epochs=epochs,
validation_data=(X_test, y_test),
verbose=1,
callbacks=[callbacks])
score, acc = model.evaluate(X_test, y_test,
batch_size=batch_size)
yhat = model.predict(X_test)
return history, that
This led to IndexError: list index out of range
What is the correct way to apply this layer to my model?
As requested, one may use the following codes to simulate a set of the dataset.
import tensorflow as tf
from tensorflow.keras.layers import Dense, Dropout,Bidirectional,Masking,LSTM
from keras_self_attention import SeqSelfAttention
X_train = np.random.rand(700, 50,34)
y_train = np.random.choice([0, 1], 700)
X_test = np.random.rand(100, 50, 34)
y_test = np.random.choice([0, 1], 100)
batch_size= 217
epochs = 600
dropout = 0.6
Rdropout = 0.7
learning_rate = 0.00001
optimizer = 'RMS'
loss = 'categorical_crossentropy'
num_classes = y_train.shape[1]
LSTM_attention_his,yhat = LSTM_attention_model(X_train,y_train,X_test,y_test,loss =loss,num_classes=num_classes,batch_size=batch_size,units=32,learning_rate=learning_rate,epochs=epochs,dropout = 0.5,recurrent_dropout=Rdropout,optimizer=optimizer)
Here is how I would rewrite the code -
import tensorflow as tf
from tensorflow.keras.layers import Dense, Dropout, Bidirectional, Masking, LSTM, Reshape
from keras_self_attention import SeqSelfAttention
import numpy as np
def opt_select(optimizer):
if optimizer == 'Adam':
adamopt = tf.keras.optimizers.Adam(lr=learning_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-8)
return adamopt
elif optimizer == 'RMS':
RMSopt = tf.keras.optimizers.RMSprop(lr=learning_rate, rho=0.9, epsilon=1e-6)
return RMSopt
else:
print('undefined optimizer')
def LSTM_attention_model(X_train, y_train, X_test, y_test, num_classes, loss, batch_size=68, units=128,
learning_rate=0.005, epochs=20, dropout=0.2, recurrent_dropout=0.2, optimizer='Adam'):
class myCallback(tf.keras.callbacks.Callback):
def on_epoch_end(self, epoch, logs={}):
if (logs.get('accuracy') > 0.90):
print("\nReached 90% accuracy so cancelling training!")
self.model.stop_training = True
callbacks = myCallback()
model = tf.keras.models.Sequential()
model.add(Masking(mask_value=0.0, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Bidirectional(LSTM(units, dropout=dropout, recurrent_dropout=recurrent_dropout, return_sequences=True)))
model.add(SeqSelfAttention(attention_activation='sigmoid'))
model.add(Reshape((-1, model.output.shape[1]*model.output.shape[2])))
model.add(Dense(num_classes, activation='softmax'))
opt = opt_select(optimizer)
model.compile(loss=loss,
optimizer=opt,
metrics=['accuracy'])
history = model.fit(X_train, y_train,
batch_size=batch_size,
epochs=epochs,
validation_data=(X_test, y_test),
verbose=1,
callbacks=[callbacks])
score, acc = model.evaluate(X_test, y_test,
batch_size=batch_size)
yhat = model.predict(X_test)
return history, that
X_train = np.random.rand(700, 50,34)
y_train = np.random.choice([0, 1], (700, 1))
X_test = np.random.rand(100, 50, 34)
y_test = np.random.choice([0, 1], (100, 1))
batch_size= 217
epochs = 600
dropout = 0.6
Rdropout = 0.7
learning_rate = 0.00001
optimizer = 'RMS'
loss = 'categorical_crossentropy'
num_classes = y_train.shape[1]
LSTM_attention_his,yhat = LSTM_attention_model(
X_train,y_train,X_test,y_test,
loss =loss,num_classes=num_classes,batch_size=batch_size,units=32,
learning_rate=learning_rate,epochs=epochs,dropout = 0.5,recurrent_dropout=Rdropout,optimizer=optimizer
)
These are the changes I had to make to get this to start training -
The original issue was caused by the LSTM layer outputting the wrong dimensions. The SeqSelfAttention layer needs a 3D input (one dimension corresponding to the sequence of the data) which was missing from the output of the LSTM layer. As mentioned by #today, in the comments, this can be solved by adding return_sequences=True to the LSTM layer.
But even with that modification,the code still gives an error at when trying to compute the cost function.The issue is that, the output of the self-attention layer is (None, 50, 64) when this is directly passed into the Dense layer, the final output of the network becomes (None, 50, 1). This doesn't make sense for what we are trying to do, because the final output should just contain a single label for each datapoint (it should have the shape (None, 1)). The issue is the output from the self-attention layer which is 3 dimensional (each data point has a (50, 64) feature vector). This needs to be reshaped into a single dimensional feature vector for the computation to make sense. So I added a reshape layer model.add(Reshape((-1, ))) between the attention layer and the Dense layer.
In addition, the myCallback class is testing if logs.get('acc') is > 0.9 but I think it should be (logs.get('accuracy').
To comment on OP's question in the comment on what kind of column should be added, in this case, it was just a matter of extracting the full sequential data from the LSTM layer. Without the return_sequence flag, the output from the LSTM layer is (None, 64) This is simply the final features of the LSTM without the intermediate sequential data.

Loading weights TensorFlow 2.0 model error

I am using Python 3.X and TensorFlow 2.0 along with "tensorflow_model_optimization" package for neural network pruning. The code I have is as follows-
from tensorflow_model_optimization.sparsity import keras as sparsity
l = tf.keras.layers
# Original model without pruning-
model = Sequential()
model.add(l.InputLayer(input_shape = (784, )))
model.add(Flatten())
model.add(Dense(units = 300, activation='relu', kernel_initializer = tf.initializers.GlorotUniform()))
model.add(l.Dropout(0.2))
model.add(Dense(units = 100, activation='relu', kernel_initializer = tf.initializers.GlorotUniform()))
model.add(l.Dropout(0.1))
model.add(Dense(units = num_classes, activation='softmax'))
# Define callbacks-
callbacks = [
# tf.keras.callbacks.TensorBoard(log_dir=logdir, profile_batch = 0),
tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience = 3)
]
# Compile designed Neural Network-
model.compile(
loss = tf.keras.losses.categorical_crossentropy,
optimizer = 'adam',
metrics = ['accuracy'])
# Save untrained and initial weights to disk-
model.save_weights("Initial_non_trained_weights.h5")
epochs = 12
num_train_samples = X_train.shape[0]
end_step = np.ceil(1.0 * num_train_samples / batch_size).astype(np.int32) * epochs
print("end_step parameter for this dataset = {0}".format(end_step))
# end_step = 5628
# Specify the parameters to be used for layer-wise pruning:
pruning_params = {
'pruning_schedule': sparsity.PolynomialDecay(
initial_sparsity=0.50, final_sparsity=0.90,
begin_step=2000, end_step=end_step, frequency=100)
}
# Neural network which is to be pruned-
pruned_model = Sequential()
pruned_model.add(l.InputLayer(input_shape=(784, )))
pruned_model.add(Flatten())
pruned_model.add(sparsity.prune_low_magnitude(Dense(units = 300, activation='relu', kernel_initializer=tf.initializers.GlorotUniform()),
**pruning_params))
pruned_model.add(l.Dropout(0.2))
pruned_model.add(sparsity.prune_low_magnitude(Dense(units = 100, activation='relu', kernel_initializer=tf.initializers.GlorotUniform()),
**pruning_params))
pruned_model.add(l.Dropout(0.1))
pruned_model.add(sparsity.prune_low_magnitude(Dense(units = num_classes, activation='softmax'), **pruning_params))
# Compile pruned CNN-
pruned_model.compile(
loss=tf.keras.losses.categorical_crossentropy,
optimizer='adam',
metrics=['accuracy'])
# Load weights from before-
pruned_model.load_weights("Initial_non_trained_weights.h5")
This last line of loading initial weights into the pruned model gives me error:
ValueError: Layer #0 (named "prune_low_magnitude_dense_9" in the current model) was found to correspond to layer dense in the save file.
However the new layer prune_low_magnitude_dense_9 expects 5 weights, but the saved weights have 2 elements.
What's going wrong?
Thanks!

Categories