Validation accuracy initially high then low - python

I am training a model for text sentiment classification with CNN. In it, the validation accuracy is initially more than training accuracy and then it decreases. Is this behavior acceptable? If not then what can be the reason and how to solve it?
My model:
class hyper():
def __init__(self,embedding_dim,filter_sizes,num_filters,dropout_prob,hidden_dims,batch_size,num_epochs):
# Model Hyperparameters
self.embedding_dim = embedding_dim
self.filter_sizes = filter_sizes
self.num_filters = num_filters
self.dropout_prob = dropout_prob
self.hidden_dims = hidden_dims
# Training parameters
self.batch_size = batch_size
self.num_epochs = num_epochs
class prep_hyper():
def __init__(self,sequenceLength,max_words):
# Prepossessing parameters
self.sequenceLength = sequenceLength
self.max_words = max_words
m_hyper=hyper(embedding_dim=embed_dim,filter_sizes=(3,4,5,6,8),num_filters=80,dropout_prob=(0.2,0.5),
hidden_dims=50,batch_size=128,num_epochs= 30)
pr_hyper = prep_hyper(sequenceLength=sequence_length,max_words=vocab_size)
model architecture:
def build_model(pr_hyper,m_hyper):
# Convolutional block
model_input = Input(shape=(pr_hyper.sequenceLength))
# use a random embedding for the text
x = Embedding(pr_hyper.max_words, m_hyper.embedding_dim,weights=[emb],trainable=False)(model_input)
# x = SpatialDropout1D(m_hyper.dropout_prob[0])(x)
conv_kern_reg = regularizers.l2(0.0001)
conv_bias_reg = regularizers.l2(0.0001)
conv_blocks = []
for sz in m_hyper.filter_sizes:
conv = Convolution1D(filters=m_hyper.num_filters,
kernel_size=sz,
# padding="same",
activation="relu",
strides=1,
kernel_regularizer=conv_kern_reg,
bias_regularizer=conv_bias_reg
)(x)
conv = GlobalMaxPooling1D()(conv)
conv_blocks.append(conv)
# merge
x = Concatenate()(conv_blocks) if len(conv_blocks) > 1 else conv_blocks[0]
x = Dense(m_hyper.hidden_dims, activation="relu")(x)
x = Dropout(m_hyper.dropout_prob[1])(x)
x = Dense(100, activation="relu")(x)
x = Dropout(m_hyper.dropout_prob[1])(x)
model_output = Dense(3, activation="softmax")(x)
model = Model(model_input, model_output)
model.compile(loss="categorical_crossentropy", optimizer=keras.optimizers.Adam(learning_rate=0.00005), metrics=["accuracy"]) #categorical_crossentropy
print(model.summary())
tf.keras.utils.plot_model(model, show_shapes=True)#, to_file='multichannel.png')
return model
INITIAL EPOCHS:

There are several reasons that this happens, like, the dropout layers is disabled during validation. For more information I would suggest you to see this
that describes several possible reasons that this happens.

Related

RuntimeError: You must compile your model before training/testing. Use `model.compile(optimizer, loss)`. but i compiled my model already

I got the following error: RuntimeError: You must compile your model before training/testing. Use model.compile(optimizer, loss). However, i have already compiled my model so i don't understand what the problem is.
`vgg = VGG16(weights='imagenet',include_top=False,input_shape=(224,224,3))
for layer in vgg.layers:
layer.trainable = False #making all the layers non-trainable
x = Flatten()(vgg.output) #flattening out the last layer
predictions = Dense(2,activation='sigmoid')(x) #Dense layer to predict wether there is pneumonia or not
model = Model(inputs=vgg.input, outputs=predictions)
early_stopping_callbacks = tensorflow.keras.callbacks.EarlyStopping(patience = 15,
restore_best_weights = True,
verbose = 1)
base_model1 = VGG16(include_top = False, weights = "imagenet", input_shape = (224, 224, 3), pooling = "max",
classes = 2)
#base_model1.load_weights("../input/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5")
base_model1.summary()
model2 = Sequential()
model2.add(base_model1)
model2.add(Flatten())
model2.add(Dense(128, activation = "relu"))
model2.add(Dense(64, activation = "relu"))
model2.add(Dense(32, activation = "relu"))
model2.add(Dense(1, activation = "sigmoid"))
# freeze the layers
for layer in base_model1.layers:
layer.trainable = False
model2.compile(optimizer = "adam", loss = "binary_crossentropy", metrics = ["accuracy"])
history = model2.fit_generator(train_generator, epochs = EPOCH, validation_data = val_generator, steps_per_epoch = 10,
callbacks = [early_stopping_callbacks])
test_loss, test_accuracy = base_model1.evaluate(test_generator, steps = 50)
print("The testing accuracy is: ", test_accuracy * 100, "%")
print("The testing loss is: ", test_loss * 100, "%")`
base_model1 is my vgg16 model and test_generator is my test set

Dropout not reducing loss in pytorch but works fine with keras

I was comparing loss for two simple MLP models with and without dropout on both TF/Keras and Pytorch frameworks (on Keras imdb dataset). But with PyTorch I am not getting the same results as I hoped for and was wondering perhaps what I am doing incorrectly.
# Keras - IMDB Dataset
model = Sequential()
model.add(Dense(16, activation = "relu", input_shape= (10000,)))
model.add(Dropout(0.5)) # comment out this line for no dropout model
model.add(Dense(16, activation = "relu"))
model.add(Dropout(0.5)) # comment out this line for no dropout model
model.add(Dense(1, activation = "sigmoid"))
model.compile(
optimizer = "rmsprop",
loss = "binary_crossentropy",
metrics = ["accuracy"]
)
history = model.fit(
X_train,
y_train,
epochs = 20,
batch_size = 512,
validation_data = (X_val, y_val)
)
The results I obtained in keras (Left figure without dropout and right with dropout)
# Pytorch - same IMDB dataset from keras
class MLP(nn.Module):
def __init__(self, in_dims, l1, l2, out_dims):
super(MLP, self).__init__()
self.fc1 = nn.Linear(in_dims, l1)
self.fc2 = nn.Linear(l1, l2)
self.fc3 = nn.Linear(l2, out_dims)
self.dropout = nn.Dropout(p=0.5)
def forward(self, X):
out = F.relu(self.fc1(X))
out = self.dropout(out) # comment out this line for no dropout model
out = F.relu(self.fc2(out))
out = self.dropout(out) # comment out this line for no dropout model
out = F.sigmoid(self.fc3(out))
return out
model = MLP(10000, 16, 16, 1)
optimizer = optim.RMSprop(model.parameters(), lr = 0.001)
criterion = nn.BCELoss()
min_val_loss = np.inf
losses = []
val_losses = []
accuracy = []
val_accuracy = []
for e in range(0,20):
running_loss = 0
for i,(X_train, y_train) in enumerate(train_loader):
yhat = model.forward(X_train)
loss = criterion(yhat.flatten(), y_train)
running_loss += loss.item()
optimizer.zero_grad()
loss.backward()
optimizer.step()
losses.append(running_loss / (i+1)) #note its i+1 since i starts from 0
model.eval()
with torch.no_grad():
running_val_loss = 0
for i,(X_val, y_val) in enumerate(val_loader):
yhat_val = model.forward(X_val)
val_loss = criterion(yhat_val.flatten(), y_val)
running_val_loss += val_loss.item()
val_losses.append(running_val_loss / (i + 1))
if val_loss < min_val_loss:
best_params = model.state_dict()
min_val_loss = val_loss
print(f"epochs : {e}, train_loss : {loss}, val_loss : {val_loss}")
Figure on the left is the result from no dropout model which has similar results to the keras model. However the one with dropout doesnot have the same behaviour.

How to solve " AttributeError: 'KerasTensor' object has no attribute '_id' "?

I was training a network using RSS data. The network is trained after preprocessing the data. But I am getting the error :
AttributeError: 'KerasTensor' object has no attribute '_id'
My model is shown below.
class DANN(Model):
def __init__(self):
super().__init__()
#Feature Extractor
self.feature_extractor_layer0 = Input(shape=(num_batch, num_features))
self.feature_extractor_layer1 = Dense(100, activation='relu')
self.feature_extractor_layer2 = Dense(100, activation='relu')
self.feature_extractor_layer3 = Dense(100, activation='relu')
self.feature_extractor_layer4 = Dropout(0.5)
self.feature_extractor_layer5 = Dense(100, activation='relu')
self.feature_extractor_layer6 = Dense(100, activation='relu')
#Label Predictor
self.label_predictor_layer0 = Dense(100, activation='relu')
self.label_predictor_layer1 = Dense(100, activation='relu')
self.label_predictor_layer2 = Dense(2, activation=None)
def call(self, x):
#Feature Extractor
x = Input(shape=(num_features,))
x = self.feature_extractor_layer1(x)
x = self.feature_extractor_layer2(x)
x = self.feature_extractor_layer3(x)
x = self.feature_extractor_layer4(x)
x = self.feature_extractor_layer5(x)
x = self.feature_extractor_layer6(x)
label_pred = self.label_predictor_layer0(x)
label_pred = self.label_predictor_layer1(label_pred)
label_pred = self.label_predictor_layer2(label_pred)
return label_pred
#creating a model object
model = DANN()
My training loop is,
#initializing parameters before training the model
lr = 1e-3
optimizer = tf.optimizers.SGD()
loss_fn_label = keras.losses.mean_squared_error
max_batches = len(dx_source)
source_label_loss = []
#training loop
for epoch in range(num_epochs):
print("\nStart of epoch %d" % (epoch,))
for step, (x_batch_train, y_batch_train) in enumerate(train_dataset):
with tf.GradientTape() as tape:
logits = model(x_batch_train, training=True) # Logits for this minibatch
loss_value = loss_fn_label(y_batch_train, logits)
grads = tape.gradient(loss_value, model.trainable_weights)
optimizer.apply_gradients(zip(grads, model.trainable_weights))
while ep == ls[ls_i]:
sys.stdout.write("█")
ls_i = ls_i+1
break
ep = ep+1
sys.stdout.write("|")
print(f'\ts_label_loss: {loss_s_label:.4f}')
source_label_loss.append(loss_s_label)
Please help me to solve his error.
dx_source_tensor = tf.convert_to_tensor(X_train_source, dtype=tf.float32)
dy_source_tensor = tf.convert_to_tensor(Y_train_source, dtype=tf.float32)
train_dataset_source = tf.data.Dataset.from_tensor_slices((dx_source_tensor, dy_source_tensor))
dx_source_test_tensor = tf.convert_to_tensor(X_test_source, dtype=tf.float32)
dy_source_test_tensor = tf.convert_to_tensor(Y_test_source, dtype=tf.float32)
test_dataset_source = tf.data.Dataset.from_tensor_slices((dx_source_test_tensor, dy_source_test_tensor))
#training dataset
train_source = tf.data.Dataset.from_tensor_slices(train_dataset_source).batch(num_batch)
#testing dataset
test_source = tf.data.Dataset.from_tensor_slices(test_dataset_source).batch(num_batch)
This is how I have given the dataset input. This code was working for the MNIST dataset. But not showing the result for a normal row-by-row data frame. If you know any other methods to train these types of models. That is also acceptable for me. Thank you!

Tensorflow Prune Layer Not Supported

I am trying to prune a model in tensorflow but coming across an error I don't know how to tackle. The error is ValueError: Please initialize "Prune" with a supported layer. Layers should either be a "PrunableLayer" instance, or should be supported by the PruneRegistry. You passed: <class 'base_transformer_tf.TransformerEncoder'>
The model is created using following
def transformer_encoder(num_columns, num_labels, num_layers, d_model, num_heads, dff, window_size, dropout_rate, weight_decay, label_smoothing, learning_rate):
inp = tf.keras.layers.Input(shape = (window_size, num_columns))
x = tf.keras.layers.BatchNormalization()(inp)
x = tf.keras.layers.Dense(d_model)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation('swish')(x)
x = tf.keras.layers.SpatialDropout1D(dropout_rate)(x)
x = TransformerEncoder(num_layers, d_model, num_heads, dff, window_size, dropout_rate)(x)
out = tf.keras.layers.Dense(num_labels, activation = 'sigmoid', dtype=tf.float32)(x[:, -1, :])
model = tf.keras.models.Model(inputs = inp, outputs = out)
model.compile(optimizer = tfa.optimizers.AdamW(weight_decay = weight_decay, learning_rate = learning_rate),
loss = tf.keras.losses.BinaryCrossentropy(label_smoothing = label_smoothing),
metrics = tf.keras.metrics.AUC(name = 'AUC'),
)
return model
The pruning portion of code is following
pruning_params = {
'pruning_schedule': tfmot.sparsity.keras.PolynomialDecay(initial_sparsity=0.00,
final_sparsity=0.50,
begin_step=0,
end_step=end_step)
}
model_for_pruning = prune_low_magnitude(model, **pruning_params)
# `prune_low_magnitude` requires a recompile.
model_for_pruning.compile(optimizer='adam',
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['accuracy'])
logdir = tempfile.mkdtemp()
callbacks = [
tfmot.sparsity.keras.UpdatePruningStep(),
tfmot.sparsity.keras.PruningSummaries(log_dir=logdir),
]
model_for_pruning.fit(np.concatenate((X_tr2, X_val)), np.concatenate((y_tr2, y_val)),
batch_size=batch_size, epochs=epochs, validation_split=validation_split,
callbacks=callbacks)
Any help would be appreciated
Tensorflow does not know how to prune your custom TransformerEncoder Keras layer. You should specify which weights to sparsify, as in this example: Prune custom Keras layer or modify parts of layer to prune.
That would look like:
class TransformerEncoder(tf.keras.layers.Layer, tfmot.sparsity.keras.PrunableLayer):
def get_prunable_weights(self):
return [self.my_weight, ..]

GPU volatility in keras very low

I am training a multi-label image classification problem, this is mostly the entire code
class DataGenerator(keras.utils.Sequence):
"""Generates data for Keras."""
def __init__(self,csv_path ,batch_size=32, dim=(224, 224), n_channels=3,
n_classes=8, shuffle=True):
self.img_files = pd.read_csv(csv_path)
self.labels = self.img_files.iloc[:, 7:16].values
self.batch_size = batch_size
self.dim = dim
self.n_channels = n_channels
self.n_classes = n_classes
self.shuffle = shuffle
self.on_epoch_end()
def __len__(self):
"""Denotes the number of batches per epoch."""
return int(np.floor(len(self.img_files) / self.batch_size))
def __getitem__(self, index):
indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
# Find list of IDs
img_files_temp = [self.img_files['Left-Fundus'][k] for k in indexes]
X, y = self.__data_generation(img_files_temp)
return X, y
def on_epoch_end(self):
"""Updates indexes after each epoch."""
self.indexes = np.arange(len(self.img_files))
if self.shuffle == True:
np.random.shuffle(self.indexes)
def __data_generation(self, img_files_temp):
X = np.empty((self.batch_size,self.dim[0], self.dim[1], self.n_channels))
y = np.empty((self.batch_size, self.n_classes))
for i, img_file in enumerate(img_files_temp):
img = skimage.io.imread(os.path.join('../Customized-DataLoader/data/train_img', img_file))
img = skimage.transform.resize(img, output_shape=self.dim, mode='constant', preserve_range=True)
X[i,:,:,:] = img
index_of_label= int(self.img_files.index[self.img_files['Left-Fundus'] ==img_file][0])
if len(self.img_files.loc[index_of_label][7:16].values)!= 8:
continue
y[:,] = self.img_files.loc[index_of_label][7:16].values
return X, y
model = keras.applications.densenet.DenseNet121(include_top=False, input_shape=(224, 224, 3))
x = model.output
x = Flatten()(x)
x = Dense(512)(x)
x = Activation('relu')(x)
x = Dropout(0.5)(x)
output1 = Dense(1, activation = 'sigmoid')(x)
output2 = Dense(1, activation = 'sigmoid')(x)
output3 = Dense(1, activation = 'sigmoid')(x)
output4 = Dense(1, activation = 'sigmoid')(x)
output5 = Dense(1, activation = 'sigmoid')(x)
output6 = Dense(1, activation = 'sigmoid')(x)
output7 = Dense(1, activation = 'sigmoid')(x)
output8 = Dense(1, activation = 'sigmoid')(x)
model = Model(model.inputs,[output1,output2,output3,output4,output5, output6, output7, output8])
# print(model.summary())
model.compile(optimizers.rmsprop(lr = 0.0001, decay = 1e-6),
loss = ["binary_crossentropy","binary_crossentropy","binary_crossentropy","binary_crossentropy", "binary_crossentropy","binary_crossentropy","binary_crossentropy","binary_crossentropy"],metrics = ["accuracy"])
def generator_wrapper(generator):
for batch_x,batch_y in generator:
yield (batch_x,[batch_y[:,i] for i in range(8)])
train_file = pd.read_csv('train.csv')
test_file = pd.read_csv('test.csv')
train_generator = DataGenerator(csv_path = 'train.csv')
valid_generator = DataGenerator(csv_path = 'test.csv')
batch_size = 32
num_epochs = 1
STEP_SIZE_VALID =len(train_file)//batch_size
STEP_SIZE_TRAIN=len(test_file)//batch_size
model.fit_generator(generator=generator_wrapper(train_generator),
steps_per_epoch=STEP_SIZE_TRAIN,validation_data=generator_wrapper(valid_generator),
validation_steps=STEP_SIZE_VALID,
epochs=5,verbose=1, workers =12,use_multiprocessing=True)
This trains without any errors, but the problem is the training is very slow, the GPU volatility is high for some time then goes to 0% and after some time shoots to 90%, can someone please help me fix this bottleneck ?
Any suggestions to keep the volatility to 100% will be very helpful, please let me know if any more info is needed.
Thanks in advance.
In short: you are not using the capabilities of keras.utils.Sequence, so although you specified 12 workers for multiprocessing, in fact you're only using 1 (you can check CPU usage to see how much core your code is using.
How to improve the code?
Instead of defining a generator wrapper that yields the samples (you can't make yield parallel, so this code will execute on ONE thread)
def generator_wrapper(generator):
# THIS IS THE CULPRIT
for batch_x,batch_y in generator:
yield (batch_x,[batch_y[:,i] for i in range(8)])
...
model.fit_generator(
# THIS IS REALLY BAD
generator=generator_wrapper(train_generator), ...)
you should pass the generator into model.fit_generator directly as follows (please refer to this tutorial, this will make proper use of all of the 12 cores specified):
model.fit_generator(
generator=train_generator, # other params
workers=12, use_multiprocessing=True)
Don't use skimage.io, use cv2 instead. The reason is because skimage.io internally uses Pillow, which is reaaaaaaly sloooow comparing to cv2. You can find more info in this benchmark in kaggle.
Think about which step you DON'T really have to do in real-time. Do you HAVE to read the files again and again? Do you really NEED to resize the files again and again? I suggest you to pack your files into HDF5 database. More info here. You can easily wrap it into keras.utils.Sequence class.

Categories