I was trying to create a Neural Network based on the model presented on this paper by Yoon Kim (https://arxiv.org/pdf/1408.5882.pdf) for sentence classification. I've built it in TensorFlow Keras and was using padded sentences (with words lemmatized) as input and 3 categories ("positive", "neutral" or "negative") as output.
Below is the model I've built:
def create_CNN_model(window_sizes, feature_maps, sent_size, num_categs, embedding_matrix:np.array):
inputs = Input(shape=(sent_size), dtype='float32', name='text_inputs') # dim = (BATCH_SIZE, sent_size, embedding_dim)
# initialize the embeddings with my own embeddings matrix
embed = Embedding(embedding_matrix.shape[0], embedding_matrix.shape[1],
mask_zero=True, input_length=sent_size,
weights=[embedding_matrix])(inputs)
#create array for max pooled vectors of features
ta = []
# as we have multiple window sizes:
for n_window in window_sizes:
con = Conv1D(feature_maps, n_window, padding='causal',
activation="relu", use_bias=True)(embed) # (BATCH_SIZE, sent_size-window_size+1, feature_maps)
# the convoluted tensor contains, for each window a feature map of dimension feature_maps
pooled = GlobalMaxPool1D(data_format='channels_last')(con) # (BATCH_SIZE, sent_size-windows_size+1)
# then, the max pooling operation extracts the maximum of each feature map, reducing the rank of the tensor
# the max pooled tensor contains a feature for each window
ta.append(pooled)
concat = concatenate(ta, axis=1)
dropped = Dropout(0.5)(concat)
outputs = Dense(num_categs,activation="softmax",use_bias=True, kernel_regularizer=l2(l=3),
kernel_constraint=Dropout(0.5))(dropped)
# create the model
model = Model(inputs=[inputs], outputs=[outputs])
#return the model
return model
I've tried training this model with just 200 sentences just to see if it overfits the data. But instead of overfitting, the loss value just goes up and down between 0 and 1. I've tried changing the learning rate for a value as small as 1e-8, but it did nothing.
Below is the function I've used for training:
def train_model(X_data, y_data, batch_sz, tf_model, max_patience, num_epochs, ln_rate):
# Instantiate an optimizer to train the model.
# optimizer = Adadelta(learning_rate=1e-3)
optimizer = Adam(learning_rate=ln_rate)
# Instantiate a loss function.
loss_fn = CategoricalCrossentropy()
# Prepare the metrics
train_acc_metric = CategoricalAccuracy()
val_acc_metric = CategoricalAccuracy()
buffer_sz = len(X_data)
patience = 0
epochs = num_epochs
last_val_acc = 0
# Start random state for better reprodutibility
np.random.seed(123)
# Create the checkpoints
ckpt = train.Checkpoint(step=tf.Variable(1), optimizer=optimizer,
model=tf_model)
manager = train.CheckpointManager(ckpt, './tf_ckpts', max_to_keep=3)
# Create directory to save the trained model
path = "./saved_model"
print("\n----------------------------------------------")
if not os.path.isdir(path):
try:
os.mkdir(path)
except OSError:
print ("\nCreation of the directory %s failed \n" % path)
else:
print ("\nSuccessfully created the directory %s \n" % path)
else:
print("\nDirectory %s already exists" % path)
print("\n----------------------------------------------")
print("\nStarting run script...\n",
"Model will be saved to ", path,"\n",
"Checkpoints will be restored from and saved to .\tf_ckpts")
# Save model prior to training
tf_model.save("./saved_model/tf_model")
# Restart from last checkpoint, if available
ckpt.restore(manager.latest_checkpoint)
print("\n----------------------------------------------")
if manager.latest_checkpoint:
print("\nRestored from {}".format(manager.latest_checkpoint))
else:
print("\nInitializing from scratch.")
# beggining training loop
for epoch in range(epochs):
print("\n----------------------------------------------")
print('Start of epoch %d' % (epoch,))
# re-shuffle data before each epoch
np.random.shuffle(X_data)
np.random.shuffle(y_data)
# create the training dataset with 10-fold crossvalidation
train_dataset = make_dataset(X_data,y_data,10)
# Iterate over the batches of the dataset.
for x_train, y_train, x_val, y_val in train_dataset:
train_batches = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_batches = train_batches.batch(batch_sz)
for x_batch_train, y_batch_train in train_batches:
with tf.GradientTape() as tape:
# calculate the forward run
logits = tf_model(x_batch_train)
# assert if output and true label tensor shapes are equal
get_shape = y_batch_train.shape
tf.debugging.assert_shapes([
(logits,get_shape),
], data=(y_batch_train, logits),
summarize=3, message="Inconsistent shape (labels,output): ",
name="assert_shapes")
# calculate loss function
loss_value = loss_fn(y_batch_train, logits)
# add 1 step to the stpes variable
ckpt.step.assign_add(1)
# Add extra losses created during this forward pass:
loss_value += sum(tf_model.losses)
# calculate gradients
grads = tape.gradient(loss_value, tf_model.trainable_weights)
# backpropagate the gradients
optimizer.apply_gradients(zip(grads, tf_model.trainable_weights))
# Update training metric.
train_acc_metric(y_batch_train, logits)
# Save & log every 500 batches.
if int(ckpt.step) % 100 == 0:
save_path = manager.save()
print("Saved checkpoint for step {}: {}".format(int(ckpt.step), save_path))
print("loss {:1.2f}".format(loss_value))
print('Seen so far: %s samples' % (int(ckpt.step) * batch_sz))
# Run a cross-validation loop on each 10-fold dataset
val_logits = tf_model(x_val)
# Update val metrics
val_acc_metric(y_val, val_logits)
# Display metrics at the end of each epoch.
train_acc = train_acc_metric.result()
print('Training accuracy: ', float(train_acc))
# Reset training metrics at the end of each epoch
train_acc_metric.reset_states()
print("----------")
val_acc = val_acc_metric.result()
print('Validation accuracy: ', float(val_acc))
print("----------------------------------------------\n")
val_acc_metric.reset_states()
# Early stopping part
if val_acc < last_val_acc:
# If the max_patience is exceeded stop the training
if patience >= max_patience:
print("\n------------------------------------------------")
print("Early stopping training to prevent over-fitting!")
print("------------------------------------------------\n")
break
else:
patience += 1
# update the validation accuracy
last_val_acc = val_acc
# save the trained model
tf_model.save("./saved_model/tf_model")
print("\n------------------------------------------------")
print("\nEnd of Training!\n")
And the result of the training:
----------------------------------------------
Successfully created the directory ./saved_model
----------------------------------------------
Starting run script...
Model will be saved to ./saved_model
Checkpoints will be restored from and saved to . f_ckpts
INFO:tensorflow:Assets written to: ./saved_model/tf_model/assets
----------------------------------------------
Initializing from scratch.
----------------------------------------------
Start of epoch 0
Training accuracy: 0.38999998569488525
----------
Validation accuracy: 0.38999998569488525
----------------------------------------------
----------------------------------------------
Start of epoch 1
Saved checkpoint for step 100: ./tf_ckpts/ckpt-1
loss 1.05
Seen so far: 2000 samples
Training accuracy: 0.4050000011920929
----------
Validation accuracy: 0.4050000011920929
----------------------------------------------
----------------------------------------------
Start of epoch 2
Saved checkpoint for step 200: ./tf_ckpts/ckpt-2
loss 1.10
Seen so far: 4000 samples
Training accuracy: 0.36000001430511475
----------
Validation accuracy: 0.36000001430511475
----------------------------------------------
----------------------------------------------
Start of epoch 3
Saved checkpoint for step 300: ./tf_ckpts/ckpt-3
loss 1.15
Seen so far: 6000 samples
Training accuracy: 0.375
----------
Validation accuracy: 0.375
----------------------------------------------
----------------------------------------------
Start of epoch 4
Saved checkpoint for step 400: ./tf_ckpts/ckpt-4
loss 1.17
Seen so far: 8000 samples
Training accuracy: 0.38999998569488525
----------
Validation accuracy: 0.38999998569488525
----------------------------------------------
----------------------------------------------
Start of epoch 5
Saved checkpoint for step 500: ./tf_ckpts/ckpt-5
loss 1.18
Seen so far: 10000 samples
Training accuracy: 0.3799999952316284
----------
Validation accuracy: 0.3799999952316284
----------------------------------------------
----------------------------------------------
Start of epoch 6
Saved checkpoint for step 600: ./tf_ckpts/ckpt-6
loss 1.09
Seen so far: 12000 samples
Training accuracy: 0.35499998927116394
----------
Validation accuracy: 0.35499998927116394
----------------------------------------------
----------------------------------------------
Start of epoch 7
Saved checkpoint for step 700: ./tf_ckpts/ckpt-7
loss 1.12
Seen so far: 14000 samples
Training accuracy: 0.3700000047683716
----------
Validation accuracy: 0.3700000047683716
----------------------------------------------
Any sugestions on how to make it converge?
Related
I am following a Pytorch code on deep learning. Where I saw model evaluation taking place within the training epoch!
Q) Should the torch.no_grad and model.eval() be out of the training epoch loop?
Q) And how to determine that, which parameter (weight) are getting optimised by the optimiser during the back-propagation?
...
for l in range(1):
model = GTN(num_edge=A.shape[-1],
num_channels=num_channels,w_in = node_features.shape[1],w_out = node_dim,
num_class=num_classes,num_layers=num_layers,norm=norm)
if adaptive_lr == 'false':
optimizer = torch.optim.Adam(model.parameters(), lr=0.005, weight_decay=0.001)
else:
optimizer = torch.optim.Adam([{'params':model.weight},{'params':model.linear1.parameters()},{'params':model.linear2.parameters()},
{"params":model.layers.parameters(), "lr":0.5}], lr=0.005, weight_decay=0.001)
loss = nn.CrossEntropyLoss()
# Train & Valid & Test
best_val_loss = 10000
best_train_loss = 10000
best_train_f1 = 0
best_val_f1 = 0
for i in range(epochs):
print('Epoch: ',i+1)
model.zero_grad()
model.train()
loss,y_train,Ws = model(A, node_features, train_node, train_target)
train_f1 = torch.mean(f1_score(torch.argmax(y_train.detach(),dim=1), train_target, num_classes=num_classes)).cpu().numpy()
print('Train - Loss: {}, Macro_F1: {}'.format(loss.detach().cpu().numpy(), train_f1))
loss.backward()
optimizer.step()
model.eval()
# Valid
with torch.no_grad():
val_loss, y_valid,_ = model.forward(A, node_features, valid_node, valid_target)
val_f1 = torch.mean(f1_score(torch.argmax(y_valid,dim=1), valid_target, num_classes=num_classes)).cpu().numpy()
if val_f1 > best_val_f1:
best_val_loss = val_loss.detach().cpu().numpy()
best_train_loss = loss.detach().cpu().numpy()
best_train_f1 = train_f1
best_val_f1 = val_f1
print('---------------Best Results--------------------')
print('Train - Loss: {}, Macro_F1: {}'.format(best_train_loss, best_train_f1))
print('Valid - Loss: {}, Macro_F1: {}'.format(best_val_loss, best_val_f1))
final_f1 += best_test_f1
For each epoch, you are doing train, followed by validation/test.
For validation/test you are moving the model to evaluation model
using model.eval() and then doing forward propagation with
torch.no_grad() which is correct. Again, you are moving back the
model back to train model using model.train() at the start of
train. There is no issue with the code and you are using the model
modes correctly.
In your code, if adaptive_lr if False then you are optimizing the parameters given by model.parameters() and when adaptive_lr
is True then you are optimizing:
model.weight
model.linear1.parameters()
model.linear2.parameters()
model.layers.parameters()
I had train a BertClassifier model using pytorch. After creating my best.pt I would like to make in production my model and using it to predict and classifier starting from a sample, so I resume them from the checkpoint. Otherwise after put it in evaluation and freeze model, I use .predict to make in work on my sample but I'm encountering this Attribute Error. I had also inizialize it before calling the checkpoint. When I am wrong? Thank you for your help!
def save_ckp(state, is_best, checkpoint_path, best_model_path):
"""
function created to save checkpoint, the latest one and the best one.
This creates flexibility: either you are interested in the state of the latest checkpoint or the best checkpoint.
state: checkpoint we want to save
is_best: is this the best checkpoint; min validation loss
checkpoint_path: path to save checkpoint
best_model_path: path to save best model
"""
f_path = checkpoint_path
# save checkpoint data to the path given, checkpoint_path
torch.save(state, f_path)
# if it is a best model, min validation loss
if is_best:
best_fpath = best_model_path
# copy that checkpoint file to best path given, best_model_path
shutil.copyfile(f_path, best_fpath)
def load_ckp(checkpoint_fpath, model, optimizer):
"""
checkpoint_path: path to save checkpoint
model: model that we want to load checkpoint parameters into
optimizer: optimizer we defined in previous training
"""
# load check point
checkpoint = torch.load(checkpoint_fpath)
# initialize state_dict from checkpoint to model
model.load_state_dict(checkpoint['state_dict'])
# initialize optimizer from checkpoint to optimizer
optimizer.load_state_dict(checkpoint['optimizer'])
# initialize valid_loss_min from checkpoint to valid_loss_min
valid_loss_min = checkpoint['valid_loss_min']
# return model, optimizer, epoch value, min validation loss
return model, optimizer, checkpoint['epoch'], valid_loss_min.item()
#Create the BertClassfier class
class BertClassifier(nn.Module):
"""Bert Model for Classification Tasks."""
def __init__(self, freeze_bert=True):
"""
#param bert: a BertModel object
#param classifier: a torch.nn.Module classifier
#param freeze_bert (bool): Set `False` to fine-tune the BERT model
"""
super(BertClassifier, self).__init__()
.......
def forward(self, input_ids, attention_mask):
''' Feed input to BERT and the classifier to compute logits.
#param input_ids (torch.Tensor): an input tensor with shape (batch_size,
max_length)
#param attention_mask (torch.Tensor): a tensor that hold attention mask
information with shape (batch_size, max_length)
#return logits (torch.Tensor): an output tensor with shape (batch_size,
num_labels) '''
# Feed input to BERT
outputs = self.bert(input_ids=input_ids,
attention_mask=attention_mask)
# Extract the last hidden state of the token `[CLS]` for classification task
last_hidden_state_cls = outputs[0][:, 0, :]
# Feed input to classifier to compute logits
logits = self.classifier(last_hidden_state_cls)
return logits
def initialize_model(epochs):
""" Initialize the Bert Classifier, the optimizer and the learning rate scheduler."""
# Instantiate Bert Classifier
bert_classifier = BertClassifier(freeze_bert=False)
# Tell PyTorch to run the model on GPU
bert_classifier = bert_classifier.to(device)
# Create the optimizer
optimizer = AdamW(bert_classifier.parameters(),
lr=lr, # Default learning rate
eps=1e-8 # Default epsilon value
)
# Total number of training steps
total_steps = len(train_dataloader) * epochs
# Set up the learning rate scheduler
scheduler = get_linear_schedule_with_warmup(optimizer,
num_warmup_steps=0, # Default value
num_training_steps=total_steps)
return bert_classifier, optimizer, scheduler
def train(model, train_dataloader, val_dataloader, valid_loss_min_input, checkpoint_path, best_model_path, start_epochs, epochs, evaluation=True):
"""Train the BertClassifier model."""
# Start training loop
logging.info("--Start training...\n")
# Initialize tracker for minimum validation loss
valid_loss_min = valid_loss_min_input
for epoch_i in range(start_epochs, epochs):
# =======================================
# Training
# =======================================
# Print the header of the result table
logging.info((f"{'Epoch':^7} | {'Batch':^7} | {'Train Loss':^12} | {'Val Loss':^10} | {'Val Acc':^9} | {'Elapsed':^9}"))
# Measure the elapsed time of each epoch
t0_epoch, t0_batch = time.time(), time.time()
# Reset tracking variables at the beginning of each epoch
total_loss, batch_loss, batch_counts = 0, 0, 0
# Put the model into the training mode
model.train()
# For each batch of training data...
for step, batch in enumerate(train_dataloader):
batch_counts +=1
# Load batch to GPU
b_input_ids, b_attn_mask, b_labels = tuple(t.to(device) for t in batch)
# Zero out any previously calculated gradients
model.zero_grad()
# Perform a forward pass. This will return logits.
logits = model(b_input_ids, b_attn_mask)
# Compute loss and accumulate the loss values
loss = loss_fn(logits, b_labels)
batch_loss += loss.item()
total_loss += loss.item()
# Perform a backward pass to calculate gradients
loss.backward()
# Clip the norm of the gradients to 1.0 to prevent "exploding gradients"
torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
# Update parameters and the learning rate
optimizer.step()
scheduler.step()
# Print the loss values and time elapsed for every 20 batches
if (step % 500 == 0 and step != 0) or (step == len(train_dataloader) - 1):
# Calculate time elapsed for 20 batches
time_elapsed = time.time() - t0_batch
# Print training results
logging.info(f"{epoch_i + 1:^7} | {step:^7} | {batch_loss / batch_counts:^12.6f} | {'-':^10} | {'-':^9} | {time_elapsed:^9.2f}")
# Reset batch tracking variables
batch_loss, batch_counts = 0, 0
t0_batch = time.time()
# Calculate the average loss over the entire training data
avg_train_loss = total_loss / len(train_dataloader)
logging.info("-"*70)
# =======================================
# Evaluation
# =======================================
if evaluation == True:
# After the completion of each training epoch, measure the model's performance
# on our validation set.
val_loss, val_accuracy = evaluate(model, val_dataloader)
# Print performance over the entire training data
time_elapsed = time.time() - t0_epoch
logging.info(f"{epoch_i + 1:^7} | {'-':^7} | {avg_train_loss:^12.6f} | {val_loss:^10.6f} | {val_accuracy:^10.6f} | {time_elapsed:^9.2f}")
logging.info("-"*70)
logging.info("\n")
# create checkpoint variable and add important data
checkpoint = {
'epoch': epoch_i + 1,
'valid_loss_min': val_loss,
'state_dict': model.state_dict(),
'optimizer': optimizer.state_dict(),
}
# save checkpoint
save_ckp(checkpoint, False, checkpoint_path, best_model_path)
## TODO: save the model if validation loss has decreased
if val_loss <= valid_loss_min:
print('Validation loss decreased ({:.6f} --> {:.6f}). Saving model ...'.format(valid_loss_min,val_loss))
# save checkpoint as best model
save_ckp(checkpoint, True, checkpoint_path, best_model_path)
valid_loss_min = val_loss
logging.info("-----------------Training complete--------------------------")
def evaluate(model, val_dataloader):
"""After the completion of each training epoch, measure the model's performance on our validation set."""
# Put the model into the evaluation mode. The dropout layers are disabled during the test time.
model.eval()
# Tracking variables
val_accuracy = []
val_loss = []
# For each batch in our validation set...
for batch in val_dataloader:
# Load batch to GPU
b_input_ids, b_attn_mask, b_labels = tuple(t.to(device) for t in batch)
# Compute logits
with torch.no_grad():
logits = model(b_input_ids, b_attn_mask)
# Compute loss
loss = loss_fn(logits, b_labels)
val_loss.append(loss.item())
# Get the predictions
preds = torch.argmax(logits, dim=1).flatten()
# Calculate the accuracy rate
accuracy = (preds == b_labels).cpu().numpy().mean() * 100
val_accuracy.append(accuracy)
# Compute the average accuracy and loss over the validation set.
val_loss = np.mean(val_loss)
val_accuracy = np.mean(val_accuracy)
return val_loss, val_accuracy
bert_classifier, optimizer, scheduler = initialize_model(epochs=n_epochs)
train(model = bert_classifier ......)
bert_classifier, optimizer, scheduler = initialize_model(epochs=n_epochs)
model, optimizer, start_epoch, valid_loss_min = load_ckp(r"./best_model/best_model.pt", bert_classifier, optimizer)
model.eval()
model.freeze()
sample = {
"seq": "ABCDE",}
predictions = model.predict(sample)
AttributeError: 'BertClassifier' object has no attribute 'predict'
Generally, people wrote the prediction function for you.
If not, you need to handle the low level stuff.
After this line, you loaded the trained parameters.
model, optimizer, start_epoch, valid_loss_min = load_ckp(r"./best_model/best_model.pt", bert_classifier, optimizer)
After that, you need to do the model.forward(intput_seq,this_attention_mask_maybe_null).
You can see the forward method here is the : def forward(self, input_ids, attention_mask) in the model.
In practicing deep learning for binary classification with Pytorch on Breast-Cancer-Wisconsin-Diagnostic-DataSet.
I've tried different approaches, and the best I can get as below, the accuracy is still low at 61%.
What's the way to improve the accuracy?
Thank you.
import pandas as pd
import io
dataset = pd.read_excel(base_dir + "Breast-Cancer-Wisconsin-Diagnostic.xlsx")
number_of_columns = dataset.shape[1]
# training and testing split of 70:30
dataset['diagnosis'] = pd.Categorical(dataset['diagnosis']).codes
dataset = dataset.sample(frac=1, random_state=1234)
train_input = dataset.values[:398, :number_of_columns-1]
train_target = dataset.values[:398, number_of_columns-1]
test_input = dataset.values[398:, :number_of_columns-1]
test_target = dataset.values[398:, number_of_columns-1]
import torch
torch.manual_seed(1234)
hidden_units = 5
net = torch.nn.Sequential(
torch.nn.Linear(number_of_columns-1, hidden_units),
torch.nn.ReLU(),
torch.nn.Linear(hidden_units, 2))
# choose optimizer and loss function
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net.parameters(), lr=0.1,momentum=0.9)
# train
epochs = 50
for epoch in range(epochs):
inputs = torch.autograd.Variable(torch.Tensor(train_input).float())
targets = torch.autograd.Variable(torch.Tensor(train_target).long())
optimizer.zero_grad()
out = net(inputs)
loss = criterion(out, targets)
loss.backward()
optimizer.step()
if epoch == 0 or (epoch + 1) % 10 == 0:
print('Epoch %d Loss: %.4f' % (epoch + 1, loss.item()))
# Epoch 1 Loss: 412063.1250
# Epoch 10 Loss: 0.6628
# Epoch 20 Loss: 0.6639
# Epoch 30 Loss: 0.6592
# Epoch 40 Loss: 0.6587
# Epoch 50 Loss: 0.6588
import numpy as np
inputs = torch.autograd.Variable(torch.Tensor(test_input).float())
targets = torch.autograd.Variable(torch.Tensor(test_target).long())
optimizer.zero_grad()
out = net(inputs)
_, predicted = torch.max(out.data, 1)
error_count = test_target.size - np.count_nonzero((targets == predicted).numpy())
print('Errors: %d; Accuracy: %d%%' % (error_count, 100 * torch.sum(targets == predicted) // test_target.size))
# Errors: 65; Accuracy: 61%
Features Representing samples are in different range. So, First thing you should do is to normalize the data.
You should plot the loss and acc over the training epochs for training and validation/test dataset to understand whether the model overfits on training data or underfit.
Furthermore, you can try with more complex (deeper) model. And since your training dataset has few number of samples, you can consider augmentation and transfer learning as well if possible.
I am trying to use the inception_v1 module written in tf.slim provided here to train the model on CIFAR 10 dataset.
The code to train and evaluate the model on the dataset is below.
# test_data = (data['images_test'], data['labels_test'])
train_data = (train_x, train_y)
val_data = (val_x, val_y)
# create two datasets, one for training and one for test
train_dataset = tf.data.Dataset.from_tensor_slices(train_data).shuffle(buffer_size=10000).batch(BATCH_SIZE).map(preprocess)
# train_dataset = train_dataset.shuffle(buffer_size=10000).batch(BATCH_SIZE).map(preprocess)
val_dataset = tf.data.Dataset.from_tensor_slices(val_data).batch(BATCH_SIZE).map(preprocess)
# test_dataset = tf.data.Dataset.from_tensor_slices(test_data).batch(BATCH_SIZE).map(preprocess)
# create a _iterator of the correct shape and type
_iter = tf.data.Iterator.from_structure(
train_dataset.output_types,
train_dataset.output_shapes
)
features, labels = _iter.get_next()
# create the initialization operations
train_init_op = _iter.make_initializer(train_dataset)
val_init_op = _iter.make_initializer(val_dataset)
# test_init_op = _iter.make_initializer(test_dataset)
# Placeholders which evaluate in the session
training_mode = tf.placeholder(shape=None, dtype=tf.bool)
dropout_prob = tf.placeholder_with_default(1.0, shape=())
reuse_bool = tf.placeholder_with_default(True, shape=())
# Init the saver Object which handles saves and restores of
# model weights
# saver = tf.train.Saver()
# Initialize the model inside the arg_scope to define the batch
# normalization layer and the appropriate parameters
with slim.arg_scope(inception_v1_arg_scope(use_batch_norm=True)) as scope:
logits, end_points = inception_v1(features,
reuse=None,
dropout_keep_prob=dropout_prob, is_training=training_mode)
# Create the cross entropy loss function
cross_entropy = tf.reduce_mean(
tf.losses.softmax_cross_entropy(tf.one_hot(labels, 10), logits))
train_op = tf.train.AdamOptimizer(1e-2).minimize(loss=cross_entropy)
# train_op = slim.learning.create_train_op(cross_entropy, optimizer, global_step=)
# Define the accuracy metric
preds = tf.argmax(logits, axis=-1, output_type=tf.int64)
acc = tf.reduce_mean(tf.cast(tf.equal(preds, labels), tf.float32))
# Count the iterations for each set
n_train_batches = train_y.shape[0] // BATCH_SIZE
n_val_batches = val_y.shape[0] // BATCH_SIZE
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
# saver = tf.train.Saver([v for v in tf.all_variables()][:-1])
# for v in tf.all_variables():
# print(v.name)
# saver.restore(sess, tf.train.latest_checkpoint('./', latest_filename='inception_v1.ckpt'))
for i in range(EPOCHS):
total_loss = 0
total_acc = 0
# Init train session
sess.run(train_init_op)
with tqdm(total=n_train_batches * BATCH_SIZE) as pbar:
for batch in range(n_train_batches):
_, loss, train_acc = sess.run([train_op, cross_entropy, acc], feed_dict={training_mode: True, dropout_prob: 0.2})
total_loss += loss
total_acc += train_acc
pbar.update(BATCH_SIZE)
print("Epoch: {} || Loss: {:.5f} || Acc: {:.5f} %".\
format(i+1, total_loss / n_train_batches, (total_acc / n_train_batches)*100))
# Switch to validation
total_val_loss = 0
total_val_acc = 0
sess.run(val_init_op)
for batch in range(n_val_batches):
val_loss, val_acc = sess.run([cross_entropy, acc], feed_dict={training_mode: False})
total_val_loss += val_loss
total_val_acc += val_acc
print("Epoch: {} || Validation Loss: {:.5f} || Val Acc: {:.5f} %".\
format(i+1, total_val_loss / n_val_batches, (total_val_acc / n_val_batches) * 100))
The paradox is that I get the following results when training and evaluate the model on the validation set:
Epoch: 1 || Loss: 2.29436 || Acc: 23.61750 %
│Epoch: 1 || Validation Loss: 1158854431554614016.00000 || Val Acc: 10.03000 %
│100%|███████████████████████████████████████████████████| 40000/40000 [03:52<00:00, 173.21it/s]
│Epoch: 2 || Loss: 1.68389 || Acc: 36.49250 %
│Epoch: 2 || Validation Loss: 27997399226326712.00000 || Val Acc: 10.03000 %
│100%|██████████████████████████████████████████████████▋| 39800/40000 [03:51<00:01, 174.11it/s]
I have set the training_mode to true during training and false during the validation. However, regarding the train_op that is only set in the training phase the model seems to be unset in the validation set. My guess is that the is_training variable does not handle the situation very well and does not keep the variables of the batch normalization initialized in the validation. Has anyone experienced a similar situation before?
I found the solution to my problem. Two things were involved in this problem.
The first one was to set a smaller batch norm decay due to a smaller than imagenet dataset i should lower it to 0.99.
batch_norm_decay=0.99
And the other thing was to use the following line in order to keep track of the trainable parameters of batch normalization layer.
train_op = slim.learning.create_train_op(cross_entropy, optimizer)
I took a tutorial available here and I tried to run it on my dataset, it was able to compile and it begins the training but here what I got :
The model don't seem to be saved at each iteration.
And I tried with 100 epochs it didn't change anything, it gives the output of the first iteration.
Do you have an idea what would be the problem ? (I know the code is long sorry)
def train(model, epochs, log_string):
'''Train the RNN'''
saver = tf.train.Saver()
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
# Used to determine when to stop the training early
valid_loss_summary = []
# Keep track of which batch iteration is being trained
iteration = 0
print()
print("Training Model: {}".format(log_string))
train_writer = tf.summary.FileWriter('./logs/3/train/{}'.format(log_string), sess.graph)
valid_writer = tf.summary.FileWriter('./logs/3/valid/{}'.format(log_string))
for e in range(epochs):
state = sess.run(model.initial_state)
# Record progress with each epoch
train_loss = []
train_acc = []
val_acc = []
val_loss = []
with tqdm(total=len(x_train)) as pbar:
for _, (x, y) in enumerate(get_batches(x_train, y_train, batch_size), 1):
feed = {model.inputs: x,
model.labels: y[:, None],
model.keep_prob: dropout,
model.initial_state: state}
summary, loss, acc, state, _ = sess.run([model.merged,
model.cost,
model.accuracy,
model.final_state,
model.optimizer],
feed_dict=feed)
# Record the loss and accuracy of each training batch
train_loss.append(loss)
train_acc.append(acc)
# Record the progress of training
train_writer.add_summary(summary, iteration)
iteration += 1
pbar.update(batch_size)
avg_train_loss = np.mean(train_loss)
avg_train_acc = np.mean(train_acc)
val_state = sess.run(model.initial_state)
with tqdm(total=len(x_valid)) as pbar:
for x, y in get_batches(x_valid, y_valid, batch_size):
feed = {model.inputs: x,
model.labels: y[:, None],
model.keep_prob: 1,
model.initial_state: val_state}
summary, batch_loss, batch_acc, val_state = sess.run([model.merged,
model.cost,
model.accuracy,
model.final_state],
feed_dict=feed)
# Record the validation loss and accuracy of each epoch
val_loss.append(batch_loss)
val_acc.append(batch_acc)
pbar.update(batch_size)
# Average the validation loss and accuracy of each epoch
avg_valid_loss = np.mean(val_loss)
avg_valid_acc = np.mean(val_acc)
valid_loss_summary.append(avg_valid_loss)
# Record the validation data's progress
valid_writer.add_summary(summary, iteration)
# Print the progress of each epoch
print("Epoch: {}/{}".format(e, epochs),
"Train Loss: {:.3f}".format(avg_train_loss),
"Train Acc: {:.3f}".format(avg_train_acc),
"Valid Loss: {:.3f}".format(avg_valid_loss),
"Valid Acc: {:.3f}".format(avg_valid_acc))
# Stop training if the validation loss does not decrease after 3 epochs
if avg_valid_loss > min(valid_loss_summary):
print("No Improvement.")
stop_early += 1
if stop_early == 3:
break
# Reset stop_early if the validation loss finds a new low
# Save a checkpoint of the model
else:
print("New Record!")
stop_early = 0
checkpoint = "sauvegarde/controverse_{}.ckpt".format(log_string)
saver.save(sess,checkpoint)
Thank you very much for your answers :)