I am building a model to classify news (AG news dataset). The vocab size ~33k with custom embedding layer. I have run this for 20 epochs but the loss and accuracy (1.3 and 26% respec.) is almost constant even at the end of 20th epoch. Can someone please help me with this? Also, am I feeding the correct input to the fc layer? I am using CrossEntropyLoss as the loss function.
Here is my model class:
class NewsClassifier(nn.Module):
def __init__(self, vocab_weights = None, rnn_type = 'LSTM', vocab_size = len(vocab.vocab), n_classes = 4, embed_size = 300, rnn_units = 512, \
n_layers = 2, bi_dir = True, rnn_drop = 0.0, padding_index = vocab['<unk>']):
super().__init__()
self.rnn_units = rnn_units
self.n_classes = n_classes
self.rnn_type = rnn_type
if vocab_weights:
self.embedding = nn.Embedding.from_pretrained(torch.as_tensor(vocab_weights))
else:
self.embedding = nn.Embedding(vocab_size, embed_size, padding_idx = padding_index)
if rnn_type == 'LSTM':
self.rnn = nn.LSTM(embed_size, rnn_units, num_layers = n_layers, bidirectional = bi_dir, dropout = rnn_drop)
elif rnn_type == 'GRU':
self.rnn = nn.GRU(embed_size, rnn_units, num_layers = n_layers, bidirectional = bi_dir, dropout = rnn_drop)
else:
raise NotImplementError
self.fc = nn.Linear(2 * rnn_units if bi_dir else rnn_units, self.n_classes)
def forward(self, data, lens):
x_embed = self.embedding(data) # (padded_lens, batch_size, embed_dim)
x_packed = pack_padded_sequence(x_embed, lens.cpu(), enforce_sorted = False) #packing sequences and passing to RNN unit
if self.rnn_type == 'LSTM':
output_packed, (hidden,cell) = self.rnn(x_packed) #output is packed and cannot be fed to linear layers
else:
output_packed, hidden = self.rnn(x_packed) #For GRU there is only hidden state
#Though n number of layers are stacked the output is always 1
output_padded, _ = pad_packed_sequence(output_packed) #output is padded to be fed to linear layer (padded_lens, batch size, hidden_units)
#Picking only the last output --> equivalent to reutrn_sequences = False in Keras
out_reduced = torch.cat((output_padded[-1, :, : self.rnn_units], output_padded[-1, :, self.rnn_units :]), 1)
return self.fc(out_reduced)
model = NewsClassifier()
print(f'The total number of trainable parameters are : {sum(p.numel() for p in model.parameters() if p.requires_grad)}')
My training function is:
def train(model, iterator = trainDataloader, optimizer = optimizer, loss_fn = criterion):
e_loss = e_acc = i = 0
model.train()
for inputs, leng, labels in iterator:
inputs, leng, labels = inputs.to(device), leng.to(device), labels.to(device)
optimizer.zero_grad()
preds = model(inputs, leng).squeeze(1)
loss = loss_fn(preds, labels.long())
acc = accuracy(preds, labels)
loss.backward()
optimizer.step()
e_loss += loss.item()
e_acc += acc.item()
i += 1
return e_loss/i, e_acc/i
def predict(model, iterator = testDataloader, loss_fn = criterion):
e_loss = e_acc = i = 0
model.eval()
with torch.no_grad():
for inputs, leng, labels in iterator:
inputs, leng, labels = inputs.to(device), leng.to(device), labels.to(device)
preds = model(inputs, leng).squeeze(1)
loss = loss_fn(preds, labels.long())
acc = accuracy(preds, labels)
e_loss += loss.item()
e_acc += acc.item()
i += 1
return e_loss/i, e_acc/i
N_EPOCHS = 20
best_valid_loss = float('inf')
for epoch in range(N_EPOCHS):
start_time = time.time()
train_loss, train_acc = train(model)
valid_loss, valid_acc = predict(model)
end_time = time.time()
epoch_mins, epoch_secs = epoch_time(start_time, end_time)
if valid_loss < best_valid_loss:
best_valid_loss = valid_loss
torch.save(model.state_dict(), 'tut1-model.pt')
print(f'Epoch: {epoch+1:02} / {N_EPOCHS} | Epoch Time: {epoch_mins}m {epoch_secs}s')
print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
print(f'\t Val. Loss: {valid_loss:.3f} | Val. Acc: {valid_acc*100:.2f}%')
Related
This is my model:
class BiLSTM(nn.Module):
def __init__(self):
super(BiLSTM, self).__init__()
self.hidden_size = 128
drp = 0.2
n_classes = len(le.classes_)
self.embedding = nn.Embedding(max_features, embed_size)
self.embedding.weight = nn.Parameter(torch.tensor(embedding_matrix, dtype=torch.float32))
self.embedding.weight.requires_grad = False
self.lstm = nn.LSTM(embed_size, self.hidden_size, bidirectional=True, batch_first=True)
self.linear = nn.Linear(self.hidden_size*4 , 128)
self.relu = nn.ReLU()
self.dropout = nn.Dropout(drp)
self.out = nn.Linear(128, n_classes)
def forward(self, x):
#rint(x.size())
h_embedding = self.embedding(x)
_embedding = torch.squeeze(torch.unsqueeze(h_embedding, 0))
h_lstm, _ = self.lstm(h_embedding)
avg_pool = torch.mean(h_lstm, 1)
max_pool, _ = torch.max(h_lstm, 1)
conc = torch.cat(( avg_pool, max_pool), 1)
conc = self.relu(self.linear(conc))
conc = self.dropout(conc)
out = self.out(conc)
return out
n_epochs = 87
model = BiLSTM()
loss_fn = nn.CrossEntropyLoss(reduction='mean',)
optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=0.002)
model.cuda()
# Load train and test in CUDA Memory
x_train = torch.tensor(train_X, dtype=torch.long).cuda()
y_train = torch.tensor(train_y, dtype=torch.long).cuda()
x_cv = torch.tensor(test_X, dtype=torch.long).cuda()
y_cv = torch.tensor(test_y, dtype=torch.long).cuda()
# Create Torch datasets
train = torch.utils.data.TensorDataset(x_train, y_train)
valid = torch.utils.data.TensorDataset(x_cv, y_cv)
# Create Data Loaders
train_loader = torch.utils.data.DataLoader(train, batch_size=batch_size, shuffle=True)
valid_loader = torch.utils.data.DataLoader(valid, batch_size=batch_size, shuffle=True)
train_loss = []
valid_loss = []
for epoch in range(n_epochs):
start_time = time.time()
# Set model to train configuration
model.train()
avg_loss = 0.
for i, (x_batch, y_batch) in enumerate(train_loader):
# Predict/Forward Pass
y_pred = model(x_batch)
# Compute loss
loss = loss_fn(y_pred, y_batch)
optimizer.zero_grad()
loss.backward()
optimizer.step()
avg_loss += loss.item() / len(train_loader)
#acc =n-avg_loss
# Set model to validation configuration
model.eval()
avg_val_loss = 0.
val_preds = np.zeros((len(x_cv),len(le.classes_)))
for i, (x_batch, y_batch) in enumerate(valid_loader):
y_pred = model(x_batch).detach()
avg_val_loss += loss_fn(y_pred, y_batch).item() / len(valid_loader)
#val_accuracy = n- avg_val_loss
# keep/store predictions
val_preds[i * batch_size:(i+1) * batch_size] =F.softmax(y_pred).cpu().numpy()
val_preds=val_preds*5000
# Check Accuracy
val_accuracy = sum(val_preds.argmax(axis=1)==test_y)/len(test_y)
train_loss.append(avg_loss)
valid_loss.append(avg_val_loss)
elapsed_time = time.time() - start_time
print('Epoch {}/{} \t Train_loss={:.4f} \t val_loss={:.4f} \t val_acc={:.4f} \t time={:.2f}s'.format(
epoch + 1, n_epochs , avg_loss, avg_val_loss, val_accuracy, elapsed_time))
This is the output I am receiving.I have used the BiLSTM model. I tried changing the loss techniques and dropout value but it didn't work. The issue is I guess the model is overfitting how can I increase the accuracy this is the output I am receiving.I have used the BiLSTM model. I tried changing the loss techniques and dropout value but it didn't work. The issue is I guess the model is overfitting how can I increase the accuracy
This is the output I am receiving.I have used the BiLSTM model. I tried changing the loss techniques and dropout value but it didn't work. The issue is I guess the model is overfitting how can I increase the accuracy, The size of the dataset is 3000
[1]: https://i.stack.imgur.com/NbK92.png [output looks like this][1]
[1]: https://i.stack.imgur.com/ll12J.png [Data Looks Like this][1]
I am working on a text classification problem with a binary output 0 or 1. The accuracy, train loss and test loss remains the same. The accuracy is exact the same for all the epochs. All the steps looks very correct. I had tried out several ways to figure out what is going wrong. Nothing actually worked. Please help.
class RNN(nn.Module):
def __init__(self, num_layers, num_classes, input_size, hidden_size,vocab,dropout):
super(RNN,self).__init__()
self.num_layers = num_layers
self.num_classes = num_classes
self.input_size = input_size
self.hidden_size = hidden_size
self.sequence_len = sequence_len
self.embedding = nn.Embedding(len(vocab),input_size)
nn.init.xavier_normal_(self.embedding.weight)
self.rnn = nn.RNN(input_size, hidden_size,num_layers,dropout = dropout, nonlinearity = 'tanh', batch_first=True, bias = True, bidirectional = False)
self.linear = nn.Linear(hidden_size, 1)
nn.init.xavier_normal_(self.linear.weight)
self.sigmoid = nn.Sigmoid()
def forward(self,x):
lens = list(map(len, x))
padded = pad_sequence(x, batch_first=True)
output_embedding=self.embedding(padded)
packed = pack_padded_sequence(input = output_embedding,lengths = lens, batch_first=True, enforce_sorted=False)
input_rnn = packed
h011 = torch.zeros(1,32,3)
output_11, hidden_11 = self.rnn(input_rnn,h011)
output_padded, output_lengths = pad_packed_sequence(output_11, batch_first=True)
final_output_11 = self.linear(hidden_11)
prob_11 = self.sigmoid(final_output_11)
return output_padded, hidden_11, prob_11
num_layers = 1
num_classes = 2
input_size = 5
hidden_size = 3
criterion = nn.BCELoss()
sequence_len = 1
dropout = 0.5
rnn = RNN(num_layers, num_classes, input_size, hidden_size,vocab,dropout)
epochs = 10
lr = 0.01
weight_decay=0.011
def train_loop(model,criterion,optimizer,train_loader,valid_loader,epochs):
train_losses= []
valid_losses= []
for epoch in range(epochs):
train_loss=0
for label,text in train_loader:
output,hidden,prob = rnn.forward(text)
prob = torch.tensor([item.item() for sublist in prob for item in sublist],dtype = torch.float32)
label = torch.tensor(label, dtype = torch.float32)
loss=criterion(prob,label)
optimizer.zero_grad()
loss.requires_grad = True
loss.backward()
optimizer.step()
train_loss += loss.item()
train_loss=train_loss/len(train_loader)
valid_loss=0
with torch.no_grad():
correct=0
total=0
for label,text in valid_loader:
output,hidden,prob = rnn.forward(text)
prob = torch.tensor([item.item() for sublist in prob for item in sublist],dtype = torch.float32)
label = torch.tensor(label, dtype = torch.float32)
loss=criterion(prob,label)
valid_loss += loss.item()
p = torch.tensor([1 if i > 0.5 else 0 for i in prob.data], dtype = torch.float32)
#predicted = torch.max(p, 1)
total += label.size(0)
correct += (p == label).sum().item()
valid_loss=valid_loss/len(valid_loader)
accuracy = 100 * correct / total
print(accuracy)
scheduler.step(accuracy)
train_losses.append(train_loss)
valid_losses.append(valid_loss)
print(f'Epoch {epoch+1:<2d}/{epochs} --> Train Loss: {train_loss:.4f} | Valid Loss: {valid_loss:.4f}')
from torch.optim.lr_scheduler import StepLR
from torch.optim.lr_scheduler import ReduceLROnPlateau
optimizer = torch.optim.Adam(rnn.parameters(), lr=0.01)
scheduler = ReduceLROnPlateau(optimizer, mode='max', factor=0.4, patience=5,
verbose=True) # need to change scheduler.step
train_loop(rnn,criterion,optimizer,train_loader,valid_loader,epochs)
Output:
I am trying to create a logistic model by using CIFAR10 data in PyTorch. After running the model for evaluation I run into an error :
RuntimeError: size mismatch, m1: [750 x 4096], m2: [1024 x 10] at C:\w\1\s\tmp_conda_3.7_100118\conda\conda-bld\pytorch_1579082551706\work\aten\src\TH/generic/THTensorMath.cpp:136
It seems like input_size is creating a problem, I dont know I am new to this. Please let me know what changes should I make in order to overcome this error.
These are the hyperparameters:
batch_size = 100
learning_rate = 0.001
# Other constants
input_size = 4*4*64
num_classes = 10
This is the cell that downloads and splits the dataset into train, validation and test.
transform = torchvision.transforms.Compose(
[torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5))])
testset = torchvision.datasets.CIFAR10(root='D:\PyTorch\cifar-10-python', train=False,download=False, transform=transform)
trainvalset = torchvision.datasets.CIFAR10(root='D:\PyTorch\cifar-10-python', train=True,download=False, transform=transform)
trainset, valset = torch.utils.data.random_split(trainvalset, [45000, 5000]) # 10% for validation
train_loader = torch.utils.data.DataLoader(trainset, batch_size=50, shuffle=True)
test_loader = torch.utils.data.DataLoader(testset, batch_size=1000, shuffle=False)
val_loader = torch.utils.data.DataLoader(valset, batch_size=1000, shuffle=False)
This is the architecture of my model.
class CifarModel(nn.Module):
def __init__(self):
super().__init__()
self.linear = nn.Linear(input_size, num_classes)
def forward(self, xb):
xb = xb.view(-1, 64*8*8)
#xb = xb.reshape(-1, 784)
print(xb.shape)
out = self.linear(xb)
return out
def training_step(self, batch):
images, labels = batch
out = self(images) # Generate predictions
loss = F.cross_entropy(out, labels) # Calculate loss
return loss
def validation_step(self, batch):
images, labels = batch
out = self(images) # Generate predictions
loss = F.cross_entropy(out, labels) # Calculate loss
acc = accuracy(out, labels) # Calculate accuracy
return {'val_loss': loss.detach(), 'val_acc': acc.detach()}
def validation_epoch_end(self, outputs):
batch_losses = [x['val_loss'] for x in outputs]
epoch_loss = torch.stack(batch_losses).mean() # Combine losses
batch_accs = [x['val_acc'] for x in outputs]
epoch_acc = torch.stack(batch_accs).mean() # Combine accuracies
return {'val_loss': epoch_loss.item(), 'val_acc': epoch_acc.item()}
def epoch_end(self, epoch, result):
print("Epoch [{}], val_loss: {:.4f}, val_acc: {:.4f}".format(epoch, result['val_loss'], result['val_acc']))
model = CifarModel()
def accuracy(outputs, labels):
_, preds = torch.max(outputs, dim=1)
return torch.tensor(torch.sum(preds == labels).item() / len(preds))
def evaluate(model, val_loader):
outputs = [model.validation_step(batch) for batch in val_loader]
return model.validation_epoch_end(outputs)
def fit(epochs, lr, model, train_loader, val_loader, opt_func=torch.optim.SGD):
history = []
optimizer = opt_func(model.parameters(), lr)
for epoch in range(epochs):
# Training Phase
for batch in train_loader:
loss = model.training_step(batch)
loss.backward()
optimizer.step()
optimizer.zero_grad()
# Validation phase
result = evaluate(model, val_loader)
model.epoch_end(epoch, result)
history.append(result)
return history
evaluate(model, val_loader)
Here you are specifying that the number of output classes should be 10:
num_classes = 10
Your forward function does not reflect this:
xb = xb.view(-1, 64*8*8) # you get 750x4096
out = self.linear(xb) # here an input of
# input_size to linear layer = 4*4*64 # 1024
# num_classes = 10
Modify it like this:
xb = xb.view(-1, 64*4*4) # you get 750x1024
out = self.linear(xb) # M1 750x1024 M2 1024x10:
# input_size = 4*4*64 # 1024
# num_classes = 10
I have a time series data looking something like this:
I am trying to model this with a sequence to sequence RNN in pytorch. It trains well and I can see the loss going down. But on testing it gives the same out put irrespective of the input.
My Model:
class RNNModel(nn.Module):
def __init__(self, predictor_size, hidden_size, num_layers, dropout = 0.3, output_size=83):
super(RNNModel, self).__init__()
self.drop = nn.Dropout(dropout)
self.rnn = nn.GRU(predictor_size, hidden_size, num_layers=num_layers, dropout = dropout)
self.decoder = nn.Linear(hidden_size, output_size)
self.init_weights()
self.hidden_size = hidden_size
self.num_layers = num_layers
def init_weights(self):
initrange = 0.1
self.decoder.bias.data.fill_(0)
self.decoder.weight.data.uniform_(-initrange, initrange)
def forward(self, input, hidden):
output, hidden = self.rnn(input, hidden)
output = self.drop(output)
decoded = self.decoder(output.view(output.size(0) * output.size(1), output.size(2)))
return decoded.view(output.size(0), output.size(1), decoded.size(1)), hidden
def init_hidden(self, batch_size):
weight = next(self.parameters()).data
return Variable(weight.new(self.num_layers, batch_size, self.hidden_size).zero_())
Train Method:
def train(data_source, lr):
# turn on training mode that enables dropout
model.train()
total_loss = 0
hidden = model.init_hidden(bs_train)
optimizer = optim.Adam(model.parameters(), lr = lr)
for batch, i in enumerate(range(0, data_source.size(0) - 1, bptt_size)):
data, targets = get_batch(data_source, i)
# Starting each batch, we detach the hidden state from how it was previously produced
# so that model doesen't ry to backprop to all the way start of the dataset
# unrolling of the graph will go from the last iteration to the first iteration
hidden = Variable(hidden.data)
if cuda.is_available():
hidden = hidden.cuda()
optimizer.zero_grad()
output, hidden = model(data, hidden)
loss = criterion(output, targets)
loss.backward()
# clip_grad_norm to prevent gradient explosion
torch.nn.utils.clip_grad_norm(model.parameters(), clip)
optimizer.step()
total_loss += len(data) * loss.data
# return accumulated loss for all the iterations
return total_loss[0] / len(data_source)
Evaluation Method:
def evaluate(data_source):
# turn on evaluation to disable dropout
model.eval()
model.train(False)
total_loss = 0
hidden = model.init_hidden(bs_valid)
for i in range(0, data_source.size(0) - 1, bptt_size):
data, targets = get_batch(data_source, i, evaluation = True)
if cuda.is_available():
hidden = hidden.cuda()
output, hidden = model(data, hidden)
total_loss += len(data) * criterion(output, targets).data
hidden = Variable(hidden.data)
return total_loss[0]/len(data_source)
Training Loop:
best_val_loss = None
best_epoch = 0
def run(epochs, lr):
val_losses = []
num_epochs = []
global best_val_loss
global best_epoch
for epoch in range(0, epochs):
train_loss = train(train_set, lr)
val_loss = evaluate(test_set)
num_epochs.append(epoch)
val_losses.append(val_loss)
print("Train Loss: ", train_loss, " Validation Loss: ", val_loss)
if not best_val_loss or val_loss < best_val_loss:
best_val_loss = val_loss
torch.save(model.state_dict(), "./4.model.pth")
best_epoch = epoch
return num_epochs, val_losses
Loss with epochs:
Getting the output:
model = RNNModel(predictor_size, hidden_size, num_layers, dropout_pct, output_size)
model.load_state_dict(torch.load("./4.model.pth"))
if cuda.is_available():
model.cuda()
model.eval()
model.train(False)
hidden = model.init_hidden(1)
inp = torch.Tensor(var[105])
input = Variable(inp.contiguous().view(1,1,predictor_size), volatile=True)
if cuda.is_available():
input.data = input.data.cuda()
output, hidden = model(input, hidden)
op = output.squeeze().data.cpu()
print(op)
Here I always get the same output irrespective of datapoint I give as input. Can somebody please tell me what I am doing wrong.
I am training my own model using Tensorflow. However, I got some trouble when I change my activation function from Relu to Selu.
This is what happened. Learning curve drops accidentally and I have no idea about what's going on.
my learning curve
like this.
For what I have known, Selu can prevent overfitting, so I try to implement it in my model. Is there any tips, or any condition when I want to use Selu?
This is my code:
this is the place where I change my activation function
-----
def conv2d_maxpool(x_tensor, conv_num_outputs, conv_ksize, conv_strides, pool_ksize, pool_strides, layer_name):
conv_layer = tf.layers.conv2d(x_tensor, conv_num_outputs, kernel_size=conv_ksize, strides=conv_strides, activation=tf.nn.selu, name = layer_name)
conv_layer = tf.layers.max_pooling2d(conv_layer, pool_size=pool_ksize, strides=pool_strides)
return conv_layer
-----
graph
tf.reset_default_graph()
#### placeholder ####
input_img = tf.placeholder(dtype=tf.float32, shape=(None, img_size, img_size, 3))
y_true = tf.placeholder(dtype=tf.float32, shape=(None, num_class))
keep_prob = tf.placeholder(tf.float32, name="keep_prob")
lr_in = tf.placeholder(dtype = tf.float32, name = 'learning_rate')
conv_ksize = (3,3)
conv_strides = (1,1)
pool_ksize = (2,2)
pool_strides = (2,2)
n_filters_1 = 32
n_filters_2 = 64
n_filters_3 = 128
n_filters_4 = 256
onebyone_ksize = (1,1)
#CNN
conv_1 = conv2d_maxpool(input_img, n_filters_1, conv_ksize, conv_strides, pool_ksize, pool_strides, layer_name = "conv1")
# conv_1 = tf.layers.conv2d(conv_1, conv_num_outputs, kernel_size=conv_ksize, strides=conv_strides, activation=tf.nn.relu)
# conv_1_norm = tf.layers.batch_normalization(conv_1, name = "batch_norm1")
# conv_1_dropout = tf.layers.dropout(conv_1_norm, rate = keep_prob, training = True, name = "dropout1")
conv_2 = conv2d_maxpool(conv_1, n_filters_2, conv_ksize, conv_strides, pool_ksize, pool_strides, layer_name = "conv2")
# conv_2_norm = tf.layers.batch_normalization(conv_2)
conv_3 = conv2d_maxpool(conv_2, n_filters_3, conv_ksize, conv_strides, pool_ksize, pool_strides, layer_name = "conv3")
# conv_3_norm = tf.layers.batch_normalization(conv_3, name = "batch_norm3")
# conv_3_dropout = tf.layers.dropout(conv_3_norm, rate = keep_prob, training = True, name = "dropout3")
conv_4 = conv2d_maxpool(conv_3, n_filters_4, conv_ksize, conv_strides, pool_ksize, pool_strides, layer_name = "conv4")
flatten = tf.layers.flatten(conv_4)
fc1 = tf.layers.dense(flatten, 256, activation = tf.nn.relu)
out = tf.layers.dense(fc1, 6, activation=None, name= "logits") #logit
predict = tf.nn.softmax(out)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = out, labels = y_true))
optimizer = tf.train.AdamOptimizer(lr).minimize(cost)
##accuracy
correct_pred = tf.equal(tf.argmax(out, 1), tf.argmax(y_true, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32), name='accuracy')
Training
#history/record
train_loss, train_acc = [], []
valid_loss, valid_acc = [], []
update_per_epoch = int(np.floor(X_train.shape[0] / batch_size))
## early stopping and learning rate congig
es_patience = 10
es_n = 0
lr_patience = 3
lr_n = 0
save_model_path = './save'
saver = tf.train.Saver()
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
# Initializing the variables
batch_gen = img_gen.flow(generator_input(X_train), y_train, batch_size = 32)
val_batch_gen = img_gen.flow(generator_input(X_valid), y_valid, batch_size = len(X_valid))
for i in range(epoch):
epoch_loss = 0
epoch_acc = 0
for j in range(update_per_epoch):
image, label = next(batch_gen)
_, this_loss, this_acc = sess.run([optimizer, cost, accuracy], feed_dict={
input_img : image,
y_true : label,
lr_in: lr,
keep_prob : keep_probability
})
epoch_loss += this_loss
epoch_acc += this_acc
## end of epoch
epoch_loss /= update_per_epoch
epoch_acc /= update_per_epoch
train_loss.append(epoch_loss)
train_acc.append(epoch_acc)
print('Epoch {:>2} Loss: {:>4.4f} Training Accuracy: {:.6f}'.format(i + 1, epoch_loss, epoch_acc))
valid_image, valid_label = next(val_batch_gen)
valid_this_loss, valid_this_acc = sess.run([cost, accuracy], feed_dict = {
input_img: valid_image,
y_true: valid_label,
lr_in: lr,
keep_prob: 1.
})
valid_loss.append(valid_this_loss)
valid_acc.append(valid_this_acc)
print('Epoch {:>2} Loss: {:>4.4f} Validation Accuracy: {:.6f}'.format(i + 1,valid_this_loss, valid_this_acc))
# early stop
if valid_this_loss > np.min(valid_loss):
es_n += 1
lr_n += 1
else:
es_n = 0
lr_n = 0
saver.save(sess, os.path.join(os.getcwd(), 'bestsession.ckpt'))
# early stop
if es_n >= es_patience:
print("-----------early stopping-------------")
break
# adaptive learning rate
if lr_n >= lr_patience:
lr *= lr_decay_rate
lr_n = 0
print("-----------adjust learning rate------------")
# Save Model
save_path = saver.save(sess, save_model_path)
print('-----model save ------')
----------- 18/09/07------------
I can always reproduce the same result.
And this is my code, I wrote it in Jupyter. But sorry I can't upload the training data:
https://drive.google.com/open?id=1uUE32KrNmWnhLbV8z-fyHSMu6zGCCG_e