I am training my own model using Tensorflow. However, I got some trouble when I change my activation function from Relu to Selu.
This is what happened. Learning curve drops accidentally and I have no idea about what's going on.
my learning curve
like this.
For what I have known, Selu can prevent overfitting, so I try to implement it in my model. Is there any tips, or any condition when I want to use Selu?
This is my code:
this is the place where I change my activation function
-----
def conv2d_maxpool(x_tensor, conv_num_outputs, conv_ksize, conv_strides, pool_ksize, pool_strides, layer_name):
conv_layer = tf.layers.conv2d(x_tensor, conv_num_outputs, kernel_size=conv_ksize, strides=conv_strides, activation=tf.nn.selu, name = layer_name)
conv_layer = tf.layers.max_pooling2d(conv_layer, pool_size=pool_ksize, strides=pool_strides)
return conv_layer
-----
graph
tf.reset_default_graph()
#### placeholder ####
input_img = tf.placeholder(dtype=tf.float32, shape=(None, img_size, img_size, 3))
y_true = tf.placeholder(dtype=tf.float32, shape=(None, num_class))
keep_prob = tf.placeholder(tf.float32, name="keep_prob")
lr_in = tf.placeholder(dtype = tf.float32, name = 'learning_rate')
conv_ksize = (3,3)
conv_strides = (1,1)
pool_ksize = (2,2)
pool_strides = (2,2)
n_filters_1 = 32
n_filters_2 = 64
n_filters_3 = 128
n_filters_4 = 256
onebyone_ksize = (1,1)
#CNN
conv_1 = conv2d_maxpool(input_img, n_filters_1, conv_ksize, conv_strides, pool_ksize, pool_strides, layer_name = "conv1")
# conv_1 = tf.layers.conv2d(conv_1, conv_num_outputs, kernel_size=conv_ksize, strides=conv_strides, activation=tf.nn.relu)
# conv_1_norm = tf.layers.batch_normalization(conv_1, name = "batch_norm1")
# conv_1_dropout = tf.layers.dropout(conv_1_norm, rate = keep_prob, training = True, name = "dropout1")
conv_2 = conv2d_maxpool(conv_1, n_filters_2, conv_ksize, conv_strides, pool_ksize, pool_strides, layer_name = "conv2")
# conv_2_norm = tf.layers.batch_normalization(conv_2)
conv_3 = conv2d_maxpool(conv_2, n_filters_3, conv_ksize, conv_strides, pool_ksize, pool_strides, layer_name = "conv3")
# conv_3_norm = tf.layers.batch_normalization(conv_3, name = "batch_norm3")
# conv_3_dropout = tf.layers.dropout(conv_3_norm, rate = keep_prob, training = True, name = "dropout3")
conv_4 = conv2d_maxpool(conv_3, n_filters_4, conv_ksize, conv_strides, pool_ksize, pool_strides, layer_name = "conv4")
flatten = tf.layers.flatten(conv_4)
fc1 = tf.layers.dense(flatten, 256, activation = tf.nn.relu)
out = tf.layers.dense(fc1, 6, activation=None, name= "logits") #logit
predict = tf.nn.softmax(out)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = out, labels = y_true))
optimizer = tf.train.AdamOptimizer(lr).minimize(cost)
##accuracy
correct_pred = tf.equal(tf.argmax(out, 1), tf.argmax(y_true, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32), name='accuracy')
Training
#history/record
train_loss, train_acc = [], []
valid_loss, valid_acc = [], []
update_per_epoch = int(np.floor(X_train.shape[0] / batch_size))
## early stopping and learning rate congig
es_patience = 10
es_n = 0
lr_patience = 3
lr_n = 0
save_model_path = './save'
saver = tf.train.Saver()
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
# Initializing the variables
batch_gen = img_gen.flow(generator_input(X_train), y_train, batch_size = 32)
val_batch_gen = img_gen.flow(generator_input(X_valid), y_valid, batch_size = len(X_valid))
for i in range(epoch):
epoch_loss = 0
epoch_acc = 0
for j in range(update_per_epoch):
image, label = next(batch_gen)
_, this_loss, this_acc = sess.run([optimizer, cost, accuracy], feed_dict={
input_img : image,
y_true : label,
lr_in: lr,
keep_prob : keep_probability
})
epoch_loss += this_loss
epoch_acc += this_acc
## end of epoch
epoch_loss /= update_per_epoch
epoch_acc /= update_per_epoch
train_loss.append(epoch_loss)
train_acc.append(epoch_acc)
print('Epoch {:>2} Loss: {:>4.4f} Training Accuracy: {:.6f}'.format(i + 1, epoch_loss, epoch_acc))
valid_image, valid_label = next(val_batch_gen)
valid_this_loss, valid_this_acc = sess.run([cost, accuracy], feed_dict = {
input_img: valid_image,
y_true: valid_label,
lr_in: lr,
keep_prob: 1.
})
valid_loss.append(valid_this_loss)
valid_acc.append(valid_this_acc)
print('Epoch {:>2} Loss: {:>4.4f} Validation Accuracy: {:.6f}'.format(i + 1,valid_this_loss, valid_this_acc))
# early stop
if valid_this_loss > np.min(valid_loss):
es_n += 1
lr_n += 1
else:
es_n = 0
lr_n = 0
saver.save(sess, os.path.join(os.getcwd(), 'bestsession.ckpt'))
# early stop
if es_n >= es_patience:
print("-----------early stopping-------------")
break
# adaptive learning rate
if lr_n >= lr_patience:
lr *= lr_decay_rate
lr_n = 0
print("-----------adjust learning rate------------")
# Save Model
save_path = saver.save(sess, save_model_path)
print('-----model save ------')
----------- 18/09/07------------
I can always reproduce the same result.
And this is my code, I wrote it in Jupyter. But sorry I can't upload the training data:
https://drive.google.com/open?id=1uUE32KrNmWnhLbV8z-fyHSMu6zGCCG_e
Related
This is my code, and image of training accuracy
# !curl -L -s -o '/content/ESC-50-master-16k.tar' 'https://drive.google.com/uc?id=1hFt-qarD_Ihjb3jW7pd3tje2pcY5tfuJ&confirm=t'
# !tar -xvf "ESC-50-master-16k.tar"
from torch.utils.data import Dataset, TensorDataset, DataLoader
import torch
import torch.nn as nn
import torch.nn.functional as F
import librosa
import numpy as np
import pandas as pd
from glob import glob
Device Configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
Load ESC-50 Data & Preprocess
metadata = pd.read_csv("./ESC-50-master-16k/meta/esc50.csv")
wav_list = sorted(glob("./ESC-50-master-16k/resample/\*.wav"))
def spec_to_image(spec, eps=1e-6):
mean = spec.mean()
std = spec.std()
spec_norm = (spec - mean) / (std + eps)
spec_min, spec_max = spec_norm.min(), spec_norm.max()
spec_scaled = 255 \* (spec_norm - spec_min) / (spec_max - spec_min)
spec_scaled = spec_scaled.astype(np.uint8)
return spec_scaled
class esc50dataset(Dataset):
def __init__(self, wavlist, metadata):
self.labels = np.array(metadata.target).astype(int)
self.audio = \[\]
for f in wavlist:
wav, sr = librosa.load(f, sr=None)
spec=librosa.feature.melspectrogram(wav, sr=sr, n_fft=1024, hop_length=640, n_mels=126) # \[126,126\]
spec_db=librosa.power_to_db(spec,top_db=80)
spec_image = np.expand_dims(spec_to_image(spec_db), axis=0)
self.audio.append(spec_image.tolist())
self.audio = np.array(self.audio)
def __len__(self):
return len(self.audio)
def __getitem__(self, idx):
return self.audio\[idx\], self.labels\[idx\]
dataset = esc50dataset(wav_list, metadata)
features = dataset\[:\]\[0\]
labels = dataset\[:\]\[1\]
train test split
train_size = 0.7
val_size = 0.5
`make train set`
split_id = int(len(features) \* train_size) # 2000 \* 0.7 = 1400
train_features, remain_features = features\[:split_id\], features\[split_id:\] # train_features = 1400, remain_features = 600
train_labels, remain_labels = labels\[:split_id\], labels\[split_id:\] # train_labels = 1400, remain_labels = 600
`make val and test set`
split_val_id = int(len(remain_features) \* val_size) # 600 \* 0.5 = 300
val_features, test_features = remain_features\[:split_val_id\], remain_features\[split_val_id:\] # val_features = 300, test_features = 300
val_labels, test_labels = remain_labels\[:split_val_id\], remain_labels\[split_val_id:\] # val_labels = 300, test_labels = 300
`define batch size`
batch_size = 32
`create tensor datasets`
train_set = TensorDataset(torch.from_numpy(train_features), torch.from_numpy(train_labels))
valid_set = TensorDataset(torch.from_numpy(val_features), torch.from_numpy(val_labels))
test_set = TensorDataset(torch.from_numpy(test_features), torch.from_numpy(test_labels))
`create dataloaders`
train_loader = DataLoader(train_set, shuffle=True, batch_size=batch_size) # 1400 / 32
val_loader = DataLoader(valid_set, shuffle=True, batch_size=batch_size) # 300 / 32
test_loader = DataLoader(test_set, shuffle=True, batch_size=batch_size) # 300 / 32
`Model Hyperparameter`
in_channel = features\[0\].shape\[0\] # in_channel = 1
max_pool_kernel = 3
hidden_size = 256
hidden_layers = 10
output_classes = labels.max() + 1 # output_classes = 50
dropout = 0.25
learning_rate = 0.001
num_epochs = 10
Model
class CRNN(nn.Module):
def __init__(self, in_channel, hidden_size, hidden_layers, output_classes, dropout):
super(CRNN, self).__init__()
self.hidden_layers = hidden_layers
self.hidden_size = hidden_size
self.CNN_layer1 = nn.Sequential(
nn.Conv2d(in_channels=in_channel, out_channels=32, kernel_size=5, stride=1, padding=2),
nn.BatchNorm2d(num_features=32),
nn.ReLU(),
nn.MaxPool2d(kernel_size=max_pool_kernel), # 32, 42, 42
)
self.CNN_layer2 = nn.Sequential(
nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5, stride=1, padding=2),
nn.BatchNorm2d(num_features=64),
nn.ReLU(),
nn.MaxPool2d(kernel_size=max_pool_kernel), # 64, 14, 14
)
self.lstm = nn.LSTM(input_size=64, hidden_size=hidden_size, num_layers=hidden_layers, batch_first=True)
self.dropout = nn.Dropout(dropout)
self.fc = nn.Linear(hidden_size, output_classes)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
x = x.float()
# print("start : {}".format(x))
x = self.CNN_layer1(x) # 1, 126, 126 -> 32, 42, 42
# print("after CNN_1 : {}".format(x))
x = self.CNN_layer2(x) # 32, 42, 42 -> 64, 14, 14
# print("after CNN_2 : {}".format(x))
x = x.reshape(x.size(0), -1, 64) # 64, 14, 14 -> 12544 , x.size(0) = Batchsize = 32
# print("after reshape : {}".format(x))
h0 = torch.zeros(self.hidden_layers, x.size(0), self.hidden_size).to(device) # torch.size([10, 32(batch_size), 512])
c0 = torch.zeros(self.hidden_layers, x.size(0), self.hidden_size).to(device) # torch.size([10, 32(batch_size), 512])
x, _ = self.lstm(x, (h0, c0))
# print("after lstm : {}".format(x))
x = self.dropout(x)
# print("after dropout : {}".format(x))
x = self.fc(x[:,-1,:])
# print("after fc : {}".format(x))
x = self.sigmoid(x)
# print("after sigmoid : {}".format(x))
return x
model initialization
model = CRNN(in_channel, hidden_size, hidden_layers, output_classes, dropout)
Criterion & Optimizer
criterion = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
Train
from tqdm import tqdm
tqdm.pandas()
tr_loss = \[\]
tr_acc = \[\]
v_loss = \[\]
v_acc = \[\]
best_valid_loss = torch.inf
best_epoch = 0
model = model.to(device)
epochloop = tqdm(range(num_epochs), position=0, desc='Training', leave=True)
for epoch in epochloop:
model.train()
train_loss = 0
train_acc = 0
epochloop.set_postfix_str(f'Training epoch {epoch}/{len(epochloop)}') # visualize
for idx, (audio, label) in enumerate(train_loader):
audio, label = audio.to(device), label.to(device)
out = model(audio)
\_, pred = torch.max(out.data, 1)
# acc
train_acc += (pred==label).sum()
# loss
optimizer.zero_grad()
loss = criterion(out, label)
train_loss += loss.item()
loss.backward()
optimizer.step()
Validation
model.eval()
val_loss = 0
val_acc = 0
with torch.no_grad():
for idx, (audio, label) in enumerate(val_loader):
epochloop.set_postfix_str(f'Validation batch {idx}/{len(val_loader)}')
audio, label = audio.to(device), label.to(device)
# forward pass
out = model(audio)
_, pred = torch.max(out.data, 1)
# acc
val_acc += (pred==label).sum()
# loss
loss = criterion(out, label)
val_loss += loss.item()
model.train()
save model if validation loss decrease
if val_loss / len(valid_set) \<= best_valid_loss:
best_valid_loss = val_loss / len(val_loader)
best_epoch = epoch
torch.save(model.state_dict(), "LSTM_epoch\_{}.pth".format(epoch))
print epoch loss & accuracy
print(f'Epoch {epoch+1}/{num_epochs} | Train Loss: {train_loss / len(train_loader):.3f} Train Acc: {train_acc / len(train_set) \* 100}% | Val Loss: {val_loss / len(val_loader):.3f} Val Acc: {val_acc / len(valid_set) \* 100}%')
tr_loss.append(train_loss / len(train_loader))
tr_acc.append(train_acc.cpu().numpy() / len(train_set) \* 100)
v_loss.append(val_loss / len(val_loader))
v_acc.append(val_acc.cpu().numpy() / len(valid_set) \* 100)
my training accuracy
Please tell me why my training accuracy is too low..
By your dataset, I'm inferring you are trying to identify sound data, and you are applying a CRNN. If you go to the repo Have you seen the repo for this dataset you will find a table with the accuracy for each method, the CRNN has about 60% accuracy, this is not so great since other methods achieve more than 95%.
You might need to change your model to get better results. Check the table you will find also some models with code
A bit difficult to give you an answer without reviewing your whole code, and re-running the model. Given this situation, I would suggest you create a Github repo, with appropriate the requirements.txt file for your code.
Even more helpful, and more likely to be answered would be if your code is in a jupyter notebook, and you can show the results of your loss curve and epochs. Your results could be more understandable shown like the output of tensorboard.
Hope this helps you to get some answers
I am building a model to classify news (AG news dataset). The vocab size ~33k with custom embedding layer. I have run this for 20 epochs but the loss and accuracy (1.3 and 26% respec.) is almost constant even at the end of 20th epoch. Can someone please help me with this? Also, am I feeding the correct input to the fc layer? I am using CrossEntropyLoss as the loss function.
Here is my model class:
class NewsClassifier(nn.Module):
def __init__(self, vocab_weights = None, rnn_type = 'LSTM', vocab_size = len(vocab.vocab), n_classes = 4, embed_size = 300, rnn_units = 512, \
n_layers = 2, bi_dir = True, rnn_drop = 0.0, padding_index = vocab['<unk>']):
super().__init__()
self.rnn_units = rnn_units
self.n_classes = n_classes
self.rnn_type = rnn_type
if vocab_weights:
self.embedding = nn.Embedding.from_pretrained(torch.as_tensor(vocab_weights))
else:
self.embedding = nn.Embedding(vocab_size, embed_size, padding_idx = padding_index)
if rnn_type == 'LSTM':
self.rnn = nn.LSTM(embed_size, rnn_units, num_layers = n_layers, bidirectional = bi_dir, dropout = rnn_drop)
elif rnn_type == 'GRU':
self.rnn = nn.GRU(embed_size, rnn_units, num_layers = n_layers, bidirectional = bi_dir, dropout = rnn_drop)
else:
raise NotImplementError
self.fc = nn.Linear(2 * rnn_units if bi_dir else rnn_units, self.n_classes)
def forward(self, data, lens):
x_embed = self.embedding(data) # (padded_lens, batch_size, embed_dim)
x_packed = pack_padded_sequence(x_embed, lens.cpu(), enforce_sorted = False) #packing sequences and passing to RNN unit
if self.rnn_type == 'LSTM':
output_packed, (hidden,cell) = self.rnn(x_packed) #output is packed and cannot be fed to linear layers
else:
output_packed, hidden = self.rnn(x_packed) #For GRU there is only hidden state
#Though n number of layers are stacked the output is always 1
output_padded, _ = pad_packed_sequence(output_packed) #output is padded to be fed to linear layer (padded_lens, batch size, hidden_units)
#Picking only the last output --> equivalent to reutrn_sequences = False in Keras
out_reduced = torch.cat((output_padded[-1, :, : self.rnn_units], output_padded[-1, :, self.rnn_units :]), 1)
return self.fc(out_reduced)
model = NewsClassifier()
print(f'The total number of trainable parameters are : {sum(p.numel() for p in model.parameters() if p.requires_grad)}')
My training function is:
def train(model, iterator = trainDataloader, optimizer = optimizer, loss_fn = criterion):
e_loss = e_acc = i = 0
model.train()
for inputs, leng, labels in iterator:
inputs, leng, labels = inputs.to(device), leng.to(device), labels.to(device)
optimizer.zero_grad()
preds = model(inputs, leng).squeeze(1)
loss = loss_fn(preds, labels.long())
acc = accuracy(preds, labels)
loss.backward()
optimizer.step()
e_loss += loss.item()
e_acc += acc.item()
i += 1
return e_loss/i, e_acc/i
def predict(model, iterator = testDataloader, loss_fn = criterion):
e_loss = e_acc = i = 0
model.eval()
with torch.no_grad():
for inputs, leng, labels in iterator:
inputs, leng, labels = inputs.to(device), leng.to(device), labels.to(device)
preds = model(inputs, leng).squeeze(1)
loss = loss_fn(preds, labels.long())
acc = accuracy(preds, labels)
e_loss += loss.item()
e_acc += acc.item()
i += 1
return e_loss/i, e_acc/i
N_EPOCHS = 20
best_valid_loss = float('inf')
for epoch in range(N_EPOCHS):
start_time = time.time()
train_loss, train_acc = train(model)
valid_loss, valid_acc = predict(model)
end_time = time.time()
epoch_mins, epoch_secs = epoch_time(start_time, end_time)
if valid_loss < best_valid_loss:
best_valid_loss = valid_loss
torch.save(model.state_dict(), 'tut1-model.pt')
print(f'Epoch: {epoch+1:02} / {N_EPOCHS} | Epoch Time: {epoch_mins}m {epoch_secs}s')
print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
print(f'\t Val. Loss: {valid_loss:.3f} | Val. Acc: {valid_acc*100:.2f}%')
This is my model:
class BiLSTM(nn.Module):
def __init__(self):
super(BiLSTM, self).__init__()
self.hidden_size = 128
drp = 0.2
n_classes = len(le.classes_)
self.embedding = nn.Embedding(max_features, embed_size)
self.embedding.weight = nn.Parameter(torch.tensor(embedding_matrix, dtype=torch.float32))
self.embedding.weight.requires_grad = False
self.lstm = nn.LSTM(embed_size, self.hidden_size, bidirectional=True, batch_first=True)
self.linear = nn.Linear(self.hidden_size*4 , 128)
self.relu = nn.ReLU()
self.dropout = nn.Dropout(drp)
self.out = nn.Linear(128, n_classes)
def forward(self, x):
#rint(x.size())
h_embedding = self.embedding(x)
_embedding = torch.squeeze(torch.unsqueeze(h_embedding, 0))
h_lstm, _ = self.lstm(h_embedding)
avg_pool = torch.mean(h_lstm, 1)
max_pool, _ = torch.max(h_lstm, 1)
conc = torch.cat(( avg_pool, max_pool), 1)
conc = self.relu(self.linear(conc))
conc = self.dropout(conc)
out = self.out(conc)
return out
n_epochs = 87
model = BiLSTM()
loss_fn = nn.CrossEntropyLoss(reduction='mean',)
optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=0.002)
model.cuda()
# Load train and test in CUDA Memory
x_train = torch.tensor(train_X, dtype=torch.long).cuda()
y_train = torch.tensor(train_y, dtype=torch.long).cuda()
x_cv = torch.tensor(test_X, dtype=torch.long).cuda()
y_cv = torch.tensor(test_y, dtype=torch.long).cuda()
# Create Torch datasets
train = torch.utils.data.TensorDataset(x_train, y_train)
valid = torch.utils.data.TensorDataset(x_cv, y_cv)
# Create Data Loaders
train_loader = torch.utils.data.DataLoader(train, batch_size=batch_size, shuffle=True)
valid_loader = torch.utils.data.DataLoader(valid, batch_size=batch_size, shuffle=True)
train_loss = []
valid_loss = []
for epoch in range(n_epochs):
start_time = time.time()
# Set model to train configuration
model.train()
avg_loss = 0.
for i, (x_batch, y_batch) in enumerate(train_loader):
# Predict/Forward Pass
y_pred = model(x_batch)
# Compute loss
loss = loss_fn(y_pred, y_batch)
optimizer.zero_grad()
loss.backward()
optimizer.step()
avg_loss += loss.item() / len(train_loader)
#acc =n-avg_loss
# Set model to validation configuration
model.eval()
avg_val_loss = 0.
val_preds = np.zeros((len(x_cv),len(le.classes_)))
for i, (x_batch, y_batch) in enumerate(valid_loader):
y_pred = model(x_batch).detach()
avg_val_loss += loss_fn(y_pred, y_batch).item() / len(valid_loader)
#val_accuracy = n- avg_val_loss
# keep/store predictions
val_preds[i * batch_size:(i+1) * batch_size] =F.softmax(y_pred).cpu().numpy()
val_preds=val_preds*5000
# Check Accuracy
val_accuracy = sum(val_preds.argmax(axis=1)==test_y)/len(test_y)
train_loss.append(avg_loss)
valid_loss.append(avg_val_loss)
elapsed_time = time.time() - start_time
print('Epoch {}/{} \t Train_loss={:.4f} \t val_loss={:.4f} \t val_acc={:.4f} \t time={:.2f}s'.format(
epoch + 1, n_epochs , avg_loss, avg_val_loss, val_accuracy, elapsed_time))
This is the output I am receiving.I have used the BiLSTM model. I tried changing the loss techniques and dropout value but it didn't work. The issue is I guess the model is overfitting how can I increase the accuracy this is the output I am receiving.I have used the BiLSTM model. I tried changing the loss techniques and dropout value but it didn't work. The issue is I guess the model is overfitting how can I increase the accuracy
This is the output I am receiving.I have used the BiLSTM model. I tried changing the loss techniques and dropout value but it didn't work. The issue is I guess the model is overfitting how can I increase the accuracy, The size of the dataset is 3000
[1]: https://i.stack.imgur.com/NbK92.png [output looks like this][1]
[1]: https://i.stack.imgur.com/ll12J.png [Data Looks Like this][1]
I am working on a text classification problem with a binary output 0 or 1. The accuracy, train loss and test loss remains the same. The accuracy is exact the same for all the epochs. All the steps looks very correct. I had tried out several ways to figure out what is going wrong. Nothing actually worked. Please help.
class RNN(nn.Module):
def __init__(self, num_layers, num_classes, input_size, hidden_size,vocab,dropout):
super(RNN,self).__init__()
self.num_layers = num_layers
self.num_classes = num_classes
self.input_size = input_size
self.hidden_size = hidden_size
self.sequence_len = sequence_len
self.embedding = nn.Embedding(len(vocab),input_size)
nn.init.xavier_normal_(self.embedding.weight)
self.rnn = nn.RNN(input_size, hidden_size,num_layers,dropout = dropout, nonlinearity = 'tanh', batch_first=True, bias = True, bidirectional = False)
self.linear = nn.Linear(hidden_size, 1)
nn.init.xavier_normal_(self.linear.weight)
self.sigmoid = nn.Sigmoid()
def forward(self,x):
lens = list(map(len, x))
padded = pad_sequence(x, batch_first=True)
output_embedding=self.embedding(padded)
packed = pack_padded_sequence(input = output_embedding,lengths = lens, batch_first=True, enforce_sorted=False)
input_rnn = packed
h011 = torch.zeros(1,32,3)
output_11, hidden_11 = self.rnn(input_rnn,h011)
output_padded, output_lengths = pad_packed_sequence(output_11, batch_first=True)
final_output_11 = self.linear(hidden_11)
prob_11 = self.sigmoid(final_output_11)
return output_padded, hidden_11, prob_11
num_layers = 1
num_classes = 2
input_size = 5
hidden_size = 3
criterion = nn.BCELoss()
sequence_len = 1
dropout = 0.5
rnn = RNN(num_layers, num_classes, input_size, hidden_size,vocab,dropout)
epochs = 10
lr = 0.01
weight_decay=0.011
def train_loop(model,criterion,optimizer,train_loader,valid_loader,epochs):
train_losses= []
valid_losses= []
for epoch in range(epochs):
train_loss=0
for label,text in train_loader:
output,hidden,prob = rnn.forward(text)
prob = torch.tensor([item.item() for sublist in prob for item in sublist],dtype = torch.float32)
label = torch.tensor(label, dtype = torch.float32)
loss=criterion(prob,label)
optimizer.zero_grad()
loss.requires_grad = True
loss.backward()
optimizer.step()
train_loss += loss.item()
train_loss=train_loss/len(train_loader)
valid_loss=0
with torch.no_grad():
correct=0
total=0
for label,text in valid_loader:
output,hidden,prob = rnn.forward(text)
prob = torch.tensor([item.item() for sublist in prob for item in sublist],dtype = torch.float32)
label = torch.tensor(label, dtype = torch.float32)
loss=criterion(prob,label)
valid_loss += loss.item()
p = torch.tensor([1 if i > 0.5 else 0 for i in prob.data], dtype = torch.float32)
#predicted = torch.max(p, 1)
total += label.size(0)
correct += (p == label).sum().item()
valid_loss=valid_loss/len(valid_loader)
accuracy = 100 * correct / total
print(accuracy)
scheduler.step(accuracy)
train_losses.append(train_loss)
valid_losses.append(valid_loss)
print(f'Epoch {epoch+1:<2d}/{epochs} --> Train Loss: {train_loss:.4f} | Valid Loss: {valid_loss:.4f}')
from torch.optim.lr_scheduler import StepLR
from torch.optim.lr_scheduler import ReduceLROnPlateau
optimizer = torch.optim.Adam(rnn.parameters(), lr=0.01)
scheduler = ReduceLROnPlateau(optimizer, mode='max', factor=0.4, patience=5,
verbose=True) # need to change scheduler.step
train_loop(rnn,criterion,optimizer,train_loader,valid_loader,epochs)
Output:
I started learning tensorflow one week ago and i am struggling a bit with the possibility of restoring a model.
I am constructing a class for a CNN (from stanford course code) where i want to have a run and a test function (first one to train the model, second one to make a prediction on another set of datas).
The function run is working correctly, mais i have a hard time restoring the model to make the prediction in function test (here i have a problem shape and i guess it comes from the way I get the operation).
import os
import tensorflow as tf
from time import time
class Cnn:
def __init__(self, batch_size=128, skip_step=10, epochs=1, dropout_ratio=0.75):
self.batch_size = batch_size
self.skip_step = skip_step
self.epochs = epochs
self.dropout_ratio = dropout_ratio
self.x = tf.placeholder(tf.float32, [None, 784], name="X_placeholder")
self.images = tf.reshape(self.x, shape=[-1, 28, 28, 1])
self.y = tf.placeholder(tf.float32, [None, 10], name="Y_placeholder")
self.dropout = tf.placeholder(tf.float32, name="dropout")
self.global_step = tf.Variable(0, dtype=tf.int32, trainable=False, name='global_step')
self.layers = [self.images]
self.n_layers = 0
def convolution(self, kernel_size, n_output, strides, scope_name):
previous_layer = self.layers[self.n_layers]
with tf.variable_scope(scope_name) as scope:
kernel_shape = [kernel_size[0], kernel_size[1], previous_layer.get_shape()[3], n_output]
kernel = tf.get_variable("kernels", kernel_shape,
initializer=tf.truncated_normal_initializer())
biases = tf.get_variable("biases", [n_output],
initializer=tf.random_normal_initializer())
convolution_2d = tf.nn.conv2d(previous_layer, kernel, strides=strides,
padding="SAME")
convolution = tf.nn.relu(convolution_2d + biases, name=scope.name)
self.layers.append(convolution)
self.n_layers += 1
return convolution
def pool(self, size, strides, scope_name, padding="SAME"):
previous_layer = self.layers[self.n_layers]
with tf.variable_scope(scope_name):
pool = tf.nn.max_pool(previous_layer, ksize=size, strides=strides,
padding=padding)
self.layers.append(pool)
self.n_layers += 1
return pool
def fully_connected(self, n_input, n_output, scope_name, relu=False, dropout=True):
previous_layer = self.layers[self.n_layers]
with tf.variable_scope(scope_name):
weights = tf.get_variable("weights", [n_input, n_output],
initializer=tf.truncated_normal_initializer())
biases = tf.get_variable("biases", [n_output],
initializer=tf.random_normal_initializer())
pool_reshaped = tf.reshape(previous_layer, [-1, n_input])
logits = tf.add(tf.matmul(pool_reshaped, weights), biases, name="logits")
fc = logits
if relu:
fc = tf.nn.relu(fc, name="relu")
if dropout:
fc = tf.nn.dropout(fc, self.dropout, name="dropout")
self.layers.append(fc)
self.n_layers += 1
if relu is False and dropout is False:
self.logits = tf.add(tf.matmul(pool_reshaped, weights), biases, name="logits")
self.predictions = tf.nn.softmax(logits, name="predictions")
return fc
def set_loss(self, scope_name):
previous_layer = self.layers[self.n_layers]
with tf.name_scope(scope_name):
entropy = tf.nn.softmax_cross_entropy_with_logits(logits=previous_layer, labels=self.y)
loss = tf.reduce_mean(entropy, name='loss')
self.layers.append(loss)
self.n_layers += 1
self.loss = loss
def set_optimizer(self):
previous_layer = self.layers[self.n_layers]
optimizer = tf.train.AdamOptimizer(0.001).minimize(previous_layer, global_step=self.global_step)
self.layers.append(optimizer)
self.n_layers += 1
self.optimizer = optimizer
def run(self, train_x, train_y):
sess = tf.Session()
with sess.as_default():
sess.run(tf.global_variables_initializer())
saver = tf.train.Saver()
writer = tf.summary.FileWriter('./my_graph/mnist', sess.graph)
ckpt = tf.train.get_checkpoint_state(os.path.dirname('results/checkpoint'))
if ckpt and ckpt.model_checkpoint_path:
saver.restore(sess, ckpt.model_checkpoint_path)
initial_step = self.global_step.eval()
start_time = time()
n_batches = int(train_x.shape[0] / self.batch_size)
total_correct_preds = 0
total_loss = 0.0
current_position = 0
for index in range(initial_step, int(n_batches * self.epochs)): # train the model n_epochs times
x_batch = train_x[current_position: current_position + self.batch_size, :]
y_batch = train_y[current_position: current_position + self.batch_size, :]
feed_dict = {self.x: x_batch, self.y: y_batch, self.dropout: self.dropout_ratio}
_, loss_batch, logits_batch = sess.run([self.optimizer, self.loss, self.logits],
feed_dict=feed_dict)
print(logits_batch.shape)
total_loss += loss_batch
preds = tf.nn.softmax(logits_batch)
correct_preds = tf.equal(tf.argmax(preds, 1), tf.argmax(y_batch, 1))
accuracy = tf.reduce_sum(tf.cast(correct_preds, tf.float32))
total_correct_preds += sess.run(accuracy)
if (index + 1) % self.skip_step == 0:
print('Average loss at step {}: {:5.1f}'.format(index + 1, total_loss / self.skip_step))
total_loss = 0.0
saver.save(sess, 'results/mnist-convnet', index)
current_position += self.batch_size
print("Optimization Finished!") # should be around 0.35 after 25 epochs
print("Total time: {0} seconds".format(time() - start_time))
print("Accuracy {0}".format(total_correct_preds / train_x.shape[0]))
def test(self, val_x, val_y):
checkpoint_file = tf.train.latest_checkpoint("results/")
graph = tf.Graph()
with graph.as_default():
sess = tf.Session()
with sess.as_default():
saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file))
saver.restore(sess, checkpoint_file)
self.x = graph.get_operation_by_name("X_placeholder").outputs[0]
self.y = graph.get_operation_by_name("Y_placeholder").outputs[0]
self.dropout = graph.get_operation_by_name("dropout").outputs[0]
self.global_step = graph.get_operation_by_name("global_step").outputs[0]
self.logits = graph.get_operation_by_name("logits").outputs[0]
current_position = 0
n_batches = int(val_x.shape[0] / self.batch_size)
total_correct_preds = 0
for i in range(n_batches):
x_batch = val_x[current_position: current_position + self.batch_size]
y_batch = val_y[current_position: current_position + self.batch_size]
feed_dict = {self.x: x_batch, self.y: y_batch, self.dropout: self.dropout_ratio}
logits_batch = sess.run([self.logits], feed_dict=feed_dict)
preds = tf.nn.softmax(logits_batch)
# correct_preds = tf.equal(tf.argmax(preds, 1), tf.argmax(y_batch, 1))
"""
accuracy = tf.reduce_sum(tf.cast(correct_preds, tf.float32))
total_correct_preds += sess.run(accuracy)
current_position += self.batch_size
print("Accuracy {0}".format(total_correct_preds / val_x.shape[0]))
"""
and testing it like this (sorry if there is a lot of code) :
from cnn import Cnn
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
tf.set_random_seed(1)
N_CLASSES = 10
# load datas
mnist = input_data.read_data_sets("mnist", one_hot=True)
train_x, train_y = mnist.train.images, mnist.train.labels
val_x, val_y = mnist.test.images, mnist.test.labels
model = Cnn(batch_size=128, skip_step=10, epochs=0.25, dropout_ratio=0.75)
model.convolution(kernel_size=[5, 5], strides=[1, 1, 1, 1], n_output=32, scope_name="conv1")
model.pool(size=[1, 2, 2, 1], strides=[1, 2, 2, 1], scope_name="pool1")
model.convolution(kernel_size=[5, 5], strides=[1, 1, 1, 1], n_output=64, scope_name="conv2")
model.pool(size=[1, 2, 2, 1], strides=[1, 2, 2, 1], scope_name="pool2")
input_features = 7 * 7 * 64
model.fully_connected(n_input=input_features, n_output=1024, scope_name="fc", relu=True, dropout=True)
model.fully_connected(n_input=1024, n_output=N_CLASSES, scope_name="softmax_linear", dropout=False)
model.set_loss(scope_name="loss")
model.set_optimizer()
model.run(train_x=train_x, train_y=train_y)
model.test(val_x, val_y)
I would also appreciate any comments regarding the better way to create a class like this (in particular the handling of the placeholders and the sessions)
Thanks !
Nicolas