Related
The loss function is MSE which is not decreasing (2.32 to 2.24). What is the problem AE architecture or the way I train the AE? After 100 epochs the loss doesn't change. Is the input data(200,1,52) can't be compressed? Should I increase the compressed data size(200,16) by changing the encoder architecture?
# Standard Torch Packages
import torch
from sklearn.model_selection import train_test_split
from torch.nn import functional as F
from torch import optim
from os.path import join, exists
from os import mkdir
import numpy as np
import glob
import matplotlib.pyplot as plt
# Import own functions
from ae_model import AE
from learning import EarlyStopping, ReduceLROnPlateau, LSIZE
# Define parameters
num_epochs = 50
rollout_path = "data/rollouts/rollout_*.npz"
logdir = "data/"
X = []
for x in glob.glob(rollout_path):
data_point = np.load(x, allow_pickle=True)
X.append(data_point)
train_loader, test_loader = train_test_split(X, test_size=0.2, shuffle= False)
cuda = torch.cuda.is_available()
device = torch.device("cuda" if cuda else "cpu")
print(device)
model = AE(LSIZE).to(device)
#optimizer = optim.Adam(model.parameters())
optimizer = torch.optim.Adam(model.parameters(),
lr = 1e-1,
weight_decay = 1e-8)
scheduler = ReduceLROnPlateau(optimizer, "min", factor=0.5, patience=5)
earlystopping = EarlyStopping("min", patience=3)
# Validation using MSE Loss function
loss_function = torch.nn.MSELoss()
def train(epoch):
""" One training epoch """
model.train()
train_loss = []
#train_data_points = 0
for batch_idx, rollout in enumerate(train_loader):
data = torch.tensor(rollout["data"]).to(device)
train_data_points = len(data) * len(train_loader)
#recon_batch, mu, logvar = model(data)
recon_batch, _ = model(data)
loss = loss_function(recon_batch, data)
optimizer.zero_grad()
loss.backward()
#train_loss += loss.item()
optimizer.step()
plot_train_data.append(loss.item())
# if batch_idx % 20 == 0:
# print(
# "Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}".format(
# epoch,
# batch_idx * len(data),
# train_data_points,
# 100.0 * batch_idx / len(train_loader),
# loss.item() / len(data),
# )
# )
# train_loss /= train_data_points
# print(
# "====> Epoch: {} Average loss: {:.4f}".format(
# epoch, train_loss / train_data_points
# )
print(
"====> Epoch: {} batchId: {} Average loss: {:.4f}".format(
epoch, batch_idx, loss.item()
))
# )
return train_loss
def test():
model.eval()
test_loss = 0
test_data_points = 0
with torch.no_grad():
for rollout in test_loader:
data = torch.tensor(rollout["data"]).to(device)
test_data_points = len(data) * len(test_loader)
#recon_batch, mu, logvar = model(data)
recon_batch, _ = model(data)
test_loss += loss_function(recon_batch, data).item()
test_loss /= test_data_points
print("====> Test set loss: {:.4f}".format(test_loss))
return test_loss
def save_checkpoint(state, is_best, filename, best_filename):
""" Save state in filename. Also save in best_filename if is_best. """
torch.save(state, filename)
if is_best:
torch.save(state, best_filename)
# check vae dir exists, if not, create it
ae_dir = join(logdir, "ae_gpu_run_false")
if not exists(ae_dir):
mkdir(ae_dir)
reload_file = join(ae_dir, "best.tar")
noreload = False
if not noreload and exists(reload_file):
state = torch.load(reload_file)
print(
"Reloading model at epoch {}"
", with test error {}".format(state["epoch"], state["precision"])
)
model.load_state_dict(state["state_dict"])
optimizer.load_state_dict(state["optimizer"])
scheduler.load_state_dict(state["scheduler"])
earlystopping.load_state_dict(state["earlystopping"])
cur_best = None
plot_train_data = []
plot_test_data = []
for epoch in range(1, num_epochs + 1):
#plot_train_data.append(train(epoch))
train(epoch)
test_loss = test()
scheduler.step(test_loss)
earlystopping.step(test_loss)
# checkpointing
best_filename = join(ae_dir, "best.tar")
filename = join(ae_dir, "checkpoint.tar")
is_best = not cur_best or test_loss < cur_best
if is_best:
cur_best = test_loss
save_checkpoint(
{
"epoch": epoch,
"state_dict": model.state_dict(),
"precision": test_loss,
"optimizer": optimizer.state_dict(),
"scheduler": scheduler.state_dict(),
"earlystopping": earlystopping.state_dict(),
},
is_best,
filename,
best_filename,
)
if earlystopping.stop:
print("End of Training because of early stopping at epoch {}".format(epoch))
break
test_plot_path = join(ae_dir, "test_fig.png")
# legend_strings = []
plt.title("AE Training and Test")
#plt.xlabel("Epochs")
#plt.ylabel("MSE losses")
#plt.plot(plot_test_data)
# legend_strings.append('Test')
#plt.legend('Test')
#plt.savefig(test_plot_path)
#plt.close()
#train_plot_path = join(ae_dir, "train_fig.png")
#plt.title("AE ")
#plt.xlabel("Epochs")
#plt.ylabel("MSE Loss")
plt.plot(plot_train_data)
# legend_strings.append('Train')
#plt.legend('Train')
plt.xticks(range(0, len(plot_train_data), 75))
plt.savefig(test_plot_path)
plt.close()
Below is the encoder and decoder model:
import torch
import torch.nn as nn
import torch.nn.functional as F
reduced_size = 22
class Decoder(nn.Module):
""" VAE decoder """
def __init__(self, latent_size):
super(Decoder, self).__init__()
self.latent_size = latent_size
self.fc1 = nn.Linear(latent_size, reduced_size)
self.deconv1 = nn.ConvTranspose1d(16, 32, 1, stride=1)
self.deconv2 = nn.ConvTranspose1d(32, 52, 1, stride=1)
def forward(self, x): # pylint: disable=arguments-differ
x = x.unsqueeze(2)
x = F.relu(self.deconv1(x))
x = torch.sigmoid(self.deconv2(x))
x = x.view(x.size(0), x.size(2), x.size(1))
return x
class Encoder(nn.Module): # pylint: disable=too-many-instance-attributes
""" VAE encoder """
def __init__(self, latent_size):
super(Encoder, self).__init__()
# input shape (200, 1, 52)
# batch_size, in_channel, len_channel
self.latent_size = latent_size
self.conv1 = nn.Conv1d(52, 32, 1, stride=1)
self.conv2 = nn.Conv1d(32, 16, 1, stride=1)
# output shape (200, 1, x)
self.fc_mu = nn.Linear(reduced_size, latent_size)
def forward(self, x): # pylint: disable=arguments-differ
x = x.view(x.size(0), x.size(2), x.size(1))
x = F.relu(self.conv1(x))
x = F.relu(self.conv2(x))
x = x.view(x.size(0), -1)
return x
class AE(nn.Module):
""" Variational Autoencoder """
def __init__(self, latent_size):
super(AE, self).__init__()
self.encoder = Encoder(latent_size)
self.decoder = Decoder(latent_size)
def forward(self, x): # pylint: disable=arguments-differ
x = x.unsqueeze(1)
encoded = self.encoder(x)
decoded = self.decoder(encoded)
decoded = decoded.squeeze(1)
return decoded, encoded
This is my code, and image of training accuracy
# !curl -L -s -o '/content/ESC-50-master-16k.tar' 'https://drive.google.com/uc?id=1hFt-qarD_Ihjb3jW7pd3tje2pcY5tfuJ&confirm=t'
# !tar -xvf "ESC-50-master-16k.tar"
from torch.utils.data import Dataset, TensorDataset, DataLoader
import torch
import torch.nn as nn
import torch.nn.functional as F
import librosa
import numpy as np
import pandas as pd
from glob import glob
Device Configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
Load ESC-50 Data & Preprocess
metadata = pd.read_csv("./ESC-50-master-16k/meta/esc50.csv")
wav_list = sorted(glob("./ESC-50-master-16k/resample/\*.wav"))
def spec_to_image(spec, eps=1e-6):
mean = spec.mean()
std = spec.std()
spec_norm = (spec - mean) / (std + eps)
spec_min, spec_max = spec_norm.min(), spec_norm.max()
spec_scaled = 255 \* (spec_norm - spec_min) / (spec_max - spec_min)
spec_scaled = spec_scaled.astype(np.uint8)
return spec_scaled
class esc50dataset(Dataset):
def __init__(self, wavlist, metadata):
self.labels = np.array(metadata.target).astype(int)
self.audio = \[\]
for f in wavlist:
wav, sr = librosa.load(f, sr=None)
spec=librosa.feature.melspectrogram(wav, sr=sr, n_fft=1024, hop_length=640, n_mels=126) # \[126,126\]
spec_db=librosa.power_to_db(spec,top_db=80)
spec_image = np.expand_dims(spec_to_image(spec_db), axis=0)
self.audio.append(spec_image.tolist())
self.audio = np.array(self.audio)
def __len__(self):
return len(self.audio)
def __getitem__(self, idx):
return self.audio\[idx\], self.labels\[idx\]
dataset = esc50dataset(wav_list, metadata)
features = dataset\[:\]\[0\]
labels = dataset\[:\]\[1\]
train test split
train_size = 0.7
val_size = 0.5
`make train set`
split_id = int(len(features) \* train_size) # 2000 \* 0.7 = 1400
train_features, remain_features = features\[:split_id\], features\[split_id:\] # train_features = 1400, remain_features = 600
train_labels, remain_labels = labels\[:split_id\], labels\[split_id:\] # train_labels = 1400, remain_labels = 600
`make val and test set`
split_val_id = int(len(remain_features) \* val_size) # 600 \* 0.5 = 300
val_features, test_features = remain_features\[:split_val_id\], remain_features\[split_val_id:\] # val_features = 300, test_features = 300
val_labels, test_labels = remain_labels\[:split_val_id\], remain_labels\[split_val_id:\] # val_labels = 300, test_labels = 300
`define batch size`
batch_size = 32
`create tensor datasets`
train_set = TensorDataset(torch.from_numpy(train_features), torch.from_numpy(train_labels))
valid_set = TensorDataset(torch.from_numpy(val_features), torch.from_numpy(val_labels))
test_set = TensorDataset(torch.from_numpy(test_features), torch.from_numpy(test_labels))
`create dataloaders`
train_loader = DataLoader(train_set, shuffle=True, batch_size=batch_size) # 1400 / 32
val_loader = DataLoader(valid_set, shuffle=True, batch_size=batch_size) # 300 / 32
test_loader = DataLoader(test_set, shuffle=True, batch_size=batch_size) # 300 / 32
`Model Hyperparameter`
in_channel = features\[0\].shape\[0\] # in_channel = 1
max_pool_kernel = 3
hidden_size = 256
hidden_layers = 10
output_classes = labels.max() + 1 # output_classes = 50
dropout = 0.25
learning_rate = 0.001
num_epochs = 10
Model
class CRNN(nn.Module):
def __init__(self, in_channel, hidden_size, hidden_layers, output_classes, dropout):
super(CRNN, self).__init__()
self.hidden_layers = hidden_layers
self.hidden_size = hidden_size
self.CNN_layer1 = nn.Sequential(
nn.Conv2d(in_channels=in_channel, out_channels=32, kernel_size=5, stride=1, padding=2),
nn.BatchNorm2d(num_features=32),
nn.ReLU(),
nn.MaxPool2d(kernel_size=max_pool_kernel), # 32, 42, 42
)
self.CNN_layer2 = nn.Sequential(
nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5, stride=1, padding=2),
nn.BatchNorm2d(num_features=64),
nn.ReLU(),
nn.MaxPool2d(kernel_size=max_pool_kernel), # 64, 14, 14
)
self.lstm = nn.LSTM(input_size=64, hidden_size=hidden_size, num_layers=hidden_layers, batch_first=True)
self.dropout = nn.Dropout(dropout)
self.fc = nn.Linear(hidden_size, output_classes)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
x = x.float()
# print("start : {}".format(x))
x = self.CNN_layer1(x) # 1, 126, 126 -> 32, 42, 42
# print("after CNN_1 : {}".format(x))
x = self.CNN_layer2(x) # 32, 42, 42 -> 64, 14, 14
# print("after CNN_2 : {}".format(x))
x = x.reshape(x.size(0), -1, 64) # 64, 14, 14 -> 12544 , x.size(0) = Batchsize = 32
# print("after reshape : {}".format(x))
h0 = torch.zeros(self.hidden_layers, x.size(0), self.hidden_size).to(device) # torch.size([10, 32(batch_size), 512])
c0 = torch.zeros(self.hidden_layers, x.size(0), self.hidden_size).to(device) # torch.size([10, 32(batch_size), 512])
x, _ = self.lstm(x, (h0, c0))
# print("after lstm : {}".format(x))
x = self.dropout(x)
# print("after dropout : {}".format(x))
x = self.fc(x[:,-1,:])
# print("after fc : {}".format(x))
x = self.sigmoid(x)
# print("after sigmoid : {}".format(x))
return x
model initialization
model = CRNN(in_channel, hidden_size, hidden_layers, output_classes, dropout)
Criterion & Optimizer
criterion = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
Train
from tqdm import tqdm
tqdm.pandas()
tr_loss = \[\]
tr_acc = \[\]
v_loss = \[\]
v_acc = \[\]
best_valid_loss = torch.inf
best_epoch = 0
model = model.to(device)
epochloop = tqdm(range(num_epochs), position=0, desc='Training', leave=True)
for epoch in epochloop:
model.train()
train_loss = 0
train_acc = 0
epochloop.set_postfix_str(f'Training epoch {epoch}/{len(epochloop)}') # visualize
for idx, (audio, label) in enumerate(train_loader):
audio, label = audio.to(device), label.to(device)
out = model(audio)
\_, pred = torch.max(out.data, 1)
# acc
train_acc += (pred==label).sum()
# loss
optimizer.zero_grad()
loss = criterion(out, label)
train_loss += loss.item()
loss.backward()
optimizer.step()
Validation
model.eval()
val_loss = 0
val_acc = 0
with torch.no_grad():
for idx, (audio, label) in enumerate(val_loader):
epochloop.set_postfix_str(f'Validation batch {idx}/{len(val_loader)}')
audio, label = audio.to(device), label.to(device)
# forward pass
out = model(audio)
_, pred = torch.max(out.data, 1)
# acc
val_acc += (pred==label).sum()
# loss
loss = criterion(out, label)
val_loss += loss.item()
model.train()
save model if validation loss decrease
if val_loss / len(valid_set) \<= best_valid_loss:
best_valid_loss = val_loss / len(val_loader)
best_epoch = epoch
torch.save(model.state_dict(), "LSTM_epoch\_{}.pth".format(epoch))
print epoch loss & accuracy
print(f'Epoch {epoch+1}/{num_epochs} | Train Loss: {train_loss / len(train_loader):.3f} Train Acc: {train_acc / len(train_set) \* 100}% | Val Loss: {val_loss / len(val_loader):.3f} Val Acc: {val_acc / len(valid_set) \* 100}%')
tr_loss.append(train_loss / len(train_loader))
tr_acc.append(train_acc.cpu().numpy() / len(train_set) \* 100)
v_loss.append(val_loss / len(val_loader))
v_acc.append(val_acc.cpu().numpy() / len(valid_set) \* 100)
my training accuracy
Please tell me why my training accuracy is too low..
By your dataset, I'm inferring you are trying to identify sound data, and you are applying a CRNN. If you go to the repo Have you seen the repo for this dataset you will find a table with the accuracy for each method, the CRNN has about 60% accuracy, this is not so great since other methods achieve more than 95%.
You might need to change your model to get better results. Check the table you will find also some models with code
A bit difficult to give you an answer without reviewing your whole code, and re-running the model. Given this situation, I would suggest you create a Github repo, with appropriate the requirements.txt file for your code.
Even more helpful, and more likely to be answered would be if your code is in a jupyter notebook, and you can show the results of your loss curve and epochs. Your results could be more understandable shown like the output of tensorboard.
Hope this helps you to get some answers
I am building a model to classify news (AG news dataset). The vocab size ~33k with custom embedding layer. I have run this for 20 epochs but the loss and accuracy (1.3 and 26% respec.) is almost constant even at the end of 20th epoch. Can someone please help me with this? Also, am I feeding the correct input to the fc layer? I am using CrossEntropyLoss as the loss function.
Here is my model class:
class NewsClassifier(nn.Module):
def __init__(self, vocab_weights = None, rnn_type = 'LSTM', vocab_size = len(vocab.vocab), n_classes = 4, embed_size = 300, rnn_units = 512, \
n_layers = 2, bi_dir = True, rnn_drop = 0.0, padding_index = vocab['<unk>']):
super().__init__()
self.rnn_units = rnn_units
self.n_classes = n_classes
self.rnn_type = rnn_type
if vocab_weights:
self.embedding = nn.Embedding.from_pretrained(torch.as_tensor(vocab_weights))
else:
self.embedding = nn.Embedding(vocab_size, embed_size, padding_idx = padding_index)
if rnn_type == 'LSTM':
self.rnn = nn.LSTM(embed_size, rnn_units, num_layers = n_layers, bidirectional = bi_dir, dropout = rnn_drop)
elif rnn_type == 'GRU':
self.rnn = nn.GRU(embed_size, rnn_units, num_layers = n_layers, bidirectional = bi_dir, dropout = rnn_drop)
else:
raise NotImplementError
self.fc = nn.Linear(2 * rnn_units if bi_dir else rnn_units, self.n_classes)
def forward(self, data, lens):
x_embed = self.embedding(data) # (padded_lens, batch_size, embed_dim)
x_packed = pack_padded_sequence(x_embed, lens.cpu(), enforce_sorted = False) #packing sequences and passing to RNN unit
if self.rnn_type == 'LSTM':
output_packed, (hidden,cell) = self.rnn(x_packed) #output is packed and cannot be fed to linear layers
else:
output_packed, hidden = self.rnn(x_packed) #For GRU there is only hidden state
#Though n number of layers are stacked the output is always 1
output_padded, _ = pad_packed_sequence(output_packed) #output is padded to be fed to linear layer (padded_lens, batch size, hidden_units)
#Picking only the last output --> equivalent to reutrn_sequences = False in Keras
out_reduced = torch.cat((output_padded[-1, :, : self.rnn_units], output_padded[-1, :, self.rnn_units :]), 1)
return self.fc(out_reduced)
model = NewsClassifier()
print(f'The total number of trainable parameters are : {sum(p.numel() for p in model.parameters() if p.requires_grad)}')
My training function is:
def train(model, iterator = trainDataloader, optimizer = optimizer, loss_fn = criterion):
e_loss = e_acc = i = 0
model.train()
for inputs, leng, labels in iterator:
inputs, leng, labels = inputs.to(device), leng.to(device), labels.to(device)
optimizer.zero_grad()
preds = model(inputs, leng).squeeze(1)
loss = loss_fn(preds, labels.long())
acc = accuracy(preds, labels)
loss.backward()
optimizer.step()
e_loss += loss.item()
e_acc += acc.item()
i += 1
return e_loss/i, e_acc/i
def predict(model, iterator = testDataloader, loss_fn = criterion):
e_loss = e_acc = i = 0
model.eval()
with torch.no_grad():
for inputs, leng, labels in iterator:
inputs, leng, labels = inputs.to(device), leng.to(device), labels.to(device)
preds = model(inputs, leng).squeeze(1)
loss = loss_fn(preds, labels.long())
acc = accuracy(preds, labels)
e_loss += loss.item()
e_acc += acc.item()
i += 1
return e_loss/i, e_acc/i
N_EPOCHS = 20
best_valid_loss = float('inf')
for epoch in range(N_EPOCHS):
start_time = time.time()
train_loss, train_acc = train(model)
valid_loss, valid_acc = predict(model)
end_time = time.time()
epoch_mins, epoch_secs = epoch_time(start_time, end_time)
if valid_loss < best_valid_loss:
best_valid_loss = valid_loss
torch.save(model.state_dict(), 'tut1-model.pt')
print(f'Epoch: {epoch+1:02} / {N_EPOCHS} | Epoch Time: {epoch_mins}m {epoch_secs}s')
print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
print(f'\t Val. Loss: {valid_loss:.3f} | Val. Acc: {valid_acc*100:.2f}%')
I am trying to build a BERT model for text classification with the help of this code [https://towardsdatascience.com/bert-text-classification-using-pytorch-723dfb8b6b5b]. My dataset contains two columns(label, text).
The labels can have three values of (0,1,2). The code works without any error but all values of confusion matrix are 0. Is there something wrong with my code?
import matplotlib.pyplot as plt
import pandas as pd
import torch
from torchtext.data import Field, TabularDataset, BucketIterator, Iterator
import torch.nn as nn
from transformers import BertTokenizer, BertForSequenceClassification
import torch.optim as optim
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import seaborn as sns
torch.manual_seed(42)
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
MAX_SEQ_LEN = 128
PAD_INDEX = tokenizer.convert_tokens_to_ids(tokenizer.pad_token)
UNK_INDEX = tokenizer.convert_tokens_to_ids(tokenizer.unk_token)
label_field = Field(sequential=False, use_vocab=False, batch_first=True, dtype=torch.float)
text_field = Field(use_vocab=False, tokenize=tokenizer.encode, lower=False, include_lengths=False, batch_first=True, fix_length=MAX_SEQ_LEN, pad_token=PAD_INDEX, unk_t>
fields = [('label', label_field), ('text', text_field)]
CLASSIFICATION_REPORT = "classification_report.jsonl"
train, valid, test = TabularDataset.splits(path='', train='train.csv', validation='validate.csv', test='test.csv', format='CSV', fields=fields, skip_header=True)
train_iter = BucketIterator(train, batch_size=16, sort_key=lambda x: len(x.text), device=device, train=True, sort=True, sort_within_batch=True)
valid_iter = BucketIterator(valid, batch_size=16, sort_key=lambda x: len(x.text), device=device, train=True, sort=True, sort_within_batch=True)
test_iter = Iterator(test, batch_size=16, device=device, train=False, shuffle=False, sort=False)
class BERT(nn.Module):
def __init__(self):
super(BERT, self).__init__()
options_name = "bert-base-uncased"
self.encoder = BertForSequenceClassification.from_pretrained(options_name, num_labels = 3)
def forward(self, text, label):
loss, text_fea = self.encoder(text, labels=label)[:2]
return loss, text_fea
def train(model, optimizer, criterion = nn.BCELoss(), train_loader = train_iter, valid_loader = valid_iter, num_epochs = 5, eval_every = len(train_iter) // 2, file_pat> running_loss = 0.0
valid_running_loss = 0.0
global_step = 0
train_loss_list = []
valid_loss_list = []
global_steps_list = []
model.train()
for epoch in range(num_epochs):
for (label, text), _ in train_loader:
label = label.type(torch.LongTensor)
label = label.to(device)
text = text.type(torch.LongTensor)
text = text.to(device)
output = model(text, label)
loss, _ = output
optimizer.zero_grad()
loss.backward()
optimizer.step()
running_loss += loss.item()
global_step += 1
if global_step % eval_every == 0:
model.eval()
with torch.no_grad():
for (label, text), _ in valid_loader:
label = label.type(torch.LongTensor)
label = label.to(device)
text = text.type(torch.LongTensor)
text = text.to(device)
output = model(text, label)
loss, _ = output
valid_running_loss += loss.item()
average_train_loss = running_loss / eval_every
average_valid_loss = valid_running_loss / len(valid_loader)
train_loss_list.append(average_train_loss)
valid_loss_list.append(average_valid_loss)
global_steps_list.append(global_step)
# resetting running values
running_loss = 0.0
valid_running_loss = 0.0
model.train()
# print progress
print('Epoch [{}/{}], Step [{}/{}], Train Loss: {:.4f}, Valid Loss: {:.4f}'.format(epoch+1, num_epochs, global_step, num_epochs*len(tra>
if best_valid_loss > average_valid_loss:
best_valid_loss = average_valid_loss
print('Finished Training!')
model = BERT().to(device)
optimizer = optim.Adam(model.parameters(), lr=2e-5)
train(model=model, optimizer=optimizer)
def evaluate(model, test_loader):
y_pred = []
y_true = []
model.eval()
with torch.no_grad():
for (label, text), _ in test_loader:
label = label.type(torch.LongTensor)
label = label.to(device)
text = text.type(torch.LongTensor)
text = text.to(device)
output = model(text, label)
_, output = output
y_pred.extend(torch.argmax(output, 2).tolist())
y_true.extend(label.tolist())
print('Classification Report:')
print(classification_report(y_true, y_pred, labels=[0,1,2], digits=4))
best_model = BERT().to(device)
evaluate(best_model, test_iter)
you are using criterion = nn.BCELoss(), binary cross entropy for a multi class classification problem, "the labels can have three values of (0,1,2)". use suitable loss function for multiclass classification.
I am working on a text classification problem with a binary output 0 or 1. The accuracy, train loss and test loss remains the same. The accuracy is exact the same for all the epochs. All the steps looks very correct. I had tried out several ways to figure out what is going wrong. Nothing actually worked. Please help.
class RNN(nn.Module):
def __init__(self, num_layers, num_classes, input_size, hidden_size,vocab,dropout):
super(RNN,self).__init__()
self.num_layers = num_layers
self.num_classes = num_classes
self.input_size = input_size
self.hidden_size = hidden_size
self.sequence_len = sequence_len
self.embedding = nn.Embedding(len(vocab),input_size)
nn.init.xavier_normal_(self.embedding.weight)
self.rnn = nn.RNN(input_size, hidden_size,num_layers,dropout = dropout, nonlinearity = 'tanh', batch_first=True, bias = True, bidirectional = False)
self.linear = nn.Linear(hidden_size, 1)
nn.init.xavier_normal_(self.linear.weight)
self.sigmoid = nn.Sigmoid()
def forward(self,x):
lens = list(map(len, x))
padded = pad_sequence(x, batch_first=True)
output_embedding=self.embedding(padded)
packed = pack_padded_sequence(input = output_embedding,lengths = lens, batch_first=True, enforce_sorted=False)
input_rnn = packed
h011 = torch.zeros(1,32,3)
output_11, hidden_11 = self.rnn(input_rnn,h011)
output_padded, output_lengths = pad_packed_sequence(output_11, batch_first=True)
final_output_11 = self.linear(hidden_11)
prob_11 = self.sigmoid(final_output_11)
return output_padded, hidden_11, prob_11
num_layers = 1
num_classes = 2
input_size = 5
hidden_size = 3
criterion = nn.BCELoss()
sequence_len = 1
dropout = 0.5
rnn = RNN(num_layers, num_classes, input_size, hidden_size,vocab,dropout)
epochs = 10
lr = 0.01
weight_decay=0.011
def train_loop(model,criterion,optimizer,train_loader,valid_loader,epochs):
train_losses= []
valid_losses= []
for epoch in range(epochs):
train_loss=0
for label,text in train_loader:
output,hidden,prob = rnn.forward(text)
prob = torch.tensor([item.item() for sublist in prob for item in sublist],dtype = torch.float32)
label = torch.tensor(label, dtype = torch.float32)
loss=criterion(prob,label)
optimizer.zero_grad()
loss.requires_grad = True
loss.backward()
optimizer.step()
train_loss += loss.item()
train_loss=train_loss/len(train_loader)
valid_loss=0
with torch.no_grad():
correct=0
total=0
for label,text in valid_loader:
output,hidden,prob = rnn.forward(text)
prob = torch.tensor([item.item() for sublist in prob for item in sublist],dtype = torch.float32)
label = torch.tensor(label, dtype = torch.float32)
loss=criterion(prob,label)
valid_loss += loss.item()
p = torch.tensor([1 if i > 0.5 else 0 for i in prob.data], dtype = torch.float32)
#predicted = torch.max(p, 1)
total += label.size(0)
correct += (p == label).sum().item()
valid_loss=valid_loss/len(valid_loader)
accuracy = 100 * correct / total
print(accuracy)
scheduler.step(accuracy)
train_losses.append(train_loss)
valid_losses.append(valid_loss)
print(f'Epoch {epoch+1:<2d}/{epochs} --> Train Loss: {train_loss:.4f} | Valid Loss: {valid_loss:.4f}')
from torch.optim.lr_scheduler import StepLR
from torch.optim.lr_scheduler import ReduceLROnPlateau
optimizer = torch.optim.Adam(rnn.parameters(), lr=0.01)
scheduler = ReduceLROnPlateau(optimizer, mode='max', factor=0.4, patience=5,
verbose=True) # need to change scheduler.step
train_loop(rnn,criterion,optimizer,train_loader,valid_loader,epochs)
Output: