I am trying to build a BERT model for text classification with the help of this code [https://towardsdatascience.com/bert-text-classification-using-pytorch-723dfb8b6b5b]. My dataset contains two columns(label, text).
The labels can have three values of (0,1,2). The code works without any error but all values of confusion matrix are 0. Is there something wrong with my code?
import matplotlib.pyplot as plt
import pandas as pd
import torch
from torchtext.data import Field, TabularDataset, BucketIterator, Iterator
import torch.nn as nn
from transformers import BertTokenizer, BertForSequenceClassification
import torch.optim as optim
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import seaborn as sns
torch.manual_seed(42)
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
MAX_SEQ_LEN = 128
PAD_INDEX = tokenizer.convert_tokens_to_ids(tokenizer.pad_token)
UNK_INDEX = tokenizer.convert_tokens_to_ids(tokenizer.unk_token)
label_field = Field(sequential=False, use_vocab=False, batch_first=True, dtype=torch.float)
text_field = Field(use_vocab=False, tokenize=tokenizer.encode, lower=False, include_lengths=False, batch_first=True, fix_length=MAX_SEQ_LEN, pad_token=PAD_INDEX, unk_t>
fields = [('label', label_field), ('text', text_field)]
CLASSIFICATION_REPORT = "classification_report.jsonl"
train, valid, test = TabularDataset.splits(path='', train='train.csv', validation='validate.csv', test='test.csv', format='CSV', fields=fields, skip_header=True)
train_iter = BucketIterator(train, batch_size=16, sort_key=lambda x: len(x.text), device=device, train=True, sort=True, sort_within_batch=True)
valid_iter = BucketIterator(valid, batch_size=16, sort_key=lambda x: len(x.text), device=device, train=True, sort=True, sort_within_batch=True)
test_iter = Iterator(test, batch_size=16, device=device, train=False, shuffle=False, sort=False)
class BERT(nn.Module):
def __init__(self):
super(BERT, self).__init__()
options_name = "bert-base-uncased"
self.encoder = BertForSequenceClassification.from_pretrained(options_name, num_labels = 3)
def forward(self, text, label):
loss, text_fea = self.encoder(text, labels=label)[:2]
return loss, text_fea
def train(model, optimizer, criterion = nn.BCELoss(), train_loader = train_iter, valid_loader = valid_iter, num_epochs = 5, eval_every = len(train_iter) // 2, file_pat> running_loss = 0.0
valid_running_loss = 0.0
global_step = 0
train_loss_list = []
valid_loss_list = []
global_steps_list = []
model.train()
for epoch in range(num_epochs):
for (label, text), _ in train_loader:
label = label.type(torch.LongTensor)
label = label.to(device)
text = text.type(torch.LongTensor)
text = text.to(device)
output = model(text, label)
loss, _ = output
optimizer.zero_grad()
loss.backward()
optimizer.step()
running_loss += loss.item()
global_step += 1
if global_step % eval_every == 0:
model.eval()
with torch.no_grad():
for (label, text), _ in valid_loader:
label = label.type(torch.LongTensor)
label = label.to(device)
text = text.type(torch.LongTensor)
text = text.to(device)
output = model(text, label)
loss, _ = output
valid_running_loss += loss.item()
average_train_loss = running_loss / eval_every
average_valid_loss = valid_running_loss / len(valid_loader)
train_loss_list.append(average_train_loss)
valid_loss_list.append(average_valid_loss)
global_steps_list.append(global_step)
# resetting running values
running_loss = 0.0
valid_running_loss = 0.0
model.train()
# print progress
print('Epoch [{}/{}], Step [{}/{}], Train Loss: {:.4f}, Valid Loss: {:.4f}'.format(epoch+1, num_epochs, global_step, num_epochs*len(tra>
if best_valid_loss > average_valid_loss:
best_valid_loss = average_valid_loss
print('Finished Training!')
model = BERT().to(device)
optimizer = optim.Adam(model.parameters(), lr=2e-5)
train(model=model, optimizer=optimizer)
def evaluate(model, test_loader):
y_pred = []
y_true = []
model.eval()
with torch.no_grad():
for (label, text), _ in test_loader:
label = label.type(torch.LongTensor)
label = label.to(device)
text = text.type(torch.LongTensor)
text = text.to(device)
output = model(text, label)
_, output = output
y_pred.extend(torch.argmax(output, 2).tolist())
y_true.extend(label.tolist())
print('Classification Report:')
print(classification_report(y_true, y_pred, labels=[0,1,2], digits=4))
best_model = BERT().to(device)
evaluate(best_model, test_iter)
you are using criterion = nn.BCELoss(), binary cross entropy for a multi class classification problem, "the labels can have three values of (0,1,2)". use suitable loss function for multiclass classification.
Related
The loss function is MSE which is not decreasing (2.32 to 2.24). What is the problem AE architecture or the way I train the AE? After 100 epochs the loss doesn't change. Is the input data(200,1,52) can't be compressed? Should I increase the compressed data size(200,16) by changing the encoder architecture?
# Standard Torch Packages
import torch
from sklearn.model_selection import train_test_split
from torch.nn import functional as F
from torch import optim
from os.path import join, exists
from os import mkdir
import numpy as np
import glob
import matplotlib.pyplot as plt
# Import own functions
from ae_model import AE
from learning import EarlyStopping, ReduceLROnPlateau, LSIZE
# Define parameters
num_epochs = 50
rollout_path = "data/rollouts/rollout_*.npz"
logdir = "data/"
X = []
for x in glob.glob(rollout_path):
data_point = np.load(x, allow_pickle=True)
X.append(data_point)
train_loader, test_loader = train_test_split(X, test_size=0.2, shuffle= False)
cuda = torch.cuda.is_available()
device = torch.device("cuda" if cuda else "cpu")
print(device)
model = AE(LSIZE).to(device)
#optimizer = optim.Adam(model.parameters())
optimizer = torch.optim.Adam(model.parameters(),
lr = 1e-1,
weight_decay = 1e-8)
scheduler = ReduceLROnPlateau(optimizer, "min", factor=0.5, patience=5)
earlystopping = EarlyStopping("min", patience=3)
# Validation using MSE Loss function
loss_function = torch.nn.MSELoss()
def train(epoch):
""" One training epoch """
model.train()
train_loss = []
#train_data_points = 0
for batch_idx, rollout in enumerate(train_loader):
data = torch.tensor(rollout["data"]).to(device)
train_data_points = len(data) * len(train_loader)
#recon_batch, mu, logvar = model(data)
recon_batch, _ = model(data)
loss = loss_function(recon_batch, data)
optimizer.zero_grad()
loss.backward()
#train_loss += loss.item()
optimizer.step()
plot_train_data.append(loss.item())
# if batch_idx % 20 == 0:
# print(
# "Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}".format(
# epoch,
# batch_idx * len(data),
# train_data_points,
# 100.0 * batch_idx / len(train_loader),
# loss.item() / len(data),
# )
# )
# train_loss /= train_data_points
# print(
# "====> Epoch: {} Average loss: {:.4f}".format(
# epoch, train_loss / train_data_points
# )
print(
"====> Epoch: {} batchId: {} Average loss: {:.4f}".format(
epoch, batch_idx, loss.item()
))
# )
return train_loss
def test():
model.eval()
test_loss = 0
test_data_points = 0
with torch.no_grad():
for rollout in test_loader:
data = torch.tensor(rollout["data"]).to(device)
test_data_points = len(data) * len(test_loader)
#recon_batch, mu, logvar = model(data)
recon_batch, _ = model(data)
test_loss += loss_function(recon_batch, data).item()
test_loss /= test_data_points
print("====> Test set loss: {:.4f}".format(test_loss))
return test_loss
def save_checkpoint(state, is_best, filename, best_filename):
""" Save state in filename. Also save in best_filename if is_best. """
torch.save(state, filename)
if is_best:
torch.save(state, best_filename)
# check vae dir exists, if not, create it
ae_dir = join(logdir, "ae_gpu_run_false")
if not exists(ae_dir):
mkdir(ae_dir)
reload_file = join(ae_dir, "best.tar")
noreload = False
if not noreload and exists(reload_file):
state = torch.load(reload_file)
print(
"Reloading model at epoch {}"
", with test error {}".format(state["epoch"], state["precision"])
)
model.load_state_dict(state["state_dict"])
optimizer.load_state_dict(state["optimizer"])
scheduler.load_state_dict(state["scheduler"])
earlystopping.load_state_dict(state["earlystopping"])
cur_best = None
plot_train_data = []
plot_test_data = []
for epoch in range(1, num_epochs + 1):
#plot_train_data.append(train(epoch))
train(epoch)
test_loss = test()
scheduler.step(test_loss)
earlystopping.step(test_loss)
# checkpointing
best_filename = join(ae_dir, "best.tar")
filename = join(ae_dir, "checkpoint.tar")
is_best = not cur_best or test_loss < cur_best
if is_best:
cur_best = test_loss
save_checkpoint(
{
"epoch": epoch,
"state_dict": model.state_dict(),
"precision": test_loss,
"optimizer": optimizer.state_dict(),
"scheduler": scheduler.state_dict(),
"earlystopping": earlystopping.state_dict(),
},
is_best,
filename,
best_filename,
)
if earlystopping.stop:
print("End of Training because of early stopping at epoch {}".format(epoch))
break
test_plot_path = join(ae_dir, "test_fig.png")
# legend_strings = []
plt.title("AE Training and Test")
#plt.xlabel("Epochs")
#plt.ylabel("MSE losses")
#plt.plot(plot_test_data)
# legend_strings.append('Test')
#plt.legend('Test')
#plt.savefig(test_plot_path)
#plt.close()
#train_plot_path = join(ae_dir, "train_fig.png")
#plt.title("AE ")
#plt.xlabel("Epochs")
#plt.ylabel("MSE Loss")
plt.plot(plot_train_data)
# legend_strings.append('Train')
#plt.legend('Train')
plt.xticks(range(0, len(plot_train_data), 75))
plt.savefig(test_plot_path)
plt.close()
Below is the encoder and decoder model:
import torch
import torch.nn as nn
import torch.nn.functional as F
reduced_size = 22
class Decoder(nn.Module):
""" VAE decoder """
def __init__(self, latent_size):
super(Decoder, self).__init__()
self.latent_size = latent_size
self.fc1 = nn.Linear(latent_size, reduced_size)
self.deconv1 = nn.ConvTranspose1d(16, 32, 1, stride=1)
self.deconv2 = nn.ConvTranspose1d(32, 52, 1, stride=1)
def forward(self, x): # pylint: disable=arguments-differ
x = x.unsqueeze(2)
x = F.relu(self.deconv1(x))
x = torch.sigmoid(self.deconv2(x))
x = x.view(x.size(0), x.size(2), x.size(1))
return x
class Encoder(nn.Module): # pylint: disable=too-many-instance-attributes
""" VAE encoder """
def __init__(self, latent_size):
super(Encoder, self).__init__()
# input shape (200, 1, 52)
# batch_size, in_channel, len_channel
self.latent_size = latent_size
self.conv1 = nn.Conv1d(52, 32, 1, stride=1)
self.conv2 = nn.Conv1d(32, 16, 1, stride=1)
# output shape (200, 1, x)
self.fc_mu = nn.Linear(reduced_size, latent_size)
def forward(self, x): # pylint: disable=arguments-differ
x = x.view(x.size(0), x.size(2), x.size(1))
x = F.relu(self.conv1(x))
x = F.relu(self.conv2(x))
x = x.view(x.size(0), -1)
return x
class AE(nn.Module):
""" Variational Autoencoder """
def __init__(self, latent_size):
super(AE, self).__init__()
self.encoder = Encoder(latent_size)
self.decoder = Decoder(latent_size)
def forward(self, x): # pylint: disable=arguments-differ
x = x.unsqueeze(1)
encoded = self.encoder(x)
decoded = self.decoder(encoded)
decoded = decoded.squeeze(1)
return decoded, encoded
I am trying to distributed train a model in pytorch on a linux machine. Below is the code
import argparse
import time
import os
import torch
import torchvision
from torch import distributed as dist
from torchvision.models import resnet18
from torch.utils.data import DataLoader
from torchvision.datasets import MNIST
from torchvision.transforms import ToTensor
from torch.nn.parallel import DistributedDataParallel as DDP
from torch.utils.data.distributed import DistributedSampler
def reduce_loss(tensor, rank, world_size):
with torch.no_grad():
dist.reduce(tensor, dst=0)
if rank == 0:
tensor /= world_size
batch_size = 128
epochs = 5
lr = 0.001
os.environ['MASTER_ADDR'] = 'localhost'
os.environ['MASTER_PORT'] = '4321'
rank=1
world_size=1
dist.init_process_group(backend='gloo', init_method='env://',rank=1,world_size=1)
torch.cuda.set_device(args.local_rank)
global_rank = dist.get_rank()
net = resnet18()
net.cuda()
net = torch.nn.SyncBatchNorm.convert_sync_batchnorm(net)
net = DDP(net, device_ids=[1], output_device=1)
data_root = 'dataset'
trainset = MNIST(root=data_root,
download=True,
train=True,
transform=ToTensor())
valset = MNIST(root=data_root,
download=True,
train=False,
transform=ToTensor())
sampler = DistributedSampler(trainset)
train_loader = DataLoader(trainset,
batch_size=batch_size,
shuffle=False,
pin_memory=True,
sampler=sampler)
val_loader = DataLoader(valset,
batch_size=batch_size,
shuffle=False,
pin_memory=True)
criterion = torch.nn.CrossEntropyLoss()
opt = torch.optim.Adam(net.parameters(), lr=lr)
net.train()
for e in range(epochs):
# DistributedSampler deterministically shuffle data
# by seting random seed be current number epoch
# so if do not call set_epoch when start of one epoch
# the order of shuffled data will be always same
sampler.set_epoch(e)
for idx, (imgs, labels) in enumerate(train_loader):
imgs = imgs.cuda()
labels = labels.cuda()
output = net(imgs)
loss = criterion(output, labels)
opt.zero_grad()
loss.backward()
opt.step()
reduce_loss(loss, 1, 1)
if idx % 10 == 0 and global_rank == 0:
print('Epoch: {} step: {} loss: {}'.format(e, idx, loss.item()))
net.eval()
with torch.no_grad():
cnt = 0
total = len(val_loader.dataset)
for imgs, labels in val_loader:
imgs, labels = imgs.cuda(), labels.cuda()
output = net(imgs)
predict = torch.argmax(output, dim=1)
cnt += (predict == labels).sum().item()
if global_rank == 0:
print('eval accuracy: {}'.format(cnt / total))
TimeoutError: The client socket has timed out after 1800s while trying to connect to (localhost, 4321).
I have defined the necessary variable i.e. master_address, master_port, rank, world_size But I am unable to connect to the server with above defined environment variables and end with time out error. Is there a way to resolve this issue.
I am trying to train my BERT Model on CONLL2003 dataset and hence the following classes were made by me
class DataSequence(torch.utils.data.Dataset):
def __init__(self, df):
lb = [i.split() for i in df['labels'].values.tolist()]
txt = df['text'].values.tolist()
self.texts = [tokenizer(str(i),
padding='max_length', max_length = 512, truncation=True, return_tensors="pt") for i in txt]
self.labels = [align_label(i,j) for i,j in zip(txt, lb)]
def __len__(self):
return len(self.labels)
def get_batch_data(self, idx):
return self.texts[idx]
def get_batch_labels(self, idx):
return torch.LongTensor(self.labels[idx])
def __getitem__(self, idx):
batch_data = self.get_batch_data(idx)
batch_labels = self.get_batch_labels(idx)
return batch_data, batch_labels
from transformers import BertForTokenClassification
class BertModel(torch.nn.Module):
def __init__(self):
super(BertModel, self).__init__()
self.bert = BertForTokenClassification.from_pretrained('bert-base-cased', num_labels=len(unique_labels))
def forward(self, input_id, mask, label):
output = self.bert(input_ids=input_id, attention_mask=mask, labels=label, return_dict=False)
return output
And for training
from torch.utils.data import Dataset, DataLoader
from torch.optim import SGD
from tqdm import tqdm
def train_loop(model, df_train, df_val):
train_dataset = DataSequence(df_train)
val_dataset = DataSequence(df_val)
train_dataloader = DataLoader(train_dataset, num_workers=4, batch_size=1, shuffle=True)
val_dataloader = DataLoader(val_dataset, num_workers=4, batch_size=1)
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
optimizer = SGD(model.parameters(), lr=LEARNING_RATE)
if use_cuda:
model = model.cuda()
best_acc = 0
best_loss = 1000
for epoch_num in range(EPOCHS):
total_acc_train = 0
total_loss_train = 0
model.train()
for train_data, train_label in tqdm(train_dataloader):
train_label = train_label[0].to(device)
mask = train_data['attention_mask'][0].to(device)
input_id = train_data['input_ids'][0].to(device)
optimizer.zero_grad()
loss, logits = model(input_id, mask, train_label)
logits_clean = logits[0][train_label != -100]
label_clean = train_label[train_label != -100]
predictions = logits_clean.argmax(dim=1)
acc = (predictions == label_clean).float().mean()
total_acc_train += acc
total_loss_train += loss.item()
loss.backward()
optimizer.step()
model.eval()
total_acc_val = 0
total_loss_val = 0
for val_data, val_label in val_dataloader:
val_label = val_label[0].to(device)
mask = val_data['attention_mask'][0].to(device)
input_id = val_data['input_ids'][0].to(device)
loss, logits = model(input_id, mask, val_label)
logits_clean = logits[0][val_label != -100]
label_clean = val_label[val_label != -100]
predictions = logits_clean.argmax(dim=1)
acc = (predictions == label_clean).float().mean()
total_acc_val += acc
total_loss_val += loss.item()
val_accuracy = total_acc_val / len(df_val)
val_loss = total_loss_val / len(df_val)
print(
f'Epochs: {epoch_num + 1} | Loss: {total_loss_train / len(df_train): .3f} | Accuracy: {total_acc_train / len(df_train): .3f} | Val_Loss: {total_loss_val / len(df_val): .3f} | Accuracy: {total_acc_val / len(df_val): .3f}')
LEARNING_RATE = 1e-2
EPOCHS = 5
model = BertModel()
train_loop(model, df_train, df_val)
The training thereby does not start and hence this error persists while coding on jupyter.
This is my model:
class BiLSTM(nn.Module):
def __init__(self):
super(BiLSTM, self).__init__()
self.hidden_size = 128
drp = 0.2
n_classes = len(le.classes_)
self.embedding = nn.Embedding(max_features, embed_size)
self.embedding.weight = nn.Parameter(torch.tensor(embedding_matrix, dtype=torch.float32))
self.embedding.weight.requires_grad = False
self.lstm = nn.LSTM(embed_size, self.hidden_size, bidirectional=True, batch_first=True)
self.linear = nn.Linear(self.hidden_size*4 , 128)
self.relu = nn.ReLU()
self.dropout = nn.Dropout(drp)
self.out = nn.Linear(128, n_classes)
def forward(self, x):
#rint(x.size())
h_embedding = self.embedding(x)
_embedding = torch.squeeze(torch.unsqueeze(h_embedding, 0))
h_lstm, _ = self.lstm(h_embedding)
avg_pool = torch.mean(h_lstm, 1)
max_pool, _ = torch.max(h_lstm, 1)
conc = torch.cat(( avg_pool, max_pool), 1)
conc = self.relu(self.linear(conc))
conc = self.dropout(conc)
out = self.out(conc)
return out
n_epochs = 87
model = BiLSTM()
loss_fn = nn.CrossEntropyLoss(reduction='mean',)
optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=0.002)
model.cuda()
# Load train and test in CUDA Memory
x_train = torch.tensor(train_X, dtype=torch.long).cuda()
y_train = torch.tensor(train_y, dtype=torch.long).cuda()
x_cv = torch.tensor(test_X, dtype=torch.long).cuda()
y_cv = torch.tensor(test_y, dtype=torch.long).cuda()
# Create Torch datasets
train = torch.utils.data.TensorDataset(x_train, y_train)
valid = torch.utils.data.TensorDataset(x_cv, y_cv)
# Create Data Loaders
train_loader = torch.utils.data.DataLoader(train, batch_size=batch_size, shuffle=True)
valid_loader = torch.utils.data.DataLoader(valid, batch_size=batch_size, shuffle=True)
train_loss = []
valid_loss = []
for epoch in range(n_epochs):
start_time = time.time()
# Set model to train configuration
model.train()
avg_loss = 0.
for i, (x_batch, y_batch) in enumerate(train_loader):
# Predict/Forward Pass
y_pred = model(x_batch)
# Compute loss
loss = loss_fn(y_pred, y_batch)
optimizer.zero_grad()
loss.backward()
optimizer.step()
avg_loss += loss.item() / len(train_loader)
#acc =n-avg_loss
# Set model to validation configuration
model.eval()
avg_val_loss = 0.
val_preds = np.zeros((len(x_cv),len(le.classes_)))
for i, (x_batch, y_batch) in enumerate(valid_loader):
y_pred = model(x_batch).detach()
avg_val_loss += loss_fn(y_pred, y_batch).item() / len(valid_loader)
#val_accuracy = n- avg_val_loss
# keep/store predictions
val_preds[i * batch_size:(i+1) * batch_size] =F.softmax(y_pred).cpu().numpy()
val_preds=val_preds*5000
# Check Accuracy
val_accuracy = sum(val_preds.argmax(axis=1)==test_y)/len(test_y)
train_loss.append(avg_loss)
valid_loss.append(avg_val_loss)
elapsed_time = time.time() - start_time
print('Epoch {}/{} \t Train_loss={:.4f} \t val_loss={:.4f} \t val_acc={:.4f} \t time={:.2f}s'.format(
epoch + 1, n_epochs , avg_loss, avg_val_loss, val_accuracy, elapsed_time))
This is the output I am receiving.I have used the BiLSTM model. I tried changing the loss techniques and dropout value but it didn't work. The issue is I guess the model is overfitting how can I increase the accuracy this is the output I am receiving.I have used the BiLSTM model. I tried changing the loss techniques and dropout value but it didn't work. The issue is I guess the model is overfitting how can I increase the accuracy
This is the output I am receiving.I have used the BiLSTM model. I tried changing the loss techniques and dropout value but it didn't work. The issue is I guess the model is overfitting how can I increase the accuracy, The size of the dataset is 3000
[1]: https://i.stack.imgur.com/NbK92.png [output looks like this][1]
[1]: https://i.stack.imgur.com/ll12J.png [Data Looks Like this][1]
I have written the following code to train a bert model on my dataset, I have used from tqdm.notebook import tqdm this import for tqdm and have used it in the loops. But when I run the program the bar stays at 0% even after the entire code has run. How to fix this?
Code
Model
TRANSFORMERS = {
"bert-multi-cased": (BertModel, BertTokenizer, "bert-base-uncased"),
}
class Transformer(nn.Module):
def __init__(self, model, num_classes=1):
"""
Constructor
Arguments:
model {string} -- Transformer to build the model on. Expects "camembert-base".
num_classes {int} -- Number of classes (default: {1})
"""
super().__init__()
self.name = model
model_class, tokenizer_class, pretrained_weights = TRANSFORMERS[model]
bert_config = BertConfig.from_json_file(MODEL_PATHS[model] + 'bert_config.json')
bert_config.output_hidden_states = True
self.transformer = BertModel(bert_config)
self.nb_features = self.transformer.pooler.dense.out_features
self.pooler = nn.Sequential(
nn.Linear(self.nb_features, self.nb_features),
nn.Tanh(),
)
self.logit = nn.Linear(self.nb_features, num_classes)
def forward(self, tokens):
"""
Usual torch forward function
Arguments:
tokens {torch tensor} -- Sentence tokens
Returns:
torch tensor -- Class logits
"""
_, _, hidden_states = self.transformer(
tokens, attention_mask=(tokens > 0).long()
)
hidden_states = hidden_states[-1][:, 0] # Use the representation of the first token of the last layer
ft = self.pooler(hidden_states)
return self.logit(ft)
Training
def fit(model, train_dataset, val_dataset, epochs=1, batch_size=8, warmup_prop=0, lr=5e-4):
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
optimizer = AdamW(model.parameters(), lr=lr)
num_warmup_steps = int(warmup_prop * epochs * len(train_loader))
num_training_steps = epochs * len(train_loader)
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps, num_training_steps)
loss_fct = nn.BCEWithLogitsLoss(reduction='mean').cuda()
for epoch in range(epochs):
model.train()
start_time = time.time()
optimizer.zero_grad()
avg_loss = 0
for step, (x, y_batch) in tqdm(enumerate(train_loader), total=len(train_loader)):
y_pred = model(x.to(device))
loss = loss_fct(y_pred.view(-1).float(), y_batch.float().to(device))
loss.backward()
avg_loss += loss.item() / len(train_loader)
xm.optimizer_step(optimizer, barrier=True)
#optimizer.step()
scheduler.step()
model.zero_grad()
optimizer.zero_grad()
model.eval()
preds = []
truths = []
avg_val_loss = 0.
with torch.no_grad():
for x, y_batch in tqdm(val_loader):
y_pred = model(x.to(device))
loss = loss_fct(y_pred.detach().view(-1).float(), y_batch.float().to(device))
avg_val_loss += loss.item() / len(val_loader)
probs = torch.sigmoid(y_pred).detach().cpu().numpy()
preds += list(probs.flatten())
truths += list(y_batch.numpy().flatten())
score = roc_auc_score(truths, preds)
dt = time.time() - start_time
lr = scheduler.get_last_lr()[0]
print(f'Epoch {epoch + 1}/{epochs} \t lr={lr:.1e} \t t={dt:.0f}s \t loss={avg_loss:.4f} \t val_loss={avg_val_loss:.4f} \t val_auc={score:.4f}')
model = Transformer("bert-multi-cased")
device = torch.device('cuda:2')
model = model.to(device)
epochs = 3
batch_size = 32
warmup_prop = 0.1
lr = 1e-4
train_dataset = JigsawDataset(df_train)
val_dataset = JigsawDataset(df_val)
test_dataset = JigsawDataset(df_test)
fit(model, train_dataset, val_dataset, epochs=epochs, batch_size=batch_size, warmup_prop=warmup_prop, lr=lr)
Output
0%| | 0/6986 [00:00<?, ?it/s]
How to fix this?
The import should be:
from tqdm import tqdm
The error is in the training function, correct this loop:
for x, y_batch in tqdm(val_loader, total = len(val_loader)):
Contrary to Ishan Dutta answer, tqdm.notebook.tqdm (and not tqdm.tqdm) is the correct function to use for both Jupyter Notebook and JupyterLab.
This problem can happen if you don't have ipywidgets installed or if you have installed ipywidgets before installing JupyterLab.
What fixed it for me was reinstalling ipywidgets:
pip3 uninstall ipywidgets --yes
pip3 install --upgrade ipywidgets