I am using BERT-BiLSTM-CRF to implement named entity recognition , and I have this code:
def train_epoch(train_loader, model, optimizer, scheduler, epoch):
# set model to training mode
model.train()
# step number in one epoch: 336
train_losses = 0
for idx, batch_samples in enumerate(tqdm(train_loader)):
batch_data, batch_token_starts, batch_labels = batch_samples
batch_masks = batch_data.gt(0) # get padding mask
# compute model output and loss
loss = model((batch_data, batch_token_starts),
token_type_ids=None, attention_mask=batch_masks, labels=batch_labels)[0]
train_losses += loss.item()
# clear previous gradients, compute gradients of all variables wrt loss
model.zero_grad()
loss.backward()
# gradient clipping
nn.utils.clip_grad_norm_(parameters=model.parameters(), max_norm=config.clip_grad)
# performs updates using calculated gradients
optimizer.step()
scheduler.step()
train_loss = float(train_losses) / len(train_loader)
logging.info("Epoch: {}, train loss: {}".format(epoch, train_loss))
I am getting the error:
train_losses += loss.item()
ValueError: only one element tensors can be converted to Python scalars
What should I do?
Im currently building an LSTM Model for predicting stock prices in pytorch. I now want to implement a walk forward validation method, but I couldnt find any resource in how to do that.
This is my current training loop:
#%%
lstm1 = LSTM1(num_classes, input_size, hidden_dim, num_layers, X_train_tensors_final.shape[1])
criterion = torch.nn.L1Loss()
optimizer = torch.optim.Adam(lstm1.parameters(), lr=learning_rate)
for epoch in range(num_epochs):
outputs = lstm1.forward(X_train_tensors_final)
optimizer.zero_grad() #clear gradients
loss = criterion(outputs, y_train_tensors)
loss.backward() #calculates the loss of the loss function
optimizer.step() #improve from loss, i.e backprop
if epoch % 100 == 0:
print("Epoch: %d, loss: %1.5f" % (epoch, loss.item()))
df_X_ss = ss.transform(df.iloc[:, 0:-1])
df_y_mm = ss.transform(df.iloc[:, 0:1])
df_X_ss = Variable(torch.Tensor(df_X_ss))
df_y_mm = Variable(torch.Tensor(df_y_mm))
df_X_ss = torch.reshape(df_X_ss, (df_X_ss.shape[0], 1, df_X_ss.shape[1]))
train_predict = lstm1(df_X_ss)
data_predict = train_predict.data.numpy()
The model should now predict one step into the future, then calculate the absolute percentage error. For the next step, the model should use the actual y value instead of the predicted yhat to make its next prediction. What would be the best way of implementing this? Or is there some build in function in pytorch that would do this ?
I am using PyTorch I am trying to do the validation on my dataset to obtain optimal number of channels in my neural network. I have the following code:
def train_during_validation():
for epoch in range (1, 201):
model.train()
optimizer.zero_grad()
out = model(data.x, data.edge_index)
loss = criterion(out[data.val_mask], data.y[data.val_mask])
loss.backward()
optimizer.step()
return loss
def validation():
loss_val = np.zeros(50, dtype = float)
model = GCN(hidden_channels = 1)
loss_val = train_during_validation()
print(loss_val)
validation()
In the code above I train previously defined model with 16 channels and I obtain a loss of 0.33. But as soon as I start doing validation on hidden_channel (see code below), my loss does not go down (it remains on 1.95). I do not understand why. Can somebody explain?
def train_during_validation(model):
print(f'Model:{model}')
for epoch in range (1, 201):
model.train()
optimizer.zero_grad()
out = model(data.x, data.edge_index)
loss = criterion(out[data.val_mask], data.y[data.val_mask])
loss.backward()
optimizer.step()
return loss
def validation():
loss_val = np.zeros(50, dtype = float)
model = GCN(hidden_channels = 1)
for i in range (50):
model = GCN(hidden_channels = i)
#print(model)
loss_val[i] = train_during_validation(model)
print(loss_val[i])
validation()
Eventually I found an error:
optimizer and criterion must be defined inside the def train_during_validation(model)
train_losses=[]
val_losses=[]
for epoch in range(EPOCH):
train_loss = 0
valid_loss = 0
# train steps
net.train()
for batch_index, (data, target) in enumerate(train_loader):
target = target.reshape(-1,1)
# clears gradients
optimizer.zero_grad()
# forward pass
output, dense2_output = net(data)
eps = 1e-6
#loss in batch
loss = torch.sqrt(criterion(output, target.double())+eps)
# backward pass for loss gradient
loss.backward()
# update paremeters/weights
optimizer.step()
# update training loss
#train_loss += math.pow(loss.item(),2)*data.size(0)
train_loss += loss.item()*data.size(0)
# average loss calculations
#train_loss = math.sqrt(train_loss/len(train_loader.sampler))
train_loss = train_loss/len(train_loader.sampler)
# Display loss statistics
print(f'Current Epoch: {epoch}\n Training Loss: {round(train_loss, 6)}')
train_losses.append(train_loss)
I am Calculating Average RMSE Loss in Pytorch Neural Network regression model. Is it the correct way ?
Please help me understand if there is any error with my approach.
Actually i want to know if there is any bug in my code
I am very new in pytorch and implementing my own network of image classifier. However I see for each epoch training accuracy is very good but validation accuracy is 0.i noted till 5th epoch. I am using Adam optimizer and have learning rate .001. also resampling the whole data set after each epoch into training n validation set. Please help where I am going wrong.
Here is my code:
### where is data?
data_dir_train = '/home/sup/PycharmProjects/deep_learning/CNN_Data/training_set'
data_dir_test = '/home/sup/PycharmProjects/deep_learning/CNN_Data/test_set'
# Define your batch_size
batch_size = 64
allData = datasets.ImageFolder(root=data_dir_train,transform=transformArr)
# We need to further split our training dataset into training and validation sets.
def split_train_validation():
# Define the indices
num_train = len(allData)
indices = list(range(num_train)) # start with all the indices in training set
split = int(np.floor(0.2 * num_train)) # define the split size
#train_idx, valid_idx = indices[split:], indices[:split]
# Random, non-contiguous split
validation_idx = np.random.choice(indices, size=split, replace=False)
train_idx = list(set(indices) - set(validation_idx))
# define our samplers -- we use a SubsetRandomSampler because it will return
# a random subset of the split defined by the given indices without replacement
train_sampler = SubsetRandomSampler(train_idx)
validation_sampler = SubsetRandomSampler(validation_idx)
#train_loader = DataLoader(allData,batch_size=batch_size,sampler=train_sampler,shuffle=False,num_workers=4)
#validation_loader = DataLoader(dataset=allData,batch_size=1, sampler=validation_sampler)
return (train_sampler,validation_sampler)
Training
from torch.optim import Adam
import torch
import createNN
import torch.nn as nn
import loadData as ld
from torch.autograd import Variable
from torch.utils.data import DataLoader
# check if cuda - GPU support available
cuda = torch.cuda.is_available()
#create model, optimizer and loss function
model = createNN.ConvNet(class_num=2)
optimizer = Adam(model.parameters(),lr=.001,weight_decay=.0001)
loss_func = nn.CrossEntropyLoss()
if cuda:
model.cuda()
# function to save model
def save_model(epoch):
torch.save(model.load_state_dict(),'imageClassifier_{}.model'.format(epoch))
print('saved model at epoch',epoch)
def exp_lr_scheduler ( epoch , init_lr = args.lr, weight_decay = args.weight_decay, lr_decay_epoch = cf.lr_decay_epoch):
lr = init_lr * ( 0.5 ** (epoch // lr_decay_epoch))
def train(num_epochs):
best_acc = 0.0
for epoch in range(num_epochs):
print('\n\nEpoch {}'.format(epoch))
train_sampler, validation_sampler = ld.split_train_validation()
train_loader = DataLoader(ld.allData, batch_size=30, sampler=train_sampler, shuffle=False)
validation_loader = DataLoader(dataset=ld.allData, batch_size=1, sampler=validation_sampler)
model.train()
acc = 0.0
loss = 0.0
total = 0
# train model with training data
for i,(images,labels) in enumerate(train_loader):
# if cuda then move to GPU
if cuda:
images = images.cuda()
labels = labels.cuda()
# Variable class wraps a tensor and we can calculate grad
images = Variable(images)
labels = Variable(labels)
# reset accumulated gradients for each batch
optimizer.zero_grad()
# pass images to model which returns preiction
output = model(images)
#calculate the loss based on prediction and actual
loss = loss_func(output,labels)
# backpropagate the loss and compute gradient
loss.backward()
# update weights as per the computed gradients
optimizer.step()
# prediction class
predVal , predClass = torch.max(output.data, 1)
acc += torch.sum(predClass == labels.data)
loss += loss.cpu().data[0]
total += labels.size(0)
# print the statistics
train_acc = acc/total
train_loss = loss / total
print('Mean train acc = {} over epoch = {}'.format(epoch,acc))
print('Mean train loss = {} over epoch = {}'.format(epoch, loss))
# Valid model with validataion data
model.eval()
acc = 0.0
loss = 0.0
total = 0
for i,(images,labels) in enumerate(validation_loader):
# if cuda then move to GPU
if cuda:
images = images.cuda()
labels = labels.cuda()
# Variable class wraps a tensor and we can calculate grad
images = Variable(images)
labels = Variable(labels)
# reset accumulated gradients for each batch
optimizer.zero_grad()
# pass images to model which returns preiction
output = model(images)
#calculate the loss based on prediction and actual
loss = loss_func(output,labels)
# backpropagate the loss and compute gradient
loss.backward()
# update weights as per the computed gradients
optimizer.step()
# prediction class
predVal, predClass = torch.max(output.data, 1)
acc += torch.sum(predClass == labels.data)
loss += loss.cpu().data[0]
total += labels.size(0)
# print the statistics
valid_acc = acc / total
valid_loss = loss / total
print('Mean train acc = {} over epoch = {}'.format(epoch, valid_acc))
print('Mean train loss = {} over epoch = {}'.format(epoch, valid_loss))
if(best_acc<valid_acc):
best_acc = valid_acc
save_model(epoch)
# at 30th epoch we save the model
if (epoch == 30):
save_model(epoch)
train(20)
I think you did not take into account that acc += torch.sum(predClass == labels.data) returns a tensor instead of a float value. Depending on the version of pytorch you are using I think you should change it to:
acc += torch.sum(predClass == labels.data).cpu().data[0] #pytorch 0.3
acc += torch.sum(predClass == labels.data).item() #pytorch 0.4
Although your code seems to be working for old pytorch version, I would recommend you to upgrade to the 0.4 version.
Also, I mentioned other problems/typos in your code.
You are loading the dataset for every epoch.
for epoch in range(num_epochs):
print('\n\nEpoch {}'.format(epoch))
train_sampler, validation_sampler = ld.split_train_validation()
train_loader = DataLoader(ld.allData, batch_size=30, sampler=train_sampler, shuffle=False)
validation_loader = DataLoader(dataset=ld.allData, batch_size=1, sampler=validation_sampler)
...
That should not happen, it should be enough loading it once
train_sampler, validation_sampler = ld.split_train_validation()
train_loader = DataLoader(ld.allData, batch_size=30, sampler=train_sampler, shuffle=False)
validation_loader = DataLoader(dataset=ld.allData, batch_size=1, sampler=validation_sampler)
for epoch in range(num_epochs):
print('\n\nEpoch {}'.format(epoch))
...
In the training part you have (this does not happen in the validation):
train_acc = acc/total
train_loss = loss / total
print('Mean train acc = {} over epoch = {}'.format(epoch,acc))
print('Mean train loss = {} over epoch = {}'.format(epoch, loss))
Where you are printing acc instead of train_acc
Also, in the validation part I mentioned that you are printing print('Mean train acc = {} over epoch = {}'.format(epoch, valid_acc)) when it should be something like 'Mean val acc'.
Changing this lines of code, using a standard model I created and CIFAR dataset the training seems to converge, accuracy increases at every epoch while mean loss value decreases.
I Hope I could help you!