separation of training data pyTorch - python

I have a code, with it, I wanted to train a neural network and save the finished model as a file. But I am getting an error due to incorrect distribution of training and training data. Can't understand why:
`import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
class ChatBot(nn.Module):
def __init__(self, input_size, hidden_size, num_layers, output_size):
super().__init__()
self.hidden_size = hidden_size
self.num_layers = num_layers
self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
self.fc = nn.Linear(hidden_size, output_size)
def forward(self, x, hidden):
out, hidden = self.lstm(x, hidden)
out = self.fc(out[:, -1, :])
return out, hidden
def init_hidden(self, batch_size):
weight = next(self.parameters()).data
hidden = (weight.new(self.num_layers, batch_size, self.hidden_size).zero_(),
weight.new(self.num_layers, batch_size, self.hidden_size).zero_())
return hidden
class ChatDataset(torch.utils.data.Dataset):
def __init__(self, data):
self.data = data
def __len__(self):
return len(self.data)
def __getitem__(self, index):
return self.data[index]
def train(model, train_loader, loss_fn, optimizer, device):
model.train()
for inputs, targets in train_loader:
inputs = inputs.to(device)
targets = targets.to(device)
hidden = model.init_hidden(inputs.size(0))
hidden = tuple([each.data for each in hidden])
optimizer.zero_grad()
outputs, _ = model(inputs, hidden)
loss = loss_fn(outputs.view(-1), targets.view(-1))
loss.backward()
optimizer.step()
def evaluate(model, val_loader, loss_fn, device):
model.eval()
total_loss = 0
with torch.no_grad():
for inputs, targets in val_loader:
inputs = inputs.to(device)
targets = targets.to(device)
hidden = model.init_hidden(inputs.size(0))
hidden = tuple([each.data for each in hidden])
outputs, _ = model(inputs, hidden)
total_loss += loss_fn(outputs, targets).item()
return total_loss / len(val_loader)
device = torch.device("cuda" if
torch.cuda.is_available() else "cpu")
input_size = 500
hidden_size = 128
num_layers = 2
output_size = 500
model = ChatBot(input_size, hidden_size, num_layers, output_size)
model = model.to(device)
data = [("Hi, how are you?", "I'm doing well, thank you for asking."),
("What's your name?", "I'm a chatbot, I don't have a name."),
("What's the weather like?", "I'm not sure, I don't have access to current weather information."),
("What's the time?", "I'm not sure, I don't have access to the current time.")]
dataset = ChatDataset(data)
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [int(0.8 * len(dataset)), int(0.2 * len(dataset))])
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=32, shuffle=False)
loss_fn = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
num_epochs = 100
for epoch in range(num_epochs):
train(model, train_loader, loss_fn, optimizer, device)
val_loss = evaluate(model, val_loader, loss_fn, device)
print("Epoch [{}/{}], Validation Loss: {:.4f}".format(epoch+1, num_epochs, val_loss))
torch.save(model.state_dict(), 'chatbot_model.pt')`
But, when I start this code, I have an error:
` ValueError
Traceback (most recent call last)
<ipython-input-8-ae2a6dd1bc7c> in
<module>
78 dataset = ChatDataset(data)
79
---> 80 train_dataset, val_dataset = torch.utils.data.random_split(dataset, [int(0.8 * len(dataset)), int(0.2 * len(dataset))])
81
82 train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)
/usr/local/lib/python3.8/dist-packages/torch/utils/data/dataset.py in random_split(dataset, lengths, generator)
345 # Cannot verify that dataset is Sized
346 if sum(lengths) != len(dataset): # type: ignore[arg-type]
--> 347 raise ValueError("Sum of input lengths does not equal the length of the input dataset!")
348
349 indices = randperm(sum(lengths), generator=generator).tolist() # type: ignore[call-overload]
ValueError: Sum of input lengths does not equal the length of the input dataset!`
I don't know, why this error. Everything seems to be correct.

The typecasting of the values to an integer is causing a difference in the total number of images in the dataset and the distribution of the number of images in train and test.
Not the most ideal code, but replacing it with the following will work :
num_train_images = int(0.8 * len(dataset))
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [num_train_images, len(dataset) - num_train_images])

I suspect there could be a loss of precision in this calculation,
[int(0.8 * len(dataset)), int(0.2 * len(dataset))]
so the number of records in the dataset is not fully accounted for.
for example:
int(.8 * 56) + int(.2 * 56) = 55

Related

How to replace an LSTM with BERT in a PyTorch classification model

Context
I have an LSTM model with pre-trained word embeddings for a multiclass classification problem with 10 output classes.
# LSTM model
hidden_dim = 64
n_layers=1
embed_len = 100
class LSTMClassifier(nn.Module):
def __init__(self, embeddings, n_layers):
super(LSTMClassifier, self).__init__()
#self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.n_layers = n_layers
self.weights = torch.FloatTensor(embeddings)
self.embedding_layer = nn.Embedding.from_pretrained(self.weights)
self.lstm = nn.LSTM(input_size=embed_len, hidden_size=hidden_dim,
num_layers=self.n_layers, batch_first=True)
self.linear = nn.Linear(hidden_dim, 10)
self.activation = nn.Tanh()
self.dropout = nn.Dropout(0.4)
def forward(self, X_batch):
embeddings = self.embedding_layer(X_batch)
embeddings = self.dropout(embeddings)
output, (hidden, cell) = self.lstm(embeddings, (torch.randn(self.n_layers, len(X_batch), hidden_dim),
torch.randn(self.n_layers, len(X_batch), hidden_dim)))
output = self.activation(output)
output = self.dropout(output)
return self.linear(output[:,-1])
Problem
Now I would like to change the LSTM layer to BERT. I tried it the following way, but when I try to train the BERT model, I get a value error ValueError: too many values to unpack (expected 2). This probably means that the input shape of the embeddings for self.bert(embeddings) pass is not of the right shape. The embeddings that are passed to self.bert() have the shape torch.Size([256, 64, 100]). What do I need to change so that my BERT model works?
class BERTClassifier(nn.Module):
def __init__(self, embeddings):
super(BERTClassifier, self).__init__()
# Load the BERT model
self.bert = BertModel.from_pretrained('bert-base-uncased')
# Embeddings
self.weights = torch.FloatTensor(embeddings)
self.embedding_layer = nn.Embedding.from_pretrained(self.weights)
# Final layer for classifcation
self.linear = nn.Linear(self.bert.config.hidden_size, 10)
self.dropout = nn.Dropout(0.4)
def forward(self, X_batch):
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
X_batch = X_batch.to(device)
embeddings = self.embedding_layer(X_batch)
output, (hidden, cell) = self.bert(embeddings)
hidden = self.dropout(hidden[:, 0, :])
logits = self.linear(hidden)
return logits
Training lines
epochs = 50
learning_rate = 1e-4
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
bert_classifier = BERTClassifier(word_embeddings_with_stopwords)
bert_classifier = bert_classifier.to(device)
# Freeze the BERT layer
for param in bert_classifier.bert.parameters():
param.requires_grad = False
# Freeze the embedding layer
for param in bert_classifier.embedding_layer.parameters():
param.requires_grad = False
loss_fn = nn.CrossEntropyLoss()
optimizer = Adam(bert_classifier.parameters(), lr=learning_rate, weight_decay=1e-5)
results_bert_classifier = train_model(bert_classifier, loss_fn, optimizer, train_loader_with_stopwords, val_loader_with_stopwords, epochs)
Training functions
# Create validation evaluation function
def calc_val_loss_accuracy(model, loss_fn, val_loader, val_losses_list, val_accuracy_list):
"""
Calculates the validation loss and accuracy during training.
Adds the calculated validation loss and accuracy to a predefined list.
"""
model.eval()
with torch.no_grad():
Y_shuffled = []
Y_preds = []
losses = []
for X, Y in val_loader:
preds = model(X)
loss = loss_fn(preds, Y)
losses.append(loss.item())
Y_shuffled.append(Y)
Y_preds.append(preds.argmax(dim=-1))
Y_shuffled = torch.cat(Y_shuffled)
Y_preds = torch.cat(Y_preds)
val_accuracy = accuracy_score(Y_shuffled.detach().numpy(), Y_preds.detach().numpy())
print("Valid Loss : {:.3f}".format(torch.tensor(losses).mean()))
print("Valid Acc : {:.3f}".format(val_accuracy))
val_losses_list.append(np.array(losses).mean())
val_accuracy_list.append(val_accuracy)
# Create training function
def train_model(model, loss_fn, optimizer, train_loader, val_loader, epochs=10):
"""
Trains a model for a given set of epochs, with a given loss function and optimizer.
Returns a dictionary consisting of the train loss and accuracy per epoch
and val loss and accuracy per epoch.
"""
model.train()
train_losses = []
train_accuracy = []
val_losses = []
val_accuracy = []
Y_shuffled_list = []
Y_preds_list = []
for i in range(1, epochs+1):
print(f"EPOCH {i}")
losses = []
for X, Y in tqdm(train_loader):
preds = model(X)
loss = loss_fn(preds, Y)
losses.append(loss.item())
Y_shuffled_list.append(Y)
Y_preds_list.append(preds.argmax(dim=-1))
optimizer.zero_grad()
loss.backward()
optimizer.step()
Y_shuffled = torch.cat(Y_shuffled_list)
Y_preds = torch.cat(Y_preds_list)
train_accuracy_score = accuracy_score(Y_shuffled.detach().numpy(), Y_preds.detach().numpy())
print("Train Loss : {:.3f}".format(torch.tensor(losses).mean()))
print("Train Acc : {:.3f}".format(train_accuracy_score))
calc_val_loss_accuracy(model, loss_fn, val_loader, val_losses, val_accuracy)
# Save training history
train_losses.append(np.array(losses).mean())
train_accuracy.append(train_accuracy_score)
# Create results
results = {'train_loss': train_losses,
'train_acc': train_accuracy,
'val_loss': val_losses,
'val_acc': val_accuracy}
return results
Full Traceback
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-113-c162e15ac6eb> in <module>
17 optimizer = Adam(bert_classifier.parameters(), lr=learning_rate, weight_decay=1e-5)
18
---> 19 results_bert_classifier = train_model(bert_classifier, loss_fn, optimizer, train_loader_with_stopwords, val_loader_with_stopwords, epochs)
4 frames
<ipython-input-76-af9148b7ec20> in train_model(model, loss_fn, optimizer, train_loader, val_loader, epochs)
21 for X, Y in tqdm(train_loader):
22
---> 23 preds = model(X)
24
25 loss = loss_fn(preds, Y)
/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1188 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1189 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1190 return forward_call(*input, **kwargs)
1191 # Do not call functions when jit is used
1192 full_backward_hooks, non_full_backward_hooks = [], []
<ipython-input-112-cbe2b66bfaf1> in forward(self, X_batch)
25 embeddings = embeddings.reshape(batch_size, sequence_length, hidden_size)
26
---> 27 output, hidden = self.bert(embeddings)
28
29 hidden = self.dropout(hidden[:, 0, :])
/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1188 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1189 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1190 return forward_call(*input, **kwargs)
1191 # Do not call functions when jit is used
1192 full_backward_hooks, non_full_backward_hooks = [], []
/usr/local/lib/python3.8/dist-packages/transformers/models/bert/modeling_bert.py in forward(self, input_ids, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, encoder_hidden_states, encoder_attention_mask, past_key_values, use_cache, output_attentions, output_hidden_states, return_dict)
973 raise ValueError("You have to specify either input_ids or inputs_embeds")
974
--> 975 batch_size, seq_length = input_shape
976 device = input_ids.device if input_ids is not None else inputs_embeds.device
977
ValueError: too many values to unpack (expected 2)

Conflict between shape of (-1, 784) with input size of 200

There is an error with the shape of a tensor and it's input size, as it is conflicting. I have absolutely no idea what to do at all. Because I am still new to this topic, which is the reason why you may or may not spot lines of code that are not needed at all. Just a heads up. If needed, personally contact me and I will send you the .ipynb file
If it matters I am doing computer vision, with the MNIST handrawn numbers dataset.
import torch
import torchvision
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
from torchvision.datasets import MNIST
import torchvision.transforms as transforms
from torch.utils.data import SubsetRandomSampler, DataLoader
dataset = MNIST(root='data/', download=True)
def split_indices(dataset, rate):
eval = int(dataset*rate/100)
index = np.random.permutation(dataset)
return index[eval:], index[:eval]
train_index, eval_index = split_indices(len(dataset), rate=20)
dataset = MNIST(root='data/', train=True, transform=transforms.ToTensor())
train_sampler = SubsetRandomSampler(train_index)
train_dl = DataLoader(dataset, batch_size=200, sampler=train_sampler)
val_sampler = SubsetRandomSampler(eval_index)
val_dl = DataLoader(dataset, batch_size=200, sampler=eval_sampler)
inputs = 28*28
nums = 10
model = nn.Linear(inputs, nums)
class MnistModel(nn.Module):
def __init__(self):
super().__init__()
self.linear = nn.Linear(inputs, nums)
def forward(self, xb):
xb = xb.reshape(-1, 784)
outputs = self.linear(xb)
return outputs
def accuracy(x, y):
print(torch.sum(x == y).item()/len(x))
model = MnistModel()
for images, labels in train_dl:
outputs = model(images)
break
loss_fn = F.cross_entropy
loss = loss_fn(outputs, labels)
opt = torch.optim.Adam(model.parameters(), lr=7)
def loss_batch(model, loss_fn, xb, yb, opt=None, metric=None):
preds = model(xb)
loss = loss_fn(preds, yb)
if opt is not None:
loss.backward()
opt.step()
opt.zero_grad()
metric_result = None
if metric is not None:
metric_result = metric(preds, yb)
return loss.item(), len(xb), metric_result
def evaluate(model, loss_fn, valid_dl, metric=None):
with torch.no_grad():
results = [loss_batch(model, loss_fn, xb, yb, metric=metric) for xb, yb in valid_dl]
losses, nums, metrics = zip(*results)
total = np.sum(nums)
avg_loss = np.sum(np.multiply(losses, nums)) / total
avg_metric = None
if metric is not None:
avg_metric = np.sum(np.multiply(metrics, nums)) / total
return avg_loss, total, avg_metric
def accuracy(outputs, labels):
_, preds = torch.max(outputs, dim=1)
return torch.sum(preds == labels).item() / len(preds)
eval_loss, total, eval_acc = evaluate(model, loss_fn, val_dl, metric=accuracy)
print(f"loss: {eval_loss}, accuracy: {eval_acc*100}")
def fit(epochs, model, loss_fn, opt, train_dl, valid_dl, metric=None):
for epoch in range(epochs):
for xb, yb in train_dl:
loss,_,_ = loss_batch(model, xb, yb, opt)
result = evaluate(model, loss_fn, valid_dl, metric)
eval_loss, total, eval_metric = result
if metric is None:
print(f"Epoch: {epoch+1}, loss: {loss.item()}")
else:
print(f"Epoch: {epoch+1}, loss: {loss.item()}, metric: {metric.__name__} {eval_metric}")
model = MnistModel()
opt = torch.optim.Adam(model.parameters(), lr=7)
fit(5, model, loss_fn, opt, train_dl, eval_dl, accuracy) #Error line
Error output:
RuntimeError Traceback (most recent call last)
<ipython-input-55-90c5585d3b40> in <module>()
1 opt = torch.optim.Adam(model.parameters(), lr=7)
----> 2 fit(5, model, loss_fn, opt, train_dl, eval_dl, accuracy)
3 frames
<ipython-input-49-afd130f584e4> in forward(self, xb)
18
19 def forward(self, xb):
---> 20 xb = xb.reshape(-1, 784)
21 outputs = self.linear(xb)
22 return outputs
RuntimeError: shape '[-1, 784]' is invalid for input of size 200
Do not use reshape for torch.Tensor.
Use torch.nn.Flatten() to flatten your image.
This will look consistent in your program.

How to solve size mismatch error in pytorch?

I am trying to create a logistic model by using CIFAR10 data in PyTorch. After running the model for evaluation I run into an error :
RuntimeError: size mismatch, m1: [750 x 4096], m2: [1024 x 10] at C:\w\1\s\tmp_conda_3.7_100118\conda\conda-bld\pytorch_1579082551706\work\aten\src\TH/generic/THTensorMath.cpp:136
It seems like input_size is creating a problem, I dont know I am new to this. Please let me know what changes should I make in order to overcome this error.
These are the hyperparameters:
batch_size = 100
learning_rate = 0.001
# Other constants
input_size = 4*4*64
num_classes = 10
This is the cell that downloads and splits the dataset into train, validation and test.
transform = torchvision.transforms.Compose(
[torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5))])
testset = torchvision.datasets.CIFAR10(root='D:\PyTorch\cifar-10-python', train=False,download=False, transform=transform)
trainvalset = torchvision.datasets.CIFAR10(root='D:\PyTorch\cifar-10-python', train=True,download=False, transform=transform)
trainset, valset = torch.utils.data.random_split(trainvalset, [45000, 5000]) # 10% for validation
train_loader = torch.utils.data.DataLoader(trainset, batch_size=50, shuffle=True)
test_loader = torch.utils.data.DataLoader(testset, batch_size=1000, shuffle=False)
val_loader = torch.utils.data.DataLoader(valset, batch_size=1000, shuffle=False)
This is the architecture of my model.
class CifarModel(nn.Module):
def __init__(self):
super().__init__()
self.linear = nn.Linear(input_size, num_classes)
def forward(self, xb):
xb = xb.view(-1, 64*8*8)
#xb = xb.reshape(-1, 784)
print(xb.shape)
out = self.linear(xb)
return out
def training_step(self, batch):
images, labels = batch
out = self(images) # Generate predictions
loss = F.cross_entropy(out, labels) # Calculate loss
return loss
def validation_step(self, batch):
images, labels = batch
out = self(images) # Generate predictions
loss = F.cross_entropy(out, labels) # Calculate loss
acc = accuracy(out, labels) # Calculate accuracy
return {'val_loss': loss.detach(), 'val_acc': acc.detach()}
def validation_epoch_end(self, outputs):
batch_losses = [x['val_loss'] for x in outputs]
epoch_loss = torch.stack(batch_losses).mean() # Combine losses
batch_accs = [x['val_acc'] for x in outputs]
epoch_acc = torch.stack(batch_accs).mean() # Combine accuracies
return {'val_loss': epoch_loss.item(), 'val_acc': epoch_acc.item()}
def epoch_end(self, epoch, result):
print("Epoch [{}], val_loss: {:.4f}, val_acc: {:.4f}".format(epoch, result['val_loss'], result['val_acc']))
model = CifarModel()
def accuracy(outputs, labels):
_, preds = torch.max(outputs, dim=1)
return torch.tensor(torch.sum(preds == labels).item() / len(preds))
def evaluate(model, val_loader):
outputs = [model.validation_step(batch) for batch in val_loader]
return model.validation_epoch_end(outputs)
def fit(epochs, lr, model, train_loader, val_loader, opt_func=torch.optim.SGD):
history = []
optimizer = opt_func(model.parameters(), lr)
for epoch in range(epochs):
# Training Phase
for batch in train_loader:
loss = model.training_step(batch)
loss.backward()
optimizer.step()
optimizer.zero_grad()
# Validation phase
result = evaluate(model, val_loader)
model.epoch_end(epoch, result)
history.append(result)
return history
evaluate(model, val_loader)
Here you are specifying that the number of output classes should be 10:
num_classes = 10
Your forward function does not reflect this:
xb = xb.view(-1, 64*8*8) # you get 750x4096
out = self.linear(xb) # here an input of
# input_size to linear layer = 4*4*64 # 1024
# num_classes = 10
Modify it like this:
xb = xb.view(-1, 64*4*4) # you get 750x1024
out = self.linear(xb) # M1 750x1024 M2 1024x10:
# input_size = 4*4*64 # 1024
# num_classes = 10

PyTorch LSTM has nan for MSELoss

My model is:
class BaselineModel(nn.Module):
def __init__(self, feature_dim=5, hidden_size=5, num_layers=2, batch_size=32):
super(BaselineModel, self).__init__()
self.num_layers = num_layers
self.hidden_size = hidden_size
self.lstm = nn.LSTM(input_size=feature_dim,
hidden_size=hidden_size, num_layers=num_layers)
def forward(self, x, hidden):
lstm_out, hidden = self.lstm(x, hidden)
return lstm_out, hidden
def init_hidden(self, batch_size):
hidden = Variable(next(self.parameters()).data.new(
self.num_layers, batch_size, self.hidden_size))
cell = Variable(next(self.parameters()).data.new(
self.num_layers, batch_size, self.hidden_size))
return (hidden, cell)
Training looks like:
train_loader = torch.utils.data.DataLoader(
train_set, batch_size=BATCH_SIZE, shuffle=True, **params)
model = BaselineModel(batch_size=BATCH_SIZE)
optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=0.0001)
loss_fn = torch.nn.MSELoss(reduction='sum')
for epoch in range(250):
# hidden = (torch.zeros(2, 13, 5),
# torch.zeros(2, 13, 5))
# model.hidden = hidden
for i, data in enumerate(train_loader):
hidden = model.init_hidden(13)
inputs = data[0]
outputs = data[1]
print('inputs', inputs.size())
# print('outputs', outputs.size())
# optimizer.zero_grad()
model.zero_grad()
# print('inputs', inputs)
pred, hidden = model(inputs, hidden)
loss = loss_fn(pred, outputs)
loss.backward()
torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
optimizer.step()
print('Epoch: ', epoch, '\ti: ', i, '\tLoss: ', loss)
I have gradient clipping set already, which seems to be the recommended solution. But after even the first step, I get:
Epoch: 0 i: 0 Loss: tensor(nan, grad_fn=)
I suspect your issue has to do with your outputs / data[1] (it would help if you show examples of your train_set). Running the following piece of code gives no nan, but I forced shape of output by hand before calling the loss_fn(pred, outputs) :
class BaselineModel(nn.Module):
def __init__(self, feature_dim=5, hidden_size=5, num_layers=2, batch_size=32):
super(BaselineModel, self).__init__()
self.num_layers = num_layers
self.hidden_size = hidden_size
self.lstm = nn.LSTM(input_size=feature_dim,
hidden_size=hidden_size, num_layers=num_layers)
def forward(self, x, hidden):
lstm_out, hidden = self.lstm(x, hidden)
return lstm_out, hidden
def init_hidden(self, batch_size):
hidden = Variable(next(self.parameters()).data.new(
self.num_layers, batch_size, self.hidden_size))
cell = Variable(next(self.parameters()).data.new(
self.num_layers, batch_size, self.hidden_size))
return (hidden, cell)
model = BaselineModel(batch_size=32)
optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=0.0001)
loss_fn = torch.nn.MSELoss(reduction='sum')
hidden = model.init_hidden(10)
model.zero_grad()
pred, hidden = model(torch.randn(2,10,5), hidden)
pred.size() #torch.Size([2, 10, 5])
outputs = torch.zeros(2,10,5)
loss = loss_fn(pred, outputs)
loss
loss.backward()
torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
optimizer.step()
print(loss)
Please note a common reason for nan values can be related to numerical stability of your learning phase, but usually you have values for the first steps before you see the divergence happening, which is apparently not the case here.

Pytorch RNN always gives the same output for multivariate time series

I have a time series data looking something like this:
I am trying to model this with a sequence to sequence RNN in pytorch. It trains well and I can see the loss going down. But on testing it gives the same out put irrespective of the input.
My Model:
class RNNModel(nn.Module):
def __init__(self, predictor_size, hidden_size, num_layers, dropout = 0.3, output_size=83):
super(RNNModel, self).__init__()
self.drop = nn.Dropout(dropout)
self.rnn = nn.GRU(predictor_size, hidden_size, num_layers=num_layers, dropout = dropout)
self.decoder = nn.Linear(hidden_size, output_size)
self.init_weights()
self.hidden_size = hidden_size
self.num_layers = num_layers
def init_weights(self):
initrange = 0.1
self.decoder.bias.data.fill_(0)
self.decoder.weight.data.uniform_(-initrange, initrange)
def forward(self, input, hidden):
output, hidden = self.rnn(input, hidden)
output = self.drop(output)
decoded = self.decoder(output.view(output.size(0) * output.size(1), output.size(2)))
return decoded.view(output.size(0), output.size(1), decoded.size(1)), hidden
def init_hidden(self, batch_size):
weight = next(self.parameters()).data
return Variable(weight.new(self.num_layers, batch_size, self.hidden_size).zero_())
Train Method:
def train(data_source, lr):
# turn on training mode that enables dropout
model.train()
total_loss = 0
hidden = model.init_hidden(bs_train)
optimizer = optim.Adam(model.parameters(), lr = lr)
for batch, i in enumerate(range(0, data_source.size(0) - 1, bptt_size)):
data, targets = get_batch(data_source, i)
# Starting each batch, we detach the hidden state from how it was previously produced
# so that model doesen't ry to backprop to all the way start of the dataset
# unrolling of the graph will go from the last iteration to the first iteration
hidden = Variable(hidden.data)
if cuda.is_available():
hidden = hidden.cuda()
optimizer.zero_grad()
output, hidden = model(data, hidden)
loss = criterion(output, targets)
loss.backward()
# clip_grad_norm to prevent gradient explosion
torch.nn.utils.clip_grad_norm(model.parameters(), clip)
optimizer.step()
total_loss += len(data) * loss.data
# return accumulated loss for all the iterations
return total_loss[0] / len(data_source)
Evaluation Method:
def evaluate(data_source):
# turn on evaluation to disable dropout
model.eval()
model.train(False)
total_loss = 0
hidden = model.init_hidden(bs_valid)
for i in range(0, data_source.size(0) - 1, bptt_size):
data, targets = get_batch(data_source, i, evaluation = True)
if cuda.is_available():
hidden = hidden.cuda()
output, hidden = model(data, hidden)
total_loss += len(data) * criterion(output, targets).data
hidden = Variable(hidden.data)
return total_loss[0]/len(data_source)
Training Loop:
best_val_loss = None
best_epoch = 0
def run(epochs, lr):
val_losses = []
num_epochs = []
global best_val_loss
global best_epoch
for epoch in range(0, epochs):
train_loss = train(train_set, lr)
val_loss = evaluate(test_set)
num_epochs.append(epoch)
val_losses.append(val_loss)
print("Train Loss: ", train_loss, " Validation Loss: ", val_loss)
if not best_val_loss or val_loss < best_val_loss:
best_val_loss = val_loss
torch.save(model.state_dict(), "./4.model.pth")
best_epoch = epoch
return num_epochs, val_losses
Loss with epochs:
Getting the output:
model = RNNModel(predictor_size, hidden_size, num_layers, dropout_pct, output_size)
model.load_state_dict(torch.load("./4.model.pth"))
if cuda.is_available():
model.cuda()
model.eval()
model.train(False)
hidden = model.init_hidden(1)
inp = torch.Tensor(var[105])
input = Variable(inp.contiguous().view(1,1,predictor_size), volatile=True)
if cuda.is_available():
input.data = input.data.cuda()
output, hidden = model(input, hidden)
op = output.squeeze().data.cpu()
print(op)
Here I always get the same output irrespective of datapoint I give as input. Can somebody please tell me what I am doing wrong.

Categories