PyTorch LSTM has nan for MSELoss - python

My model is:
class BaselineModel(nn.Module):
def __init__(self, feature_dim=5, hidden_size=5, num_layers=2, batch_size=32):
super(BaselineModel, self).__init__()
self.num_layers = num_layers
self.hidden_size = hidden_size
self.lstm = nn.LSTM(input_size=feature_dim,
hidden_size=hidden_size, num_layers=num_layers)
def forward(self, x, hidden):
lstm_out, hidden = self.lstm(x, hidden)
return lstm_out, hidden
def init_hidden(self, batch_size):
hidden = Variable(next(self.parameters()).data.new(
self.num_layers, batch_size, self.hidden_size))
cell = Variable(next(self.parameters()).data.new(
self.num_layers, batch_size, self.hidden_size))
return (hidden, cell)
Training looks like:
train_loader = torch.utils.data.DataLoader(
train_set, batch_size=BATCH_SIZE, shuffle=True, **params)
model = BaselineModel(batch_size=BATCH_SIZE)
optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=0.0001)
loss_fn = torch.nn.MSELoss(reduction='sum')
for epoch in range(250):
# hidden = (torch.zeros(2, 13, 5),
# torch.zeros(2, 13, 5))
# model.hidden = hidden
for i, data in enumerate(train_loader):
hidden = model.init_hidden(13)
inputs = data[0]
outputs = data[1]
print('inputs', inputs.size())
# print('outputs', outputs.size())
# optimizer.zero_grad()
model.zero_grad()
# print('inputs', inputs)
pred, hidden = model(inputs, hidden)
loss = loss_fn(pred, outputs)
loss.backward()
torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
optimizer.step()
print('Epoch: ', epoch, '\ti: ', i, '\tLoss: ', loss)
I have gradient clipping set already, which seems to be the recommended solution. But after even the first step, I get:
Epoch: 0 i: 0 Loss: tensor(nan, grad_fn=)

I suspect your issue has to do with your outputs / data[1] (it would help if you show examples of your train_set). Running the following piece of code gives no nan, but I forced shape of output by hand before calling the loss_fn(pred, outputs) :
class BaselineModel(nn.Module):
def __init__(self, feature_dim=5, hidden_size=5, num_layers=2, batch_size=32):
super(BaselineModel, self).__init__()
self.num_layers = num_layers
self.hidden_size = hidden_size
self.lstm = nn.LSTM(input_size=feature_dim,
hidden_size=hidden_size, num_layers=num_layers)
def forward(self, x, hidden):
lstm_out, hidden = self.lstm(x, hidden)
return lstm_out, hidden
def init_hidden(self, batch_size):
hidden = Variable(next(self.parameters()).data.new(
self.num_layers, batch_size, self.hidden_size))
cell = Variable(next(self.parameters()).data.new(
self.num_layers, batch_size, self.hidden_size))
return (hidden, cell)
model = BaselineModel(batch_size=32)
optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=0.0001)
loss_fn = torch.nn.MSELoss(reduction='sum')
hidden = model.init_hidden(10)
model.zero_grad()
pred, hidden = model(torch.randn(2,10,5), hidden)
pred.size() #torch.Size([2, 10, 5])
outputs = torch.zeros(2,10,5)
loss = loss_fn(pred, outputs)
loss
loss.backward()
torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
optimizer.step()
print(loss)
Please note a common reason for nan values can be related to numerical stability of your learning phase, but usually you have values for the first steps before you see the divergence happening, which is apparently not the case here.

Related

separation of training data pyTorch

I have a code, with it, I wanted to train a neural network and save the finished model as a file. But I am getting an error due to incorrect distribution of training and training data. Can't understand why:
`import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
class ChatBot(nn.Module):
def __init__(self, input_size, hidden_size, num_layers, output_size):
super().__init__()
self.hidden_size = hidden_size
self.num_layers = num_layers
self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
self.fc = nn.Linear(hidden_size, output_size)
def forward(self, x, hidden):
out, hidden = self.lstm(x, hidden)
out = self.fc(out[:, -1, :])
return out, hidden
def init_hidden(self, batch_size):
weight = next(self.parameters()).data
hidden = (weight.new(self.num_layers, batch_size, self.hidden_size).zero_(),
weight.new(self.num_layers, batch_size, self.hidden_size).zero_())
return hidden
class ChatDataset(torch.utils.data.Dataset):
def __init__(self, data):
self.data = data
def __len__(self):
return len(self.data)
def __getitem__(self, index):
return self.data[index]
def train(model, train_loader, loss_fn, optimizer, device):
model.train()
for inputs, targets in train_loader:
inputs = inputs.to(device)
targets = targets.to(device)
hidden = model.init_hidden(inputs.size(0))
hidden = tuple([each.data for each in hidden])
optimizer.zero_grad()
outputs, _ = model(inputs, hidden)
loss = loss_fn(outputs.view(-1), targets.view(-1))
loss.backward()
optimizer.step()
def evaluate(model, val_loader, loss_fn, device):
model.eval()
total_loss = 0
with torch.no_grad():
for inputs, targets in val_loader:
inputs = inputs.to(device)
targets = targets.to(device)
hidden = model.init_hidden(inputs.size(0))
hidden = tuple([each.data for each in hidden])
outputs, _ = model(inputs, hidden)
total_loss += loss_fn(outputs, targets).item()
return total_loss / len(val_loader)
device = torch.device("cuda" if
torch.cuda.is_available() else "cpu")
input_size = 500
hidden_size = 128
num_layers = 2
output_size = 500
model = ChatBot(input_size, hidden_size, num_layers, output_size)
model = model.to(device)
data = [("Hi, how are you?", "I'm doing well, thank you for asking."),
("What's your name?", "I'm a chatbot, I don't have a name."),
("What's the weather like?", "I'm not sure, I don't have access to current weather information."),
("What's the time?", "I'm not sure, I don't have access to the current time.")]
dataset = ChatDataset(data)
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [int(0.8 * len(dataset)), int(0.2 * len(dataset))])
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=32, shuffle=False)
loss_fn = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
num_epochs = 100
for epoch in range(num_epochs):
train(model, train_loader, loss_fn, optimizer, device)
val_loss = evaluate(model, val_loader, loss_fn, device)
print("Epoch [{}/{}], Validation Loss: {:.4f}".format(epoch+1, num_epochs, val_loss))
torch.save(model.state_dict(), 'chatbot_model.pt')`
But, when I start this code, I have an error:
` ValueError
Traceback (most recent call last)
<ipython-input-8-ae2a6dd1bc7c> in
<module>
78 dataset = ChatDataset(data)
79
---> 80 train_dataset, val_dataset = torch.utils.data.random_split(dataset, [int(0.8 * len(dataset)), int(0.2 * len(dataset))])
81
82 train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)
/usr/local/lib/python3.8/dist-packages/torch/utils/data/dataset.py in random_split(dataset, lengths, generator)
345 # Cannot verify that dataset is Sized
346 if sum(lengths) != len(dataset): # type: ignore[arg-type]
--> 347 raise ValueError("Sum of input lengths does not equal the length of the input dataset!")
348
349 indices = randperm(sum(lengths), generator=generator).tolist() # type: ignore[call-overload]
ValueError: Sum of input lengths does not equal the length of the input dataset!`
I don't know, why this error. Everything seems to be correct.
The typecasting of the values to an integer is causing a difference in the total number of images in the dataset and the distribution of the number of images in train and test.
Not the most ideal code, but replacing it with the following will work :
num_train_images = int(0.8 * len(dataset))
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [num_train_images, len(dataset) - num_train_images])
I suspect there could be a loss of precision in this calculation,
[int(0.8 * len(dataset)), int(0.2 * len(dataset))]
so the number of records in the dataset is not fully accounted for.
for example:
int(.8 * 56) + int(.2 * 56) = 55

How to replace an LSTM with BERT in a PyTorch classification model

Context
I have an LSTM model with pre-trained word embeddings for a multiclass classification problem with 10 output classes.
# LSTM model
hidden_dim = 64
n_layers=1
embed_len = 100
class LSTMClassifier(nn.Module):
def __init__(self, embeddings, n_layers):
super(LSTMClassifier, self).__init__()
#self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.n_layers = n_layers
self.weights = torch.FloatTensor(embeddings)
self.embedding_layer = nn.Embedding.from_pretrained(self.weights)
self.lstm = nn.LSTM(input_size=embed_len, hidden_size=hidden_dim,
num_layers=self.n_layers, batch_first=True)
self.linear = nn.Linear(hidden_dim, 10)
self.activation = nn.Tanh()
self.dropout = nn.Dropout(0.4)
def forward(self, X_batch):
embeddings = self.embedding_layer(X_batch)
embeddings = self.dropout(embeddings)
output, (hidden, cell) = self.lstm(embeddings, (torch.randn(self.n_layers, len(X_batch), hidden_dim),
torch.randn(self.n_layers, len(X_batch), hidden_dim)))
output = self.activation(output)
output = self.dropout(output)
return self.linear(output[:,-1])
Problem
Now I would like to change the LSTM layer to BERT. I tried it the following way, but when I try to train the BERT model, I get a value error ValueError: too many values to unpack (expected 2). This probably means that the input shape of the embeddings for self.bert(embeddings) pass is not of the right shape. The embeddings that are passed to self.bert() have the shape torch.Size([256, 64, 100]). What do I need to change so that my BERT model works?
class BERTClassifier(nn.Module):
def __init__(self, embeddings):
super(BERTClassifier, self).__init__()
# Load the BERT model
self.bert = BertModel.from_pretrained('bert-base-uncased')
# Embeddings
self.weights = torch.FloatTensor(embeddings)
self.embedding_layer = nn.Embedding.from_pretrained(self.weights)
# Final layer for classifcation
self.linear = nn.Linear(self.bert.config.hidden_size, 10)
self.dropout = nn.Dropout(0.4)
def forward(self, X_batch):
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
X_batch = X_batch.to(device)
embeddings = self.embedding_layer(X_batch)
output, (hidden, cell) = self.bert(embeddings)
hidden = self.dropout(hidden[:, 0, :])
logits = self.linear(hidden)
return logits
Training lines
epochs = 50
learning_rate = 1e-4
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
bert_classifier = BERTClassifier(word_embeddings_with_stopwords)
bert_classifier = bert_classifier.to(device)
# Freeze the BERT layer
for param in bert_classifier.bert.parameters():
param.requires_grad = False
# Freeze the embedding layer
for param in bert_classifier.embedding_layer.parameters():
param.requires_grad = False
loss_fn = nn.CrossEntropyLoss()
optimizer = Adam(bert_classifier.parameters(), lr=learning_rate, weight_decay=1e-5)
results_bert_classifier = train_model(bert_classifier, loss_fn, optimizer, train_loader_with_stopwords, val_loader_with_stopwords, epochs)
Training functions
# Create validation evaluation function
def calc_val_loss_accuracy(model, loss_fn, val_loader, val_losses_list, val_accuracy_list):
"""
Calculates the validation loss and accuracy during training.
Adds the calculated validation loss and accuracy to a predefined list.
"""
model.eval()
with torch.no_grad():
Y_shuffled = []
Y_preds = []
losses = []
for X, Y in val_loader:
preds = model(X)
loss = loss_fn(preds, Y)
losses.append(loss.item())
Y_shuffled.append(Y)
Y_preds.append(preds.argmax(dim=-1))
Y_shuffled = torch.cat(Y_shuffled)
Y_preds = torch.cat(Y_preds)
val_accuracy = accuracy_score(Y_shuffled.detach().numpy(), Y_preds.detach().numpy())
print("Valid Loss : {:.3f}".format(torch.tensor(losses).mean()))
print("Valid Acc : {:.3f}".format(val_accuracy))
val_losses_list.append(np.array(losses).mean())
val_accuracy_list.append(val_accuracy)
# Create training function
def train_model(model, loss_fn, optimizer, train_loader, val_loader, epochs=10):
"""
Trains a model for a given set of epochs, with a given loss function and optimizer.
Returns a dictionary consisting of the train loss and accuracy per epoch
and val loss and accuracy per epoch.
"""
model.train()
train_losses = []
train_accuracy = []
val_losses = []
val_accuracy = []
Y_shuffled_list = []
Y_preds_list = []
for i in range(1, epochs+1):
print(f"EPOCH {i}")
losses = []
for X, Y in tqdm(train_loader):
preds = model(X)
loss = loss_fn(preds, Y)
losses.append(loss.item())
Y_shuffled_list.append(Y)
Y_preds_list.append(preds.argmax(dim=-1))
optimizer.zero_grad()
loss.backward()
optimizer.step()
Y_shuffled = torch.cat(Y_shuffled_list)
Y_preds = torch.cat(Y_preds_list)
train_accuracy_score = accuracy_score(Y_shuffled.detach().numpy(), Y_preds.detach().numpy())
print("Train Loss : {:.3f}".format(torch.tensor(losses).mean()))
print("Train Acc : {:.3f}".format(train_accuracy_score))
calc_val_loss_accuracy(model, loss_fn, val_loader, val_losses, val_accuracy)
# Save training history
train_losses.append(np.array(losses).mean())
train_accuracy.append(train_accuracy_score)
# Create results
results = {'train_loss': train_losses,
'train_acc': train_accuracy,
'val_loss': val_losses,
'val_acc': val_accuracy}
return results
Full Traceback
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-113-c162e15ac6eb> in <module>
17 optimizer = Adam(bert_classifier.parameters(), lr=learning_rate, weight_decay=1e-5)
18
---> 19 results_bert_classifier = train_model(bert_classifier, loss_fn, optimizer, train_loader_with_stopwords, val_loader_with_stopwords, epochs)
4 frames
<ipython-input-76-af9148b7ec20> in train_model(model, loss_fn, optimizer, train_loader, val_loader, epochs)
21 for X, Y in tqdm(train_loader):
22
---> 23 preds = model(X)
24
25 loss = loss_fn(preds, Y)
/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1188 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1189 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1190 return forward_call(*input, **kwargs)
1191 # Do not call functions when jit is used
1192 full_backward_hooks, non_full_backward_hooks = [], []
<ipython-input-112-cbe2b66bfaf1> in forward(self, X_batch)
25 embeddings = embeddings.reshape(batch_size, sequence_length, hidden_size)
26
---> 27 output, hidden = self.bert(embeddings)
28
29 hidden = self.dropout(hidden[:, 0, :])
/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1188 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1189 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1190 return forward_call(*input, **kwargs)
1191 # Do not call functions when jit is used
1192 full_backward_hooks, non_full_backward_hooks = [], []
/usr/local/lib/python3.8/dist-packages/transformers/models/bert/modeling_bert.py in forward(self, input_ids, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, encoder_hidden_states, encoder_attention_mask, past_key_values, use_cache, output_attentions, output_hidden_states, return_dict)
973 raise ValueError("You have to specify either input_ids or inputs_embeds")
974
--> 975 batch_size, seq_length = input_shape
976 device = input_ids.device if input_ids is not None else inputs_embeds.device
977
ValueError: too many values to unpack (expected 2)

How to make prediction on pytorch emotion detection model

I made a CNN model for emotion recognition on 5 emotions. I wanted to test it on an single image to get the individual class predictions for each emotion.
Evaluating the model works, but I can't seem to find how to make a prediction with a single image. How can I do that?
The Model
def conv_block(in_channels, out_channels, pool=False):
layers = [nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
nn.BatchNorm2d(out_channels),
nn.ELU(inplace=True)]
if pool: layers.append(nn.MaxPool2d(2))
return nn.Sequential(*layers)
class ResNet(ImageClassificationBase):
def __init__(self, in_channels, num_classes):
super().__init__()
self.conv1 = conv_block(in_channels, 128)
self.conv2 = conv_block(128, 128, pool=True)
self.res1 = nn.Sequential(conv_block(128, 128), conv_block(128, 128))
self.drop1 = nn.Dropout(0.5)
self.conv3 = conv_block(128, 256)
self.conv4 = conv_block(256, 256, pool=True)
self.res2 = nn.Sequential(conv_block(256, 256), conv_block(256, 256))
self.drop2 = nn.Dropout(0.5)
self.conv5 = conv_block(256, 512)
self.conv6 = conv_block(512, 512, pool=True)
self.res3 = nn.Sequential(conv_block(512, 512), conv_block(512, 512))
self.drop3 = nn.Dropout(0.5)
self.classifier = nn.Sequential(nn.MaxPool2d(6),
nn.Flatten(),
nn.Linear(512, num_classes))
def forward(self, xb):
out = self.conv1(xb)
out = self.conv2(out)
out = self.res1(out) + out
out = self.drop1(out)
out = self.conv3(out)
out = self.conv4(out)
out = self.res2(out) + out
out = self.drop2(out)
out = self.conv5(out)
out = self.conv6(out)
out = self.res3(out) + out
out = self.drop3(out)
out = self.classifier(out)
return out
The fit_one_cycle function is called to train the model
#torch.no_grad()
def evaluate(model, val_loader):
model.eval()
outputs = [model.validation_step(batch) for batch in val_loader]
return model.validation_epoch_end(outputs)
def get_lr(optimizer):
for param_group in optimizer.param_groups:
return param_group['lr']
def fit_one_cycle(epochs, max_lr, model, train_loader, val_loader,
weight_decay=0, grad_clip=None, opt_func=torch.optim.SGD):
torch.cuda.empty_cache()
history = []
# Set up custom optimizer with weight decay
optimizer = opt_func(model.parameters(), max_lr, weight_decay=weight_decay)
# Set up one-cycle learning rate scheduler
sched = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr, epochs=epochs,
steps_per_epoch=len(train_loader))
for epoch in range(epochs):
# Training Phase
model.train()
train_losses = []
lrs = []
for batch in train_loader:
loss = model.training_step(batch)
train_losses.append(loss)
loss.backward()
# Gradient clipping
if grad_clip:
nn.utils.clip_grad_value_(model.parameters(), grad_clip)
optimizer.step()
optimizer.zero_grad()
# Record & update learning rate
lrs.append(get_lr(optimizer))
sched.step()
# Validation phase
result = evaluate(model, val_loader)
result['train_loss'] = torch.stack(train_losses).mean().item()
result['lrs'] = lrs
model.epoch_end(epoch, result)
history.append(result)
return history
This returns the accuracy and loss, i want to change this so it returns prediction percentages for each class.
def accuracy(outputs, labels):
_, preds = torch.max(outputs, dim=1)
return torch.tensor(torch.sum(preds == labels).item() / len(preds))
class ImageClassificationBase(nn.Module):
def training_step(self, batch):
images, labels = batch
out = self(images)
loss = F.cross_entropy(out, labels)
return loss
def validation_step(self, batch):
images, labels = batch
out = self(images)
loss = F.cross_entropy(out, labels)
acc = accuracy(out, labels)
return {'val_loss': loss, 'val_acc': acc}
def validation_epoch_end(self, outputs):
batch_losses = [x['val_loss'] for x in outputs]
epoch_loss = torch.stack(batch_losses).mean()
batch_accs = [x['val_acc'] for x in outputs]
epoch_acc = torch.stack(batch_accs).mean()
return {'val_loss': epoch_loss.item(), 'val_acc': epoch_acc.item()}
def epoch_end(self, epoch, result):
print("Epoch [{}], last_lr: {:.5f}, train_loss: {:.4f}, val_loss: {:.4f}, val_acc: {:.4f}".format(
epoch, result['lrs'][-1], result['train_loss'], result['val_loss'], result['val_acc']))
Evaluating the model works, but I can't seem to find how to make a
prediction with a single image. How can I do that?
Simply, if you have a single image make sure to:
use additional 1 dimension at the beginning
make sure to use CHW format instead of HWC (or specify that within pytorch, check out how to do that here)
For example:
my_model = CNN(...)
random_image = torch.randn(1, 3, 100, 100) # 3 channels, 100x100 img
BTW. Your accuracy could be written a little simpler like this:
def accuracy(outputs, labels):
preds = torch.argmax(outputs, dim=1)
return torch.sum(preds == labels) / len(preds)
Getting class probability
Similar to argmax you can use softmax which transforms logits (unnormalized probability outputted by your network) into probabilities:
def probability(outputs):
return torch.nn.functional.softmax(outputs, dim=1)

How to solve size mismatch error in pytorch?

I am trying to create a logistic model by using CIFAR10 data in PyTorch. After running the model for evaluation I run into an error :
RuntimeError: size mismatch, m1: [750 x 4096], m2: [1024 x 10] at C:\w\1\s\tmp_conda_3.7_100118\conda\conda-bld\pytorch_1579082551706\work\aten\src\TH/generic/THTensorMath.cpp:136
It seems like input_size is creating a problem, I dont know I am new to this. Please let me know what changes should I make in order to overcome this error.
These are the hyperparameters:
batch_size = 100
learning_rate = 0.001
# Other constants
input_size = 4*4*64
num_classes = 10
This is the cell that downloads and splits the dataset into train, validation and test.
transform = torchvision.transforms.Compose(
[torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5))])
testset = torchvision.datasets.CIFAR10(root='D:\PyTorch\cifar-10-python', train=False,download=False, transform=transform)
trainvalset = torchvision.datasets.CIFAR10(root='D:\PyTorch\cifar-10-python', train=True,download=False, transform=transform)
trainset, valset = torch.utils.data.random_split(trainvalset, [45000, 5000]) # 10% for validation
train_loader = torch.utils.data.DataLoader(trainset, batch_size=50, shuffle=True)
test_loader = torch.utils.data.DataLoader(testset, batch_size=1000, shuffle=False)
val_loader = torch.utils.data.DataLoader(valset, batch_size=1000, shuffle=False)
This is the architecture of my model.
class CifarModel(nn.Module):
def __init__(self):
super().__init__()
self.linear = nn.Linear(input_size, num_classes)
def forward(self, xb):
xb = xb.view(-1, 64*8*8)
#xb = xb.reshape(-1, 784)
print(xb.shape)
out = self.linear(xb)
return out
def training_step(self, batch):
images, labels = batch
out = self(images) # Generate predictions
loss = F.cross_entropy(out, labels) # Calculate loss
return loss
def validation_step(self, batch):
images, labels = batch
out = self(images) # Generate predictions
loss = F.cross_entropy(out, labels) # Calculate loss
acc = accuracy(out, labels) # Calculate accuracy
return {'val_loss': loss.detach(), 'val_acc': acc.detach()}
def validation_epoch_end(self, outputs):
batch_losses = [x['val_loss'] for x in outputs]
epoch_loss = torch.stack(batch_losses).mean() # Combine losses
batch_accs = [x['val_acc'] for x in outputs]
epoch_acc = torch.stack(batch_accs).mean() # Combine accuracies
return {'val_loss': epoch_loss.item(), 'val_acc': epoch_acc.item()}
def epoch_end(self, epoch, result):
print("Epoch [{}], val_loss: {:.4f}, val_acc: {:.4f}".format(epoch, result['val_loss'], result['val_acc']))
model = CifarModel()
def accuracy(outputs, labels):
_, preds = torch.max(outputs, dim=1)
return torch.tensor(torch.sum(preds == labels).item() / len(preds))
def evaluate(model, val_loader):
outputs = [model.validation_step(batch) for batch in val_loader]
return model.validation_epoch_end(outputs)
def fit(epochs, lr, model, train_loader, val_loader, opt_func=torch.optim.SGD):
history = []
optimizer = opt_func(model.parameters(), lr)
for epoch in range(epochs):
# Training Phase
for batch in train_loader:
loss = model.training_step(batch)
loss.backward()
optimizer.step()
optimizer.zero_grad()
# Validation phase
result = evaluate(model, val_loader)
model.epoch_end(epoch, result)
history.append(result)
return history
evaluate(model, val_loader)
Here you are specifying that the number of output classes should be 10:
num_classes = 10
Your forward function does not reflect this:
xb = xb.view(-1, 64*8*8) # you get 750x4096
out = self.linear(xb) # here an input of
# input_size to linear layer = 4*4*64 # 1024
# num_classes = 10
Modify it like this:
xb = xb.view(-1, 64*4*4) # you get 750x1024
out = self.linear(xb) # M1 750x1024 M2 1024x10:
# input_size = 4*4*64 # 1024
# num_classes = 10

Pytorch RNN always gives the same output for multivariate time series

I have a time series data looking something like this:
I am trying to model this with a sequence to sequence RNN in pytorch. It trains well and I can see the loss going down. But on testing it gives the same out put irrespective of the input.
My Model:
class RNNModel(nn.Module):
def __init__(self, predictor_size, hidden_size, num_layers, dropout = 0.3, output_size=83):
super(RNNModel, self).__init__()
self.drop = nn.Dropout(dropout)
self.rnn = nn.GRU(predictor_size, hidden_size, num_layers=num_layers, dropout = dropout)
self.decoder = nn.Linear(hidden_size, output_size)
self.init_weights()
self.hidden_size = hidden_size
self.num_layers = num_layers
def init_weights(self):
initrange = 0.1
self.decoder.bias.data.fill_(0)
self.decoder.weight.data.uniform_(-initrange, initrange)
def forward(self, input, hidden):
output, hidden = self.rnn(input, hidden)
output = self.drop(output)
decoded = self.decoder(output.view(output.size(0) * output.size(1), output.size(2)))
return decoded.view(output.size(0), output.size(1), decoded.size(1)), hidden
def init_hidden(self, batch_size):
weight = next(self.parameters()).data
return Variable(weight.new(self.num_layers, batch_size, self.hidden_size).zero_())
Train Method:
def train(data_source, lr):
# turn on training mode that enables dropout
model.train()
total_loss = 0
hidden = model.init_hidden(bs_train)
optimizer = optim.Adam(model.parameters(), lr = lr)
for batch, i in enumerate(range(0, data_source.size(0) - 1, bptt_size)):
data, targets = get_batch(data_source, i)
# Starting each batch, we detach the hidden state from how it was previously produced
# so that model doesen't ry to backprop to all the way start of the dataset
# unrolling of the graph will go from the last iteration to the first iteration
hidden = Variable(hidden.data)
if cuda.is_available():
hidden = hidden.cuda()
optimizer.zero_grad()
output, hidden = model(data, hidden)
loss = criterion(output, targets)
loss.backward()
# clip_grad_norm to prevent gradient explosion
torch.nn.utils.clip_grad_norm(model.parameters(), clip)
optimizer.step()
total_loss += len(data) * loss.data
# return accumulated loss for all the iterations
return total_loss[0] / len(data_source)
Evaluation Method:
def evaluate(data_source):
# turn on evaluation to disable dropout
model.eval()
model.train(False)
total_loss = 0
hidden = model.init_hidden(bs_valid)
for i in range(0, data_source.size(0) - 1, bptt_size):
data, targets = get_batch(data_source, i, evaluation = True)
if cuda.is_available():
hidden = hidden.cuda()
output, hidden = model(data, hidden)
total_loss += len(data) * criterion(output, targets).data
hidden = Variable(hidden.data)
return total_loss[0]/len(data_source)
Training Loop:
best_val_loss = None
best_epoch = 0
def run(epochs, lr):
val_losses = []
num_epochs = []
global best_val_loss
global best_epoch
for epoch in range(0, epochs):
train_loss = train(train_set, lr)
val_loss = evaluate(test_set)
num_epochs.append(epoch)
val_losses.append(val_loss)
print("Train Loss: ", train_loss, " Validation Loss: ", val_loss)
if not best_val_loss or val_loss < best_val_loss:
best_val_loss = val_loss
torch.save(model.state_dict(), "./4.model.pth")
best_epoch = epoch
return num_epochs, val_losses
Loss with epochs:
Getting the output:
model = RNNModel(predictor_size, hidden_size, num_layers, dropout_pct, output_size)
model.load_state_dict(torch.load("./4.model.pth"))
if cuda.is_available():
model.cuda()
model.eval()
model.train(False)
hidden = model.init_hidden(1)
inp = torch.Tensor(var[105])
input = Variable(inp.contiguous().view(1,1,predictor_size), volatile=True)
if cuda.is_available():
input.data = input.data.cuda()
output, hidden = model(input, hidden)
op = output.squeeze().data.cpu()
print(op)
Here I always get the same output irrespective of datapoint I give as input. Can somebody please tell me what I am doing wrong.

Categories