So I have been making an A.I. program with 0 experience in coding in python just as a hobby. While making the training file, I faced an error and I am not able to solve it.
This is my Training file-
import numpy as np
import json
import torch
import torch.nn as nn
from torch.utils.data import Dataset,DataLoader
from NeuralNetwork import bag_of_words , tokenize , stem
from Brain import NeuralNet
with open('intents.json','r') as f:
intents = json.load(f)
all_words = []
tags = []
xy = []
for intent in intents['intents']:
tag = intent['tag']
tags.append(tag)
for pattern in intent['patterns']:
w = tokenize(pattern)
all_words.extend(w)
xy.append((w,tag))
ignore_words = [',','?','/','.','!']
all_words = [stem(w) for w in all_words if w not in ignore_words]
all_words = sorted(set(all_words))
tags = sorted(set(tags))
x_train = []
y_train = []
for (pattern_sentence,tag) in xy:
bag = bag_of_words(pattern_sentence,all_words)
x_train.append(bag)
label = tags.index(tag)
y_train.append(label)
x_train = np.array(x_train)
y_train = np.array(y_train)
num_epochs = 1000
batch_size = 8
learning_rate = 0.001
input_size = len(x_train[0])
hidden_size = 8
output_size = len(tags)
print("Training the model...")
class ChatDataset(Dataset):
def __init__(self):
self.n_samples = len(x_train)
self.x_data = x_train
self.y_data = y_train
def __getitem__(self,index):
return self.x_data[index],self.y_data[index]
def __len__(self):
return self.n_samples
dataset = ChatDataset()
train_loader = DataLoader(dataset=dataset,
batch_size=batch_size,
shuffle=True,
num_workers=0)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = NeuralNet(input_size,hidden_size,output_size).to(device=device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),lr=learning_rate)
for epoch in range(num_epochs):
for (words,labels) in train_loader:
words = words.to(device)
labels = labels.to(dtype=torch.long).to(device)
outputs = model(words)
loss = criterion(outputs,labels)
optimizer.zero_grad()
loss.backward()
optimizer.step()
if (epoch+1) % 100 == 0:
print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')
print(f'Final Loss : {loss.item():.4f}')
data = {
"model_state":model.state_dict(),
"input_size":input_size,
"hidden_size":hidden_size,
"output_size":output_size,
"all_words":all_words,
"tags":tags
}
FILE = "TrainData.pth"
torch.save(data,FILE)
print(f"Training Completed, File Saved to {FILE}")
This is the program which contains all the layers of the neural network which I have named as Brain.py-
import torch.nn as nn
class NeuralNet(nn.Module):
def __init__(self,input_size,hidden_size,num_classes):
super(NeuralNet,self).__init__()
self.l1 = nn.Linear(input_size,hidden_size)
self.l2 = nn.Linear(input_size,hidden_size)
self.l3 = nn.Linear(hidden_size,num_classes)
self.relu = nn.ReLU()
def forward(self,x):
out = self.l1(x)
out = self.relu(out)
out = self.l2(out)
out = self.relu(out)
out = self.l3(out)
return out
And this is the error which VSCode is sending me-
File "f:/Aryav files/J.A.R.V.I.S/J.A.R.V.I.S. Mark III/Train.py", line 81, in <module>
outputs = model(words)
File "C:\Users\user\AppData\Local\Programs\Python\Python38\lib\site-packages\torch\nn\modules\module.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "C:\Users\user\AppData\Local\Programs\Python\Python38\lib\site-packages\torch\nn\modules\linear.py", line 103, in forward
return F.linear(input, self.weight, self.bias)
File "C:\Users\user\AppData\Local\Programs\Python\Python38\lib\site-packages\torch\nn\functional.py", line 1848, in linear
return torch._C._nn.linear(input, weight, bias)
RuntimeError: mat1 and mat2 shapes cannot be multiplied (8x8 and 15x8)
I am a very beginner in programming so please tell me the exact line no. and the line which I need to replace it with. Thank You
Refer to PyTorch documentation, nn.Linear has input size and output size as arguments.
So, in your network here:
def __init__(self,input_size,hidden_size,num_classes):
super(NeuralNet,self).__init__()
self.l1 = nn.Linear(input_size,hidden_size)
self.l2 = nn.Linear(input_size,hidden_size)
self.l3 = nn.Linear(hidden_size,num_classes)
self.relu = nn.ReLU()
l1 takes input_size of features and outputs hidden_size of features.
l2 also takes input_size of features and outputs hidden_size of features.
It will only work if input_size == hidden_size, which I think, in this case, is not the same.
What you want to achieve is to specify all the hidden layers with hidden_size nodes, right?
Then, a simple fix would be
self.l1 = nn.Linear(input_size,hidden_size)
self.l2 = nn.Linear(hidden_size,hidden_size)
self.l3 = nn.Linear(hidden_size,num_classes)
Then, the inputs and outputs between layers should match.
Sidenote, I think you need another instance of ReLU. This is considered bad, reason is posted here.
In your second layer you are putting input_size as a dimention instead of hidden_size(which is your output size of the previous layer).
Use the following lines in your model:
self.l1 = nn.Linear(input_size,hidden_size)
self.l2 = nn.Linear(hidden_size,hidden_size)
self.l3 = nn.Linear(hidden_size,num_classes)
Related
I am working on a multiclass image classification problem that has 4k labeled images. Currently, I am using cross-validation.However, I want to try nested-cross-validation for the problem in pytorch but I couldn't find it.
Is it possible to use nested-cross-validation in pytorch? if so, how?
I built a simple pipeline below. Is it correct to implement nested-cross-validation like this.
import torch
from torch.utils.data import DataLoader,SubsetRandomSampler
from sklearn.model_selection import KFold
from torchvision import datasets
input_size = (256,3,224,244)
target_size = (256,)
class Dataset(datasets.VisionDataset):
def __init__(self):
super().__init__(self)
self.images = torch.rand(input_size).float()
self.targets = torch.randint(0,3,target_size)
def __getitem__(self, index: int) -> any:
return self.images[index],self.targets[index]
def __len__(self) -> int:
return len(self.images)
class BasicModel(torch.nn.Module):
def __init__(self) -> None:
super(BasicModel,self).__init__()
self.conv = torch.nn.Conv2d(3,16,kernel_size=(5,5))
self.adp = torch.nn.AdaptiveAvgPool2d(1)
self.linear = torch.nn.Linear(16,3)
def forward(self,x):
x = self.conv(x)
x = self.adp(x)
x = x.view(x.size(0),-1)
x = self.linear(x)
return x
data_ids = [*range(input_size[0])]
data = Dataset()
model = BasicModel()
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),lr=0.01)
k_fold = 5
kfold_test = KFold(n_splits=k_fold, shuffle=True)
num_epochs = 2
for test_fold,(remain_ids,test_ids) in enumerate(kfold_test.split(data_ids)):
test_sampler = SubsetRandomSampler(test_ids)
testLoader = DataLoader(data,sampler=test_sampler,batch_size=2)
kfold_val = KFold(n_splits=k_fold-1, shuffle=True)
for epoch in range(num_epochs):
for val_fold,(train_ids,val_ids) in enumerate(kfold_val.split(remain_ids)):
train_sampler = SubsetRandomSampler(train_ids)
trainLoader = DataLoader(data,sampler=train_sampler,batch_size=2)
val_sampler = SubsetRandomSampler(val_ids)
valLoader = DataLoader(data,sampler=val_sampler,batch_size=2)
model.train()
for image,target in trainLoader:
with torch.cuda.amp.autocast():
output = model(image)
loss = criterion(output,target)
optimizer.zero_grad()
loss.backward()
optimizer.step()
print(f"train loss:{loss.item()}")
model.eval()
with torch.no_grad():
for image,target in valLoader:
output = model(image)
loss = criterion(output,target)
print(f"test loss:{loss.item()}")
model.eval()
with torch.no_grad():
for image,target in testLoader:
output = model(image)
loss = criterion(output,target)
print(f"test loss:{loss.item()}")
I have written this line "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')" still getting the following errror. I have provided my all code that i written till now please with this.
RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu! (when checking argument for argument mat1 in method wrapper_addmm)
Train.py File
import json
from nltk_utils import tokenize, stem, bag_of_words
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from model import NeuralNet
with open('intents.json', 'r') as f:
intents = json.load(f)
all_words = []
tags = []
xy = []
for intent in intents['intents']:
tag = intent['tag']
tags.append(tag)
for pattern in intent['patterns']:
w = tokenize(pattern)
all_words.extend(w)
xy.append((w, tag))
ignore_words = ['?', '!', '.', ',']
all_words = [stem(w) for w in all_words if w not in ignore_words]
all_words = sorted(set(all_words))
tags = sorted(set(tags))
x_train = []
y_train = []
for (pattern_sentence, tag) in xy:
bag = bag_of_words(pattern_sentence, all_words)
x_train.append(bag)
label = tags.index(tag)
y_train.append(label) # CrossEntropyLoss
x_train = np.array(x_train)
y_train = np.array(y_train)
class ChatDataset(Dataset):
def __init__(self):
self.n_samples = len(x_train)
self.x_data = x_train
self.y_data = y_train
# dataset[idx]
def __getitem__(self, index):
return self.x_data[index], self.y_data[index]
def __len__(self):
return self.n_samples
batch_size = 8
hidden_size = 8
output_size = len(tags)
input_size = len(x_train[0])
learning_rate = 0.001
num_epochs = 1000
print(input_size, len(all_words))
print(output_size, tags)
dataset = ChatDataset()
train_loader = DataLoader(dataset=dataset, batch_size=batch_size, shuffle=True)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = NeuralNet(input_size, hidden_size, output_size).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
for epoch in range(num_epochs):
for(words, labels) in train_loader:
words = words.to(device)
labels = labels.type(torch.LongTensor) # <----- Here (Casting)
labels = labels.to(device)
# forward
outputs = model(words)
loss = criterion(outputs, labels)
# backward and optimizer step
optimizer.zero_grad()
loss.backward()
optimizer.step()
if (epoch + 1) % 100 == 0:
print(f'epoch {epoch + 1}/{num_epochs}, loss={loss.item():.4f}')
print(f'final loss: {loss.item():.4f}')
data = {
"model_state" : model.state_dict(),
"input_size" : input_size,
"output_size" : output_size,
"hidden_size" : hidden_size,
"all_words" : all_words,
"tags" : tags,
}
FILE = "data.pth"
torch.save(data, FILE)
print(f'training complete , file saved to {FILE}')
chat.py file
import random
import json
import torch
from model import NeuralNet
from nltk_utils import bag_of_words, tokenize
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# device = torch.device('cuda')
with open('intents.json', 'r') as f:
intents = json.load(f)
FILE = 'data.pth'
data = torch.load(FILE)
input_size = data["input_size"]
hidden_size = data["hidden_size"]
output_size = data["output_size"]
all_words = data["all_words"]
tags = data["tags"]
model_state = data["model_state"]
model = NeuralNet(input_size, hidden_size, output_size).to(device)
model.load_state_dict(model_state)
model.eval()
bot_name = 'Sam'
print("Let's chat: type 'quit' to exit")
while True:
sentence = input('You: ')
if sentence == "Quit":
break
sentence = tokenize(sentence)
X = bag_of_words(sentence, all_words)
X = X.reshape(1, X.shape[0])
X = torch.from_numpy(X)
output = model(X)
_, predicted = torch.max(output, dim=1)
tag = tags[predicted.item()]
probs = torch.softmax(output, dim=1)
prob = probs[0][predicted.item()]
if prob.item() > 0.75:
for intent in intents["intents"]:
if tag == intent["tag"]:
print(f"{bot_name}: {random.choice(intent['responses'])}")
else:
print(f"{bot_name}: I do not understand...:(")
After running the chat.py. while chatbot is trying to send their response then it giving the runtime error.
I should get the response from chatbot. but getting the following error
Error Image
I want to know how much the fine-tuned model improves compared to the model without fine-tuning.I want to compare the performance of the pre-trained model(BERT) and the model(fine-tuned BERT) obtained by fine-tuning the pre-trained model on text classification.I know how to fine-tune BERT for text classification, but not very clear on how to use BERT directly for classification.what should I do?The following is the code for fine-tuning the model, how to rewrite it to directly use the pre-trained model.
<!-- language: python -->
from transformers import BertTokenizer, BertModel
import torch
import torch.nn as nn
import torch.utils.data as Data
import torch.optim as optim
from sklearn.metrics import accuracy_score,matthews_corrcoef
from sklearn.model_selection import train_test_split
tokenizer_model = BertTokenizer.from_pretrained('bert-base-uncased')
pretrained_model = BertModel.from_pretrained("bert-base-uncased")
class MyDataSet(Data.Dataset):
def __init__ (self, data, label):
self.data = data
self.label = label
self.tokenizer = tokenizer_model
def __getitem__(self, idx):
text = self.data[idx]
label = self.label[idx]
inputs = self.tokenizer(text, return_tensors="pt",padding='max_length',max_length=256,truncation=True)
input_ids = inputs.input_ids.squeeze(0)
#token_type_ids = inputs.token_type_ids.squeeze(0)
attention_mask = inputs.attention_mask.squeeze(0)
#return input_ids, token_type_ids, attention_mask, label
return input_ids, attention_mask, label
def __len__(self):
return len(self.data)
data,label = [],[]
with open(path) as f:
for line in f.readlines():
a,b = line.strip().split('\t')
data.append(b)
if a == 'LOW':
label.append('0')
elif a == 'MEDIUM':
label.append('1')
else:
label.append('2')
label = [int(i) for i in label]
train_x,test_x,train_y,test_y = train_test_split(data, label, test_size = 0.15,random_state = 32, stratify=label)
dataset_train = MyDataSet(train_x,train_y)
dataset_test = MyDataSet(test_x,test_y)
dataloader_train = Data.DataLoader(dataset_train, batch_size=128, shuffle=True,num_workers=32,pin_memory=True)
dataloader_test = Data.DataLoader(dataset_test, batch_size=128, shuffle=True,num_workers=32,pin_memory=True)
class MyModel(nn.Module):
def __init__(self):
super(MyModel, self).__init__()
self.bert = pretrained_model
self.linear = nn.Linear(768,3)
def forward(self, input_ids, attention_mask):
output = self.bert(input_ids, attention_mask).pooler_output
print(output.shape) # torch.Size([1, 768])
output = self.linear(output)
return output
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if torch.cuda.device_count() > 1:
print("Use", torch.cuda.device_count(), 'gpus')
model = MyModel()
model = nn.DataParallel(model)
model = model.to(device)
## model = MyModel().to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-5)
for epoch in range(10):
for input_ids,attention_mask,label in dataloader_train:
train_input_ids,train_attention_mask,train_label = input_ids.to(device),attention_mask.to(device),label.to(device)
model.train()
pred = model(train_input_ids,train_attention_mask)
print('epoch:',epoch)
#print('pred,label:',pred,label)
loss = loss_fn(pred, train_label)
print('Loss:',loss.item())
pred = torch.argmax(pred,dim=1)
acc = (pred == train_label).float().mean()
print('acc:',acc)
loss.backward()
optimizer.step()
optimizer.zero_grad()
savename_train = str(path) +'_' + str(name) + '_train' + '.txt'
with open(savename_train,'a') as f:
f.write(str(epoch)+'\t'+str(loss.item())+'\t'+str(acc.item())+'\n')
model.eval()
with torch.no_grad():
for input_ids,attention_mask,label in dataloader_test:
validation_input_ids,validation_attention_mask,validation_label = input_ids.to(device),attention_mask.to(device),label.to(device)
pred = model(validation_input_ids,validation_attention_mask)
loss = loss_fn(pred, validation_label)
pred = torch.argmax(pred, dim=1)
acc = (pred == validation_label).float().mean()
print('acc:',acc)
savename_eval = str(path) +'_' + str(name) + '_val' + '.txt'
with open(savename_eval,'a') as f:
f.write(str(epoch)+'\t'+str(loss.item())+'\t'+str(acc.item())+'\n')
What you are trying to do does not make sense. The naive BERT model was retrained using a combination of masked language modelling objective and next sentence prediction. So, all it can do is predicting masked tokens, predicting if a pair of given sentence can be next to each other in a text. Most importantly, it can provide embeddings.
To use for classification you have to add a classification head to the end of the model. Initially, the weights of that layer is randomly initialised. If you do not fine tune the last layer, what do you really expect from random weights?
If you really want to compare the fine-tuned model to a baseline, take the embeddings vector from the BERT and use a tradional ML model like SVM or Tree based calssifier.
I'm a beginner to the LSTM and PyTorch. I try to create a model for a bursty traffic prediction scenario. This is kind of a overfit model. First, It matches all the next values using the x_data as inputs and targets. You can identify the shape of the x_data by looking at the main. Then try to predict the whole traffic shape using the first 100 seed values. But this model gives me an error when running "outputs, hidden = model(inputs)". please help me to solve this error.
Here is my complete code,
import numpy as np
import torch
import torch,torch.nn as nn
from torch import Tensor
from torch.autograd import Variable
from torch import optim
import torch.nn.functional as F
from scipy.signal import savgol_filter
is_cuda = torch.cuda.is_available()
if is_cuda:
device = torch.device("cuda")
print("GPU is available")
else:
device = torch.device("cpu")
print("GPU not available, CPU used")
class FPredRNN(nn.Module):
def __init__(self, input_size, hidden_size, num_layers, dropout_val = 0.1):
super(FPredRNN, self).__init__()
self.input_size = input_size
self.nh = hidden_size
self.nl = num_layers
self.lstm = nn.LSTM(self.input_size, self.nh, self.nl, dropout = dropout_val)
self.dropout = nn.Dropout(dropout_val)
self.linear = nn.Linear(self.nh, 1)
def forward(self, x, hidden, steps = 1000, eval = False):
predictions = []
batch_size = x.size(0)
if(hidden.size(0) != batch_size):
self.init_hidden(batch_size)
l_out, hidden = self.lstm(x, hidden)
l_out = l_out.contiguous().view(-1, self.nh)
out = self.dropout(l_out)
out = self.linear(out)
#out = out.view(batch_size, -1)
#out = out[:,-1]
if(eval):
eval_input = out[-1:]
for i in range(steps):
lstm_out, hidden = self.lstm(eval_input, hidden)
linear_out = self.linear(lstm_out)
predictions += [linear_out]
eval_input = linear_out
out = torch.stack(predictions).squeeze()
return out, hidden
def init_hidden(self, batch_size):
weight = next(self.parameters()).data
hidden = (weight.new(self.nl, batch_size, self.nh).zero_().to(device), weight.new(self.nl, batch_size, self.nh).zero_().to(device))
#hidden = ((self.nl, batch_size, self.nh).zero_().to(device), (self.nl, batch_size, self.nh).zero_().to(device))
return hidden
if __name__ == "__main__":
x_data = np.empty((1, 2000))
y_data = np.empty((1, 1))
for n in [30000]:
traffic_generator = GenerateTraffic()
bursty_traffic, a_t = traffic_generator.create_bursty_traffic(n_d=n)
detected, attempted = traffic_generator.simulate_bursty_traffic_arrivals(bursty_traffic, backoff_bool= True)
smooth_x = savgol_filter(detected, 97, 2)
x_data[(n//10000)-3] = smooth_x
inputs = x_data[:, :1999]
targets = x_data[:, 1:2000]
inputs = torch.from_numpy(inputs)
targets = torch.from_numpy(targets)
print(inputs.size(1))
print(inputs.size(0))
#print(hidden.size(0))
model = FPredRNN(input_size = inputs.size(1), hidden_size = 1100, num_layers = 2, dropout_val = 0.1)
model.to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr = 0.0001)
# Train Model
n_epochs = 2
for epoch in range(1, n_epochs + 1):
optimizer.zero_grad()
inputs.to(device)
outputs, hidden = model(inputs)
loss = criterion(outputs, targets)
loss.backward()
optimizer.step()
# Test Model
seed_lenght = 100
seed = inputs[:seed_lenght]
outt = model(seed, steps=1000, eval = True)
test_out = torch.cat((seed.squeeze(), outt))
Error code
TypeError Traceback (most recent call last)
<ipython-input-2-0e63f8c64103> in <module>()
101 optimizer.zero_grad()
102 inputs.to(device)
--> 103 outputs, hidden = model(inputs)
104 loss = criterion(outputs, targets)
105 loss.backward()
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
725 result = self._slow_forward(*input, **kwargs)
726 else:
--> 727 result = self.forward(*input, **kwargs)
728 for hook in itertools.chain(
729 _global_forward_hooks.values(),
TypeError: forward() missing 1 required positional argument: 'hidden'
The forward method needs another value inputted, hidden. I think what you want to do is:
hidden = model.init_hidden()
outputs, hidden = model(inputs, hidden)
this way the first input for hidden would just be an tensor full of zeros, and the next hidden inputs would be the inputs of the previous letters.
You don't have to provide hidden for recurrent layers.
hidden should only be passed to recurrent layers if element from next batch is a continuation of previous element
It is almost never the case
hidden is implicitly initialized with zeros, documentation here, please read it before using
Given that, your model can be (and probably can be simplified further, what are you trying to achieve with nn.Linear?):
class FPredRNN(nn.Module):
def __init__(self, input_size, hidden_size, num_layers, dropout_val=0.1):
super(FPredRNN, self).__init__()
self.input_size = input_size
self.nh = hidden_size
self.nl = num_layers
self.lstm = nn.LSTM(self.input_size, self.nh, self.nl, dropout=dropout_val)
self.dropout = nn.Dropout(dropout_val)
self.linear = nn.Linear(self.nh, 1)
def forward(self, x, steps=1000):
predictions = []
l_out, hidden = self.lstm(x)
l_out = l_out.contiguous().view(-1, self.nh)
out = self.dropout(l_out)
out = self.linear(out)
# Not sure what is going on here, but eval is an attribute of nn.Module
if self.eval:
eval_input = out[-1:]
for i in range(steps):
lstm_out, hidden = self.lstm(eval_input, hidden)
linear_out = self.linear(lstm_out)
predictions += [linear_out]
eval_input = linear_out
out = torch.stack(predictions).squeeze()
return out
Please read nn.LSTM documentation in-depth, it will help with recurrent layers.
I am trying to make a LSTM VAE as a learning stage for future work with pytorch.
I managed to get it to work on some small tester data but now that I want to run it on my actual data I am continuously getting this error:
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.FloatTensor [10, 40]], which is output 0 of TBackward, is at version 2; expected version 1 instead. Hint: the backtrace further above shows the operation that failed to compute its gradient. The variable in question was changed in there or anywhere later. Good luck!
Any help on how to solve this error and generally just make my code more efficient would be great!
Here is the appropriate section of the traceback:
File "<ipython-input-16-7fe0e9e30e5d>", line 190, in <module>
rec_loss, kl_loss = train_batch(x,G_inp,epoch,train=True)
File "<ipython-input-16-7fe0e9e30e5d>", line 166, in train_batch
reconstruction, hidden, kld = model(x, G_inp, None, None)
File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py", line 727, in _call_impl
result = self.forward(*input, **kwargs)
File "<ipython-input-16-7fe0e9e30e5d>", line 93, in forward
mu, logvar, z = self.encoder(x)
File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py", line 727, in _call_impl
result = self.forward(*input, **kwargs)
File "<ipython-input-16-7fe0e9e30e5d>", line 37, in forward
out1, self.hidden = self.lstm(x, self.hidden)
File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py", line 727, in _call_impl
result = self.forward(*input, **kwargs)
File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/rnn.py", line 582, in forward
self.dropout, self.training, self.bidirectional, self.batch_first)
(Triggered internally at /pytorch/torch/csrc/autograd/python_anomaly_mode.cpp:104.)
allow_unreachable=True) # allow_unreachable flag
The code is below: (excuse all the .clone(), I read that this could be a solution so I was testing it out everywhere with no help)
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim as optim
from keras.preprocessing.text import Tokenizer
from keras import preprocessing
import torch.nn.functional as F
import math
import random
#encoder
class Encoder(nn.Module):
def __init__(self,embedding_dim, vocab_size, n_layers_E, n_hidden_E, dim_z, n_hidden_G, batch_size):
super(Encoder,self).__init__()
self.n_layers_E = n_layers_E
self.n_hidden_E = n_hidden_E
self.batch_size = batch_size
self.dim_z = dim_z
self.lstm = nn.LSTM(input_size = embedding_dim, hidden_size = n_hidden_E, num_layers = n_layers_E, batch_first=True, bidirectional = True)
self.hidden_to_mu = nn.Linear(2*n_hidden_E,dim_z)
self.hidden_to_logvar = nn.Linear(2*n_hidden_G, dim_z)
self.hidden = (torch.zeros(2*n_layers_E, batch_size, n_hidden_E),torch.zeros(2*n_layers_E, batch_size, n_hidden_E))
def forward(self,x):
batch_size, n_seq, n_embed = x.size()
#batch_size, n_seq = x.size()
out1, self.hidden = self.lstm(x, self.hidden)
e_hidden = self.hidden[0].view(batch_size, 2 * self.n_hidden_E).clone()
#e_hidden = torch.cat(list(hidden),dim = 0)
mu = self.hidden_to_mu(e_hidden)
logvar = self.hidden_to_logvar(e_hidden)
epsilon = torch.randn([batch_size, self.dim_z])
z = mu + torch.exp(logvar*0.5)*epsilon
return mu, logvar, z
class Generator(nn.Module):
def __init__(self,n_hidden_G,n_layers_G, embedding_dim, dim_z, vocab_size , batch_size):
super(Generator,self).__init__()
self.n_hidden_G = n_hidden_G
self.n_layers_G = n_layers_G
self.n_z = dim_z
self.batch_size = batch_size
self.LSTM = nn.LSTM(input_size = embedding_dim + dim_z, hidden_size = n_hidden_G, num_layers = n_layers_G, batch_first = True)
self.fc = nn.Linear(n_hidden_G, vocab_size)
self.hidden = (torch.zeros(self.n_layers_G, batch_size, self.n_hidden_G)
,torch.zeros(self.n_layers_G, batch_size, self.n_hidden_G))
def forward(self,x,z, g_hidden = None):
batch_size,n_seq, n_embed = x.size()
#batch_size, n_seq= x.size()
z = torch.cat([z]*n_seq,1).view(batch_size, n_seq, self.n_z)
x = torch.cat([x,z], dim = 2)
if g_hidden is None: #if we are validating
self.hidden = (torch.zeros(self.n_layers_G, batch_size, self.n_hidden_G)
,torch.zeros(self.n_layers_G, batch_size, self.n_hidden_G))
else: #if we are training
self.hidden = g_hidden
output, self.hidden = self.LSTM(x, self.hidden)
output = self.fc(output)
return output, self.hidden
class VAE(nn.Module):
def __init__(self, embedding_dim, vocab_size, n_layers_E, n_hidden_E, dim_z, n_hidden_G, batch_size,n_layers_G ):
super(VAE, self).__init__()
self.embedding = nn.Embedding(vocab_size, embedding_dim)
self.encoder = Encoder(embedding_dim, vocab_size, n_layers_E, n_hidden_E, dim_z, n_hidden_G, batch_size)
self.generator = Generator(n_hidden_G,n_layers_G, embedding_dim, dim_z, vocab_size, batch_size )
self.n_z = dim_z
def forward(self, x, G_inp, z, G_hidden):
if z is None:
batch_size, n_seq = x.size()
x = self.embedding(x)
mu, logvar, z = self.encoder(x)
kld = -0.5*torch.sum(logvar-mu.pow(2)-logvar.exp()+1).mean()
else:
kld = None
G_inp = self.embedding(G_inp)
logit, G_hidden = self.generator(G_inp,z, G_hidden)
return logit, G_hidden, kld
train_df = pd.read_csv("train.csv", header =None)[0:500]
test_df = pd.read_csv("test.csv",header =None)[0:500]
train = train_df.iloc[:,0]
max_words = 2000
max_len = 25
tok = Tokenizer(num_words = max_words)
tok.fit_on_texts(train)
sequences = tok.texts_to_sequences(train)
sequences_matrix = preprocessing.sequence.pad_sequences(sequences, maxlen = max_len)
#tok.sequences_to_texts(sequences)
n_hidden_E = 10
n_layers_E = 1
embedding_dim = 10
vocab_size = max_words
n_hidden_G = 10
n_layers_G = 2
dim_z = 10
train_size = len(train)
batch_size = 100
rec_coef = 7
lr = 0.01
epochs = 100
def create_generator_input(x, train):
G_inp = x[:, 0:max_len-1].clone() #input for generator should exclude last word of sequence
# if train == False:
# return G_inp
# r = np.random.rand(G_inp.size(0), G_inp.size(1))
# #Perform word_dropout according to random values (r) generated for each word
# for i in range(len(G_inp)):
# for j in range(1,G_inp.size(1)):
# if r[i, j] < opt.word_dropout and G_inp[i, j] not in [vocab.stoi[opt.pad_token], vocab.stoi[opt.end_token]]:
# G_inp[i, j] = vocab.stoi[opt.unk_token]
return G_inp
def producebatches(x,batch_size):
k = math.floor(x.shape[0]/batch_size)
total = (k)*batch_size
flatten = x[0:total].flatten()
batches = flatten.reshape((k,batch_size,x.shape[1]))
return batches
batches = producebatches(sequences_matrix, batch_size)
model = VAE(embedding_dim, vocab_size, n_layers_E, n_hidden_E, dim_z, n_hidden_G, batch_size,n_layers_G)
optimizer = torch.optim.Adam(model.parameters(), lr = lr)
criterion = nn.BCELoss(reduction = 'sum')
model.train()
def train_batch(x,G_inp,step,train =True):
reconstruction, hidden, kld = model(x, G_inp, None, None)
reconstruction2 = reconstruction.view(-1, vocab_size).clone() #converting into shape (batch_size*(n_seq-1), n_vocab) to facilitate performing F.cross_entropy()
#y = x[:, 1:x.size(1)].clone() #target for generator should exclude first word of sequence
#y = y.contiguous().view(-1)
G_inp2 = G_inp.contiguous().view(-1) #converting into shape (batch_size*(n_seq-1),1) to facilitate performing F.cross_entropy()
rec_loss = F.cross_entropy(reconstruction2,G_inp2)
kld_coef = (math.tanh((step - 15000)/1000) + 1) / 2
#kld_coef = min(1,step/(200000.0))
loss = rec_coef*rec_loss + kld_coef*kld
if train == True:
torch.autograd.set_detect_anomaly(True) #skip below step if we are performing validation
optimizer.zero_grad()
loss.backward(retain_graph=True)
optimizer.step()
return rec_loss.item(), kld.item()
for epoch in range(epochs):
train_rec_loss = []
train_kl_loss = []
for i in range(batches.shape[0]):
x = torch.tensor(batches[i], dtype = torch.long)
G_inp = create_generator_input(x, train = True)
rec_loss, kl_loss = train_batch(x,G_inp,epoch,train=True)
train_rec_loss.append(rec_loss)
train_kl_loss.append(kl_loss)
train_rec_loss = np.mean(train_rec_loss)
train_kl_loss = np.mean(train_kl_loss)
print("No.", epoch, "T_rec:", '%.2f'%rec_loss, "T_kld:", '%.2f'%kl_loss)
def generate_sentences(n):
model.eval()
sentences = []
for i in range(n):
z = torch.randn([1, dim_z])
hidden = (torch.zeros(n_layers_G,1, n_hidden_G)
,torch.zeros(n_layers_G, 1, n_hidden_G))
G_inp = torch.LongTensor(1,1).fill_(1)
str_ind = []
while len(str_ind)<49:
with torch.autograd.no_grad():
logit, G_hidden, _ = model(None, G_inp, z, hidden)
probs = F.softmax(logit[0],dim=1)
G_inp = torch.multinomial(probs,1)
str_ind.append(G_inp[0][0].item())
sentences.append(str_ind)
return sentences
t = generate_sentences(1)
First, you can re-initialize your hidden layer after each epoch. This will overcome the error that you are facing without any major changes:
for epoch in range(epochs):
train_rec_loss = []
train_kl_loss = []
for i in range(batches.shape[0]):
x = torch.tensor(batches[i], dtype = torch.long)
G_inp = create_generator_input(x, train = True)
rec_loss, kl_loss = train_batch(x,G_inp,epoch,train=True)
train_rec_loss.append(rec_loss)
train_kl_loss.append(kl_loss)
model.hidden = (torch.zeros(n_layers_G, batch_size, n_hidden_G)
,torch.zeros(n_layers_G, batch_size, n_hidden_G))
train_rec_loss = np.mean(train_rec_loss)
train_kl_loss = np.mean(train_kl_loss)
print("No.", epoch, "T_rec:", '%.2f'%rec_loss, "T_kld:", '%.2f'%kl_loss)
Furthermore, you can avoid inplace operations on the activation functions and dropout operations (inplace = False)(I think it is not your case).