Pytorch network not training

Pytorch network not training - python

I am trying to train an activity recognition system using PyTorch, but the network is not training and loss is not dropping, even though I have a similar model working perfectly on keras. I have provided code for the training loop, model class, and dataset class here. Can you help me why the loss is not dropping (accuracy is not increasing)
main training loop
# create dataset
dataset = IMU_dataset()
train_loader = DataLoader(dataset=dataset,
batch_size=40,
shuffle=True,
num_workers=2)
num_epochs = 100
total_samples = len(dataset)
n_iterations = math.ceil(total_samples/4)
print(total_samples, n_iterations)
input_shape = 3
output_index = 6
device = torch.device('cpu')
model = HARmodel(input_shape, output_index).to(device)
model.float()
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.1)
for epoch in range(num_epochs):
for i, (inputs, labels) in enumerate(train_loader):
# origin shape: [40, 3, 400]
labels = labels.to(device)
# Forward pass
outputs = model(inputs.to(device).float())
loss = criterion(outputs, labels.long())
# Backward and optimize
optimizer.zero_grad()
loss.backward()
optimizer.step()
# if (i+1) % 5 == 0:
# print(f'loss: {loss.item()}')
print(model.calculate_accuracy(dataset.x_data, dataset.y_data), model.calculate_loss(dataset.x_data, dataset.y_data, criterion))
Here is the model class.
class HARmodel(nn.Module):
"""Model for human-activity-recognition."""
def __init__(self, input_size, num_classes):
super().__init__()
# Extract features, 1D conv layers
self.layer_1 = nn.Conv1d(input_size, 100, 10, stride=1)
self.activation_relu = nn.ReLU()
self.layer_2 = nn.Conv1d(100, 100, 10, stride=1)
self.layer_3 = nn.Conv1d(100, 100, 10, stride=1)
self.layer_4 = nn.MaxPool1d(2, stride=3)
self.layer_5 = nn.Dropout(p=0.2)
self.layer_6 = nn.Conv1d(100, 160, 10, stride=1)
self.layer_7 = nn.Conv1d(160, 160, 10, stride=1)
self.layer_8 = nn.Conv1d(160, 160, 10, stride=1)
# self.layer_9 = nn.AvgPool1d(97)
self.layer_10 = nn.Dropout(p=0.5)
self.layer_11 = nn.Linear(160, 6)
self.activation_softmax = nn.Softmax()
def forward(self, x):
x = self.layer_1(x)
x = self.activation_relu(x)
x = self.layer_2(x)
x = self.activation_relu(x)
x = self.layer_3(x)
x = self.activation_relu(x)
x = self.layer_4(x)
x = self.layer_5(x)
x = self.layer_6(x)
x = self.activation_relu(x)
x = self.layer_7(x)
x = self.activation_relu(x)
x = self.layer_8(x)
x = self.activation_relu(x)
self.layer_9 = nn.AvgPool1d(x.shape[2])
x = self.layer_9(x)
x = self.layer_10(x)
y = self.layer_11(x.view(x.shape[0],x.shape[1]))
# y = self.activation_softmax(y)
return y
def calculate_accuracy(self, X,y):
with torch.no_grad():
output = model.forward(X.float())
max_index = output.max(dim = 1)[1]
true_output = y.type(torch.LongTensor)
result = (max_index == true_output).sum()/y.shape[0]
return result.detach().numpy()
def calculate_loss(self, X,y, crit):
with torch.no_grad():
output = model.forward(X.float())
max_index = output.max(dim = 1)[1]
true_output = y.type(torch.LongTensor)
return crit(output, true_output).item()
Here is the dataset class:
class IMU_dataset(Dataset):
def __init__(self):
self.n = X.shape[0]
self.x_data = torch.from_numpy(X.reshape(-1,3,400))
self.y_data = torch.from_numpy(y)
def __getitem__(self, index):
return self.x_data[index], self.y_data[index]
def __len__(self):
return self.n
EDIT 1:
I got to know that I need to remove the softmax layer
I have tried with a lower learning rate and still have the same problem.

Related

How to get activation values of a layer in pytorch

I have a pytorch-lightning model that has a dense layer like so:
def __init__(...)
...
self.dense = nn.Linear(channels[-1], 64, bias=True)
...
for my project, I need to get the activation values of this layer as a list
I have tried this code which I found on the pytorch discussion forum:
activation = {}
def get_activation(name):
def hook(model, input, output):
activation[name] = output.detach()
return hook
test_img = cv.imread(f'digimage/100.jpg')
test_img = cv.resize(test_img, (128, 128))
test_img = np.moveaxis(test_img, 2, 0)
modelftr = load_feature_model(**model_dict)
num_ftrs = modelftr.fc.in_features
modelftr.fc = torch.nn.Linear(num_ftrs, 228)
modelftr.load_state_dict(torch.load('...'))
modelftr.dense.register_forward_hook(get_activation('dense'))
with torch.no_grad():
modelatt.to('cpu')
modelatt.eval()
test_img = torch.tensor(test_img).view(-1, 3, 128, 128).float()
output = modelcat(test_img)
print(activation['dense'])
But this gives a keyerror:
8 test_img = torch.tensor(test_img).view(-1, 3, 128, 128).float()
9 output = modelcat(test_img)
---> 10 print(activation['dense'])
KeyError: 'dense'
Update:
This is my full model code.
As you can see there is a linear layer named dense
class FAtNet(pl.LightningModule):
def __init__(self, image_size, in_channels, num_blocks, channels,
num_classes=20, block_types=['C', 'C', 'T', 'T'], lr=0.0001, loss_function=nn.CrossEntropyLoss()):
super().__init__()
self.lr = lr
self.loss_function = loss_function
ih, iw = image_size
block = {'C': MBConv, 'T': Transformer}
self.s0 = self._make_layer(
conv_3x3_bn, in_channels, channels[0], num_blocks[0], (ih // 2, iw // 2))
self.s1 = self._make_layer(
block[block_types[0]], channels[0], channels[1], num_blocks[1], (ih // 4, iw // 4))
self.s2 = self._make_layer(
block[block_types[1]], channels[1], channels[2], num_blocks[2], (ih // 8, iw // 8))
self.s3 = self._make_layer(
block[block_types[2]], channels[2], channels[3], num_blocks[3], (ih // 16, iw // 16))
self.s4 = self._make_layer(
block[block_types[3]], channels[3], channels[4], num_blocks[4], (ih // 32, iw // 32))
self.pool = nn.AvgPool2d(ih // 32, 1)
self.dense = nn.Linear(channels[-1], 64, bias=True)
self.fc = nn.Linear(64, num_classes, bias=False)
def forward(self, x):
x = self.s0(x)
x = self.s1(x)
x = self.s2(x)
x = self.s3(x)
x = self.s4(x)
x = self.pool(x).view(-1, x.shape[1])
x = self.dense(x)
x = self.fc(x)
return x
def _make_layer(self, block, inp, oup, depth, image_size):
layers = nn.ModuleList([])
for i in range(depth):
if i == 0:
layers.append(block(inp, oup, image_size, downsample=True))
else:
layers.append(block(oup, oup, image_size))
return nn.Sequential(*layers)
def configure_optimizers(self):
return optim.Adam(self.parameters(), lr=self.lr)
def training_step(self, batch, batch_idx):
X, y = batch
y_hat = self(X)
loss = self.loss_function(y_hat, y)
self.log('train_loss', loss)
return loss
def test_step(self, batch, batch_idx):
X, y = batch
y_hat = self(X)
loss = self.loss_function(y_hat, y)
self.log('test_loss', loss)
return loss
### custom prediction function ###
def predict(self, dm):
X_test = dm.X_test
self.eval()
X_test = torch.tensor(X_test).float()
self.to(device='cuda')
pred = []
with torch.no_grad():
for data in X_test:
output = self(data)
pred.append(output)
pred = pred[0].detach()
pred = pred.cpu()
self.to(device='cpu')
self.train()
return pred
def count_parameters(model):
return sum(p.numel() for p in model.parameters() if p.requires_grad)

It seems like you model does not have 'dense' layer, only 'fc'.
Try:
modelftr.fc.register_forward_hook(get_activation('fc'))

How to plot Receptive Fields, for a CNN/fashionMNIST?

I created my CNN with PyTorch Lightning, and I am actually looking for plotting the Receptive Fields.
Do you have any suggestions about it?
I look for different solutions here and there, but I actually can't make them synergize with PyTorch Lightning.
Is it possible to visualize the Receptive fields directly inside Tensorboard?
I'll share with you my Dataset:
train_dataset = torchvision.datasets.FashionMNIST('classifier_data', train=True, download=True, transform=transforms.ToTensor())
train, val = train_test_split(train_dataset, test_size = .2)
train_loader = DataLoader(train, batch_size = 32)
val_loader = DataLoader(train, batch_size = 32)
test_dataset = torchvision.datasets.FashionMNIST('classifier_data', train=False, download=True, transform=transforms.ToTensor())
test_loader = DataLoader(test_dataset, batch_size = 32)
and CNN:
def __init__(self, dropout, learn_rate, momentum, weight_decay, optimizer):
#def __init__(self, dropout, learn_rate, weight_decay, optimizer):
super().__init__()
self.conv1 = nn.Conv2d(in_channels = 1, out_channels = 6, kernel_size = 5)
self.conv2 = nn.Conv2d(in_channels = 6, out_channels = 12 , kernel_size = 5)
self.fc1 = nn.Linear(in_features = 12*4*4, out_features = 120)
self.fc2 = nn.Linear(in_features = 120, out_features = 60)
self.out = nn.Linear(in_features = 60, out_features = 10)
self.do = nn.Dropout(dropout) #for overfitting issues
self.loss = nn.CrossEntropyLoss()
self.accuracy = torchmetrics.Accuracy()
self.learn_rate = learn_rate
self.momentum = momentum #with Adam we don't have momentum. To Check best Optimizer with Optune, please comment this line.
self.weight_decay = weight_decay
self.optimizer = optimizer
self.train_loss = []
self.val_loss = []
self.train_acc = []
self.test_acc = []
#plot into tensorboard
log_dir = pathlib.Path.cwd() / "lightning_logs"
self.writer = SummaryWriter(log_dir)
#forward step
#I add each layer to the histogram. It's plotted into tensorboard
def forward(self, x, additional_out=False):
#conv1
x = self.conv1(x)
self.writer.add_histogram("First convolutional layer CNN", x)
x = F.relu(x)
x = F.max_pool2d(x, kernel_size = 2, stride = 2)
#conv2
x = self.conv2(x)
self.writer.add_histogram("Second convolutional layer CNN", x)
x = F.relu(x)
x = F.max_pool2d(x, kernel_size = 2, stride = 2)
#fuly connected 1
x = x.reshape(-1, 12*4*4)
x = self.fc1(x)
self.writer.add_histogram("First linear layer CNN", x)
x = F.relu(x)
x = self.do(x)
#fully connected 2
x=self.fc2(x)
self.writer.add_histogram("Second linear layer CNN", x)
x = F.relu(x)
x = self.do(x)
#output
x = self.out(x)
self.writer.add_histogram("Output layer CNN", x)
return x
#optimizer
def configure_optimizers(self):
#optimizer = self.optimizer(self.parameters(), lr = self.learn_rate, momentum = self.momentum, weight_decay = self.weight_decay)
optimizer = self.optimizer(self.parameters(), lr = self.learn_rate, weight_decay = self.weight_decay)
return optimizer
#training step
def training_step(self, batch, batch_idx):
x, y = batch
b = x.size(0)
x = x.view(b, -1, 28, 28)
logit = self(x)
J = self.loss(logit, y) #loss
#self.train_loss.append(J) #no need to append
acc = self.accuracy(logit, y) #accuracy
#self.train_acc.append(acc) #no need to append
self.log("train_loss_cnn", J.item())
self.log("train_acc_cnn", acc.item())
return {'loss': J}
#Since I used Tensorboard, it don't have to append to loss
def test_step(self, batch, batch_idx):
p, q = batch
b = p.size(0)
p = p.view(b, -1, 28, 28)
logit = self(p)
J = self.loss(logit, q) #loss
acc_test = self.accuracy(logit, q) #accuracy
#self.train_acc.append(acc_test) #no need to append
#self.train_loss.append(J) #no need to append
self.log("test_acc_cnn", acc_test.item())
self.log("test_loss_cnn", J.item())
def validation_step(self, batch, batch_idx=None):
u, v = batch
b = u.size(0)
u = u.view(b, -1, 28, 28)
logit = self(u)
J = self.loss(logit, v) #loss
#self.val_loss.append(J) #no need to append
acc_val = self.accuracy(logit, v) #accuracy
#self.train_acc.append(acc_val) #no need to append
self.log("val_loss_cnn", J.item())
self.log("val_acc_cnn", acc_val.item())
return {"loss": J, "pred": logit, "target": v}
#Once saves from validation step, I take with me the returned elements, and I can plot the Confusion Matrix inside Tensorboard
def validation_epoch_end(self, outputs):
preds = torch.cat([tmp['pred'] for tmp in outputs])
targets = torch.cat([tmp['target'] for tmp in outputs])
conf_mat = confusion_matrix(preds, targets, num_classes=10)
df_cm = pd.DataFrame(conf_mat.numpy(), index = range(10), columns=range(10))
plt.figure(figsize = (10,7))
fig_ = sns.heatmap(df_cm, annot=True, cmap='Spectral').get_figure()
plt.close(fig_)
self.logger.experiment.add_figure("Confusion matrix CNN", fig_, self.current_epoch)

Pytorch:RuntimeError: Input type (torch.FloatTensor) and weight type (torch.cuda.FloatTensor) should be the same

I set my model and data to the same device,
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
net.to(device)
and I also do this:
inputs, labels = data
inputs, labels = inputs.to(device), labels.to(device)
but the error still exists. When it comes to 5000 times or more, the error will take place.
RuntimeError: Input type (torch.FloatTensor) and weight type (torch.cuda.FloatTensor) should be the same
The following is the whole training code, I hope you can answer it. Thanks!
import torch
import os
import torchvision.transforms as transforms
from PIL import Image
from torch import nn
from torch.utils.data import Dataset, DataLoader
captcha_list = list('0123456789abcdefghijklmnopqrstuvwxyz_')
captcha_length = 6
# 验证码文本转为向量
def text2vec(text):
vector = torch.zeros((captcha_length, len(captcha_list)))
text_len = len(text)
if text_len > captcha_length:
raise ValueError("验证码超过6位啦！")
for i in range(text_len):
vector[i,captcha_list.index(text[i])] = 1
return vector
# 验证码向量转为文本
def vec2text(vec):
label = torch.nn.functional.softmax(vec, dim =1)
vec = torch.argmax(label, dim=1)
for v in vec:
text_list = [captcha_list[v] for v in vec]
return ''.join(text_list)
# 加载所有图片，并将验证码向量化
def make_dataset(data_path):
img_names = os.listdir(data_path)
samples = []
for img_name in img_names:
img_path = data_path+img_name
target_str = img_name.split('_')[0].lower()
samples.append((img_path, target_str))
return samples
class CaptchaData(Dataset):
def __init__(self, data_path, transform=None):
super(Dataset, self).__init__()
self.transform = transform
self.samples = make_dataset(data_path)
def __len__(self):
return len(self.samples)
def __getitem__(self, index):
img_path, target = self.samples[index]
target = text2vec(target)
target = target.view(1, -1)[0]
img = Image.open(img_path)
img = img.resize((140,44))
img = img.convert('RGB') # img转成向量
if self.transform is not None:
img = self.transform(img)
return img, target
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
# 第一层神经网络
# nn.Sequential: 将里面的模块依次加入到神经网络中
self.layer1 = nn.Sequential(
nn.Conv2d(3, 16, kernel_size=3, padding=1), # 3通道变成16通道，图片：44*140
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2) # 图片：22*70
)
# 第2层神经网络
self.layer2 = nn.Sequential(
nn.Conv2d(16, 64, kernel_size=3), # 16通道变成64通道，图片：20*68
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2) # 图片：10*34
)
# 第3层神经网络
self.layer3 = nn.Sequential(
nn.Conv2d(64, 128, kernel_size=3), # 16通道变成64通道，图片：8*32
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2) # 图片：4*16
)
# 第4层神经网络
self.fc1 = nn.Sequential(
nn.Linear(4*16*128, 1024),
nn.Dropout(0.2), # drop 20% of the neuron
nn.ReLU()
)
# 第5层神经网络
self.fc2 = nn.Linear(1024, 6*37) # 6:验证码的长度， 37: 字母列表的长度
#前向传播
def forward(self, x):
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = x.view(x.size(0), -1)
x = self.fc1(x)
x = self.fc2(x)
return x
net = Net()
def calculat_acc(output, target):
output, target = output.view(-1, len(captcha_list)), target.view(-1, len(captcha_list)) # 每37个就是一个字符
output = nn.functional.softmax(output, dim=1)
output = torch.argmax(output, dim=1)
target = torch.argmax(target, dim=1)
output, target = output.view(-1, captcha_length), target.view(-1, captcha_length) #每6个字符是一个验证码
c = 0
for i, j in zip(target, output):
if torch.equal(i, j):
c += 1
acc = c / output.size()[0] * 100
return acc
def train(epoch_nums):
# 数据准备
transform = transforms.Compose([transforms.ToTensor()]) # 不做数据增强和标准化了
train_dataset = CaptchaData('./sougou_com_Trains/', transform=transform)
train_data_loader = DataLoader(train_dataset, batch_size=32, num_workers=0, shuffle=True, drop_last=True)
test_data = CaptchaData('./sougou_com_Trains/', transform=transform)
test_data_loader = DataLoader(test_data, batch_size=128, num_workers=0, shuffle=True, drop_last=True)
# 更换设备
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print('当前设备是:',device)
net.to(device)
criterion = nn.MultiLabelSoftMarginLoss() # 损失函数
optimizer = torch.optim.Adam(net.parameters(), lr=0.001) # 优化器
# 加载模型
model_path = './module_build/model.pth'
if os.path.exists(model_path):
print('开始加载模型')
checkpoint = torch.load(model_path)
net.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
# 开始训练
i = 1
for epoch in range(epoch_nums):
running_loss = 0.0
net.train() # 神经网络开启训练模式
for data in train_data_loader:
if i%100==0:
print(i)
inputs, labels = data
inputs, labels = inputs.to(device), labels.to(device) #数据发送到指定设备
#每次迭代都要把梯度置零
optimizer.zero_grad()
# 关键步骤
# 前向传播
outputs = net(inputs)
# 计算误差
loss = criterion(outputs, labels)
# 后向传播
loss.backward()
# 优化参数
optimizer.step()
running_loss += loss.item()
if i % 2000 == 0:
acc = calculat_acc(outputs, labels)
print('第%s次训练正确率: %.3f %%, loss: %.3f' % (i,acc,running_loss/2000))
running_loss = 0
# 保存模型
torch.save({
'model_state_dict':net.state_dict(),
'optimizer_state_dict':optimizer.state_dict(),
},model_path)
i += 1
# 结束一个epoch,计算测试集的正确率
net.eval() #测试模式
with torch.no_grad():
for inputs, labels in test_data_loader:
outputs = net(inputs)
acc = calculat_acc(outputs, labels)
print('测试集正确率: %.3f %%' % (acc))
break # 只测试一个batch
# 每5个epoch 更新学习率
if epoch % 5 == 4:
for p in optimizer.param_groups:
p['lr'] *= 0.9
train(10)

In evaluation part: Do this
net.eval() #测试模式
with torch.no_grad():
for inputs, labels in test_data_loader:
inputs, labels = inputs.to(device), labels.to(device)
outputs = net(inputs)
acc = calculat_acc(outputs, labels)
print('测试集正确率: %.3f %%' % (acc))
break # 只测试一个batch
This will work since you dint move your data from test loader to device.

Pytorch LSTM- VAE Sentence Generator: RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation

I am trying to make a LSTM VAE as a learning stage for future work with pytorch.
I managed to get it to work on some small tester data but now that I want to run it on my actual data I am continuously getting this error:
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.FloatTensor [10, 40]], which is output 0 of TBackward, is at version 2; expected version 1 instead. Hint: the backtrace further above shows the operation that failed to compute its gradient. The variable in question was changed in there or anywhere later. Good luck!
Any help on how to solve this error and generally just make my code more efficient would be great!
Here is the appropriate section of the traceback:
File "<ipython-input-16-7fe0e9e30e5d>", line 190, in <module>
rec_loss, kl_loss = train_batch(x,G_inp,epoch,train=True)
File "<ipython-input-16-7fe0e9e30e5d>", line 166, in train_batch
reconstruction, hidden, kld = model(x, G_inp, None, None)
File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py", line 727, in _call_impl
result = self.forward(*input, **kwargs)
File "<ipython-input-16-7fe0e9e30e5d>", line 93, in forward
mu, logvar, z = self.encoder(x)
File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py", line 727, in _call_impl
result = self.forward(*input, **kwargs)
File "<ipython-input-16-7fe0e9e30e5d>", line 37, in forward
out1, self.hidden = self.lstm(x, self.hidden)
File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py", line 727, in _call_impl
result = self.forward(*input, **kwargs)
File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/rnn.py", line 582, in forward
self.dropout, self.training, self.bidirectional, self.batch_first)
(Triggered internally at /pytorch/torch/csrc/autograd/python_anomaly_mode.cpp:104.)
allow_unreachable=True) # allow_unreachable flag
The code is below: (excuse all the .clone(), I read that this could be a solution so I was testing it out everywhere with no help)
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim as optim
from keras.preprocessing.text import Tokenizer
from keras import preprocessing
import torch.nn.functional as F
import math
import random
#encoder
class Encoder(nn.Module):
def __init__(self,embedding_dim, vocab_size, n_layers_E, n_hidden_E, dim_z, n_hidden_G, batch_size):
super(Encoder,self).__init__()
self.n_layers_E = n_layers_E
self.n_hidden_E = n_hidden_E
self.batch_size = batch_size
self.dim_z = dim_z
self.lstm = nn.LSTM(input_size = embedding_dim, hidden_size = n_hidden_E, num_layers = n_layers_E, batch_first=True, bidirectional = True)
self.hidden_to_mu = nn.Linear(2*n_hidden_E,dim_z)
self.hidden_to_logvar = nn.Linear(2*n_hidden_G, dim_z)
self.hidden = (torch.zeros(2*n_layers_E, batch_size, n_hidden_E),torch.zeros(2*n_layers_E, batch_size, n_hidden_E))
def forward(self,x):
batch_size, n_seq, n_embed = x.size()
#batch_size, n_seq = x.size()
out1, self.hidden = self.lstm(x, self.hidden)
e_hidden = self.hidden[0].view(batch_size, 2 * self.n_hidden_E).clone()
#e_hidden = torch.cat(list(hidden),dim = 0)
mu = self.hidden_to_mu(e_hidden)
logvar = self.hidden_to_logvar(e_hidden)
epsilon = torch.randn([batch_size, self.dim_z])
z = mu + torch.exp(logvar*0.5)*epsilon
return mu, logvar, z
class Generator(nn.Module):
def __init__(self,n_hidden_G,n_layers_G, embedding_dim, dim_z, vocab_size , batch_size):
super(Generator,self).__init__()
self.n_hidden_G = n_hidden_G
self.n_layers_G = n_layers_G
self.n_z = dim_z
self.batch_size = batch_size
self.LSTM = nn.LSTM(input_size = embedding_dim + dim_z, hidden_size = n_hidden_G, num_layers = n_layers_G, batch_first = True)
self.fc = nn.Linear(n_hidden_G, vocab_size)
self.hidden = (torch.zeros(self.n_layers_G, batch_size, self.n_hidden_G)
,torch.zeros(self.n_layers_G, batch_size, self.n_hidden_G))
def forward(self,x,z, g_hidden = None):
batch_size,n_seq, n_embed = x.size()
#batch_size, n_seq= x.size()
z = torch.cat([z]*n_seq,1).view(batch_size, n_seq, self.n_z)
x = torch.cat([x,z], dim = 2)
if g_hidden is None: #if we are validating
self.hidden = (torch.zeros(self.n_layers_G, batch_size, self.n_hidden_G)
,torch.zeros(self.n_layers_G, batch_size, self.n_hidden_G))
else: #if we are training
self.hidden = g_hidden
output, self.hidden = self.LSTM(x, self.hidden)
output = self.fc(output)
return output, self.hidden
class VAE(nn.Module):
def __init__(self, embedding_dim, vocab_size, n_layers_E, n_hidden_E, dim_z, n_hidden_G, batch_size,n_layers_G ):
super(VAE, self).__init__()
self.embedding = nn.Embedding(vocab_size, embedding_dim)
self.encoder = Encoder(embedding_dim, vocab_size, n_layers_E, n_hidden_E, dim_z, n_hidden_G, batch_size)
self.generator = Generator(n_hidden_G,n_layers_G, embedding_dim, dim_z, vocab_size, batch_size )
self.n_z = dim_z
def forward(self, x, G_inp, z, G_hidden):
if z is None:
batch_size, n_seq = x.size()
x = self.embedding(x)
mu, logvar, z = self.encoder(x)
kld = -0.5*torch.sum(logvar-mu.pow(2)-logvar.exp()+1).mean()
else:
kld = None
G_inp = self.embedding(G_inp)
logit, G_hidden = self.generator(G_inp,z, G_hidden)
return logit, G_hidden, kld
train_df = pd.read_csv("train.csv", header =None)[0:500]
test_df = pd.read_csv("test.csv",header =None)[0:500]
train = train_df.iloc[:,0]
max_words = 2000
max_len = 25
tok = Tokenizer(num_words = max_words)
tok.fit_on_texts(train)
sequences = tok.texts_to_sequences(train)
sequences_matrix = preprocessing.sequence.pad_sequences(sequences, maxlen = max_len)
#tok.sequences_to_texts(sequences)
n_hidden_E = 10
n_layers_E = 1
embedding_dim = 10
vocab_size = max_words
n_hidden_G = 10
n_layers_G = 2
dim_z = 10
train_size = len(train)
batch_size = 100
rec_coef = 7
lr = 0.01
epochs = 100
def create_generator_input(x, train):
G_inp = x[:, 0:max_len-1].clone() #input for generator should exclude last word of sequence
# if train == False:
# return G_inp
# r = np.random.rand(G_inp.size(0), G_inp.size(1))
# #Perform word_dropout according to random values (r) generated for each word
# for i in range(len(G_inp)):
# for j in range(1,G_inp.size(1)):
# if r[i, j] < opt.word_dropout and G_inp[i, j] not in [vocab.stoi[opt.pad_token], vocab.stoi[opt.end_token]]:
# G_inp[i, j] = vocab.stoi[opt.unk_token]
return G_inp
def producebatches(x,batch_size):
k = math.floor(x.shape[0]/batch_size)
total = (k)*batch_size
flatten = x[0:total].flatten()
batches = flatten.reshape((k,batch_size,x.shape[1]))
return batches
batches = producebatches(sequences_matrix, batch_size)
model = VAE(embedding_dim, vocab_size, n_layers_E, n_hidden_E, dim_z, n_hidden_G, batch_size,n_layers_G)
optimizer = torch.optim.Adam(model.parameters(), lr = lr)
criterion = nn.BCELoss(reduction = 'sum')
model.train()
def train_batch(x,G_inp,step,train =True):
reconstruction, hidden, kld = model(x, G_inp, None, None)
reconstruction2 = reconstruction.view(-1, vocab_size).clone() #converting into shape (batch_size*(n_seq-1), n_vocab) to facilitate performing F.cross_entropy()
#y = x[:, 1:x.size(1)].clone() #target for generator should exclude first word of sequence
#y = y.contiguous().view(-1)
G_inp2 = G_inp.contiguous().view(-1) #converting into shape (batch_size*(n_seq-1),1) to facilitate performing F.cross_entropy()
rec_loss = F.cross_entropy(reconstruction2,G_inp2)
kld_coef = (math.tanh((step - 15000)/1000) + 1) / 2
#kld_coef = min(1,step/(200000.0))
loss = rec_coef*rec_loss + kld_coef*kld
if train == True:
torch.autograd.set_detect_anomaly(True) #skip below step if we are performing validation
optimizer.zero_grad()
loss.backward(retain_graph=True)
optimizer.step()
return rec_loss.item(), kld.item()
for epoch in range(epochs):
train_rec_loss = []
train_kl_loss = []
for i in range(batches.shape[0]):
x = torch.tensor(batches[i], dtype = torch.long)
G_inp = create_generator_input(x, train = True)
rec_loss, kl_loss = train_batch(x,G_inp,epoch,train=True)
train_rec_loss.append(rec_loss)
train_kl_loss.append(kl_loss)
train_rec_loss = np.mean(train_rec_loss)
train_kl_loss = np.mean(train_kl_loss)
print("No.", epoch, "T_rec:", '%.2f'%rec_loss, "T_kld:", '%.2f'%kl_loss)
def generate_sentences(n):
model.eval()
sentences = []
for i in range(n):
z = torch.randn([1, dim_z])
hidden = (torch.zeros(n_layers_G,1, n_hidden_G)
,torch.zeros(n_layers_G, 1, n_hidden_G))
G_inp = torch.LongTensor(1,1).fill_(1)
str_ind = []
while len(str_ind)<49:
with torch.autograd.no_grad():
logit, G_hidden, _ = model(None, G_inp, z, hidden)
probs = F.softmax(logit[0],dim=1)
G_inp = torch.multinomial(probs,1)
str_ind.append(G_inp[0][0].item())
sentences.append(str_ind)
return sentences
t = generate_sentences(1)

First, you can re-initialize your hidden layer after each epoch. This will overcome the error that you are facing without any major changes:
for epoch in range(epochs):
train_rec_loss = []
train_kl_loss = []
for i in range(batches.shape[0]):
x = torch.tensor(batches[i], dtype = torch.long)
G_inp = create_generator_input(x, train = True)
rec_loss, kl_loss = train_batch(x,G_inp,epoch,train=True)
train_rec_loss.append(rec_loss)
train_kl_loss.append(kl_loss)
model.hidden = (torch.zeros(n_layers_G, batch_size, n_hidden_G)
,torch.zeros(n_layers_G, batch_size, n_hidden_G))
train_rec_loss = np.mean(train_rec_loss)
train_kl_loss = np.mean(train_kl_loss)
print("No.", epoch, "T_rec:", '%.2f'%rec_loss, "T_kld:", '%.2f'%kl_loss)
Furthermore, you can avoid inplace operations on the activation functions and dropout operations (inplace = False)(I think it is not your case).

Loss does not reduce with an auto-encoder like setup with PyTorch v1.4

I am new to PyTorch and have written the following code. I am using one neural network for the encoding part followed by some processing of the encoded data and then using another network for the decoding this data.
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR
import numpy as np
import itertools
import datetime
class Encoder(nn.Module):
def __init__(self):
super(Encoder, self).__init__()
self.fc1 = nn.Linear(4, 32, bias=False)
self.fc2 = nn.Linear(32, 16, bias=False)
self.fc3 = nn.Linear(16, 7, bias=False)
def forward(self, x):
x = self.fc1(x)
x = torch.tanh(x)
x = self.fc2(x)
x = torch.tanh(x)
x = self.fc3(x)
output = torch.tanh(x)
return output
def channel(codeword, snr_db, device):
snr_value = 10 ** (snr_db / 10)
h_real = torch.normal(mean=0, std=1, size=(codeword.shape[0], 1)) * torch.sqrt(torch.as_tensor(1/2))
h_imag = torch.normal(mean=0, std=1, size=(codeword.shape[0], 1)) * torch.sqrt(torch.as_tensor(1/2))
h_real_t = h_real.repeat(1, codeword.shape[1]).to(device)
h_imag_t = h_imag.repeat(1, codeword.shape[1]).to(device)
noise_real = torch.normal(mean=0, std=1, size=codeword.shape) * torch.sqrt(torch.as_tensor(1/(2*snr_value)))
noise_imag = torch.normal(mean=0, std=1, size=codeword.shape) * torch.sqrt(torch.as_tensor(1/(2*snr_value)))
noise_real = noise_real.to(device)
noise_imag = noise_imag.to(device)
faded_cw_real = torch.mul(h_real_t, codeword) + noise_real
faded_cw_imag = torch.mul(h_imag_t, codeword) + noise_imag
return torch.cat([faded_cw_real[:, :, None], faded_cw_imag[:, :, None]], dim=2), h_real, h_imag
class Decoder(nn.Module):
def __init__(self):
super(Decoder, self).__init__()
self.bigru = nn.GRU(input_size=7, hidden_size=200, num_layers=20, bidirectional=True)
self.fc0 = nn.Linear(4, 1)
self.fc1 = nn.Linear(400, 4)
def forward(self, x):
x, states = self.bigru(x)
output = torch.squeeze(self.fc0(torch.transpose(x, 2, 1)))
output = torch.tanh(output)
output = self.fc1(output)
output = torch.tanh(output)
# output = torch.softmax(output, dim=0)
return output
def train(args, model1, model2, device, optimizer, epoch, snr):
model1.train()
model2.train()
count = 1000
for i in range(count):
data = np.array([list(i) for i in itertools.product([-1, 1], repeat=4)])
p = np.random.permutation(16)
# p = np.random.randint(low=0, high=16, size=(16,))
train_data = data[p]
data_one_hot = np.eye(16)
truth = data_one_hot[p]
# truth = torch.as_tensor(truth).to(device).float() # Uncomment this for BCE loss
train_data = torch.as_tensor(train_data).float()
train_data = train_data.to(device)
# optimizer1.zero_grad()
optimizer.zero_grad()
output = model1(train_data)
output = output.to(device)
ch_out, h_r, h_i = channel(output, snr, device)
h_r = torch.as_tensor(h_r[:, :, None].repeat(1, 7, 1)).to(device)
h_i = torch.as_tensor(h_i[:, :, None].repeat(1, 7, 1)).to(device)
dec_ip = torch.cat([ch_out, h_r, h_i], 2)
dec_ip = torch.transpose(dec_ip, 2, 1)
hat = model2(torch.as_tensor(dec_ip).float())
loss_d = F.mse_loss(hat, train_data)
# loss_d = F.binary_cross_entropy(hat, truth)
loss_d.backward()
optimizer.step()
if i % 10 == 0:
# print(f"Train epoch: {epoch}, Batch: {i}, Encoder Loss: {loss_e.item()}, SNR: {snr}")
print(f"Train epoch: {epoch}, Batch: {i}, Decoder Loss: {loss_d.item()}, SNR: {snr}")
def main():
epochs = 14
learning_rate = 1
learning_rate_step = 0.7
no_cuda = False
log_interval = 10
use_cuda = not no_cuda and torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
enc_model = Encoder().to(device)
dec_model = Decoder().to(device)
optimizer = optim.Adam(list(dec_model.parameters())+list(enc_model.parameters()), lr=learning_rate)
scheduler = StepLR(optimizer, step_size=1, gamma=learning_rate_step)
for epoch in range(1, epochs+1):
snr = 20 - 20 * epoch / epochs
train(log_interval, enc_model, dec_model, device, optimizer, epoch, snr)
scheduler.step()
if __name__ == "__main__":
main()
However when I run this, the output is:
Train epoch: x, Batch: y, Decoder Loss: 2.0, SNR: z
x, y and z values are dependent on the iteration. The decoder loss is stuck at 2.0. In fact, the loss starts at lesser than 2 and then gets stuck at 2.0.
Is there something simply wrong with the code?

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Pytorch network not training - python

Related

How to get activation values of a layer in pytorch

How to plot Receptive Fields, for a CNN/fashionMNIST?

Pytorch:RuntimeError: Input type (torch.FloatTensor) and weight type (torch.cuda.FloatTensor) should be the same

Pytorch LSTM- VAE Sentence Generator: RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation

Loss does not reduce with an auto-encoder like setup with PyTorch v1.4

Categories

Resources