Why does GraphRNN pack_padded after linear transform? - python

I am a beginner of machine learning. I'm reading GraphRNN's code now
class GRU_plain(nn.Module):
def init(self, input_size, embedding_size, hidden_size, num_layers, has_input=True, has_output=False, output_size=None):
super(GRU_plain, self).init()
self.num_layers = num_layers
self.hidden_size = hidden_size
self.has_input = has_input
self.has_output = has_output
if has_input:
self.input = nn.Linear(input_size, embedding_size)
self.rnn = nn.GRU(input_size=embedding_size, hidden_size=hidden_size, num_layers=num_layers,
batch_first=True)
else:
self.rnn = nn.GRU(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=True)
if has_output:
self.output = nn.Sequential(
nn.Linear(hidden_size, embedding_size),
nn.ReLU(),
nn.Linear(embedding_size, output_size)
)
self.relu = nn.ReLU()
# initialize
self.hidden = None # need initialize before forward run
for name, param in self.rnn.named_parameters():
if 'bias' in name:
nn.init.constant(param, 0.25)
elif 'weight' in name:
nn.init.xavier_uniform(param,gain=nn.init.calculate_gain('sigmoid'))
for m in self.modules():
if isinstance(m, nn.Linear):
m.weight.data = init.xavier_uniform(m.weight.data, gain=nn.init.calculate_gain('relu'))
def init_hidden(self, batch_size):
return Variable(torch.zeros(self.num_layers, batch_size, self.hidden_size)).cuda()
def forward(self, input_raw, pack=False, input_len=None):
if self.has_input:
input = self.input(input_raw)
input = self.relu(input)
else:
input = input_raw
if pack:
input = pack_padded_sequence(input, input_len, batch_first=True)
output_raw, self.hidden = self.rnn(input, self.hidden)
if pack:
output_raw = pad_packed_sequence(output_raw, batch_first=True)[0]
if self.has_output:
output_raw = self.output(output_raw)
# return hidden state at each time step
return output_raw
At forward, why does GraphRNN pack_padded_sequence after linear transform by self.input?
Doesn't the non-zero raw get the information of padded 0?
I think it may be convenient for coding, which has little effect on the result.
But I can't find a rnn example doing the same thing.

Related

LSTM + Linear layer Encoder/Decoder always predict blank value

I am trying to use Seq2Seq to covert analog signal to discrete time series.
The raw timeseries signal contains firing wave sound, echo from marker(in short marker), and end of the tube.
The current progress of my Encoder/Decoder + Linear layer + MSEloss() is blank prediction. It does not convert the bumps in to discrete signal.
0 = blank
1 = end of tube(wall)
2 = noise echo
3 = echo from marker
4 = firing kick
class PLMixin:
def configure_optimizers(self) -> torch.optim.Adam:
return torch.optim.Adam(self.parameters(), lr=1e-3)
# return torch.optim.SGD(self.parameters(), lr=1e-3)
# return torch.optim.RMSprop(self.parameters(), lr=1e-3)
def train_loader(self) -> DataLoader:
"""Return train_loader."""
training_dataset = TimeseriesLiquidLevelDataset(
text_files_dir=Path("digitized_dataset/training")
)
training_dataloader = DataLoader(training_dataset, batch_size=1, shuffle=True, num_workers=0)
return training_dataloader
def val_loader(self) -> DataLoader:
"""Return validation loader."""
val_dataset = TimeseriesLiquidLevelDataset(
text_files_dir=Path("digitized_dataset/training")
)
val_dataloader = DataLoader(val_dataset, batch_size=1, shuffle=False, num_workers=0)
return val_dataloader
def test_loader(self) -> DataLoader:
"""Return validation loader."""
val_dataset = TimeseriesLiquidLevelDataset(
text_files_dir=Path("digitized_dataset/testing")
)
val_dataloader = DataLoader(val_dataset, batch_size=1, shuffle=False, num_workers=0)
return val_dataloader
"""Time series experiment with embedding function. On freeze because it predict variety of token."""
import multiprocessing
import random
import typing as typ
from os.path import exists
from pathlib import Path
import joblib
import numpy as np
import pytorch_lightning as pl
import torch
import torch.nn as nn
from pytorch_lightning.loggers import TensorBoardLogger
import seq2seq_gadgets as gadgets
from lstm_compare_solution import clean, conclude_correctness
NUM_PROCS = multiprocessing.cpu_count()
seed = 181993
torch.manual_seed(seed)
random.seed(seed)
np.random.seed(seed)
device = "cuda" if torch.cuda.is_available() else "cpu"
class Encoder(nn.Module):
"""Simple encoder class."""
def __init__(self, input_size: int, hidden_size: int, num_layers: int):
"""Init the instance."""
super().__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.num_layers = num_layers
self.lstm = nn.LSTM(input_size, hidden_size, num_layers=num_layers, bidirectional=True, batch_first=True)
def forward(self, x: torch.Tensor) -> typ.Tuple[torch.Tensor, typ.Tuple[torch.Tensor, torch.Tensor]]:
"""Run forward computing."""
_, (hn, cn) = self.lstm(x)
return _, (hn, cn)
class Decoder(nn.Module):
def __init__(
self,
input_size: int,
hidden_size: int,
output_size: int,
num_layers: int
) -> None:
super().__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.output_size = output_size
self.num_layers = num_layers
self.lstm = nn.LSTM(input_size, hidden_size, num_layers=num_layers, bidirectional=True, batch_first=True)
# Convert the tensor dimension
self.layers = nn.Sequential(
nn.Dropout(p=0.5),
nn.Linear(2 * hidden_size, hidden_size), # *2 because of bidirectional=True
nn.ReLU(),
nn.Dropout(p=0.5),
nn.Linear(hidden_size, output_size),
nn.ReLU(),
)
def forward(self, x: torch.Tensor, cell: typ.Tuple[torch.Tensor, torch.Tensor]) -> torch.Tensor:
"""Run forward computing."""
hn, cn = cell
output, (hn, cn) = self.lstm(x, (hn, cn))
output = self.layers(output)
return output
class EncoderDecoder(gadgets.PLMixin, pl.LightningModule):
"""Lightning of simple LSTM."""
def __init__(self, encoder: Encoder, decoder: Decoder, n_classes: int = 4):
"""Init the instance."""
super().__init__()
self.encoder = encoder
self.decoder = decoder
self.n_classes = n_classes
self._loss = nn.MSELoss()
def forward(
self,
x: torch.Tensor
) -> torch.Tensor:
"""Run training function."""
_, (hidden, cell) = self.encoder.forward(x)
output = self.decoder.forward(x, (hidden, cell))
return output
def training_step(
self,
train_batch: typ.Tuple[torch.Tensor, torch.Tensor],
batch_idx: int
) -> typ.Dict:
"""Training step."""
x, y = train_batch
x_hat = self.forward(x)
loss = self._loss(x_hat, y.float())
self.log("train_loss", loss)
return {"loss": loss}
def validation_step(self, val_batch: typ.Tuple[torch.Tensor, torch.Tensor], batch_idx: int) -> typ.Dict:
"""Validate the batch."""
x, y = val_batch
x_hat = self.forward(x)
loss = self._loss(x_hat, y.float())
self.log("train_loss", loss)
analog_file = f"my_val_{batch_idx}.joblib"
predicted_file = f"my_val_{batch_idx}_predicted.joblib"
solution_file = f"my_val_{batch_idx}_solution.joblib"
joblib.dump(x, analog_file)
joblib.dump(x_hat, predicted_file)
joblib.dump(y, solution_file)
return {"val_loss": loss}
def main() -> None:
"""Run main function."""
logger = TensorBoardLogger("lightning_logs", name="digitized")
trainer = pl.Trainer(fast_dev_run=False, max_epochs=5, logger=logger)
encoder_input_size: int = 20000
encoder_hidden_size: int = 1000
encoder_num_layers: int = 1
decoder_n_classes: int = 5
decoder_input_size: int = encoder_input_size
decoder_hidden_size: int = 1000
decoder_output_size: int = encoder_input_size
decoder_num_layers: int = encoder_num_layers
encoder = Encoder(
encoder_input_size, encoder_hidden_size, encoder_num_layers
)
decoder = Decoder(
decoder_input_size,
decoder_hidden_size,
decoder_output_size,
decoder_num_layers,
)
chk_point = Path("./lightning_logs/digitized/version_1/checkpoints/epoch=9-step=570.ckpt")
if exists(chk_point):
model = EncoderDecoder.load_from_checkpoint(chk_point, encoder=encoder, decoder=decoder)
model.eval()
else:
model = EncoderDecoder(encoder, decoder, decoder_n_classes)
trainer.fit(model, train_dataloaders=model.train_loader())
clean()
trainer.validate(model, dataloaders=model.train_loader())
# trainer.test(model, dataloaders=model.test_loader())
# Keep pytorch lightning clean
conclude_correctness("my_val")
# conclude_correctness("my_test")
if __name__ == "__main__":
main()
Question:
Is Encoder/Decoder capable to do analog to discrete conversion?

How to add Individual LSTM layers for each task in multi-task learning with Pytorch

for example, I define a model for 2 tasks in multi-task way.
class BertMy(nn.Module):
def __init__(self, segment_size, output_size, dropout):
super(BertMy, self).__init__()
self.bert = AutoModelForMaskedLM.from_pretrained("cl-tohoku/bert-base-japanese")
self.bert_vocab_size = 32000
self.bn = nn.BatchNorm1d(segment_size*self.bert_vocab_size)
self.fc1 = nn.Linear(segment_size*self.bert_vocab_size, output_size)
self.fc2 = nn.Linear(segment_size*self.bert_vocab_size, output_size)
self.dropout = nn.Dropout(dropout)
def forward(self, x):
x = self.bert(x).logits
x = x.view(x.shape[0], -1)
x1 = self.fc1(self.dropout(self.bn(x)))
x2 = self.fc2(self.dropout(self.bn(x)))
return [x1,x2]
I want add 2 Individual LSTM layers for 2 tasks, in order to predicate them Individually.
How should I define this in Pytorch?

Debugging neural network dropout problem for the probability not lying inside [0,1]

I tried to put a droprate to my neural network (NN) using torch and I got a strange error at the end. How can I fix it?
So the idea is that I wrote a NN inside a function to make it easier to call. The function is the following:
(I personally think the problem lies inside the class of the NN, but for the sake of having a working example I'm putting everything).
def train_neural_network(data_train_X, data_train_Y, batch_size, learning_rate, graph = True, dropout = 0.0 ):
input_size = len(data_test_X.columns)
hidden_size = 200
num_classes = 4
num_epochs = 120
batch_size = batch_size
learning_rate = learning_rate
# The class of NN
class NeuralNet(nn.Module):
def __init__(self, input_size, hidden_size, num_classes, p = dropout):
super(NeuralNet, self).__init__()
self.fc1 = nn.Linear(input_size, hidden_size)
self.fc2 = nn.Linear(hidden_size, hidden_size)
self.fc3 = nn.Linear(hidden_size, num_classes)
def forward(self, x, p = dropout):
out = F.relu(self.fc1(x))
out = F.relu(self.fc2(out))
out = nn.Dropout(out, p) #drop
out = self.fc3(out)
return out
# Prepare data
X_train = torch.from_numpy(data_train_X.values).float()
Y_train = torch.from_numpy(data_train_Y.values).float()
# Loading data
train = torch.utils.data.TensorDataset(X_train, Y_train)
train_loader = torch.utils.data.DataLoader(train, batch_size=batch_size)
net = NeuralNet(input_size, hidden_size, num_classes)
# Loss
criterion = nn.CrossEntropyLoss()
# Optimiser
optimiser = torch.optim.SGD(net.parameters(), lr=learning_rate)
# Proper training
total_step = len(train_loader)
loss_values = []
for epoch in range(num_epochs+1):
net.train()
train_loss = 0.0
for i, (predictors, results) in enumerate(train_loader, 0):
# Forward pass
outputs = net(predictors)
results = results.long()
results = results.squeeze_()
loss = criterion(outputs, results)
# Backward and optimise
optimiser.zero_grad()
loss.backward()
optimiser.step()
# Update loss
train_loss += loss.item()
loss_values.append(train_loss / batch_size )
print('Finished Training')
return net
And when I call the function:
net = train_neural_network(data_train_X = data_train_X, data_train_Y = data_train_Y, batch_size = batch_size, learning_rate = learning_rate, dropout = 0.1)
The error is the following:
net = train_neural_network(data_train_X = data_train_X, data_train_Y = data_train_Y, batch_size = batch_size, learning_rate = learning_rate, dropout = 0.1)
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/dropout.py in __init__(self, p, inplace)
8 def __init__(self, p=0.5, inplace=False):
9 super(_DropoutNd, self).__init__()
---> 10 if p < 0 or p > 1:
11 raise ValueError("dropout probability has to be between 0 and 1, "
12 "but got {}".format(p))
RuntimeError: bool value of Tensor with more than one value is ambiguous
Why do you think there is an error?
Before putting the droprate, everything was working. Additional points for you if you know how to
implement a bias inside my network! For example, on the hidden layer. I can't find any example online.
Change your architecture for this:
class NeuralNet(nn.Module):
def __init__(self, input_size, hidden_size, num_classes, p=dropout):
super(NeuralNet, self).__init__()
self.fc1 = nn.Linear(input_size, hidden_size)
self.fc2 = nn.Linear(hidden_size, hidden_size)
self.fc3 = nn.Linear(hidden_size, num_classes)
self.dropout = nn.Dropout(p=p)
def forward(self, x):
out = F.relu(self.fc1(x))
out = F.relu(self.fc2(out))
out = self.dropout(self.fc3(out))
return out
Let me know if it works.

Understanding the code in pyTorch

I am having problems with understanding the following part of the code from ResNet architecture. The full code is available at https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/02-intermediate/deep_residual_network/main-gpu.py . I am not very familiar with Python.
# Residual Block
class ResidualBlock(nn.Module):
def __init__(self, in_channels, out_channels, stride=1, downsample=None):
super(ResidualBlock, self).__init__()
self.conv1 = conv3x3(in_channels, out_channels, stride)
self.bn1 = nn.BatchNorm2d(out_channels)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(out_channels, out_channels)
self.bn2 = nn.BatchNorm2d(out_channels)
self.downsample = downsample
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
# ResNet Module
class ResNet(nn.Module):
def __init__(self, block, layers, num_classes=10):
super(ResNet, self).__init__()
self.in_channels = 16
self.conv = conv3x3(3, 16)
self.bn = nn.BatchNorm2d(16)
self.relu = nn.ReLU(inplace=True)
self.layer1 = self.make_layer(block, 16, layers[0])
self.layer2 = self.make_layer(block, 32, layers[0], 2)
self.layer3 = self.make_layer(block, 64, layers[1], 2)
self.avg_pool = nn.AvgPool2d(8)
self.fc = nn.Linear(64, num_classes)
def make_layer(self, block, out_channels, blocks, stride=1):
downsample = None
if (stride != 1) or (self.in_channels != out_channels):
downsample = nn.Sequential(
conv3x3(self.in_channels, out_channels, stride=stride),
nn.BatchNorm2d(out_channels))
layers = []
layers.append(block(self.in_channels, out_channels, stride, downsample))
self.in_channels = out_channels
for i in range(1, blocks):
layers.append(block(out_channels, out_channels))
return nn.Sequential(*layers)
def forward(self, x):
out = self.conv(x)
out = self.bn(out)
out = self.relu(out)
out = self.layer1(out)
out = self.layer2(out)
out = self.layer3(out)
out = self.avg_pool(out)
out = out.view(out.size(0), -1)
out = self.fc(out)
return out
resnet = ResNet(ResidualBlock, [3, 3, 3])
My main question is why should we pass 'block' every time? In the function
def make_layer(self, block, out_channels, blocks, stride=1):
instead of passing 'block' why cant we create an instance of 'ResidualBlock' and append it with layers as follows?
block = ResidualBlock(self.in_channels, out_channels, stride, downsample)
layers.append(block)
The ResNet module is designed to be generic, so that it can create networks with arbitrary blocks. So, if you do not pass the block which you want to create you'll have to write the name of the block explicitly like below.
# Residual Block
class ResidualBlock(nn.Module):
def __init__(self, in_channels, out_channels, stride=1, downsample=None):
super(ResidualBlock, self).__init__()
self.conv1 = conv3x3(in_channels, out_channels, stride)
self.bn1 = nn.BatchNorm2d(out_channels)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(out_channels, out_channels)
self.bn2 = nn.BatchNorm2d(out_channels)
self.downsample = downsample
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
# ResNet Module
class ResNet(nn.Module):
def __init__(self, layers, num_classes=10):
super(ResNet, self).__init__()
self.in_channels = 16
self.conv = conv3x3(3, 16)
self.bn = nn.BatchNorm2d(16)
self.relu = nn.ReLU(inplace=True)
self.layer1 = self.make_layer(16, layers[0])
self.layer2 = self.make_layer(32, layers[0], 2)
self.layer3 = self.make_layer(64, layers[1], 2)
self.avg_pool = nn.AvgPool2d(8)
self.fc = nn.Linear(64, num_classes)
def make_layer(self, out_channels, blocks, stride=1):
downsample = None
if (stride != 1) or (self.in_channels != out_channels):
downsample = nn.Sequential(
conv3x3(self.in_channels, out_channels, stride=stride),
nn.BatchNorm2d(out_channels))
layers = []
layers.append(ResidualBlock(self.in_channels, out_channels, stride, downsample)) # Major change here
self.in_channels = out_channels
for i in range(1, blocks):
layers.append(ResidualBlock(out_channels, out_channels)) # Major change here
return nn.Sequential(*layers)
def forward(self, x):
out = self.conv(x)
out = self.bn(out)
out = self.relu(out)
out = self.layer1(out)
out = self.layer2(out)
out = self.layer3(out)
out = self.avg_pool(out)
out = out.view(out.size(0), -1)
out = self.fc(out)
return out
resnet = ResNet([3, 3, 3])
This reduces the capability of your ResNet module and binds it with only the ResidualBlock. Now, if you create some other type of block (say ResidualBlock2), you will need to create another Resnet2 module specifically for that. So, it's better to create a generic ResNet module which takes in the block parameter, so that it can be used with different types of blocks.
A trivial python example to clarify
Suppose you want to create a function that can apply a mathematical operation on a list and returns its output. So, you might create something like below
def exp(inp_list):
out_list = []
for num in inp_list:
out_list.append(math.exp(num))
return out_list
def floor(inp_list):
out_list = []
for num in inp_list:
out_list.append(math.floor(num))
return out_list
Here, we are doing an exponent and a floor operation on some input list. But, we can do a better job by defining a generic function to do the same as
def apply_func(fn, inp_list):
out_list = []
for num in inp_list:
out_list.append(fn(num))
return out_list
and now call this apply_func as apply_func(math.exp, inp_list) for exponential and as apply_func(math.floor, inp_list) for floor function. Also this opens up possibility for any kind of operation.
Note: It's not a practical example as you can always use map or list comprehension for achieving the same thing. But, it demonstrates the use clearly.

How to do fully connected batch norm in PyTorch?

torch.nn has classes BatchNorm1d, BatchNorm2d, BatchNorm3d, but it doesn't have a fully connected BatchNorm class? What is the standard way of doing normal Batch Norm in PyTorch?
Ok. I figured it out. BatchNorm1d can also handle Rank-2 tensors, thus it is possible to use BatchNorm1d for the normal fully-connected case.
So for example:
import torch.nn as nn
class Policy(nn.Module):
def __init__(self, num_inputs, action_space, hidden_size1=256, hidden_size2=128):
super(Policy, self).__init__()
self.action_space = action_space
num_outputs = action_space
self.linear1 = nn.Linear(num_inputs, hidden_size1)
self.linear2 = nn.Linear(hidden_size1, hidden_size2)
self.linear3 = nn.Linear(hidden_size2, num_outputs)
self.bn1 = nn.BatchNorm1d(hidden_size1)
self.bn2 = nn.BatchNorm1d(hidden_size2)
def forward(self, inputs):
x = inputs
x = self.bn1(F.relu(self.linear1(x)))
x = self.bn2(F.relu(self.linear2(x)))
out = self.linear3(x)
return out
The BatchNorm1d normally comes before the ReLU, and the bias is redundant, so
import torch.nn as nn
class Policy(nn.Module):
def __init__(self, num_inputs, action_space, hidden_size1=256, hidden_size2=128):
super(Policy2, self).__init__()
self.action_space = action_space
num_outputs = action_space
self.linear1 = nn.Linear(num_inputs, hidden_size1, bias=False)
self.linear2 = nn.Linear(hidden_size1, hidden_size2, bias=False)
self.linear3 = nn.Linear(hidden_size2, num_outputs)
self.bn1 = nn.BatchNorm1d(hidden_size1)
self.bn2 = nn.BatchNorm1d(hidden_size2)
def forward(self, inputs):
x = inputs
x = F.relu(self.bn1(self.linear1(x)))
x = F.relu(self.bn2(self.linear2(x)))
out = self.linear3(x)
return out

Categories