LSTM + Linear layer Encoder/Decoder always predict blank value

LSTM + Linear layer Encoder/Decoder always predict blank value - python

I am trying to use Seq2Seq to covert analog signal to discrete time series.
The raw timeseries signal contains firing wave sound, echo from marker(in short marker), and end of the tube.
The current progress of my Encoder/Decoder + Linear layer + MSEloss() is blank prediction. It does not convert the bumps in to discrete signal.
0 = blank
1 = end of tube(wall)
2 = noise echo
3 = echo from marker
4 = firing kick
class PLMixin:
def configure_optimizers(self) -> torch.optim.Adam:
return torch.optim.Adam(self.parameters(), lr=1e-3)
# return torch.optim.SGD(self.parameters(), lr=1e-3)
# return torch.optim.RMSprop(self.parameters(), lr=1e-3)
def train_loader(self) -> DataLoader:
"""Return train_loader."""
training_dataset = TimeseriesLiquidLevelDataset(
text_files_dir=Path("digitized_dataset/training")
)
training_dataloader = DataLoader(training_dataset, batch_size=1, shuffle=True, num_workers=0)
return training_dataloader
def val_loader(self) -> DataLoader:
"""Return validation loader."""
val_dataset = TimeseriesLiquidLevelDataset(
text_files_dir=Path("digitized_dataset/training")
)
val_dataloader = DataLoader(val_dataset, batch_size=1, shuffle=False, num_workers=0)
return val_dataloader
def test_loader(self) -> DataLoader:
"""Return validation loader."""
val_dataset = TimeseriesLiquidLevelDataset(
text_files_dir=Path("digitized_dataset/testing")
)
val_dataloader = DataLoader(val_dataset, batch_size=1, shuffle=False, num_workers=0)
return val_dataloader
"""Time series experiment with embedding function. On freeze because it predict variety of token."""
import multiprocessing
import random
import typing as typ
from os.path import exists
from pathlib import Path
import joblib
import numpy as np
import pytorch_lightning as pl
import torch
import torch.nn as nn
from pytorch_lightning.loggers import TensorBoardLogger
import seq2seq_gadgets as gadgets
from lstm_compare_solution import clean, conclude_correctness
NUM_PROCS = multiprocessing.cpu_count()
seed = 181993
torch.manual_seed(seed)
random.seed(seed)
np.random.seed(seed)
device = "cuda" if torch.cuda.is_available() else "cpu"
class Encoder(nn.Module):
"""Simple encoder class."""
def __init__(self, input_size: int, hidden_size: int, num_layers: int):
"""Init the instance."""
super().__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.num_layers = num_layers
self.lstm = nn.LSTM(input_size, hidden_size, num_layers=num_layers, bidirectional=True, batch_first=True)
def forward(self, x: torch.Tensor) -> typ.Tuple[torch.Tensor, typ.Tuple[torch.Tensor, torch.Tensor]]:
"""Run forward computing."""
_, (hn, cn) = self.lstm(x)
return _, (hn, cn)
class Decoder(nn.Module):
def __init__(
self,
input_size: int,
hidden_size: int,
output_size: int,
num_layers: int
) -> None:
super().__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.output_size = output_size
self.num_layers = num_layers
self.lstm = nn.LSTM(input_size, hidden_size, num_layers=num_layers, bidirectional=True, batch_first=True)
# Convert the tensor dimension
self.layers = nn.Sequential(
nn.Dropout(p=0.5),
nn.Linear(2 * hidden_size, hidden_size), # *2 because of bidirectional=True
nn.ReLU(),
nn.Dropout(p=0.5),
nn.Linear(hidden_size, output_size),
nn.ReLU(),
)
def forward(self, x: torch.Tensor, cell: typ.Tuple[torch.Tensor, torch.Tensor]) -> torch.Tensor:
"""Run forward computing."""
hn, cn = cell
output, (hn, cn) = self.lstm(x, (hn, cn))
output = self.layers(output)
return output
class EncoderDecoder(gadgets.PLMixin, pl.LightningModule):
"""Lightning of simple LSTM."""
def __init__(self, encoder: Encoder, decoder: Decoder, n_classes: int = 4):
"""Init the instance."""
super().__init__()
self.encoder = encoder
self.decoder = decoder
self.n_classes = n_classes
self._loss = nn.MSELoss()
def forward(
self,
x: torch.Tensor
) -> torch.Tensor:
"""Run training function."""
_, (hidden, cell) = self.encoder.forward(x)
output = self.decoder.forward(x, (hidden, cell))
return output
def training_step(
self,
train_batch: typ.Tuple[torch.Tensor, torch.Tensor],
batch_idx: int
) -> typ.Dict:
"""Training step."""
x, y = train_batch
x_hat = self.forward(x)
loss = self._loss(x_hat, y.float())
self.log("train_loss", loss)
return {"loss": loss}
def validation_step(self, val_batch: typ.Tuple[torch.Tensor, torch.Tensor], batch_idx: int) -> typ.Dict:
"""Validate the batch."""
x, y = val_batch
x_hat = self.forward(x)
loss = self._loss(x_hat, y.float())
self.log("train_loss", loss)
analog_file = f"my_val_{batch_idx}.joblib"
predicted_file = f"my_val_{batch_idx}_predicted.joblib"
solution_file = f"my_val_{batch_idx}_solution.joblib"
joblib.dump(x, analog_file)
joblib.dump(x_hat, predicted_file)
joblib.dump(y, solution_file)
return {"val_loss": loss}
def main() -> None:
"""Run main function."""
logger = TensorBoardLogger("lightning_logs", name="digitized")
trainer = pl.Trainer(fast_dev_run=False, max_epochs=5, logger=logger)
encoder_input_size: int = 20000
encoder_hidden_size: int = 1000
encoder_num_layers: int = 1
decoder_n_classes: int = 5
decoder_input_size: int = encoder_input_size
decoder_hidden_size: int = 1000
decoder_output_size: int = encoder_input_size
decoder_num_layers: int = encoder_num_layers
encoder = Encoder(
encoder_input_size, encoder_hidden_size, encoder_num_layers
)
decoder = Decoder(
decoder_input_size,
decoder_hidden_size,
decoder_output_size,
decoder_num_layers,
)
chk_point = Path("./lightning_logs/digitized/version_1/checkpoints/epoch=9-step=570.ckpt")
if exists(chk_point):
model = EncoderDecoder.load_from_checkpoint(chk_point, encoder=encoder, decoder=decoder)
model.eval()
else:
model = EncoderDecoder(encoder, decoder, decoder_n_classes)
trainer.fit(model, train_dataloaders=model.train_loader())
clean()
trainer.validate(model, dataloaders=model.train_loader())
# trainer.test(model, dataloaders=model.test_loader())
# Keep pytorch lightning clean
conclude_correctness("my_val")
# conclude_correctness("my_test")
if __name__ == "__main__":
main()
Question:
Is Encoder/Decoder capable to do analog to discrete conversion?

Related

Why does GraphRNN pack_padded after linear transform?

I am a beginner of machine learning. I'm reading GraphRNN's code now
class GRU_plain(nn.Module):
def init(self, input_size, embedding_size, hidden_size, num_layers, has_input=True, has_output=False, output_size=None):
super(GRU_plain, self).init()
self.num_layers = num_layers
self.hidden_size = hidden_size
self.has_input = has_input
self.has_output = has_output
if has_input:
self.input = nn.Linear(input_size, embedding_size)
self.rnn = nn.GRU(input_size=embedding_size, hidden_size=hidden_size, num_layers=num_layers,
batch_first=True)
else:
self.rnn = nn.GRU(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=True)
if has_output:
self.output = nn.Sequential(
nn.Linear(hidden_size, embedding_size),
nn.ReLU(),
nn.Linear(embedding_size, output_size)
)
self.relu = nn.ReLU()
# initialize
self.hidden = None # need initialize before forward run
for name, param in self.rnn.named_parameters():
if 'bias' in name:
nn.init.constant(param, 0.25)
elif 'weight' in name:
nn.init.xavier_uniform(param,gain=nn.init.calculate_gain('sigmoid'))
for m in self.modules():
if isinstance(m, nn.Linear):
m.weight.data = init.xavier_uniform(m.weight.data, gain=nn.init.calculate_gain('relu'))
def init_hidden(self, batch_size):
return Variable(torch.zeros(self.num_layers, batch_size, self.hidden_size)).cuda()
def forward(self, input_raw, pack=False, input_len=None):
if self.has_input:
input = self.input(input_raw)
input = self.relu(input)
else:
input = input_raw
if pack:
input = pack_padded_sequence(input, input_len, batch_first=True)
output_raw, self.hidden = self.rnn(input, self.hidden)
if pack:
output_raw = pad_packed_sequence(output_raw, batch_first=True)[0]
if self.has_output:
output_raw = self.output(output_raw)
# return hidden state at each time step
return output_raw
At forward, why does GraphRNN pack_padded_sequence after linear transform by self.input?
Doesn't the non-zero raw get the information of padded 0?
I think it may be convenient for coding, which has little effect on the result.
But I can't find a rnn example doing the same thing.

making GRU/LSTM states trainable in Tensorflow/Keras and add some random noise

I train the following model based on GRU, note that I am passing the argument stateful=True to the GRU builder.
class LearningToSurpriseModel(tf.keras.Model):
def __init__(self, vocab_size, embedding_dim, rnn_units):
super().__init__(self)
self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
self.gru = tf.keras.layers.GRU(rnn_units,
stateful=True,
return_sequences=True,
return_state=True,
reset_after=True
)
self.dense = tf.keras.layers.Dense(vocab_size)
def call(self, inputs, states=None, return_state=False, training=False):
x = inputs
x = self.embedding(x, training=training)
if states is None:
states = self.gru.get_initial_state(x)
x, states = self.gru(x, initial_state=states, training=training)
x = self.dense(x, training=training)
if return_state:
return x, states
else:
return x
#tf.function
def train_step(self, inputs):
[defining here my training step]
I instantiate my model
model = LearningToSurpriseModel(
vocab_size=len(ids_from_chars.get_vocabulary()),
embedding_dim=embedding_dim,
rnn_units=rnn_units
)
[compile and do stuff]
the custom callback below reset states manually at the end of each epoch.
gru_layer = model.layers[1]
class CustomCallback(tf.keras.callbacks.Callback):
def __init__(self, gru_layer):
self.gru_layer = gru_layer
def on_epoch_end(self, epoch, logs=None):
self.gru_layer.reset_states()
model.fit(train_dataset, validation_data=validation_dataset, \
epochs=EPOCHS, callbacks = [EarlyS, CustomCallback(gru_layer)], verbose=1)
States will be reset to zero. I would like to follow ideas in https://r2rt.com/non-zero-initial-states-for-recurrent-neural-networks.html to make states trainable. Implementation in this post seems based on tensorflow, and overwrites native functions, maybe there is a more elegant way in Keras.
(1) how do I make states trainable ?
(2) how do I combine trainable states and random initialization ?

Based on the very good answer above, the full code for solving the case
class CustomGRULayer(tf.keras.layers.Layer):
def __init__(self, rnn_units, batch_size):
super(CustomGRULayer, self).__init__()
self.rnn_units = rnn_units
self.batch_size = batch_size
self.gru = tf.keras.layers.GRU(self.rnn_units,
stateful=True,
return_sequences=True,
return_state=True,
reset_after=True,
)
self.w=None
def build(self, input_shape):
w_init = tf.random_normal_initializer(mean=0.0, stddev=0.2)
self.w = tf.Variable(
initial_value=w_init(shape=(self.batch_size, self.rnn_units),
dtype='float32'), trainable=True)
def call(self, inputs):
return self.gru(inputs, initial_state = self.w)
class LearningToSurpriseModel(tf.keras.Model):
def __init__(self, vocab_size, embedding_dim, rnn_units, batch_size):
super().__init__(self)
self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
self.gru_layer = CustomGRULayer(rnn_units = rnn_units, batch_size = batch_size)
self.dense = tf.keras.layers.Dense(vocab_size)
def call(self, inputs, states=None, return_state=False, training=False):
x = inputs
x = self.embedding(x, training=training)
if states is None:
states = self.gru_layer.gru.get_initial_state(x)
x, states = self.gru_layer.gru(x, initial_state=states, training=training)
x = self.dense(x, training=training)
if return_state:
return x, states
else:
return x
model = LearningToSurpriseModel(
vocab_size=len(ids_from_chars.get_vocabulary()),
embedding_dim=embedding_dim,
rnn_units=rnn_units,
batch_size=BATCH_SIZE
)
model.compile(optimizer='adam', loss=tf.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=[
tf.keras.metrics.SparseCategoricalAccuracy()]
)
EarlyS = EarlyStopping(monitor = 'val_loss', mode = 'min', restore_best_weights=True, patience=10, verbose = 1)
# defining a custom callback for resetting states at the end of period only
gru_layer = model.layers[1]
class CustomCallback(tf.keras.callbacks.Callback):
def __init__(self, gru_layer):
self.gru_layer = gru_layer
def on_epoch_end(self, epoch, logs=None):
self.gru_layer.gru.reset_states(self.gru_layer.w)
model.fit(train_dataset, validation_data=validation_dataset, epochs=EPOCHS, callbacks = [EarlyS, CustomCallback(gru_layer)], verbose=1)

Using captum with nn.Embedding getting RuntimeError

I am using captum library and getting following error. Here is the complete code to reproduce the error.
RuntimeError: One of the differentiated Tensors appears to not have been used in the graph. Set allow_unused=True if this is the desired behavior.
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from captum.attr import IntegratedGradients
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
vocab_size = 1024
embedding_dim = 32
seq_len = 128
num_classes = 5
hidden_dim = 256
class predictor(nn.Module):
def __init__(self):
super().__init__()
self.seq_len = seq_len
self.num_classes = num_classes
self.hidden_dim = hidden_dim
self.vocab_size, self.embedding_dim = vocab_size, embedding_dim
self.embedding = nn.Embedding(self.vocab_size, self.embedding_dim)
self.linear = nn.Linear(self.seq_len*self.embedding_dim, self.num_classes)
def forward(self, x):
x = self.embedding(x.long())
x = x.reshape(-1, self.seq_len*self.embedding_dim)
x = F.relu(self.linear(x))
return x
class wrapper_predictor(nn.Module):
def __init__(self, model):
super().__init__()
self.model = model
def forward(self, x):
x = self.model(x)
x = F.softmax(x, dim=1)
return x
indexes = torch.Tensor(np.random.randint(0, vocab_size, (seq_len))).to(device)
model = predictor().to(device)
wrapper_model = wrapper_predictor(model).to(device)
ig = IntegratedGradients(wrapper_model)
attributions, delta = ig.attribute(inputs=indexes, target=0, n_steps=1, return_convergence_delta=True)

I resolved the issue with LayerIntegratedGradients.
Here is the link to read more to know other possible solutions. https://captum.ai/tutorials/IMDB_TorchText_Interpret
This is using an instance of LayerIntegratedGradients using forward function of model and the embedding layer as the example given in the link.
Here is sample code which using LayerIntegratedGradients with nn.Embedding
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from captum.attr import IntegratedGradients, LayerIntegratedGradients
from torchsummary import summary
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
vocab_size = 1024
embedding_dim = 1
seq_len = 128
num_classes = 5
hidden_dim = 256
class predictor(nn.Module):
def __init__(self):
super(predictor, self).__init__()
self.seq_len = seq_len
self.num_classes = num_classes
self.hidden_dim = hidden_dim
self.vocab_size, self.embedding_dim = vocab_size, embedding_dim
self.embedding = nn.Sequential(
nn.Embedding(self.vocab_size, self.embedding_dim),
)
self.embedding.weight = torch.randn((self.vocab_size, self.embedding_dim), requires_grad=True)
self.fc = nn.Sequential(
nn.Linear(self.seq_len*self.embedding_dim, self.hidden_dim, device=device, bias=False),
nn.Linear(self.hidden_dim, self.num_classes, device=device, bias=False),
)
def forward(self, x):
x = self.embedding(x.long())
x = x.view(-1, self.seq_len*self.embedding_dim)
x = self.fc(x)
return x
class wrapper_predictor(nn.Module):
def __init__(self, model):
super().__init__()
self.model = model
def forward(self, x):
x = self.model(x)
x = F.softmax(x, dim=1) #keep softmax out of forward if attribution score is too low.
return x
model = predictor().to(device)
indexes = torch.Tensor(np.random.randint(0, vocab_size, (seq_len))).to(device)
input_size = indexes.shape
summary(model=model, input_size=input_size, batch_size=-1, device='cuda')
wrapper_model = wrapper_predictor(model).to(device)
lig = LayerIntegratedGradients(model, model.embedding)
attributions, delta = lig.attribute(inputs=indexes, target=0, n_steps=1, return_convergence_delta=True)

Get tensor element as int

I am writing a custom model with tf.keras and in a recurrent node I need to get the value of my 1D input as an int.
That recurrent node needs to build a 1xN tensor which elements are the result of N iterations of a function f(x).
So I created a numpy array of size N which is filled by one element at each iteration, then I convert the numpy array to a tensor.
The problem is I can't get the value of my 1D tensor as an int.
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers
def f(x):
return 3*x
class myLayer(layers.Layer):
def __init__(self, units=1, input_dim=1):
super(myLayer, self).__init__()
self.units = units
w_init = tf.random_normal_initializer()
self.w = tf.Variable(initial_value=w_init(shape=(input_dim, units),
dtype='float32'),
trainable=False)
def build(self, input_shape):
self.w = self.add_weight(
shape=(input_shape[-1], self.units),
initializer="random_normal",
trainable=False,
)
####### IMPORTANT PART HERE ######
def call(self, inputs):
# In this example N = 20
# Define numpy array
x = np.zeros(20)
# Set its first value to my 1D input # ERROR HERE
x[0] = inputs[0]
# Assign the other element of x
for i in range(1,20):
x[i] = f(x[i-1])
# Cast to tensor
return tf.constant(x, shape=(1,20))
class Linear(layers.Layer):
def __init__(self, units=1, input_dim=20):
super(Linear, self).__init__()
self.units = units
w_init = tf.random_normal_initializer()
self.w = tf.Variable(initial_value=w_init(shape=(input_dim, units),
dtype='float32'),
trainable=True)
b_init = tf.zeros_initializer()
self.b = tf.Variable(initial_value=b_init(shape=(units,),
dtype='float32'),
trainable=True)
def build(self, input_shape):
self.w = self.add_weight(
shape=(input_shape[-1], self.units),
initializer="random_normal",
trainable=True,
)
def call(self, inputs):
return tf.matmul(inputs, self.w) + self.b
def build_model():
model = tf.keras.Sequential([
myLayer(),
Linear()
])
optimizer = tf.keras.optimizers.RMSprop(0.001)
model.compile(loss='mse', optimizer=optimizer, metrics=['mae', 'mse'])
#model.build([1])
return model
class PrintDot(tf.keras.callbacks.Callback):
def on_epoch_end(self, epoch, logs):
if epoch % 100 == 0: print(epoch)
print('.', end='')
train_X = np.linspace(0,99, num=100)
train_y = 2*train_X
train_X = train_X / np.linalg.norm(train_X)
model = build_model()
#print(model.summary())
epochs = 10
history = model.fit(train_X, train_y, epochs=epochs, validation_split=0.2, verbose=0, callbacks=[PrintDot()])
hist = pd.DataFrame(history.history)
hist['epoch'] = history.epoch
print(hist.tail())
# x = tf.ones((1,1)) * 0.21
# horse_layer = Horseshoe()
# y = horse_layer(x)
# print(y)

Debugging neural network dropout problem for the probability not lying inside [0,1]

I tried to put a droprate to my neural network (NN) using torch and I got a strange error at the end. How can I fix it?
So the idea is that I wrote a NN inside a function to make it easier to call. The function is the following:
(I personally think the problem lies inside the class of the NN, but for the sake of having a working example I'm putting everything).
def train_neural_network(data_train_X, data_train_Y, batch_size, learning_rate, graph = True, dropout = 0.0 ):
input_size = len(data_test_X.columns)
hidden_size = 200
num_classes = 4
num_epochs = 120
batch_size = batch_size
learning_rate = learning_rate
# The class of NN
class NeuralNet(nn.Module):
def __init__(self, input_size, hidden_size, num_classes, p = dropout):
super(NeuralNet, self).__init__()
self.fc1 = nn.Linear(input_size, hidden_size)
self.fc2 = nn.Linear(hidden_size, hidden_size)
self.fc3 = nn.Linear(hidden_size, num_classes)
def forward(self, x, p = dropout):
out = F.relu(self.fc1(x))
out = F.relu(self.fc2(out))
out = nn.Dropout(out, p) #drop
out = self.fc3(out)
return out
# Prepare data
X_train = torch.from_numpy(data_train_X.values).float()
Y_train = torch.from_numpy(data_train_Y.values).float()
# Loading data
train = torch.utils.data.TensorDataset(X_train, Y_train)
train_loader = torch.utils.data.DataLoader(train, batch_size=batch_size)
net = NeuralNet(input_size, hidden_size, num_classes)
# Loss
criterion = nn.CrossEntropyLoss()
# Optimiser
optimiser = torch.optim.SGD(net.parameters(), lr=learning_rate)
# Proper training
total_step = len(train_loader)
loss_values = []
for epoch in range(num_epochs+1):
net.train()
train_loss = 0.0
for i, (predictors, results) in enumerate(train_loader, 0):
# Forward pass
outputs = net(predictors)
results = results.long()
results = results.squeeze_()
loss = criterion(outputs, results)
# Backward and optimise
optimiser.zero_grad()
loss.backward()
optimiser.step()
# Update loss
train_loss += loss.item()
loss_values.append(train_loss / batch_size )
print('Finished Training')
return net
And when I call the function:
net = train_neural_network(data_train_X = data_train_X, data_train_Y = data_train_Y, batch_size = batch_size, learning_rate = learning_rate, dropout = 0.1)
The error is the following:
net = train_neural_network(data_train_X = data_train_X, data_train_Y = data_train_Y, batch_size = batch_size, learning_rate = learning_rate, dropout = 0.1)
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/dropout.py in __init__(self, p, inplace)
8 def __init__(self, p=0.5, inplace=False):
9 super(_DropoutNd, self).__init__()
---> 10 if p < 0 or p > 1:
11 raise ValueError("dropout probability has to be between 0 and 1, "
12 "but got {}".format(p))
RuntimeError: bool value of Tensor with more than one value is ambiguous
Why do you think there is an error?
Before putting the droprate, everything was working. Additional points for you if you know how to
implement a bias inside my network! For example, on the hidden layer. I can't find any example online.

Change your architecture for this:
class NeuralNet(nn.Module):
def __init__(self, input_size, hidden_size, num_classes, p=dropout):
super(NeuralNet, self).__init__()
self.fc1 = nn.Linear(input_size, hidden_size)
self.fc2 = nn.Linear(hidden_size, hidden_size)
self.fc3 = nn.Linear(hidden_size, num_classes)
self.dropout = nn.Dropout(p=p)
def forward(self, x):
out = F.relu(self.fc1(x))
out = F.relu(self.fc2(out))
out = self.dropout(self.fc3(out))
return out
Let me know if it works.

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

LSTM + Linear layer Encoder/Decoder always predict blank value - python

Related

Why does GraphRNN pack_padded after linear transform?

making GRU/LSTM states trainable in Tensorflow/Keras and add some random noise

Using captum with nn.Embedding getting RuntimeError

Get tensor element as int

Debugging neural network dropout problem for the probability not lying inside [0,1]

Categories

Resources