Moving tensorflow 1D convolution code to pytorch - python

I am completely new to PyTorch, I would like to move my TF code to PyTorch, and I think I am missing something.
I have X as input and Y as output. X is a time series data, on which I would like to do 1D convolution. Y is just a plain number.
X has a shape of (1050589, 81, 21). I have 1050589 experiments, each experiment has 81 timestamps and each timestamp has 21 points of data. This is the required format for TF, but as far as I was able to get out in PyTorch the time dimension should be the last one.
I have my data in a numpy array, so first I transformed the data to fit PyTorch, and also transformed into a list.
a = []
for n, i in enumerate(X):
a.append([X[n].T, Y[n]])
train_data = DataLoader(a, batch_size=128)
My model looks like this:
class NeuralNetwork(nn.Module):
def __init__(self):
super(NeuralNetwork, self).__init__()
self.linear_relu_stack = nn.Sequential(
nn.Conv1d(EMBED_SIZE, 32, 7, padding='same'),
nn.ReLU(),
nn.Flatten(),
nn.Linear(81*32, 32),
nn.ReLU(),
nn.Linear(32, 1),
)
def forward(self, x):
logits = self.linear_relu_stack(x)
return logits.double()
The architecture is simple, as I want to keep it the same as I have in Tensorflow. One convolution with a kernel of 7 and 32 channels, followed by a dense layer and a single output layer.
Same network in Tensorflow:
def conv_1d_model():
model = Sequential(name="model_conv1D")
model.add(Conv1D(filters=32, kernel_size=7, activation='relu', input_shape=(81, 21), padding="same"))
model.add(Flatten())
model.add(Dense(32, activation='relu'))
model.add(Dense(1))
return model
Now when I try to optimize this network in PyTorch my losses are all over the place, not decreasing at all, while in TensorFlow it runs perfectly well.
I am sure I am missing something, can anyone point me in the right direction?
My optimization function in PyTorch:
model = NeuralNetwork()
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
def train_loop(dataloader, model, loss_fn, optimizer):
size = len(dataloader.dataset)
for batch, (X, y) in enumerate(dataloader):
# Compute prediction and loss
pred = torch.squeeze(model(X)) # I was getting a warning about the pred being in different shape than y, so I squeezed it
loss = loss_fn(pred, y)
# Backpropagation
optimizer.zero_grad()
loss.backward()
optimizer.step()
if batch % 10 == 0:
loss, current = loss.item(), batch * len(X)
print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")
Optimization in Tensorflow
model = conv_1d_model()
opt = Adam(learning_rate=learning_rate)
model.compile(loss='mse', optimizer=opt, metrics=['mae'])
model_history = model.fit(X, Y, validation_split=0.2, epochs=epochs, batch_size=batch_size, verbose=1)

Related

How do I fix this size of tensor error for my NN classifier PyTorch

I'm having trouble understanding why this is throwing an error. This code is pulled directly from the PyTorch documentation for a NN classifier for the fashion MNIST dataset. However when I try to flip this to the MNIST handwritten digits data set it comes up with the following error:
RuntimeError: The size of tensor a (10) must match the size of tensor b (64) at non-singleton dimension 1
This occurs when using the loss function during the training loop function. Can anyone help me understand why this is happening. Thanks!
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor, Lambda
import torchvision.models as models
import matplotlib.pyplot as plt
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#device = "cpu"
print(f"Using {device} device")
training_data = datasets.MNIST(
root="data",
train=True,
download=True,
transform=ToTensor()
)
test_data = datasets.MNIST(
root="data",
train=False,
download=True,
transform=ToTensor()
)
train_dataloader = DataLoader(training_data, batch_size=64)
test_dataloader = DataLoader(test_data, batch_size=64)
class NeuralNetwork(nn.Module):
def __init__(self):
super(NeuralNetwork, self).__init__()
self.flatten = nn.Flatten()
self.linear_relu_stack = nn.Sequential(
nn.Linear(28*28, 512),
nn.ReLU(),
nn.Linear(512, 512),
nn.ReLU(),
nn.Linear(512, 10),
)
def forward(self, x):
x = self.flatten(x)
logits = self.linear_relu_stack(x)
return logits
def train_loop(dataloader, model, loss_fn, optimizer):
size = len(dataloader.dataset)
for batch, (X, y) in enumerate(dataloader):
# Compute prediction and loss
X, y = X.to(device), y.to(device)
pred = model(X)
loss = loss_fn(pred, y)
# Backpropagation
optimizer.zero_grad()
loss.backward()
optimizer.step()
if batch % 100 == 0:
loss, current = loss.item(), batch * len(X)
print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")
def test_loop(dataloader, model, loss_fn):
size = len(dataloader.dataset)
num_batches = len(dataloader)
test_loss, correct = 0, 0
with torch.no_grad():
for X, y in dataloader:
X, y = X.to(device), y.to(device)
pred = model(X)
test_loss += loss_fn(pred, y).item()
correct += (pred.argmax(1) == y).type(torch.float).sum().item()
test_loss /= num_batches
correct /= size
print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
def save_checkpoint(state, filename = "checkpoint.pth.tar"):
print("=> Saving checkpoint")
torch.save(state, filename)
model = NeuralNetwork().to(device)
learning_rate = 1e-3
batch_size = 64
epochs = 10
# Initialize the loss function
loss_fn = nn.MSELoss()
optimiser = torch.optim.Adam(model.parameters(), lr=learning_rate)
for t in range(epochs):
print(f"Epoch {t+1}\n-------------------------------")
train_loop(train_dataloader, model, loss_fn, optimiser)
test_loop(test_dataloader, model, loss_fn)
print("Done!")
torch.nn.MSELoss is an implemention of mean squared error. You can't measure the difference between two tensors if they're different sizes (MSELoss does not allow for broadcasting). So if you're using MSELoss, then the predictions and the targets must be the same shape. In your case, preds is a tensor of shape [64, 10], and y is a tensor of shape [64].
The reason y is of shape [64] rather than [64, 10] is that most classification dataset implementations represent targets as integer labels rather than one-hot encoded vectors. In theory, you could convert these integer label targets to one-hot encoded targets.
But in reality, since this is a classification problem, you should probably be using something like nn.CrossEntropyLoss rather than nn.MSELoss. The former is a conventional classification loss function, and it allows the targets to be integer labels rather than one-hot labels (so just swapping out MSELoss for CrossEntropyLoss should solve your problem). MSELoss is better suited for regression tasks and such.

How to reset the state of an LSTM RNN after each epoch within Keras?

I have defined a stateful LSTM RNN, and I want to reset the state of the RNN after each epoch. I have found that one way to do this would be:
n_epochs = 50
for i in range(n_epochs):
lstm.fit(X, y, epochs = 1, batch_size = 64)
lstm.reset_states()
Is there any other more elegant way to implement this in the model specification or when training that is supported by Keras?
You should be able to solve this with a Keras callback, which probably a bit more elegant:
import tensorflow as tf
class CustomCallback(tf.keras.callbacks.Callback):
def on_epoch_end(self, epoch, logs=None):
lstm_layer.reset_states()
inputs = tf.keras.layers.Input(batch_shape = (10, 5, 2))
x = tf.keras.layers.LSTM(10, stateful=True)(inputs)
outputs = tf.keras.layers.Dense(1, activation='linear')(x)
model = tf.keras.Model(inputs, outputs)
lstm_layer = model.layers[1]
model.compile(optimizer='adam', loss='mse')
x = tf.random.normal((200, 5, 2))
y = tf.random.normal((200, 1))
model.fit(x, y, epochs=5, callbacks=[CustomCallback()], batch_size=10)
For experiments only, everyone knows when working for multiple steps and you set all input values back to 0 for all DATA ( long potential enough or the same number as input ) in the batch that reset all memories of LSTM.
That is the behavior of LSTM since they are sensitive to input because it contains comparison units and summation units.

Custom loss function with Gradient Tape, TF2.6

I am trying to use a custom loss function in my Keras sequential model (TensorFlow 2.6.0). This custom loss (ideally) will calculate the data loss plus the residual of a physical equation (say, diffusion equation, Navier Stokes, etc.). This residual error is based on the model output derivative wrt its inputs and I want to use GradientTape.
In this MWE, I removed the data loss term and other equation losses, and just used the derivative of the output wrt its input. The dataset can be found here.
from numpy import loadtxt
from keras.models import Sequential
from keras.layers import Dense
import tensorflow as tf #tf.__version__ = '2.6.0'
# load the dataset
dataset = loadtxt('pima-indians-diabetes.csv', delimiter=',')
# split into input (X) and output (y) variables
X = dataset[:,0:8] #X.shape = (768, 8)
y = dataset[:,8]
X = tf.convert_to_tensor(X, dtype=tf.float32)
y = tf.convert_to_tensor(y, dtype=tf.float32)
def customLoss(y_true,y_pred):
x_tensor = tf.convert_to_tensor(model.input, dtype=tf.float32)
# x_tensor = tf.cast(x_tensor, tf.float32)
with tf.GradientTape() as t:
t.watch(x_tensor)
output = model(x_tensor)
DyDX = t.gradient(output, x_tensor)
dy_t = DyDX[:, 5:6]
R_pred=dy_t
# loss_data = tf.reduce_mean(tf.square(yTrue - yPred), axis=-1)
loss_PDE = tf.reduce_mean(tf.square(R_pred))
return loss_PDE
model = Sequential()
model.add(Dense(12, input_dim=8, activation='relu'))
model.add(Dense(12, activation='relu'))
model.add(Dense(12, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss=customLoss, optimizer='adam', metrics=['accuracy'])
model.fit(X, y, epochs=15)
After execution, I get this ValueError:
ValueError: Passed in object of type <class 'keras.engine.keras_tensor.KerasTensor'>, not tf.Tensor
When I change loss=customLoss to loss='mse', the model starts training, but using that customLoss is the whole point. Any ideas?
The problem seems to come from model.input in the loss function, If I understand your code correctly, you can use the loss :
def custom_loss_pass(model, x_tensor):
def custom_loss(y_true,y_pred):
with tf.GradientTape() as t:
t.watch(x_tensor)
output = model(x_tensor)
DyDX = t.gradient(output, x_tensor)
dy_t = DyDX[:, 5:6]
R_pred=dy_t
# loss_data = tf.reduce_mean(tf.square(yTrue - yPred), axis=-1)
loss_PDE = tf.reduce_mean(tf.square(R_pred))
return loss_PDE
return custom_loss
And then:
model.compile(loss=custom_loss_pass(model, X), optimizer='adam', metrics=['accuracy'])
I am not sure it does what you want but at least it works!

The application of self-attention layer raised index error

So I am doing a classification machine learning with the input of (batch, step, features).
In order to improve the accuracy of this model, I intended to apply a self-attention layer to it.
I am unfamiliar with how to use it for my case since most examples online are concerned with embedding NLP models.
def opt_select(optimizer):
if optimizer == 'Adam':
adamopt = tf.keras.optimizers.Adam(lr=learning_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-8)
return adamopt
elif optimizer == 'RMS':
RMSopt = tf.keras.optimizers.RMSprop(lr=learning_rate, rho=0.9, epsilon=1e-6)
return RMSopt
else:
print('undefined optimizer')
def LSTM_attention_model(X_train, y_train, X_test, y_test, num_classes, loss,batch_size=68, units=128, learning_rate=0.005,epochs=20, dropout=0.2, recurrent_dropout=0.2,optimizer='Adam'):
class myCallback(tf.keras.callbacks.Callback):
def on_epoch_end(self, epoch, logs={}):
if (logs.get('acc') > 0.90):
print("\nReached 90% accuracy so cancelling training!")
self.model.stop_training = True
callbacks = myCallback()
model = tf.keras.models.Sequential()
model.add(Masking(mask_value=0.0, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Bidirectional(LSTM(units, dropout=dropout, recurrent_dropout=recurrent_dropout)))
model.add(SeqSelfAttention(attention_activation='sigmoid'))
model.add(Dense(num_classes, activation='softmax'))
opt = opt_select(optimizer)
model.compile(loss=loss,
optimizer=opt,
metrics=['accuracy'])
history = model.fit(X_train, y_train,
batch_size=batch_size,
epochs=epochs,
validation_data=(X_test, y_test),
verbose=1,
callbacks=[callbacks])
score, acc = model.evaluate(X_test, y_test,
batch_size=batch_size)
yhat = model.predict(X_test)
return history, that
This led to IndexError: list index out of range
What is the correct way to apply this layer to my model?
As requested, one may use the following codes to simulate a set of the dataset.
import tensorflow as tf
from tensorflow.keras.layers import Dense, Dropout,Bidirectional,Masking,LSTM
from keras_self_attention import SeqSelfAttention
X_train = np.random.rand(700, 50,34)
y_train = np.random.choice([0, 1], 700)
X_test = np.random.rand(100, 50, 34)
y_test = np.random.choice([0, 1], 100)
batch_size= 217
epochs = 600
dropout = 0.6
Rdropout = 0.7
learning_rate = 0.00001
optimizer = 'RMS'
loss = 'categorical_crossentropy'
num_classes = y_train.shape[1]
LSTM_attention_his,yhat = LSTM_attention_model(X_train,y_train,X_test,y_test,loss =loss,num_classes=num_classes,batch_size=batch_size,units=32,learning_rate=learning_rate,epochs=epochs,dropout = 0.5,recurrent_dropout=Rdropout,optimizer=optimizer)
Here is how I would rewrite the code -
import tensorflow as tf
from tensorflow.keras.layers import Dense, Dropout, Bidirectional, Masking, LSTM, Reshape
from keras_self_attention import SeqSelfAttention
import numpy as np
def opt_select(optimizer):
if optimizer == 'Adam':
adamopt = tf.keras.optimizers.Adam(lr=learning_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-8)
return adamopt
elif optimizer == 'RMS':
RMSopt = tf.keras.optimizers.RMSprop(lr=learning_rate, rho=0.9, epsilon=1e-6)
return RMSopt
else:
print('undefined optimizer')
def LSTM_attention_model(X_train, y_train, X_test, y_test, num_classes, loss, batch_size=68, units=128,
learning_rate=0.005, epochs=20, dropout=0.2, recurrent_dropout=0.2, optimizer='Adam'):
class myCallback(tf.keras.callbacks.Callback):
def on_epoch_end(self, epoch, logs={}):
if (logs.get('accuracy') > 0.90):
print("\nReached 90% accuracy so cancelling training!")
self.model.stop_training = True
callbacks = myCallback()
model = tf.keras.models.Sequential()
model.add(Masking(mask_value=0.0, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Bidirectional(LSTM(units, dropout=dropout, recurrent_dropout=recurrent_dropout, return_sequences=True)))
model.add(SeqSelfAttention(attention_activation='sigmoid'))
model.add(Reshape((-1, model.output.shape[1]*model.output.shape[2])))
model.add(Dense(num_classes, activation='softmax'))
opt = opt_select(optimizer)
model.compile(loss=loss,
optimizer=opt,
metrics=['accuracy'])
history = model.fit(X_train, y_train,
batch_size=batch_size,
epochs=epochs,
validation_data=(X_test, y_test),
verbose=1,
callbacks=[callbacks])
score, acc = model.evaluate(X_test, y_test,
batch_size=batch_size)
yhat = model.predict(X_test)
return history, that
X_train = np.random.rand(700, 50,34)
y_train = np.random.choice([0, 1], (700, 1))
X_test = np.random.rand(100, 50, 34)
y_test = np.random.choice([0, 1], (100, 1))
batch_size= 217
epochs = 600
dropout = 0.6
Rdropout = 0.7
learning_rate = 0.00001
optimizer = 'RMS'
loss = 'categorical_crossentropy'
num_classes = y_train.shape[1]
LSTM_attention_his,yhat = LSTM_attention_model(
X_train,y_train,X_test,y_test,
loss =loss,num_classes=num_classes,batch_size=batch_size,units=32,
learning_rate=learning_rate,epochs=epochs,dropout = 0.5,recurrent_dropout=Rdropout,optimizer=optimizer
)
These are the changes I had to make to get this to start training -
The original issue was caused by the LSTM layer outputting the wrong dimensions. The SeqSelfAttention layer needs a 3D input (one dimension corresponding to the sequence of the data) which was missing from the output of the LSTM layer. As mentioned by #today, in the comments, this can be solved by adding return_sequences=True to the LSTM layer.
But even with that modification,the code still gives an error at when trying to compute the cost function.The issue is that, the output of the self-attention layer is (None, 50, 64) when this is directly passed into the Dense layer, the final output of the network becomes (None, 50, 1). This doesn't make sense for what we are trying to do, because the final output should just contain a single label for each datapoint (it should have the shape (None, 1)). The issue is the output from the self-attention layer which is 3 dimensional (each data point has a (50, 64) feature vector). This needs to be reshaped into a single dimensional feature vector for the computation to make sense. So I added a reshape layer model.add(Reshape((-1, ))) between the attention layer and the Dense layer.
In addition, the myCallback class is testing if logs.get('acc') is > 0.9 but I think it should be (logs.get('accuracy').
To comment on OP's question in the comment on what kind of column should be added, in this case, it was just a matter of extracting the full sequential data from the LSTM layer. Without the return_sequence flag, the output from the LSTM layer is (None, 64) This is simply the final features of the LSTM without the intermediate sequential data.

recurrent neural network ValueError: Found array with dim 3. Estimator expected <= 2

I am running an LSTM, GRU and bilstm model using the following code
# Create BiLSTM model
def create_model_bilstm(units):
model = Sequential()
model.add(Bidirectional(LSTM(units = units,
return_sequences=True),
input_shape=(X_train.shape[1], X_train.shape[2])))
#model.add(Bidirectional(LSTM(units = units)))
model.add(Dense(1))
#Compile model
model.compile(loss='mse', optimizer='adam')
return model
# Create LSTM or GRU model
def create_model(units, m):
model = Sequential()
model.add(m (units = units, return_sequences = True,
input_shape = [X_train.shape[1], X_train.shape[2]]))
model.add(Dropout(0.1))
#model.add(m (units = units))
#model.add(Dropout(0.2))
model.add(Dense(units = 1))
#Compile model
model.compile(loss='mse', optimizer='adam')
return model
# BiLSTM
model_bilstm = create_model_bilstm(20)
# GRU and LSTM
model_gru = create_model(50, GRU)
model_lstm = create_model(20, LSTM)
# Fit BiLSTM, LSTM and GRU
def fit_model(model):
early_stop = EarlyStopping(monitor = 'val_loss',
patience = 100)
history = model.fit(X_train, y_train, epochs = 700,
validation_split = 0.2, batch_size = 32,
shuffle = False, callbacks = [early_stop])
return history
history_bilstm = fit_model(model_bilstm)
history_lstm = fit_model(model_lstm)
history_gru = fit_model(model_gru)
This all runs smoothly and prints out my loss graphs. but when it comes to predictions i run the following code
# Make prediction
def prediction(model):
prediction = model.predict(X_test)
prediction = scaler_y.inverse_transform(prediction)
return prediction
prediction_bilstm = prediction(model_bilstm)
prediction_lstm = prediction(model_lstm)
prediction_gru = prediction(model_gru)
and i get the following error
ValueError Traceback (most recent call last)
<ipython-input-387-9d45f01ae2a2> in <module>
5 return prediction
6
----> 7 prediction_bilstm = prediction(model_bilstm)
8 prediction_lstm = prediction(model_lstm)
9 prediction_gru = prediction(model_gru)
<ipython-input-387-9d45f01ae2a2> in prediction(model)
2 def prediction(model):
3 prediction = model.predict(X_test)
----> 4 prediction = scaler_y.inverse_transform(prediction)
5 return prediction
...
ValueError: Found array with dim 3. Estimator expected <= 2.
I am assuming this has something to do with my X_test shape based on other posts i have read so i tried to reshape it to 2d but got another error telling me "expected bidirectional_3_input to have 3 dimensions, but got array with shape (62, 36)" on line 7 again.
What am i doing wrong and how can i fix it?
Data Explanation:
So I am trying to predict discharge rates (target variable) using groundwater levels (34 features), precipitation and temperature as input which gives me a total of 36 features. My data is in monthly resolution. I am using 63 observation for my test (5 year pred) and the rest for my train.
What are you doing wrong? Let's assume your input data has shape X_train.shape = [d0,d1,d2], then after setting up your BiLSTM-model like
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Bidirectional,LSTM,Dense
model = tf.keras.Sequential()
model.add(
tf.keras.layers.Bidirectional(
tf.keras.layers.LSTM(
units = 10,
return_sequences=True),
input_shape=(d1, d2)
)
)
model.add(Dense(1))
model.compile(loss='mse', optimizer='adam')
we can check the input- and output-shapes your model expects by
>>model.input.shape
TensorShape([None, d1, d2])
>>model.output.shape
TensorShape([None, d1, 1])
So your model expects input of shape (n_batch,d1,d2), where n_batch is the batch size of the data, and returns a shape (n_batch,d1,1), thus a 3d-tensor.
Now if you provide a 3d-tensor to your model, the model.prediction-method will succesfully return a 3d-tensor, however sklearn.preprocessing.StandardScaler.inverse_transform only works for 2d-data, thats why it says
ValueError: Found array with dim 3. Estimator expected <= 2.
On the other hand, if you first reshape your data to be 2d, then model.prediction complains, because it is set up to expect a 3d-tensor.
How can you fix it? For further help on how to fix your code, you will need to provide us with more detailled information on what you expect your model to do, especially what output-shape you want your BiLSTM-model to have. I assume you actually want your BiLSTM-model to return a scalar for each sample, so an additional Flatten-layer might do the trick:
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Bidirectional,LSTM,Dense,Flatten
model = tf.keras.Sequential()
model.add(
tf.keras.layers.Bidirectional(
tf.keras.layers.LSTM(
units = 10,
return_sequences=True),
input_shape=(d1, d2)
)
)
model.add(Flatten()) #<-- additional flatten-layer
model.add(Dense(1))
model.compile(loss='mse', optimizer='adam')

Categories