RuntimeError: 1D target tensor expected, multi-target not supported Pytorch - python

I recently shifted to pytorch from keras and I am still trying to understand how all this work. Below is the code I have implemented to classify mnist dataset using a simple MLP. Just like I used to do in keras I have flattend each of 28x28 image into a vector of 784 , and I have also created a one-hot representation for my labels.
In the model I was hoping that given a vector of 784 the model would output a one-hot vector with probabilities,but as soon as my code reaches to compute the loss I get the following error :
RuntimeError: 1D target tensor expected, multi-target not supported
Below is my code :
import numpy as np
import matplotlib.pyplot as plt
import torch
import time
from torch import nn, optim
from keras.datasets import mnist
from torch.utils.data import Dataset, DataLoader
RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)
# ----------------------------------------------------
class MnistDataset(Dataset):
def __init__(self, data_size=0):
(x, y), (_, _) = mnist.load_data()
x = [i.flatten() for i in x]
x = np.array(x, dtype=np.float32)
if data_size < 0 or data_size > len(y):
assert ("Data size should be between 0 to number of files in the dataset")
if data_size == 0:
data_size = len(y)
self.data_size = data_size
# picking 'data_size' random samples
self.x = x[:data_size]
self.y = y[:data_size]
# scaling between 0-1
self.x = (self.x / 255)
# Creating one-hot representation of target
y_encoded = []
for label in y:
encoded = np.zeros(10)
encoded[label] = 1
y_encoded.append(encoded)
self.y = np.array(y_encoded)
def __len__(self):
return self.data_size
def __getitem__(self, index):
x_sample = self.x[index]
label = self.y[index]
return x_sample, label
# ----------------------------------------------------
num_train_samples = 10000
num_test_samples = 2000
# Each generator returns a single
# sample & its label on each iteration.
mnist_train = MnistDataset(data_size=num_train_samples)
mnist_test = MnistDataset(data_size=num_test_samples)
# Each generator returns a batch of samples on each iteration.
train_loader = DataLoader(mnist_train, batch_size=128, shuffle=True) # 79 batches
test_loader = DataLoader(mnist_test, batch_size=128, shuffle=True) # 16 batches
# ----------------------------------------------------
# Defining the Model Architecture
class MLP(nn.Module):
def __init__(self):
super().__init__()
self.fc1 = nn.Linear(28 * 28, 100)
self.act1 = nn.ReLU()
self.fc2 = nn.Linear(100, 50)
self.act2 = nn.ReLU()
self.fc3 = nn.Linear(50, 10)
self.act3 = nn.Sigmoid()
def forward(self, x):
x = self.act1(self.fc1(x))
x = self.act2(self.fc2(x))
output = self.act3(self.fc3(x))
return output
# ----------------------------------------------------
model = MLP()
# Defining optimizer and loss function
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9)
# ----------------------------------------------------
# Training the model
epochs = 10
print("Training Started...")
for epoch in range(epochs):
for batch_index, (inputs, targets) in enumerate(train_loader):
optimizer.zero_grad() # Zero the gradients
outputs = model(inputs) # Forward pass
loss = criterion(outputs, targets) # Compute the Loss
loss.backward() # Compute the Gradients
optimizer.step() # Update the parameters
# Evaluating the model
total = 0
correct = 0
with torch.no_grad():
for batch_idx, (inputs, targets) in enumerate(test_loader):
outputs = model(inputs)
_, predicted = torch.max(outputs.data, 1)
total += targets.size(0)
correct += predicted.eq(targets.data).cpu().sum()
print('Epoch : {} Test Acc : {}'.format(epoch, (100. * correct / total)))
print("Training Completed Sucessfully")
# ----------------------------------------------------
I also read some other posts related to the same problem & most of them said that the CrossEntropy loss the target has to be a single number ,which totally gets over my head.Can someone please explain a solution.Thank you.

For nn.CrossEntropyLoss, you don't need one-hot representation of the label, you just need to pass the prediction's logit, which shape is (batch_size, n_class), and a target vector (batch_size,)
So just pass in the label index vector y instead of one-hot vector.
Fixed of your code:
class MnistDataset(Dataset):
def __init__(self, data_size=0):
(x, y), (_, _) = mnist.load_data()
x = [i.flatten() for i in x]
x = np.array(x, dtype=np.float32)
if data_size < 0 or data_size > len(y):
assert ("Data size should be between 0 to number of files in the dataset")
if data_size == 0:
data_size = len(y)
self.data_size = data_size
# picking 'data_size' random samples
self.x = x[:data_size]
self.y = y[:data_size]
# scaling between 0-1
self.x = (self.x / 255)
self.y = y # <--
def __len__(self):
return self.data_size
def __getitem__(self, index):
x_sample = self.x[index]
label = self.y[index]
return x_sample, label
Take a look at Pytorch example for more detail:
https://pytorch.org/docs/stable/generated/torch.nn.CrossEntropyLoss.html

Related

Multihead attention model - IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1

I'm trying to train a multiclass classification model (with 3 classes) using a multihead attention layer and two linear layers with some tabular data, and I'm getting this error:
IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1)
I have copied my model/dataset classes and my training loop below; it seems like the error is associated with the data I am passing into my loss function (criterion), which looks like this:
y_pred: tensor([-115.7523, -113.5820, 37.0307], dtype=torch.float64, grad_fn=<SqueezeBackward0>)
and
y: tensor(0).
I am unable to resolve this error, so any help with this would be greatly appreciated.
Here is the dataset and model classes:
class GeneExpressionDataset(torch.utils.data.Dataset):
def __init__(self, data):
self.data = data
self.features = self.data.iloc[:, 2:].values
self.labels = self.data.iloc[:, 1].values
def __len__(self):
return len(self.data)
def __getitem__(self, idx):
features = torch.tensor(self.features[idx], dtype=torch.double)
labels = torch.tensor(self.labels[idx], dtype=torch.long)
return features, labels
class MultiheadAttention(nn.Module):
def __init__(self, input_dim, num_heads, dropout_rate):
super(MultiheadAttention, self).__init__()
self.input_dim = input_dim
self.num_heads = num_heads
self.dropout_rate = dropout_rate
self.q_linear = nn.Linear(input_dim, input_dim)
self.k_linear = nn.Linear(input_dim, input_dim)
self.v_linear = nn.Linear(input_dim, input_dim)
self.dropout = nn.Dropout(dropout_rate)
self.out_linear = nn.Linear(input_dim, input_dim)
def forward(self, query, key, value, mask=None):
batch_size = query.size(0)
# Apply linear transformations to obtain query, key, and value representations
q = self.q_linear(query).view(batch_size, -1, self.num_heads)
k = self.k_linear(key).view(batch_size, -1, self.num_heads)
v = self.v_linear(value).view(batch_size, -1, self.num_heads)
# Compute scaled dot-product attention scores
scores = torch.matmul(q, k.transpose(1, 2)) / (self.input_dim ** 0.5)
if mask is not None:
mask = mask.unsqueeze(1)
scores = scores.masked_fill(mask == 0, -1e9)
# Apply softmax to obtain attention weights
attn_weights = torch.softmax(scores, dim=-1)
# Apply dropout to the attention weights
attn_weights = self.dropout(attn_weights)
# Compute the attention output
attn_output = torch.matmul(attn_weights, v)
# Concatenate the attention output from different heads
attn_output = attn_output.transpose(1, 2).contiguous().view(batch_size, -1, self.num_heads * (self.input_dim // self.num_heads))
# Apply linear transformation to obtain the final attention output
out = self.out_linear(attn_output)
return out
class geneGPT(nn.Module):
def __init__(self, input_dim, hid_dim, output_dim, num_heads, dropout_rate):
super().__init__()
self.attention = MultiheadAttention(input_dim, num_heads, dropout_rate)
self.fc1 = nn.Linear(num_heads * (input_dim//num_heads), hid_dim)
self.relu = nn.ReLU()
self.out = nn.Linear(hid_dim, output_dim)
def forward(self, x, mask=None):
x = self.attention(x, x, x, mask)
x = self.relu(self.fc1(x))
x = self.out(x)
return x
and here is the training loop:
print('Training...')
model = geneGPT(INPUT_DIM, HID_DIM, OUTPUT_DIM, NUM_HEADS, DROPOUT_RATE).double().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
for epoch in range(NUM_EPOCHS):
train_losses = 0.0
valid_losses = 0.0
train_accs = 0.0
valid_accs = 0.0
for i, (x, y) in enumerate(train_dl):
x, y = x.to(device), y.to(device)
optimizer.zero_grad()
y_pred = model(x).squeeze()
y = y.squeeze()
print(y_pred, y)
train_loss = criterion(y_pred, y)
train_acc = multi_acc(y_pred, y)
train_loss.backward()
optimizer.step()
train_losses += train_loss.item()
train_accs += train_acc.item()
for i, (x, y) in enumerate(val_dl):
x, y = x.to(device), y.to(device)
y_pred = model(x).squeeze()
y = y.squeeze()
valid_loss = criterion(y_pred, y)
valid_acc = multi_acc(y_pred, y)
valid_losses += valid_loss.item()
valid_accs += valid_acc.item()
print("Epoch {}/{} | Loss: {:.4f} | Train Loss:{:.4f} | Valid Loss".format(epoch + 1, NUM_EPOCHS, train_loss / len(train_dl), valid_loss / len(val_dl)))
print("Training Accuracy: {:.4f} | Validation Accuracy: {:.4f}".format(train_accs / len(train_dl), valid_accs / len(val_dl)))
test_accs = 0.0
for i, (x, y) in enumerate(test_dl):
x, y = x.to(device), y.to(device)
y_pred = model(x).squeeze()
y = y.squeeze()
test_acc = multi_acc(y_pred, y)
test_accs += test_acc.item()
print("Testing Accuracy: {:.4f}".format(test_accs / len(test_dl)))
torch.save(model.state_dict(), "model.pth")
In your training loop
y_pred = model(x).squeeze()
y = y.squeeze()
You changed the dimension of both, and in later step
train_loss = criterion(y_pred.unsqueeze(0), y)
You again changed the y_pred dimension, keeping the y dim same. So I assume the relative difference in the dim of both y and y_pred is resulting in the error " Expected input batch_size (1) to match target batch_size (0)".

Calculate Accuracy of Pytorch Model

I am new to Pytorch and I have compiled the below code from different articles and code snippets. The code is basically taking in sequence of products and then predicting the next product in a sequence.
I am trying to find accuracy of this model but not sure how to do it. Any help or suggestion would be appreciated.
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
torch.manual_seed(50)
prod_list = ['AA105045091',
'C2106264154',
'B2106691381',
'AA105045091',
'B2106691381',
'X3106692282',
'V2106350393',
'C2106264154',
'V6104504285',
'A2106329636',
'M6M100936257',
'N2101433968',
'X2M200042701',
'V3M200052002',
'K5101434063',
'B1106334744',
'P1103790575',
'K1106031596',
'E3D227124S6',
'D1105834415',
'M4102794084',
'B4101250283',
'C2102794082',
'D1106816721',
'B5106788450',
'A3106805351',
'C2106788452',
'C2106805373',
'B2106788454',
'A1104146375']
prod_list
sequences = []
for i in range(3, len(prod_list)):
words = prod_list[i-3:i+1]
sequences.append(words)
# split the sequence to input list and output list
X = []
y= []
for i in sequences:
X.append(i[0:3])
y.append(i[3])
# create integer-to-token mapping
int2token = {}
cnt = 0
for w in set(" ".join(prod_list).split()):
int2token[cnt] = w
cnt+= 1
# create token-to-integer mapping
token2int = {t: i for i, t in int2token.items()}
def get_integer_seq_train(seq):
new_list = []
for i in seq:
new_list.append(token2int[i])
return new_list
# convert text sequences to integer sequences
x_int = [get_integer_seq_train(i) for i in X]
# convert lists to numpy arrays
x_int = np.array(x_int)
vocab_size = len(int2token)
vocab_size
def get_integer_seq_test(seq):
return [token2int[w] for w in seq.split()]
#return [token2int[w] for w in seq.split()]
# convert text sequences to integer sequences
y_int = [get_integer_seq_test(i) for i in y]
# convert lists to numpy arrays
y_int = np.array(y_int)
def get_batches(arr_x, arr_y, batch_size):
# iterate through the arrays
prv = 0
for n in range(batch_size, arr_x.shape[0], batch_size):
x = arr_x[prv:n,:]
y = arr_y[prv:n,:]
prv = n
yield x, y
class WordLSTM(nn.Module):
def __init__(self, n_hidden=256, n_layers=4, drop_prob=0.3, lr=0.001):
super().__init__()
self.drop_prob = drop_prob
self.n_layers = n_layers
self.n_hidden = n_hidden
self.lr = lr
self.emb_layer = nn.Embedding(vocab_size, 200)
## define the LSTM
self.lstm = nn.LSTM(200, n_hidden, n_layers,
dropout=drop_prob, batch_first=True)
## define a dropout layer
self.dropout = nn.Dropout(drop_prob)
## define the fully-connected layer
self.fc = nn.Linear(3*n_hidden, vocab_size)
torch.manual_seed(50)
def forward(self, x, hidden):
''' Forward pass through the network.
These inputs are x, and the hidden/cell state `hidden`. '''
## pass input through embedding layer
embedded = self.emb_layer(x)
## Get the outputs and the new hidden state from the lstm
lstm_output, hidden = self.lstm(embedded, hidden)
## pass through a dropout layer
out = self.dropout(lstm_output)
#out = out.contiguous().view(-1, self.n_hidden)
out = out.reshape(x.shape[0], -1)
## put "out" through the fully-connected layer
out = self.fc(out)
# return the final output and the hidden state
return out, hidden
def init_hidden(self, batch_size):
''' initializes hidden state '''
# Create two new tensors with sizes n_layers x batch_size x n_hidden,
# initialized to zero, for hidden state and cell state of LSTM
weight = next(self.parameters()).data
# if GPU is available
if (torch.cuda.is_available()):
hidden = (weight.new(self.n_layers, batch_size, self.n_hidden).zero_().cuda(),
weight.new(self.n_layers, batch_size, self.n_hidden).zero_().cuda())
# if GPU is not available
else:
hidden = (weight.new(self.n_layers, batch_size, self.n_hidden).zero_(),
weight.new(self.n_layers, batch_size, self.n_hidden).zero_())
return hidden
net = WordLSTM()
def train(net, epochs=10, batch_size=32, lr=0.001, clip=1, print_every=32):
torch.manual_seed(50)
# optimizer
opt = torch.optim.Adam(net.parameters(), lr=lr)
# loss
criterion = nn.CrossEntropyLoss()
counter = 0
net.train()
for e in range(epochs):
# initialize hidden state
h = net.init_hidden(batch_size)
for x, y in get_batches(x_int, y_int, batch_size):
counter+= 1
# convert numpy arrays to PyTorch arrays
inputs, targets = torch.from_numpy(x), torch.from_numpy(y)
# detach hidden states
h = tuple([each.data for each in h])
# zero accumulated gradients
net.zero_grad()
# get the output from the model
output, h = net(inputs, h)
# calculate the loss and perform backprop
loss = criterion(output, targets.view(-1))
# back-propagate error
loss.backward()
# `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
nn.utils.clip_grad_norm_(net.parameters(), clip)
# update weigths
opt.step()
print("Epoch: {}/{}...".format(e+1, epochs),
"Step: {}...".format(counter),
"Loss: {}...".format(loss))
train(net, batch_size = 32, epochs=20, print_every=256)
def predict(net, tkn, h=None):
# tensor inputs
new_inp = []
for t1 in tkn:
x = np.array([token2int[t1]])
new_inp.append(x)
new_inp = np.asarray(new_inp).reshape(1,-1)
inputs = torch.from_numpy(new_inp)
# detach hidden state from history
h = tuple([each.data for each in h])
# get the output of the model
out, h = net(inputs, h)
# get the token probabilities
p = F.softmax(out, dim=1).data
p = p.cpu()
p = p.numpy()
p = p.reshape(p.shape[1],)
# get indices of top 3 values
top_n_idx = p.argsort()[-1:][::-1]
# randomly select one of the three indices
#sampled_token_index = top_n_idx[random.sample([0],1)[0]]
sampled_token_index = top_n_idx[0]
# return the encoded value of the predicted char and the hidden state
return int2token[sampled_token_index]
# function to generate text
def sample(net, prime):
net.eval()
# batch size is 1
h = net.init_hidden(1)
token = predict(net, prime, h)
return token
sample(net, prime=['AA105045091', 'C2106264154', 'B2106691381'])

PyTorch LSTM with multivariate time series (Many-to-Many)

Given 5 features on a time series we want to predict the following values using an LSTM Recurrent Neural Network, using PyTorch. The problem is that the Loss Value starts very low (i.e. 0.04) and it increases a bit as the computation runs (it seems it converge to a slightly higher value, but it never decreases).
Moreover, the dataset is normalized, and we tried different values of learning rate, epochs, batch sizes etc.
An example of loss during training:
step : 0 loss : 0.0016425768844783306
step : 1 loss : 0.0028163508977741003
step : 2 loss : 0.009786984883248806
This is the class:
class MV_LSTM(torch.nn.Module):
def __init__(self,n_features,seq_length):
super(MV_LSTM, self).__init__()
self.n_features = n_features
self.seq_len = seq_length
self.n_hidden = 40 # number of hidden states
self.n_layers = 1 # number of LSTM layers (stacked)
self.l_lstm = torch.nn.LSTM(input_size = n_features,
hidden_size = self.n_hidden,
num_layers = self.n_layers,
batch_first = True)
# according to pytorch docs LSTM output is
# (batch_size,seq_len, num_directions * hidden_size)
# when considering batch_first = True
self.l_linear = torch.nn.Linear(self.n_hidden*self.seq_len, 5)
def init_hidden(self, batch_size):
hidden_state = torch.randn(self.n_layers,batch_size,self.n_hidden)
cell_state = torch.randn(self.n_layers,batch_size,self.n_hidden)
self.hidden = (hidden_state, cell_state)
def forward(self, x):
batch_size, seq_len, _ = x.size()
lstm_out, self.hidden = self.l_lstm(x,self.hidden)
x = lstm_out.contiguous().view(batch_size,-1)
return self.l_linear(x)
This is the main code:
n_features = 5 # this is number of parallel inputs
n_timesteps = 24 # this is number of timesteps
# convert dataset into input/output
X, y = split_sequences(dataset, n_timesteps)
print(X.shape, y.shape)
X
y
# create NN
mv_net = MV_LSTM(n_features,n_timesteps)
criterion = torch.nn.MSELoss() # reduction='sum' created huge loss value
optimizer = torch.optim.Adam(mv_net.parameters(), lr=1e-4)
train_episodes = 50
batch_size = 16
This is the training:
mv_net.train()
for t in range(train_episodes):
X, y = sklearn.utils.shuffle(X, y)
for b in range(0,len(X),batch_size):
inpt = X[b:b+batch_size,:,:]
target = y[b:b+batch_size,:]
x_batch = torch.tensor(inpt,dtype=torch.float32)
y_batch = torch.tensor(target,dtype=torch.float32)
mv_net.init_hidden(x_batch.size(0))
output = mv_net(x_batch)
loss = criterion(output.view(-1,5), y_batch)
loss.backward()
optimizer.step()
optimizer.zero_grad()
print('step : ' , t , 'loss : ' , loss.item())
Thank you for your time, and sorry for our unexperience (this is our first RNN).

Data augmentation with ImageDataGenerator for videos (4D tensors) in Keras

I have an ImageDataGenerator in Keras that I would like to apply during training to every frame in short video clips which are represented as 4D numpy arrays with shape (num_frames, width, height, 3).
In the case of a standard dataset consisting of images each with shape (width, height, 3), I would normally do something like:
aug = tf.keras.preprocessing.image.ImageDataGenerator(
rotation_range=15,
zoom_range=0.15)
model.fit_generator(
aug.flow(X_train, y_train),
epochs=100)
How can I apply these same data augmentations to a dataset of 4D numpy arrays representing sequences of images?
I figured it out. I created a custom class which inherits from tensorflow.keras.utils.Sequence that performs the augmentations using scipy for each image.
class CustomDataset(tf.keras.utils.Sequence):
def __init__(self, batch_size, *args, **kwargs):
self.batch_size = batch_size
self.X_train = args[0]
self.Y_train = args[1]
def __len__(self):
# returns the number of batches
return int(self.X_train.shape[0] / self.batch_size)
def __getitem__(self, index):
# returns one batch
X = []
y = []
for i in range(self.batch_size):
r = random.randint(0, self.X_train.shape[0] - 1)
next_x = self.X_train[r]
next_y = self.Y_train[r]
augmented_next_x = []
###
### Augmentation parameters for this clip.
###
rotation_amt = random.randint(-45, 45)
for j in range(self.X_train.shape[1]):
transformed_img = ndimage.rotate(next_x[j], rotation_amt, reshape=False)
transformed_img[transformed_img == 0] = 255
augmented_next_x.append(transformed_img)
X.append(augmented_next_x)
y.append(next_y)
X = np.array(X).astype('uint8')
y = np.array(y)
encoder = LabelBinarizer()
y = encoder.fit_transform(y)
return X, y
def on_epoch_end(self):
# option method to run some logic at the end of each epoch: e.g. reshuffling
pass
I then pass this in to the fit_generator method:
training_data_augmentation = CustomDataset(BS, X_train_L, y_train_L)
model.fit_generator(
training_data_augmentation,
epochs=300)

Tensorflow 2 LSTM model doesn't learn using a Sequence

I'm currently using a LSTM model to make timeserie predictions with Tensorflow 2.2.0
I've been using a large dataset and everything works nicely.
However, the dataset creation takes a lot of RAM and I wanted to use a tensorflow.keras.utils.Sequence to solve the issue, my problem is the following:
When using a Sequence, my model doesn't learn anymore (it predicts the average of the real signal over the whole dataset)
My dataset is created from two python lists x_train_flights and y_train_flights, each containing pandas DataFrames. For each (x_train_flight, y_train_flight) of this list:
x_train_flight of shape (-1, features) containing features signals
y_train_flight of shape (-1, 1) containing one signal being aligned in time with the ones from x_train_flights
The system looks like as follow (I am not allowed to share the real data, I've recreated the graph using pseudo-random signals instead):
Here, features=2 (the blue and orange lines), and look_back=5. That is to say, the 10 points (from x_train_flights) in the rectangle are used to predict the golden point (which is compared to the corresponding point in y_train_flights during the training phase). The gray points are previous predictions.
To create my dataset, I've been using these functions:
def lstm_shapify(sequence, look_back, features):
res = np.empty((look_back, len(sequence), features), dtype=np.float32)
for i in range(look_back):
res[i] = np.roll(sequence, -i * features)
return np.transpose(res, axes=(1, 0, 2))[:-look_back + 1]
def make_dataset(x_flights, y_flights, look_back, features):
x = np.empty((0, look_back, features), dtype=np.float32)
y = np.empty((0, 1), dtype=np.float32)
for i in range(len(x_flights)):
x_sample = x_flights[i].values
y_sample = y_flights[i].values[look_back - 1:]
x = np.concatenate([x, lstm_shapify(x_sample, look_back, features)])
y = np.concatenate([y, y_sample])
return x, y
And I fit my network with the following:
model.fit(
x_train,
y_train,
epochs=7,
batch_size=batch_size
)
So, I've created this custom Sequence:
class LSTMGenerator(Sequence):
def __init__(
self,
x_flights: List[DataFrame],
y_flights: List[DataFrame],
look_back: int,
batch_size: int,
features: int
):
self.x_flights = x_flights
self.y_flights = []
self.look_back = look_back
self.batch_size = batch_size
self.features = features
self.length = 0
for y_flight in y_flights:
y = y_flight.iloc[look_back - 1:].to_numpy()
self.y_flights.append(y)
self.length += len(y) // batch_size
def __getitem__(self, index):
flight_index = 0
while True:
n = len(self.y_flights[flight_index]) // self.batch_size
if index < n:
break
flight_index += 1
index = index - n
start_index = index * self.batch_size
x_batch = lstm_shapify(
self.x_flights[flight_index]
.iloc[start_index:start_index + self.batch_size + self.look_back - 1]
.to_numpy(),
self.look_back,
self.features
)
y_batch = self.y_flights[flight_index][start_index:start_index + self.batch_size]
return x_batch, y_batch
def __len__(self):
return self.length
Each tuple (x, y) it returns are two numpy arrays of shape (batch_size, look_back, features) and (batch_size, 1) respectively.
And now I'm trying to fit it with:
model.fit(
LSTMGenerator(x_train_flights, y_train_flights, look_back, batch_size, features),
epochs=epochs
)
Here is my model:
model = Sequential()
model.add(LSTM(
100,
input_shape=(look_back, features),
kernel_regularizer=regularizers.l2(1e-3),
bias_regularizer=regularizers.l2(1e-4)
))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(Dense(1, activation='tanh'))
model.compile(optimizer='adam', loss='mse')
Hope you can help me
EDIT: more details about the datasets
I solved it by taking a break and looking at the code once again (and I realized it was a silly mistake): the issue of my Sequence comes from the samples in each batch being consecutive samples in time, whereas my compute-everything-dataset's batches where nicely shuffled.
My Sequence was problematic because the batches were selected at a random index from a random dataset. Now I select each sample at a random index from a random dataset to create a single batch.
Here is a working example:
from tensorflow.keras import *
from tensorflow.keras.layers import *
from tensorflow.keras.utils import *
import numpy as np
import tensorflow as tf
np.random.seed(1234)
tf.random.set_seed(1234)
features = 3
lookback = 7
model = Sequential()
model.add(LSTM(500, input_shape = (lookback, features)))
model.add(Dense(1, activation='tanh'))
XS = np.random.randn(200, features)
YS = np.random.randn(200)
class LookbackSeq(Sequence):
def __init__(self, XS, YS, batch_size, lookback):
self.XS = XS
self.YS = YS
self.batch_size = batch_size
self.lookback = lookback
def __len__(self):
n_windows = self.XS.shape[0] - self.lookback
return int(np.ceil(n_windows / self.batch_size))
def __getitem__(self, i):
base = i * self.batch_size
n_windows = self.XS.shape[0] - self.lookback
batch_size = min(n_windows - base, self.batch_size)
X = np.zeros((batch_size, self.lookback, self.XS.shape[1]))
Y = np.zeros((batch_size, 1))
for i in range(batch_size):
for j in range(self.lookback):
X[i, j] = self.XS[base + i + j]
Y[i] = self.YS[base + i + self.lookback]
return X, Y
model.compile(optimizer='adam', loss='mse')
# ALL SAMPLES IN MEMORY
X, Y = [], []
for i in range(len(XS) - lookback):
X.append(XS[i:i+lookback])
Y.append(YS[i+lookback])
X, Y = np.array(X), np.array(Y)
model.fit(X, Y, epochs = 10, batch_size = 4, shuffle = False)
# GENERATED ON THE FLY
# gen = LookbackSeq(XS, YS, 4, lookback)
# model.fit(x = gen,
# steps_per_epoch = len(gen),
# shuffle = False,
# epochs = 10)
I'm assuming your input data has the shape X = (n_points, n_features) and Y = (n_points,). LookbackSeq does the batching and windowing (lookback) for you.
You can comment and uncomment the relevant lines to either train with samples generated on the fly or with them all stored in memory. You should get identical results.

Categories