I'm currently using a LSTM model to make timeserie predictions with Tensorflow 2.2.0
I've been using a large dataset and everything works nicely.
However, the dataset creation takes a lot of RAM and I wanted to use a tensorflow.keras.utils.Sequence to solve the issue, my problem is the following:
When using a Sequence, my model doesn't learn anymore (it predicts the average of the real signal over the whole dataset)
My dataset is created from two python lists x_train_flights and y_train_flights, each containing pandas DataFrames. For each (x_train_flight, y_train_flight) of this list:
x_train_flight of shape (-1, features) containing features signals
y_train_flight of shape (-1, 1) containing one signal being aligned in time with the ones from x_train_flights
The system looks like as follow (I am not allowed to share the real data, I've recreated the graph using pseudo-random signals instead):
Here, features=2 (the blue and orange lines), and look_back=5. That is to say, the 10 points (from x_train_flights) in the rectangle are used to predict the golden point (which is compared to the corresponding point in y_train_flights during the training phase). The gray points are previous predictions.
To create my dataset, I've been using these functions:
def lstm_shapify(sequence, look_back, features):
res = np.empty((look_back, len(sequence), features), dtype=np.float32)
for i in range(look_back):
res[i] = np.roll(sequence, -i * features)
return np.transpose(res, axes=(1, 0, 2))[:-look_back + 1]
def make_dataset(x_flights, y_flights, look_back, features):
x = np.empty((0, look_back, features), dtype=np.float32)
y = np.empty((0, 1), dtype=np.float32)
for i in range(len(x_flights)):
x_sample = x_flights[i].values
y_sample = y_flights[i].values[look_back - 1:]
x = np.concatenate([x, lstm_shapify(x_sample, look_back, features)])
y = np.concatenate([y, y_sample])
return x, y
And I fit my network with the following:
model.fit(
x_train,
y_train,
epochs=7,
batch_size=batch_size
)
So, I've created this custom Sequence:
class LSTMGenerator(Sequence):
def __init__(
self,
x_flights: List[DataFrame],
y_flights: List[DataFrame],
look_back: int,
batch_size: int,
features: int
):
self.x_flights = x_flights
self.y_flights = []
self.look_back = look_back
self.batch_size = batch_size
self.features = features
self.length = 0
for y_flight in y_flights:
y = y_flight.iloc[look_back - 1:].to_numpy()
self.y_flights.append(y)
self.length += len(y) // batch_size
def __getitem__(self, index):
flight_index = 0
while True:
n = len(self.y_flights[flight_index]) // self.batch_size
if index < n:
break
flight_index += 1
index = index - n
start_index = index * self.batch_size
x_batch = lstm_shapify(
self.x_flights[flight_index]
.iloc[start_index:start_index + self.batch_size + self.look_back - 1]
.to_numpy(),
self.look_back,
self.features
)
y_batch = self.y_flights[flight_index][start_index:start_index + self.batch_size]
return x_batch, y_batch
def __len__(self):
return self.length
Each tuple (x, y) it returns are two numpy arrays of shape (batch_size, look_back, features) and (batch_size, 1) respectively.
And now I'm trying to fit it with:
model.fit(
LSTMGenerator(x_train_flights, y_train_flights, look_back, batch_size, features),
epochs=epochs
)
Here is my model:
model = Sequential()
model.add(LSTM(
100,
input_shape=(look_back, features),
kernel_regularizer=regularizers.l2(1e-3),
bias_regularizer=regularizers.l2(1e-4)
))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(Dense(1, activation='tanh'))
model.compile(optimizer='adam', loss='mse')
Hope you can help me
EDIT: more details about the datasets
I solved it by taking a break and looking at the code once again (and I realized it was a silly mistake): the issue of my Sequence comes from the samples in each batch being consecutive samples in time, whereas my compute-everything-dataset's batches where nicely shuffled.
My Sequence was problematic because the batches were selected at a random index from a random dataset. Now I select each sample at a random index from a random dataset to create a single batch.
Here is a working example:
from tensorflow.keras import *
from tensorflow.keras.layers import *
from tensorflow.keras.utils import *
import numpy as np
import tensorflow as tf
np.random.seed(1234)
tf.random.set_seed(1234)
features = 3
lookback = 7
model = Sequential()
model.add(LSTM(500, input_shape = (lookback, features)))
model.add(Dense(1, activation='tanh'))
XS = np.random.randn(200, features)
YS = np.random.randn(200)
class LookbackSeq(Sequence):
def __init__(self, XS, YS, batch_size, lookback):
self.XS = XS
self.YS = YS
self.batch_size = batch_size
self.lookback = lookback
def __len__(self):
n_windows = self.XS.shape[0] - self.lookback
return int(np.ceil(n_windows / self.batch_size))
def __getitem__(self, i):
base = i * self.batch_size
n_windows = self.XS.shape[0] - self.lookback
batch_size = min(n_windows - base, self.batch_size)
X = np.zeros((batch_size, self.lookback, self.XS.shape[1]))
Y = np.zeros((batch_size, 1))
for i in range(batch_size):
for j in range(self.lookback):
X[i, j] = self.XS[base + i + j]
Y[i] = self.YS[base + i + self.lookback]
return X, Y
model.compile(optimizer='adam', loss='mse')
# ALL SAMPLES IN MEMORY
X, Y = [], []
for i in range(len(XS) - lookback):
X.append(XS[i:i+lookback])
Y.append(YS[i+lookback])
X, Y = np.array(X), np.array(Y)
model.fit(X, Y, epochs = 10, batch_size = 4, shuffle = False)
# GENERATED ON THE FLY
# gen = LookbackSeq(XS, YS, 4, lookback)
# model.fit(x = gen,
# steps_per_epoch = len(gen),
# shuffle = False,
# epochs = 10)
I'm assuming your input data has the shape X = (n_points, n_features) and Y = (n_points,). LookbackSeq does the batching and windowing (lookback) for you.
You can comment and uncomment the relevant lines to either train with samples generated on the fly or with them all stored in memory. You should get identical results.
Related
I recently shifted to pytorch from keras and I am still trying to understand how all this work. Below is the code I have implemented to classify mnist dataset using a simple MLP. Just like I used to do in keras I have flattend each of 28x28 image into a vector of 784 , and I have also created a one-hot representation for my labels.
In the model I was hoping that given a vector of 784 the model would output a one-hot vector with probabilities,but as soon as my code reaches to compute the loss I get the following error :
RuntimeError: 1D target tensor expected, multi-target not supported
Below is my code :
import numpy as np
import matplotlib.pyplot as plt
import torch
import time
from torch import nn, optim
from keras.datasets import mnist
from torch.utils.data import Dataset, DataLoader
RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)
# ----------------------------------------------------
class MnistDataset(Dataset):
def __init__(self, data_size=0):
(x, y), (_, _) = mnist.load_data()
x = [i.flatten() for i in x]
x = np.array(x, dtype=np.float32)
if data_size < 0 or data_size > len(y):
assert ("Data size should be between 0 to number of files in the dataset")
if data_size == 0:
data_size = len(y)
self.data_size = data_size
# picking 'data_size' random samples
self.x = x[:data_size]
self.y = y[:data_size]
# scaling between 0-1
self.x = (self.x / 255)
# Creating one-hot representation of target
y_encoded = []
for label in y:
encoded = np.zeros(10)
encoded[label] = 1
y_encoded.append(encoded)
self.y = np.array(y_encoded)
def __len__(self):
return self.data_size
def __getitem__(self, index):
x_sample = self.x[index]
label = self.y[index]
return x_sample, label
# ----------------------------------------------------
num_train_samples = 10000
num_test_samples = 2000
# Each generator returns a single
# sample & its label on each iteration.
mnist_train = MnistDataset(data_size=num_train_samples)
mnist_test = MnistDataset(data_size=num_test_samples)
# Each generator returns a batch of samples on each iteration.
train_loader = DataLoader(mnist_train, batch_size=128, shuffle=True) # 79 batches
test_loader = DataLoader(mnist_test, batch_size=128, shuffle=True) # 16 batches
# ----------------------------------------------------
# Defining the Model Architecture
class MLP(nn.Module):
def __init__(self):
super().__init__()
self.fc1 = nn.Linear(28 * 28, 100)
self.act1 = nn.ReLU()
self.fc2 = nn.Linear(100, 50)
self.act2 = nn.ReLU()
self.fc3 = nn.Linear(50, 10)
self.act3 = nn.Sigmoid()
def forward(self, x):
x = self.act1(self.fc1(x))
x = self.act2(self.fc2(x))
output = self.act3(self.fc3(x))
return output
# ----------------------------------------------------
model = MLP()
# Defining optimizer and loss function
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9)
# ----------------------------------------------------
# Training the model
epochs = 10
print("Training Started...")
for epoch in range(epochs):
for batch_index, (inputs, targets) in enumerate(train_loader):
optimizer.zero_grad() # Zero the gradients
outputs = model(inputs) # Forward pass
loss = criterion(outputs, targets) # Compute the Loss
loss.backward() # Compute the Gradients
optimizer.step() # Update the parameters
# Evaluating the model
total = 0
correct = 0
with torch.no_grad():
for batch_idx, (inputs, targets) in enumerate(test_loader):
outputs = model(inputs)
_, predicted = torch.max(outputs.data, 1)
total += targets.size(0)
correct += predicted.eq(targets.data).cpu().sum()
print('Epoch : {} Test Acc : {}'.format(epoch, (100. * correct / total)))
print("Training Completed Sucessfully")
# ----------------------------------------------------
I also read some other posts related to the same problem & most of them said that the CrossEntropy loss the target has to be a single number ,which totally gets over my head.Can someone please explain a solution.Thank you.
For nn.CrossEntropyLoss, you don't need one-hot representation of the label, you just need to pass the prediction's logit, which shape is (batch_size, n_class), and a target vector (batch_size,)
So just pass in the label index vector y instead of one-hot vector.
Fixed of your code:
class MnistDataset(Dataset):
def __init__(self, data_size=0):
(x, y), (_, _) = mnist.load_data()
x = [i.flatten() for i in x]
x = np.array(x, dtype=np.float32)
if data_size < 0 or data_size > len(y):
assert ("Data size should be between 0 to number of files in the dataset")
if data_size == 0:
data_size = len(y)
self.data_size = data_size
# picking 'data_size' random samples
self.x = x[:data_size]
self.y = y[:data_size]
# scaling between 0-1
self.x = (self.x / 255)
self.y = y # <--
def __len__(self):
return self.data_size
def __getitem__(self, index):
x_sample = self.x[index]
label = self.y[index]
return x_sample, label
Take a look at Pytorch example for more detail:
https://pytorch.org/docs/stable/generated/torch.nn.CrossEntropyLoss.html
I currently have my neural network training with a batch_size =1 , To run it across multiple gpus i need to increase the batch size to be larger than the amount of gpus so i want batch_size=16, although the way i have my data set up i am not sure how to change that
The data is read from a csv file
raw_data = pd.read_csv("final.csv")
train_data = raw_data[:750]
test_data = raw_data[750:]
Then the data is normalized and turned to Tensors
# normalize features
scaler = MinMaxScaler(feature_range=(-1, 1))
scaled_train = scaler.fit_transform(train_data)
scaled_test = scaler.transform(test_data)
# Turn into Tensorflow Tensors
train_data_normalized = torch.FloatTensor(scaled_train).view(-1)
test_data_normalized = torch.FloatTensor(scaled_test).view(-1)
Then the data is turned into a Tensor Tuple of [input list, output] format
e.g (tensor([1,3,56,63,3]),tensor([34]))
# Convert to tensor tuples
def input_series_sequence(input_data, tw):
inout_seq = []
L = len(input_data)
i = 0
for index in range(L - tw):
train_seq = input_data[i:i + tw]
train_label = input_data[i + tw:i + tw + 1]
inout_seq.append((train_seq, train_label))
i = i + tw
return inout_seq
train_inout_seq = input_series_sequence(train_data_normalized, train_window)
test_input_seq = input_series_sequence(test_data_normalized, train_window)
And then the model is trained like so
for i in range(epochs):
for seq, labels in train_inout_seq:
optimizer.zero_grad()
model.module.hidden_cell = model.module.init_hidden()
seq = seq.to(device)
labels = labels.to(device)
y_pred = model(seq)
single_loss = loss_function(y_pred, labels)
single_loss.backward()
optimizer.step()
So i want to know how exactly to change the batch_size from 1 -> 16 , Do i need to use Dataset and Dataloader? and if so how exactly would it fit in with my current code, thanks!
Edit: Model is defined like this, might have to change the forward function?
class LSTM(nn.Module):
def __init__(self, input_size=1, hidden_layer_size=100, output_size=1):
super().__init__()
self.hidden_layer_size = hidden_layer_size
self.lstm = nn.LSTM(input_size, hidden_layer_size)
self.linear = nn.Linear(hidden_layer_size, output_size)
self.hidden_cell = (torch.zeros(1, 1, self.hidden_layer_size),
torch.zeros(1, 1, self.hidden_layer_size))
def init_hidden(self):
return (torch.zeros(1, 1, self.hidden_layer_size),
torch.zeros(1, 1, self.hidden_layer_size))
def forward(self, input_seq):
lstm_out, self.hidden_cell = self.lstm(input_seq.view(len(input_seq), 1, -1), self.hidden_cell)
predictions = self.linear(lstm_out.view(len(input_seq), -1))
return predictions[-1]
You can do this by wrapping your model by a nn.DataParallel class.
model = nn.DataParallel(model)
Since I don't have access to multiple GPUs and your data right now to test, I'll direct you here
I'm trying to implement a network with keras and tensorflow back-end, I'm using transfer learning model (VGG16), my dataset is a medical images dataset so instead of having only one image, I have a series of slices, so my dataset is organized in a folder and each serie is a np.array() with size (nb_slices,512,512,3).
My dataset is composed by 1130 train samples and 120 valid samples, so I don't think that datas is the problem.
I tried to create a dataGenerator to load my image series in my model without a batch-size problem (I used this Training a Keras model from batches of .npy files using generator? to make my generator class) , (and I reshaped my volumes with size (nb_slices, 224,224,3))
then I tried to use transfer learning, and custom a VGG16 network with 1 more convolution layer, MaxPooling, Flatten, Dense, Dropout and final Dense layer.
When I start training, it seems there is no problem, but at a moment it returns IndexError: list index out of range, and I saw that DataGenerator iterates more than the size of dataset but I don't know why...
Which part could cause it ?
Here is my DataGenerator
INPUT_DIM = 224
MAX_PIXEL_VAL = 255
MEAN = 58.09
STDDEV = 49.73
class DataGenerator(keras.utils.Sequence):
def __init__(self, file_list, labels, data_loc):
self.listIDs = file_list
self.labels = labels
self.data_loc = data_loc
self.on_epoch_end()
def __len__(self):
return int(len(self.listIDs))
def __getitem__(self, index):
indexes = self.indexes[index:(index + 1)]
list_IDS_temp = [self.listIDs[k] for k in indexes]
X, y = self.__data_generation(list_IDS_temp)
return X, y
def on_epoch_end(self):
self.indexes = np.arange(len(self.listIDs))
def __data_generation(self, list_IDS_temp):
for ID in list_IDS_temp:
vol = np.load(self.data_loc + ID + '.npy')
nb_slices = vol.shape[0]
pad = int((vol.shape[2] - INPUT_DIM) / 2)
vol = vol[:, pad:-pad, pad:-pad]
# standardize
vol = (vol - np.min(vol)) / (np.max(vol) - np.min(vol)) * MAX_PIXEL_VAL
# normalize
vol = (vol - MEAN) / STDDEV
# convert to RGB
vol = np.stack((vol,) * 3, axis=3)
y = np.empty(nb_slices, dtype=int)
# y = self.labels[ID]
for i in range(nb_slices):
y[i] = self.labels[int(ID)]
return vol, keras.utils.to_categorical(y, num_classes=2)
and my model:
train_set = DataGenerator(df_train['exams'].tolist(), df_train['labels'].tolist(), all_file_loc_train)
valid_set = DataGenerator(df_val['exams'].tolist(), df_val['labels'].tolist(), all_file_loc_val)
model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3)) # , input_shape=(224, 224, 3)
layer_dict = dict([(layer.name, layer) for layer in model.layers])
x = layer_dict['block2_pool'].output
x = Conv2D(filters=64, kernel_size=(3, 3), activation='relu')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Flatten()(x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.4)(x)
x = Dense(2, activation='softmax')(x)
custom_model = Model(inputs=model.input, outputs=x)
for layer in custom_model.layers[:7]:
layer.trainable = False
custom_model.compile(loss='categorical_crossentropy', optimizer=optimizers.SGD(lr=0.0001, momentum=0.9),
metrics=["accuracy"])
results = custom_model.fit_generator(generator=train_set, validation_data=valid_set, epochs=50, verbose=2)
I expected accuracy around 80-90% of accuracy but it seems that something goes wrong in my DataGenerator and I don't know what. Please I need help...
I have an autoencoder set up in Keras. I want to be able to weight the features of the input vector according to a predetermined 'precision' vector. This continuous valued vector has the same length as the input, and each element lies in the range [0, 1], corresponding to the confidence in the corresponding input element, where 1 is completely confident and 0 is no confidence.
I have a precision vector for every example.
I have defined a loss that takes into account this precision vector. Here, reconstructions of low-confidence features are down-weighted.
def MAEpw_wrapper(y_prec):
def MAEpw(y_true, y_pred):
return K.mean(K.square(y_prec * (y_pred - y_true)))
return MAEpw
My issue is that the precision tensor y_prec depends on the batch. I want to be able to update y_prec according to the current batch so that each precision vector is correctly associated with its observation.
I have the done the following:
global y_prec
y_prec = K.variable(P[:32])
Here P is a numpy array containing all precision vectors with the indices corresponding to the examples. I initialize y_prec to have the correct shape for a batch size of 32. I then define the following DataGenerator:
class DataGenerator(Sequence):
def __init__(self, batch_size, y, shuffle=True):
self.batch_size = batch_size
self.y = y
self.shuffle = shuffle
self.on_epoch_end()
def on_epoch_end(self):
self.indexes = np.arange(len(self.y))
if self.shuffle == True:
np.random.shuffle(self.indexes)
def __len__(self):
return int(np.floor(len(self.y) / self.batch_size))
def __getitem__(self, index):
indexes = self.indexes[index * self.batch_size: (index+1) * self.batch_size]
# Set precision vector.
global y_prec
new_y_prec = K.variable(P[indexes])
y_prec = K.update(y_prec, new_y_prec)
# Get training examples.
y = self.y[indexes]
return y, y
Here I am aiming to update y_prec in the same function that generates the batch. This seems to be updating y_prec as expected. I then define my model architecture:
dims = [40, 20, 2]
model2 = Sequential()
model2.add(Dense(dims[0], input_dim=64, activation='relu'))
model2.add(Dense(dims[1], input_dim=dims[0], activation='relu'))
model2.add(Dense(dims[2], input_dim=dims[1], activation='relu', name='bottleneck'))
model2.add(Dense(dims[1], input_dim=dims[2], activation='relu'))
model2.add(Dense(dims[0], input_dim=dims[1], activation='relu'))
model2.add(Dense(64, input_dim=dims[0], activation='linear'))
And finally, I compile and run:
model2.compile(optimizer='adam', loss=MAEpw_wrapper(y_prec))
model2.fit_generator(DataGenerator(32, digits.data), epochs=100)
Where digits.data is a numpy array of observations.
However, this ends up defining separate graphs:
StopIteration: Tensor("Variable:0", shape=(32, 64), dtype=float32_ref) must be from the same graph as Tensor("Variable_4:0", shape=(32, 64), dtype=float32_ref).
I've scoured SO for a solution to my problem but nothing I've found works. Any help on how to do this properly is appreciated.
This autoencoder can be easily implemented using the Keras functional API. This will allow to have an additional input placeholder y_prec_input, which will be fed with the "precision" vector. The full source code can be found here.
Data generator
First, let's reimplement your data generator as follows:
class DataGenerator(Sequence):
def __init__(self, batch_size, y, prec, shuffle=True):
self.batch_size = batch_size
self.y = y
self.shuffle = shuffle
self.prec = prec
self.on_epoch_end()
def on_epoch_end(self):
self.indexes = np.arange(len(self.y))
if self.shuffle:
np.random.shuffle(self.indexes)
def __len__(self):
return int(np.floor(len(self.y) / self.batch_size))
def __getitem__(self, index):
indexes = self.indexes[index * self.batch_size: (index + 1) * self.batch_size]
y = self.y[indexes]
y_prec = self.prec[indexes]
return [y, y_prec], y
Note that I got rid of the global variable. Now, instead, the precision vector P is provided as input argument (prec), and the generator yields an additional input that will be fed to the precision placeholder y_prec_input (see model definition).
Model
Finally, your model can be defined and trained as follows:
y_input = Input(shape=(input_dim,))
y_prec_input = Input(shape=(1,))
h_enc = Dense(dims[0], activation='relu')(y_input)
h_enc = Dense(dims[1], activation='relu')(h_enc)
h_enc = Dense(dims[2], activation='relu', name='bottleneck')(h_enc)
h_dec = Dense(dims[1], activation='relu')(h_enc)
h_dec = Dense(input_dim, activation='relu')(h_dec)
model2 = Model(inputs=[y_input, y_prec_input], outputs=h_dec)
model2.compile(optimizer='adam', loss=MAEpw_wrapper(y_prec_input))
# Train model
model2.fit_generator(DataGenerator(32, digits.data, P), epochs=100)
where input_dim = digits.data.shape[1]. Note that I also changed the output dimension of the decoder to input_dim, since it must match the input dimension.
Try to test your code with worker=0 when you call fit_generator, if it works normally then threading is your problem.
If threading is the cause, try this:
# In the code that executes on the main thread
graph = tf.get_default_graph()
# In code that executes in other threads(e.g. your generator)
with graph.as_default():
...
...
new_y_prec = K.variable(P[indexes])
y_prec = K.update(y_prec, new_y_prec)
This question already has answers here:
How do I create a variable-length input LSTM in Keras?
(4 answers)
Closed 5 years ago.
Despite going through multiple examples, I still don't understand how to classify sequences of varying length using Keras, similar to this question. I can train a network that detects frequencies of sinusoid with varying length, by using masking:
from keras import models
from keras.layers.recurrent import LSTM
from keras.layers import Dense, Masking
from keras.optimizers import RMSprop
from keras.losses import categorical_crossentropy
from keras.preprocessing.sequence import pad_sequences
import numpy as np
def gen_noise(noise_len, mag):
return np.random.uniform(size=noise_len) * mag
def gen_sin(t_val, freq):
return 2 * np.sin(2 * np.pi * t_val * freq)
def train_rnn(x_train, y_train, max_len, mask, number_of_categories):
epochs = 3
batch_size = 500
# three hidden layers of 256 each
vec_dims = 1
hidden_units = 256
in_shape = (max_len, vec_dims)
model = models.Sequential()
model.add(Masking(mask, name="in_layer", input_shape=in_shape,))
model.add(LSTM(hidden_units, return_sequences=False))
model.add(Dense(number_of_categories, input_shape=(number_of_categories,),
activation='softmax', name='output'))
model.compile(loss=categorical_crossentropy, optimizer=RMSprop())
model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs,
validation_split=0.05)
return model
def gen_sig_cls_pair(freqs, t_stops, num_examples, noise_magnitude):
x = []
y = []
num_cat = len(freqs)
dt = 0.01
max_t = int(np.max(t_stops) / dt)
for f_i, f in enumerate(freqs):
for t_stop in t_stops:
t_range = np.arange(0, t_stop, dt)
t_len = t_range.size
for _ in range(num_examples):
sig = gen_sin(f, t_range) + gen_noise(t_len, noise_magnitude)
x.append(sig)
one_hot = np.zeros(num_cat, dtype=np.bool)
one_hot[f_i] = 1
y.append(one_hot)
pad_kwargs = dict(padding='post', maxlen=max_t, value=np.NaN, dtype=np.float32)
return pad_sequences(x, **pad_kwargs), np.array(y)
if __name__ == '__main__':
noise_mag = 0.01
mask_val = -10
frequencies = (5, 7, 10)
signal_lengths = (0.8, 0.9, 1)
x_in, y_in = gen_sig_cls_pair(frequencies, signal_lengths, 50, noise_mag)
mod = train_rnn(x_in[:, :, None], y_in, 100, mask_val, len(frequencies))
However, I don't understand how I'm supposed to tell Keras about the other sequences. I thought I could mask them too, but when I try, they just output NaN.
testing_dat, expected = gen_sig_cls_pair(frequencies, signal_lengths, 1, 0)
res = mod.predict(testing_dat[:, :, None])
fig, axes = plt.subplots(3)
axes[0].plot(np.concatenate(testing_dat), label="input")
axes[1].plot(np.argmax(res, axis=1), "ro", label="result", alpha=0.2)
axes[1].plot(np.argmax(expected, axis=1), "bo", label="expected", alpha=0.2)
axes[1].legend(bbox_to_anchor=(1.1, 1))
axes[2].plot(res)
plt.show()
How do I make a network that can evaluate inputs of varying lengths?
You can pad the input sequences (usually with zeros) or you can use batches of size 1 with varying input size, as outlined in fchollet's answer on the Keras github:
for seq, label in zip(sequences, y):
model.train(np.array([seq]), [label])
Alternatively, if your type of problem allows it, you extract subsequences of the original time series with length less than the length of the shortest sequences. The third option also allows you to add redundancy to the dataset if you have few samples, and reduce the chances of overfitting.
EDIT:
Seanny123 (OP) pointed out that fchollet's lines above contain model.train, which is not valid code.
He solved the problem using batches of size 1 and the following code:
from keras.models import Sequential
from keras.layers import LSTM, Dense
import numpy as np
def gen_sig(num_samples, seq_len):
one_indices = np.random.choice(a=num_samples, size=num_samples // 2, replace=False)
x_val = np.zeros((num_samples, seq_len), dtype=np.bool)
x_val[one_indices, 0] = 1
y_val = np.zeros(num_samples, dtype=np.bool)
y_val[one_indices] = 1
return x_val, y_val
N_train = 100
N_test = 10
recall_len = 20
X_train, y_train = gen_sig(N_train, recall_len)
X_test, y_test = gen_sig(N_train, recall_len)
print('Build STATEFUL model...')
model = Sequential()
model.add(LSTM(10, batch_input_shape=(1, 1, 1), return_sequences=False, stateful=True))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
print('Train...')
for epoch in range(15):
mean_tr_acc = []
mean_tr_loss = []
for seq_idx in range(X_train.shape[0]):
start_val = X_train[seq_idx, 0]
assert y_train[seq_idx] == start_val
assert tuple(np.nonzero(X_train[seq_idx, :]))[0].shape[0] == start_val
y_in = np.array([y_train[seq_idx]], dtype=np.bool)
for j in range(np.random.choice(a=np.arange(5, recall_len+1))):
x_in = np.array([[[X_train[seq_idx][j]]]])
tr_loss, tr_acc = model.train_on_batch(x_in, y_in)
mean_tr_acc.append(tr_acc)
mean_tr_loss.append(tr_loss)
model.reset_states()
print('accuracy training = {}'.format(np.mean(mean_tr_acc)))
print('loss training = {}'.format(np.mean(mean_tr_loss)))
print('___________________________________')
mean_te_acc = []
mean_te_loss = []
for seq_idx in range(X_test.shape[0]):
start_val = X_test[seq_idx, 0]
assert y_test[seq_idx] == start_val
assert tuple(np.nonzero(X_test[seq_idx, :]))[0].shape[0] == start_val
y_in = np.array([y_test[seq_idx]], dtype=np.bool)
for j in range(np.random.choice(a=np.arange(5, recall_len+1))):
te_loss, te_acc = model.test_on_batch(np.array([[[X_test[seq_idx][j]]]], dtype=np.bool), y_in)
mean_te_acc.append(te_acc)
mean_te_loss.append(te_loss)
model.reset_states()
print('accuracy testing = {}'.format(np.mean(mean_te_acc)))
print('loss testing = {}'.format(np.mean(mean_te_loss)))
print('___________________________________')