I'm trying to build a NARX NN with Keras. I'm still not 100% sure on the use of the argument return_sequence=True in the LSTM neurons but, before I can check that, I need to make the code work. When I try to run it I get the following message:
ValueError: Error when checking input: expected lstm_84_input to have 3 dimensions, but got array with shape (6686, 3)
See my code below. The error is raised while running the model.fit command. My data data is of the shape 40101 time steps x 6 features (3 exogenous inputs, 3 system responses).
import numpy as np
import pandas as pd
from sklearn.model_selection import TimeSeriesSplit
import tensorflow as tf
from tensorflow.keras import initializers
# --- main
data = pd.read_excel('example.xlsx',usecols=['wave','wind','current','X','Y','RZ'])
data.plot(subplots=True, figsize=[15,10])
x_data = np.array(data.loc[:,['wave','wind','current']])
y_data = np.array(data.loc[:,['X','Y','RZ']])
timeSeriesCrossValidation = TimeSeriesSplit(n_splits=5)
for train, validation in timeSeriesCrossValidation.split(x_data, y_data):
# create model
model = tf.keras.models.Sequential()
# input layer
model.add(tf.keras.layers.LSTM(units=50,
input_shape=(40101,3),
dropout=0.01,
recurrent_dropout=0.2,
kernel_initializer=initializers.RandomNormal(mean=0,stddev=.5),
bias_initializer=initializers.Zeros(),
return_sequences = True))
# 1st hidden layer
model.add(tf.keras.layers.LSTM(units=50,
dropout=0.01,
recurrent_dropout=0.2,
kernel_initializer=initializers.RandomNormal(mean=0,stddev=.5),
bias_initializer=initializers.Zeros(),
return_sequences = True))
# 2nd hidder layer
model.add(tf.keras.layers.LSTM(units=50,
dropout=0.01,
recurrent_dropout=0.2,
kernel_initializer=initializers.RandomNormal(mean=0,stddev=.5),
bias_initializer=initializers.Zeros(),
return_sequences = False))
# output layer
model.add(tf.keras.layers.Dense(3))
model.compile(loss='mse',optimizer='nadam',metrics=['accuracy'])
model.fit(x_data[train], y_data[train],
verbose=2,
batch_size=None,
epochs=10,
validation_data=(x_data[validation], y_data[validation])
#callbacks=early_stop
)
prediction = model.predict(x_data[validation])
y_validation = y_data[validation]
LSTM layers need input in 3 dimensions:
(n_samples, time_steps, features)
You passed data with this format:
(n_samples, features)
Since you don't have a function to create time steps, the easiest solution would be to change your input to shape:
(40101, 1, 3)
Bogus data:
x_data = np.random.rand(40101, 1, 3)
y_data = np.random.rand(40101, 3)
Also, you shouldn't pass the number of samples in the input_shape argument of a Keras layer. Just use this:
input_shape=(1, 3)
So here is the corrected code (with bogus data):
import numpy as np
from sklearn.model_selection import TimeSeriesSplit
import tensorflow as tf
from tensorflow.keras import initializers
from tensorflow.keras.layers import *
x_data = np.random.rand(40101, 1, 3)
y_data = np.random.rand(40101, 3)
timeSeriesCrossValidation = TimeSeriesSplit(n_splits=5)
for train, validation in timeSeriesCrossValidation.split(x_data, y_data):
# create model
model = tf.keras.models.Sequential()
# input layer
model.add(LSTM(units=5,
input_shape=(1, 3),
dropout=0.01,
recurrent_dropout=0.2,
kernel_initializer=initializers.RandomNormal(mean=0, stddev=.5),
bias_initializer=initializers.Zeros(),
return_sequences=True))
# 1st hidden layer
model.add(LSTM(units=5,
dropout=0.01,
recurrent_dropout=0.2,
kernel_initializer=initializers.RandomNormal(mean=0, stddev=.5),
bias_initializer=initializers.Zeros(),
return_sequences=True))
# 2nd hidder layer
model.add(LSTM(units=50,
dropout=0.01,
recurrent_dropout=0.2,
kernel_initializer=initializers.RandomNormal(mean=0, stddev=.5),
bias_initializer=initializers.Zeros(),
return_sequences=False))
# output layer
model.add(tf.keras.layers.Dense(3))
model.compile(loss='mse', optimizer='nadam', metrics=['accuracy'])
model.fit(x_data[train], y_data[train],
verbose=2,
batch_size=None,
epochs=1,
validation_data=(x_data[validation], y_data[validation])
# callbacks=early_stop
)
prediction = model.predict(x_data[validation])
y_validation = y_data[validation]
If you want a function to create time steps, use this:
def multivariate_data(dataset, target, start_index, end_index, history_size,
target_size, step, single_step=False):
data = []
labels = []
start_index = start_index + history_size
if end_index is None:
end_index = len(dataset) - target_size
for i in range(start_index, end_index):
indices = range(i-history_size, i, step)
data.append(dataset[indices])
if single_step:
labels.append(target[i+target_size])
else:
labels.append(target[i:i+target_size])
return np.array(data), np.array(labels)
It will give you the right shape, e.g.:
multivariate_data(dataset=np.random.rand(40101, 3),
target=np.random.rand(40101, 3),
0, len(x_data), 5, 0, 1, True)[0].shape
(40096, 5, 3)
You lost 5 data points because at the beginning you can't look 5 steps back in the past.
Related
I tried to set up a LSTM model with input matrix 7 columns, ca. 1650 rows
Output matrix is 1 column, 1650 rows.
My model code is shown in the following.
Problem, the prediction does in every 1650 columns have the same value 26,19...
Can anyone help?
Thank you!
Edit: I updated the code.
In svt.csv are 8 columns with ca. 8000 rows...Y is seperated from that, so that Y has one column and X has 7 columns.
Edit2: I realized that predict has shape (1657, 20, 1) why not (1657,1) ???
import pandas as pd
import tensorflow as tf
import pandas as pd
from tensorflow.keras import layers
from keras.models import Sequential
import numpy as np
from keras.layers import LSTM
from keras.layers import Dense
from keras.layers import Dropout
from matplotlib import pyplot
from tensorflow.keras import callbacks
df_svt = pd.read_csv("svt.csv", sep=";")
df_svt = df_svt.iloc[6980:]
df_svt = df_svt.iloc[:8158]
df_y = pd.DataFrame(df_svt.y)
df_y_train = df_y.iloc[:6501]
df_y_test = df_y.iloc[6501:8158]
y_train = df_y_train.to_numpy()
y_test = df_y_test.to_numpy()
df_svt = df_svt.drop(columns=["y"])
df_svt = df_svt.drop(columns=["Date"])
df_x_train = df_svt.iloc[:6501]
df_x_test = df_svt.iloc[6501:8158]
x_train = df_x_train.to_numpy()
x_test = df_x_test.to_numpy()
train_X = x_train.reshape(6501,1,7)
test_X = x_test.reshape(1657,1,7)
train_Y = y_train.reshape(6501,1,1)
test_Y = y_test.reshape(1657,1,1)
trainX = []
trainY = []
testX = []
testY = []
ts = 20
timestep = ts
def create_dataset(datasetX,datasetY, timestep=10):
dataX, dataY = [],[]
for i in range(len(datasetX)-timestep-1):
a = datasetX[i:(i+timestep),0]
dataX.append(a)
dataY.append(datasetY[i+timestep,0])
return np.array(dataX), np.array(dataY)
timestep=ts
trainX, trainY = create_dataset(train_X, train_Y, timestep)
timestep=ts
testX, testY = create_dataset(test_X,test_Y,timestep)
model = tf.keras.Sequential()
model.add(LSTM(units=70,return_sequences=True, input_shape=(trainX.shape[1],trainX.shape[2]) ))
model.add(Dropout(0.2))
model.add(LSTM(units=70, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units=70))
model.add(Dropout(0.2))
model.add(Dense(units=1))
model.compile(optimizer="adam",loss="mean_squared_error")
model.fit(trainX, trainY, epochs=5, batch_size=32,verbose=1
model.summary()
predict = model.predict(testX)
print(predict)
You are using return_sequences=True in your LSTM layers which means that all outputs from each timestep are fed to the next timestep. This is most likely what you want to do in a sequential model, but you need to remove it from the last LSTM layer, where you want the outputs from the last timestep.
model.add(LSTM(units=70,return_sequences=True, input_shape=(trainX.shape[1],trainX.shape[2]) ))
model.add(Dropout(0.2))
model.add(LSTM(units=70, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units=70))
model.add(Dropout(0.2))
I'm trying to fit a LSTM-model to my data with a Masking Layer in front and I get this error:
ValueError: Can not squeeze dim[1], expected a dimension of 1, got 4 for '{{node binary_crossentropy/weighted_loss/Squeeze}} = Squeeze[T=DT_FLOAT, squeeze_dims=[-1]](Cast)' with input shapes: [128,4].
This is my code:
from tensorflow.keras.layers import LSTM, Dense, BatchNormalization, Masking
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Nadam
import numpy as np
if __name__ == '__main__':
# define stub data
samples, timesteps, features = 128, 4, 99
X = np.random.rand(samples, timesteps, features)
Y = np.random.randint(0, 2, size=(samples))
# create model
model = Sequential()
model.add(Masking(mask_value=0., input_shape=(None, 99)))
model.add(LSTM(100, return_sequences=True))
model.add(BatchNormalization())
model.add(Dense(1, activation='sigmoid'))
optimizer = Nadam(learning_rate=0.0001)
loss = BinaryCrossentropy(from_logits=False)
model.compile(loss=loss, optimizer=optimizer)
# train model
model.fit(
X,
Y,
batch_size=128)
I see from this related post, that I can't use one-hot encoded labels, but my labels are not one-hot encoded.
Also, when I remove the masking layer, training works.
From my understanding one sample consists of 4 timesteps with 99 features here. The shape of X is therefore (128,4,99)
Therefore, I only have to provide one label for each sample, the shape of Y therefore being (128,)
But it seems like the dimensions of X and or Y are not correct, as tensorflow wants to change its dimensions?
I have tried providing a label per timestep of each sample (Y = np.random.randint(0, 2, size=(samples, timesteps)), with the same result.
Why does adding the masking layer introduce this error? And how can I keep the masking layer without getting the error?
System Information:
Python version: 3.9.5
Tensorflow version: 2.5.0
OS: Windows
I don't think the problem is the Masking layer. Since you set the parameter return_sequences to True in the LSTM layer, you are getting a sequence with the same number of time steps as your input and an output space of 100 for each timestep, hence the shape (128, 4, 100), where 128 is the batch size. Afterwards, you apply a BatchNormalization layer and finally a Dense layer resulting in the shape (128, 4, 1). The problem is your labels have a 2D shape (128, 1) and your model has a 3D output due to the return_sequences parameter. So, simply setting this parameter to False should solve your problem. See also this post.
Here is a working example:
from tensorflow.keras.layers import LSTM, Dense, BatchNormalization, Masking
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Nadam
import numpy as np
if __name__ == '__main__':
# define stub data
samples, timesteps, features = 128, 4, 99
X = np.random.rand(samples, timesteps, features)
Y = np.random.randint(0, 2, size=(samples))
# create model
model = Sequential()
model.add(Masking(mask_value=0., input_shape=(None, 99)))
model.add(LSTM(100, return_sequences=False))
model.add(BatchNormalization())
model.add(Dense(1, activation='sigmoid'))
optimizer = Nadam(learning_rate=0.0001)
loss = BinaryCrossentropy(from_logits=False)
model.compile(loss=loss, optimizer=optimizer)
# train model
model.fit(
X,
Y,
batch_size=128)
i tried to make a confusion matrix from the model that i make, all seems fine till making the model until i approach a error that says
ValueError: Found input variables with inconsistent numbers of
samples: [4, 304]
here are the code that i use
# Convert List to numpy array, for Keras use
Train_label = np.eye(n_labels)[label] # One-hot encoding by np array function
Train_data = np.array(data)
print("Dataset shape is",Train_data.shape, "(size, timestep, column, row, channel)")
print("Label shape is",Train_label.shape,"(size, label onehot vector)")
# shuffling dataset for input fit function
# if don`t, can`t train model entirely
x = np.arange(Train_label.shape[0])
np.random.shuffle(x)
# same order shuffle is needed
Train_label = Train_label[x]
Train_data = Train_data[x]
train_size = 0.9
X_train=Train_data[:int(Totalnb * 0.9),:]
Y_train=Train_label[:int(Totalnb * 0.9)]
X_test=Train_data[int(Totalnb * 0.1):,:]
Y_test=Train_label[int(Totalnb * 0.1):]
# 2. Buliding a Model
# declare input layer for CNN+LSTM architecture
video = Input(shape=(timesteps,img_col,img_row,img_channel))
STEPS_PER_EPOCH = 120
#AlexNet Layer
model = tf.keras.models.Sequential([
# 1st conv
tf.keras.layers.Conv2D(96, (11,11),strides=(4,4), activation='relu', input_shape=(img_col, img_row, img_channel)),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.MaxPooling2D(2, strides=(2,2)),
# 2nd conv
tf.keras.layers.Conv2D(256, (5,5),strides=(1,1), activation='relu',padding="same"),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.MaxPooling2D(2, strides=(2,2)),
# 3rd conv
tf.keras.layers.Conv2D(384, (3,3),strides=(1,1), activation='relu',padding="same"),
tf.keras.layers.BatchNormalization(),
# 4th conv
tf.keras.layers.Conv2D(384, (3,3),strides=(1,1), activation='relu',padding="same"),
tf.keras.layers.BatchNormalization(),
# 5th Conv
tf.keras.layers.Conv2D(256, (3, 3), strides=(1, 1), activation='relu',padding="same"),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.MaxPooling2D(2, strides=(2,2)),
])
model.trainable = True
# FC Dense Layer
x = model.output
x = Flatten()(x)
cnn_out = Dense(128)(x)
# Construct CNN model
Lstm_inp = Model(model.input, cnn_out)
# Distribute CNN output by timesteps
encoded_frames = TimeDistributed(Lstm_inp)(video)
# Contruct LSTM model
encoded_sequence = LSTM(256)(encoded_frames)
hidden_Drop = Dropout(0.2)(encoded_sequence)
hidden_layer = Dense(128)(hidden_Drop)
outputs = Dense(n_labels, activation="softmax")(hidden_layer)
# Contruct CNN+LSTM model
model = Model([video], outputs)
# 3. Setting up the Model Learning Process
# Model Compile
opt = SGD(lr=0.01)
model.compile(loss = "categorical_crossentropy", optimizer = opt, metrics=['accuracy'])
model.summary()
# 4. Training the Model
hist = model.fit(X_train, Y_train, batch_size=batch_size, validation_split=validation_ratio, shuffle=True, epochs=num_epochs)
Y_pred2 = model.predict(X_test)
y_pred= np.argmax(Y_pred2, axis=1) # prediksi
y_test=np.argmax(Y_test, axis=0)
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test, y_pred)
import seaborn as sns
import matplotlib.pyplot as plt
f, ax = plt.subplots(figsize=(8,5))
sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, fmt=".0f", ax=ax)
plt.xlabel("Y_head")
plt.ylabel("Y_true")
plt.show()
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))
everything seems fine and work but the error come out when i try to make the confussion matrix in the line confusion_matrix(y_test, y_pred)
i still cant figure what might be the problem
hope anyone can help me with this
thank you so much guys
Posting my comments as answer for completeness:
One possible thing that looks a bit weird is that you take different axis when calculating the argmax for y_pred and y_test. But that might be ok depending on your data layout.
y_test and y_pred seem be be of different lengths. Can you check the shapes of Y_pred2 and Y_test and see if the axes over which you calculate the argmax are correct.
I am running an LSTM, GRU and bilstm model using the following code
# Create BiLSTM model
def create_model_bilstm(units):
model = Sequential()
model.add(Bidirectional(LSTM(units = units,
return_sequences=True),
input_shape=(X_train.shape[1], X_train.shape[2])))
#model.add(Bidirectional(LSTM(units = units)))
model.add(Dense(1))
#Compile model
model.compile(loss='mse', optimizer='adam')
return model
# Create LSTM or GRU model
def create_model(units, m):
model = Sequential()
model.add(m (units = units, return_sequences = True,
input_shape = [X_train.shape[1], X_train.shape[2]]))
model.add(Dropout(0.1))
#model.add(m (units = units))
#model.add(Dropout(0.2))
model.add(Dense(units = 1))
#Compile model
model.compile(loss='mse', optimizer='adam')
return model
# BiLSTM
model_bilstm = create_model_bilstm(20)
# GRU and LSTM
model_gru = create_model(50, GRU)
model_lstm = create_model(20, LSTM)
# Fit BiLSTM, LSTM and GRU
def fit_model(model):
early_stop = EarlyStopping(monitor = 'val_loss',
patience = 100)
history = model.fit(X_train, y_train, epochs = 700,
validation_split = 0.2, batch_size = 32,
shuffle = False, callbacks = [early_stop])
return history
history_bilstm = fit_model(model_bilstm)
history_lstm = fit_model(model_lstm)
history_gru = fit_model(model_gru)
This all runs smoothly and prints out my loss graphs. but when it comes to predictions i run the following code
# Make prediction
def prediction(model):
prediction = model.predict(X_test)
prediction = scaler_y.inverse_transform(prediction)
return prediction
prediction_bilstm = prediction(model_bilstm)
prediction_lstm = prediction(model_lstm)
prediction_gru = prediction(model_gru)
and i get the following error
ValueError Traceback (most recent call last)
<ipython-input-387-9d45f01ae2a2> in <module>
5 return prediction
6
----> 7 prediction_bilstm = prediction(model_bilstm)
8 prediction_lstm = prediction(model_lstm)
9 prediction_gru = prediction(model_gru)
<ipython-input-387-9d45f01ae2a2> in prediction(model)
2 def prediction(model):
3 prediction = model.predict(X_test)
----> 4 prediction = scaler_y.inverse_transform(prediction)
5 return prediction
...
ValueError: Found array with dim 3. Estimator expected <= 2.
I am assuming this has something to do with my X_test shape based on other posts i have read so i tried to reshape it to 2d but got another error telling me "expected bidirectional_3_input to have 3 dimensions, but got array with shape (62, 36)" on line 7 again.
What am i doing wrong and how can i fix it?
Data Explanation:
So I am trying to predict discharge rates (target variable) using groundwater levels (34 features), precipitation and temperature as input which gives me a total of 36 features. My data is in monthly resolution. I am using 63 observation for my test (5 year pred) and the rest for my train.
What are you doing wrong? Let's assume your input data has shape X_train.shape = [d0,d1,d2], then after setting up your BiLSTM-model like
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Bidirectional,LSTM,Dense
model = tf.keras.Sequential()
model.add(
tf.keras.layers.Bidirectional(
tf.keras.layers.LSTM(
units = 10,
return_sequences=True),
input_shape=(d1, d2)
)
)
model.add(Dense(1))
model.compile(loss='mse', optimizer='adam')
we can check the input- and output-shapes your model expects by
>>model.input.shape
TensorShape([None, d1, d2])
>>model.output.shape
TensorShape([None, d1, 1])
So your model expects input of shape (n_batch,d1,d2), where n_batch is the batch size of the data, and returns a shape (n_batch,d1,1), thus a 3d-tensor.
Now if you provide a 3d-tensor to your model, the model.prediction-method will succesfully return a 3d-tensor, however sklearn.preprocessing.StandardScaler.inverse_transform only works for 2d-data, thats why it says
ValueError: Found array with dim 3. Estimator expected <= 2.
On the other hand, if you first reshape your data to be 2d, then model.prediction complains, because it is set up to expect a 3d-tensor.
How can you fix it? For further help on how to fix your code, you will need to provide us with more detailled information on what you expect your model to do, especially what output-shape you want your BiLSTM-model to have. I assume you actually want your BiLSTM-model to return a scalar for each sample, so an additional Flatten-layer might do the trick:
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Bidirectional,LSTM,Dense,Flatten
model = tf.keras.Sequential()
model.add(
tf.keras.layers.Bidirectional(
tf.keras.layers.LSTM(
units = 10,
return_sequences=True),
input_shape=(d1, d2)
)
)
model.add(Flatten()) #<-- additional flatten-layer
model.add(Dense(1))
model.compile(loss='mse', optimizer='adam')
I have a batching generator function that is not feeding the correct batch shape to an LSTM. When I test the function it appears to return the correct shape [n_samples, n_timesteps, n_features] but this throws an error when fitting the model.
I have checked the function by looping over the generator to check the batch shapes and they return the correct number of samples, time steps etc.
from keras.models import Sequential
from keras.layers import Dense, LSTM, TimeDistributed, RepeatVector
def batch_generator(x_train_scaled, y_train_scaled, batch_size, sequence_length):
"""
Generator function to develop sequential batches of data.
Args:
batch_size:
sequence_length:
"""
# Infinite loop.
while True:
# Allocate a new array for the batch of input-signals.
x_shape = np.array((batch_size, sequence_length, num_x_signals))
x_batch = np.zeros(shape=x_shape, dtype=np.float16)
# Allocate a new array for the batch of output-signals.
y_shape = np.array((batch_size, sequence_length, 1))
y_batch = np.zeros(shape=y_shape, dtype=np.float16)
for i in range(batch_size):
# Copy the sequences of data starting at this index.
x_batch[i] = x_train_scaled[i:i+sequence_length]
y_batch[i] = y_train_scaled[i:i+sequence_length]
yield (x_batch, y_batch)
# test function
batch_size = 10
sequence_length = 10
batch_gen = batch_generator(x_train_scaled, y_train_scaled,batch_size=batch_size,
sequence_length=sequence_length)
x_batch, y_batch = next(batch_gen)
# test that returns correct shape (10, 10, 3) and (10, 10, 1)
print(x_batch.shape)
print(y_batch.shape)
def build_model(generator, n_outputs):
# define encoder/decoder architecture, use Time Distributed layer
model = Sequential()
model.add(LSTM(10, activation='relu', input_shape=(x_batch.shape[1],
x_batch.shape[2])))
model.add(RepeatVector(n_outputs))
model.add(LSTM(10, activation='relu', return_sequences=True))
model.add(TimeDistributed(Dense(5, activation='relu')))
model.add(TimeDistributed(Dense(1)))
model.compile(loss='mse', optimizer='adam')
# fit network
model.fit_generator(generator=batch_gen,
epochs=20,
steps_per_epoch=10,
validation_data=validation_data,
verbose = 1)
return model