data shape mismatch in time series prediction - python

working with a LSTM model for predicting stock prices, i did every step exactly as the tutorial but unlike the tutorial, my code runs into an error.
here is the code i am working with:
df = pd.read_csv(f'D:\\algo\\all\\EURUSD_15M.csv')
df = df.loc[:, ~df.columns.str.contains('^Unnamed',)]
training_set = df.iloc[:-int(len(df)/10), 4:5].values
sc = MinMaxScaler(feature_range= (0, 1))
training_set_scaled = sc.fit_transform(training_set)
x_train , y_train = [], []
for i in range(60, len(training_set)):
x_train.append(training_set_scaled[i-60:i, 0])
y_train.append(training_set_scaled[i, 0])
x_train, y_train = np.array(x_train), np.array(y_train)
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape = (x_train.shape[1],1)))
model.add(Dropout(0.2))
model.add(LSTM(units=50, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units=50, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units=50, return_sequences=True))
model.add(Dropout(0.2))
model.add(Dense(units=1))
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['Accuracy'])
model.fit(x_train, y_train, epochs=5, batch_size=64)
So whats supposed to happen is take 60 periods of a price and predict the 61th period.
but i ultimately face the following error:
ValueError: A target array with shape (379319, 1) was passed for an output of shape (None, 60, 1) while using as loss `mean_squared_error`. This loss expects targets to have the same shape as the output.
what am i doing wrong?

As a dataset try using
TimeseriesGenerator (tf.keras.preprocessing.sequence.TimeseriesGenerator) instead of your custom list -> https://www.tensorflow.org/api_docs/python/tf/keras/preprocessing/sequence/TimeseriesGenerator
ex.
train_gen = TimeseriesGenerator(Xtrain, Xtrain, n_steps, batch_size=24*7)
valid_gen = TimeseriesGenerator(Xvalid, Xvalid, n_steps, batch_size=24*7)
test_gen = TimeseriesGenerator(Xtest, Xtest, n_steps, batch_size=24*7)

Related

InvalidArgumentError: Graph execution error: word2vec

I am new to ML and I am creating a CNN model for Sentiment analysis using word2vec. My word2vec contains negative value also. While fitting the model I got an error -
InvalidArgumentError in model.fit(X_train, Y_train, epochs=3, batch_size=64)
InvalidArgumentError: Graph execution error: Detected at node 'sequential_30/embedding_29/embedding_lookup'
This is the code to create the model
def get_vec(x):
doc = nlp(x)
vec = doc.vector
return vec
df['vec'] = df['text'].apply(lambda x: get_vec(x))
XTrain = df['vec'].to_numpy()
XTrain = XTrain.reshape(-1, 1)
XTrain = np.concatenate(np.concatenate(XTrain, axis = 0), axis = 0).reshape(-1, 300)
YTrain = df['target']
X_train, X_test, Y_train, Y_test = train_test_split(XTrain, YTrain, test_size = .3, random_state = 45, stratify = YTrain)
# Pad the sequence to the same length
max_review_length = 1600
X_train = pad_sequences(X_train, maxlen=max_review_length)
top_words = (len(nlp.vocab)) + 1
# Using embedding from Keras
embedding_vecor_length = 300
model = Sequential()
model.add(Embedding(top_words, embedding_vecor_length, input_length=max_review_length))
# Convolutional model (3x conv, flatten, 2x dense)
model.add(Convolution1D(64, 3, padding='same'))
model.add(Convolution1D(32, 3, padding='same'))
model.add(Convolution1D(16, 3, padding='same'))
model.add(Flatten())
model.add(Dropout(0.2))
model.add(Dense(180,activation='sigmoid'))
model.add(Dropout(0.2))
model.add(Dense(1,activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X_train, Y_train, epochs=3, batch_size=64)
When I replace all negative values in df['vec'], code is working without error but with 0 accuracy. What is wrong in this? Please help. Thanks in advance..

Convolutional Neural Network - 1D - Feature Classification Error

I am trying to modify the following example to simulate CNN for my set of data and running into some errors
https://machinelearningmastery.com/cnn-models-for-human-activity-recognition-time-series-classification/
X = D.replace(['Resting', 'Swimming', 'Feeding', 'Non directed motion'], [0, 1, 2, 3])
X_Label = X['Label'].to_numpy()
X_Data = X[['X_static','Y_static','Z_static','X_dynamic','Y_dynamic','Z_dynamic']].to_numpy()
X_names = ['X_static','Y_static','Z_static','X_dynamic','Y_dynamic','Z_dynamic']
X_Label_Names = np.array(['Resting', 'Swimming', 'Feeding', 'Non directed motion'])
X_Data is a 5600 by 6 column numpy matrix. Each column represents a type of measurement data over time
X_Label is a 5600 by 1 column consisting of values of 0 through 3 that represents the features or attributes. 0 represents resting, 1 represents swimming and so on.
X = X_Data
y = X_Label
def load_dataset_f(X,y):
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.5, stratify=y, random_state=random_state
)
trainX = X_train
trainy = y_train
testX = X_test
testy = y_test
print(trainX)
print(trainX.shape)
print(trainy.shape)
return trainX, trainy, testX, testy
# fit and evaluate a model
def evaluate_model_f(trainX, trainy, testX, testy):
verbose, epochs, batch_size = 2, 10, 20
n_timesteps, n_features, n_outputs = 6, 1, 1
print('n timesteps --------------------------------------------------------------------')
print(n_timesteps)
model = Sequential()
model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(n_timesteps,n_features)))
model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
model.add(Dropout(0.5))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(100, activation='relu'))
model.add(Dense(n_outputs, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
# fit network
print(to_categorical(trainy))
model.fit(trainX.reshape(len(trainX),6,1), to_categorical(trainy))
# evaluate model
_, accuracy = model.evaluate(testX, testy, batch_size=batch_size, verbose=0)
return accuracy
def run_experiment_f(repeats=1):
# load data
trainX, trainy, testX, testy = load_dataset_f(X,y)
print(trainX)
# repeat experiment
scores = list()
for r in range(repeats):
score = evaluate_model_f(trainX, trainy, testX, testy)
score = score * 100.0
print('>#%d: %.3f' % (r+1, score))
scores.append(score)
# summarize results
summarize_results(scores)
load_dataset_f(X,y)
run_experiment_f()
I am unfamiliar with the tensorflow library and getting errors at model.fit(), I am not sure to how to approach this. The matrix presented in the example was 3D where as my data is 2D, not sure if that matters. How do I get this code to work ?
You need to make sure that your input to your Conv1D layer has the shape (timesteps, features) and that your last output layer's units equals the number of unique labels in your dataset. Here is a working example:
import tensorflow as tf
trainX = tf.random.normal((32, 6))
trainy = tf.random.uniform((32, 1), maxval=4)
verbose, epochs, batch_size = 2, 10, 20
n_timesteps, n_features, n_outputs = 6, 1, 4
print('n timesteps --------------------------------------------------------------------')
print(n_timesteps)
model = tf.keras.Sequential()
model.add(tf.keras.layers.Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(n_timesteps,n_features)))
model.add(tf.keras.layers.Conv1D(filters=64, kernel_size=3, activation='relu'))
model.add(tf.keras.layers.Dropout(0.5))
model.add(tf.keras.layers.MaxPooling1D(pool_size=2))
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(100, activation='relu'))
model.add(tf.keras.layers.Dense(n_outputs, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
# fit network
print(tf.keras.utils.to_categorical(trainy))
trainX = tf.expand_dims(trainX, axis=2)
model.fit(trainX, tf.keras.utils.to_categorical(trainy))

Preparing Pandas DataFrame for LSTM

I'm trying to fit a LSTM classifier using Keras but don't understand how to prepare the data for training.
I currently have two dataframes for the training data. X_train contains 48 hand-crafted temporal features from IMU data, and y_train contains corresponding labels (4 kinds) representing terrain. The shape of these dataframes is given below:
X_train = X_train.values.reshape(X_train.shape[0],X_train.shape[1],1)
print(X_train.shape, y_train.shape)
**(268320, 48, 1) (268320,)**
Model using batch_size = (32,5,48):
def def_model():
model = Sequential()
model.add(LSTM(units=144,batch_size=(32, 5, 48),return_sequences=True))
model.add(Dropout(0.5))
model.add(Dense(144, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(4, activation='softmax'))
model.compile(optimizer=Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['categorical_accuracy'])
return model
model_LSTM = def_model()
LSTM_history = model_LSTM.fit(X_train, y_train, epochs=15, validation_data=(X_valid, y_valid), verbose=1)
The error that I am getting:
ValueError: Shapes (32, 1) and (32, 48, 4) are incompatible
Any insight into how to fix this particular error and any intuition into what Keras is expecting?
What is the 5 in your batch size ? The batch_size argument in the LSTM layer indicates that your data should be in the form (batch_size, time_steps, feature_per_time_step). If I am understanding correctly, your data has time_steps = 1 and feature_per_time_step = 48.
Here is a sample of working code and the shape of each of them.
def def_model():
model = Sequential()
model.add(LSTM(units=144,batch_size=(32, 1, 48),return_sequences=True))
model.add(Dropout(0.5))
model.add(Dense(144, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(4, activation='softmax'))
model.compile(optimizer=Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['categorical_accuracy'])
return model
model_LSTM = def_model()
X_train = np.random.random((10000,1,48))
y_train = np.random.random((10000,4))
y_train = y_train.reshape(-1,1,4)
data = tf.data.Dataset.from_tensor_slices((X_train, y_train)).batch(32)
model_LSTM.fit(data, epochs=15, verbose=1)
Passing data instead of x_train and y_train in your fit function will fit the model properly.
If you want to have 5 timesteps in your data, you will have to create your X_train in such a way to have it have a shape (n_samples,5,48).

ValueError: Shapes are incompatible in LSTM model

I am creating an LSTM model based on the following parameters
embed_dim = 128
lstm_out = 200
batch_size = 32
model = Sequential()
model.add(Embedding(2500, embed_dim,input_length = X.shape[1]))
model.add(Dropout(0.2))
model.add(LSTM(lstm_out))
model.add(Dense(2,activation='sigmoid'))
model.compile(loss = 'categorical_crossentropy', optimizer='adam',metrics = ['accuracy'])
print(model.summary())
Xtrain, Xtest, ytrain, ytest = train_test_split(X, train['target'], test_size = 0.2, shuffle=True)
print(Xtrain.shape, ytrain.shape)
print(Xtest.shape, ytest.shape)
model.fit(Xtrain, ytrain, batch_size =batch_size, epochs = 1, verbose = 5)
but I am receiving the following error
ValueError: Shapes (32, 1) and (32, 2) are incompatible
Can you help me with this error?
Your y_train is coming from a single column of a Pandas dataframe, which is a single column. This is suitable if your classification problem is a binary classification 0/1 problem. Then you only need a single neuron in the output layer.
model = Sequential()
model.add(Embedding(2500, embed_dim,input_length = X.shape[1]))
model.add(Dropout(0.2))
model.add(LSTM(lstm_out))
# Only one neuron in the output layer
model.add(Dense(1,activation='sigmoid'))

keras lstm error: expected to see 1 array

so i want to make a lstm network to run on my data but i get this message:
ValueError: Error when checking input: expected lstm_1_input to have shape (None, 1) but got array with shape (1, 557)
this is my code:
x_train=numpy.array(x_train)
x_test=numpy.array(x_test)
x_train = numpy.reshape(x_train, (x_train.shape[0], 1, x_train.shape[1]))
x_test = numpy.reshape(x_test, (x_test.shape[0], 1, x_test.shape[1]))
# create and fit the LSTM network
model = Sequential()
model.add(LSTM(50, input_shape=(1,len(x_train[0]) )))
model.add(Dense(1))
model.add(Dropout(0.2))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(x_train, numpy.array(y_train), epochs=100, batch_size=1, verbose=2)
You need to change the input_shape value for LSTM layer. Also, x_train must have the following shape.
x_train = x_train.reshape(len(x_train), x_train.shape[1], 1)
So, change
x_train = numpy.reshape(x_train, (x_train.shape[0], 1, x_train.shape[1]))
model.add(LSTM(50, input_shape=(1,len(x_train[0]) )))
to
x_train = x_train.reshape(len(x_train), x_train.shape[1], 1)
model.add(LSTM(50, input_shape=(x_train.shape[1], 1) )))

Categories