Keras fitting LSTM getting slower in loop

Keras fitting LSTM getting slower in loop - python

I have a simple loop which in every iteration creates an LSTM (with the same parameters) and fit it to the same data.
The problem is that it takes more and more time during itterations.
batch_size = 10
optimizer = optimizers.adam(lr=0.001)
number_unites = 20
for counter_temp in range(1,100):
t0 = time.time()
model = None
model = Sequential()
model.add(LSTM(units=number_unites, batch_input_shape=(None, batch_size, 1), return_sequences=False))
model.add(Dense(1))
model.compile(loss='mean_absolute_error', optimizer=optimizer, metrics=['accuracy'])
model.fit(x_train, y_train, epochs=50, validation_data=(x_train_val, y_train_val), verbose=0)
print(time.time()-t0)

Thanks to DanielMöller, it is solved.
I used keras.backend.clear_session() to remove "Keras's graph".
from keras.backend import clear_session
clear_session()

Related

How to calculate mean relative error on test datasets

def create_model():
model = Sequential()
model.add(LSTM(50, return_sequences=True, input_shape=(40002, 12)))
model.add(LSTM(50, return_sequences= True))
model.add(LSTM(50, return_sequences= True))
model.add(tf.keras.layers.LSTM(30))
model.add(Dense(2, activation='linear'))
def rmse(Y_test, prediction):
return K.sqrt(K.mean(K.square(Y_test-prediction)))
# compile
model.compile(optimizer='adam', loss=rmse, metrics=['mean_squared_error', rmse])
return model
# fit the model
model = create_model()
model.fit(x_train, Y_train, shuffle=False, verbose=1, epochs=10)
# # predict model
prediction = model.predict(x_test, verbose=0)
print(prediction)
How to calculate mean relative error for tensor inputs i.e my Y_test and prediction are tensor.
Y_test and prediction as 2 values
Example:
Y_test = [[0.2,0.003],
[0.3, 0.008]]
prediction = [[0.4,0.005],
[0.5,0.007]]
mean_relative_error = mean(absolute(0.2-0.4)/0.2 + absolute(0.003-0.005)/0.003), mean(absolute(0.3-0.5)/0.3 + absolute(0.008-0.007)/0.008)
mean_relative_error = [0.533, 0.3925]
Please note that I don't want to use it for backpropagation to improve the network.

Would have added like this:
from tensorflow.math import reduce_mean, abs, reduce_sum
relative_error = reduce_mean(reduce_sum(abs(prediction-Y_test)/prediction, axis=1))
# [0.9, 0.54285717]
mean_relative_error = reduce_mean(relative_error)
# 0.7214286
I couldn't use tf.keras.losses.MeanAbsoluteError(reduction=tf.keras.losses.Reduction.NONE) because of a bug. The MeanAbsoluteError still does reduce to mean despite specifying it not to. The bug reported HERE

The application of self-attention layer raised index error

So I am doing a classification machine learning with the input of (batch, step, features).
In order to improve the accuracy of this model, I intended to apply a self-attention layer to it.
I am unfamiliar with how to use it for my case since most examples online are concerned with embedding NLP models.
def opt_select(optimizer):
if optimizer == 'Adam':
adamopt = tf.keras.optimizers.Adam(lr=learning_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-8)
return adamopt
elif optimizer == 'RMS':
RMSopt = tf.keras.optimizers.RMSprop(lr=learning_rate, rho=0.9, epsilon=1e-6)
return RMSopt
else:
print('undefined optimizer')
def LSTM_attention_model(X_train, y_train, X_test, y_test, num_classes, loss,batch_size=68, units=128, learning_rate=0.005,epochs=20, dropout=0.2, recurrent_dropout=0.2,optimizer='Adam'):
class myCallback(tf.keras.callbacks.Callback):
def on_epoch_end(self, epoch, logs={}):
if (logs.get('acc') > 0.90):
print("\nReached 90% accuracy so cancelling training!")
self.model.stop_training = True
callbacks = myCallback()
model = tf.keras.models.Sequential()
model.add(Masking(mask_value=0.0, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Bidirectional(LSTM(units, dropout=dropout, recurrent_dropout=recurrent_dropout)))
model.add(SeqSelfAttention(attention_activation='sigmoid'))
model.add(Dense(num_classes, activation='softmax'))
opt = opt_select(optimizer)
model.compile(loss=loss,
optimizer=opt,
metrics=['accuracy'])
history = model.fit(X_train, y_train,
batch_size=batch_size,
epochs=epochs,
validation_data=(X_test, y_test),
verbose=1,
callbacks=[callbacks])
score, acc = model.evaluate(X_test, y_test,
batch_size=batch_size)
yhat = model.predict(X_test)
return history, that
This led to IndexError: list index out of range
What is the correct way to apply this layer to my model?
As requested, one may use the following codes to simulate a set of the dataset.
import tensorflow as tf
from tensorflow.keras.layers import Dense, Dropout,Bidirectional,Masking,LSTM
from keras_self_attention import SeqSelfAttention
X_train = np.random.rand(700, 50,34)
y_train = np.random.choice([0, 1], 700)
X_test = np.random.rand(100, 50, 34)
y_test = np.random.choice([0, 1], 100)
batch_size= 217
epochs = 600
dropout = 0.6
Rdropout = 0.7
learning_rate = 0.00001
optimizer = 'RMS'
loss = 'categorical_crossentropy'
num_classes = y_train.shape[1]
LSTM_attention_his,yhat = LSTM_attention_model(X_train,y_train,X_test,y_test,loss =loss,num_classes=num_classes,batch_size=batch_size,units=32,learning_rate=learning_rate,epochs=epochs,dropout = 0.5,recurrent_dropout=Rdropout,optimizer=optimizer)

Here is how I would rewrite the code -
import tensorflow as tf
from tensorflow.keras.layers import Dense, Dropout, Bidirectional, Masking, LSTM, Reshape
from keras_self_attention import SeqSelfAttention
import numpy as np
def opt_select(optimizer):
if optimizer == 'Adam':
adamopt = tf.keras.optimizers.Adam(lr=learning_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-8)
return adamopt
elif optimizer == 'RMS':
RMSopt = tf.keras.optimizers.RMSprop(lr=learning_rate, rho=0.9, epsilon=1e-6)
return RMSopt
else:
print('undefined optimizer')
def LSTM_attention_model(X_train, y_train, X_test, y_test, num_classes, loss, batch_size=68, units=128,
learning_rate=0.005, epochs=20, dropout=0.2, recurrent_dropout=0.2, optimizer='Adam'):
class myCallback(tf.keras.callbacks.Callback):
def on_epoch_end(self, epoch, logs={}):
if (logs.get('accuracy') > 0.90):
print("\nReached 90% accuracy so cancelling training!")
self.model.stop_training = True
callbacks = myCallback()
model = tf.keras.models.Sequential()
model.add(Masking(mask_value=0.0, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Bidirectional(LSTM(units, dropout=dropout, recurrent_dropout=recurrent_dropout, return_sequences=True)))
model.add(SeqSelfAttention(attention_activation='sigmoid'))
model.add(Reshape((-1, model.output.shape[1]*model.output.shape[2])))
model.add(Dense(num_classes, activation='softmax'))
opt = opt_select(optimizer)
model.compile(loss=loss,
optimizer=opt,
metrics=['accuracy'])
history = model.fit(X_train, y_train,
batch_size=batch_size,
epochs=epochs,
validation_data=(X_test, y_test),
verbose=1,
callbacks=[callbacks])
score, acc = model.evaluate(X_test, y_test,
batch_size=batch_size)
yhat = model.predict(X_test)
return history, that
X_train = np.random.rand(700, 50,34)
y_train = np.random.choice([0, 1], (700, 1))
X_test = np.random.rand(100, 50, 34)
y_test = np.random.choice([0, 1], (100, 1))
batch_size= 217
epochs = 600
dropout = 0.6
Rdropout = 0.7
learning_rate = 0.00001
optimizer = 'RMS'
loss = 'categorical_crossentropy'
num_classes = y_train.shape[1]
LSTM_attention_his,yhat = LSTM_attention_model(
X_train,y_train,X_test,y_test,
loss =loss,num_classes=num_classes,batch_size=batch_size,units=32,
learning_rate=learning_rate,epochs=epochs,dropout = 0.5,recurrent_dropout=Rdropout,optimizer=optimizer
)
These are the changes I had to make to get this to start training -
The original issue was caused by the LSTM layer outputting the wrong dimensions. The SeqSelfAttention layer needs a 3D input (one dimension corresponding to the sequence of the data) which was missing from the output of the LSTM layer. As mentioned by #today, in the comments, this can be solved by adding return_sequences=True to the LSTM layer.
But even with that modification,the code still gives an error at when trying to compute the cost function.The issue is that, the output of the self-attention layer is (None, 50, 64) when this is directly passed into the Dense layer, the final output of the network becomes (None, 50, 1). This doesn't make sense for what we are trying to do, because the final output should just contain a single label for each datapoint (it should have the shape (None, 1)). The issue is the output from the self-attention layer which is 3 dimensional (each data point has a (50, 64) feature vector). This needs to be reshaped into a single dimensional feature vector for the computation to make sense. So I added a reshape layer model.add(Reshape((-1, ))) between the attention layer and the Dense layer.
In addition, the myCallback class is testing if logs.get('acc') is > 0.9 but I think it should be (logs.get('accuracy').
To comment on OP's question in the comment on what kind of column should be added, in this case, it was just a matter of extracting the full sequential data from the LSTM layer. Without the return_sequence flag, the output from the LSTM layer is (None, 64) This is simply the final features of the LSTM without the intermediate sequential data.

How to obtain outputs that are from future in a keras sequential model

i'm a beginner in python and i'm interested in machine learning attached to finance. I have created a model to predict future prices reading from a csv file, I have created the neural network, I get it to have very little loss but the outputs cannot be extracted or I just haven't created the necessary layers.
I would greatly appreciate a help with this. Thanks in advance.
model = Sequential()
model.add(LSTM(256, input_shape=(1,1)))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='mean_squared_error', optimizer='adam', metrics=['accuracy'])
model.fit(train_x, train_y, epochs=30, batch_size=1, verbose=1)
score = model.evaluate(train_x, train_y, verbose=0)
print('Keras model loss = ', score[0])
print('Keras model accuracy = ', score[1])
train_predictions = model.predict(train_x)
test_predictions = model.predict(test_x)
train_predictions = scaler.inverse_transform(train_predictions)
train_y = scaler.inverse_transform([train_y])
train_predict_plot = np.empty_like(scaled_data)
train_predict_plot[:,:] = np.nan
train_predict_plot[1:len(train_predictions)+1, :] = train_predictions
test_predict_plot = np.empty_like(scaled_data)
test_predict_plot[:,:] = np.nan
test_predict_plot[
plt.plot(scaler.inverse_transform(scaled_data))
plt.plot(train_predict_plot)
plt.plot(test_predict_plot)
plt.show()

Overfitting in LSTM even after using regularizers

I am having a time series prediction problem and building an LSTM like below :
def create_model():
model = Sequential()
model.add(LSTM(50,kernel_regularizer=l2(0.01), recurrent_regularizer=l2(0.01), bias_regularizer=l2(0.01), input_shape=(train_X.shape[1], train_X.shape[2])))
model.add(Dropout(0.591))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
return model
When I train the model on 5 splits like below :
tss = TimeSeriesSplit(n_splits = 5)
X = data.drop(labels=['target_prediction'], axis=1)
y = data['target_prediction']
for train_index, test_index in tss.split(X):
train_X, test_X = X.iloc[train_index, :].values, X.iloc[test_index,:].values
train_y, test_y = y.iloc[train_index].values, y.iloc[test_index].values
model=create_model()
history = model.fit(train_X, train_y, epochs=10, batch_size=64,validation_data=(test_X, test_y), verbose=0, shuffle=False)
I get an overfitting problem. The graph of loss is attached
I am not sure why there is overfitting when I use regularizers in my Keras model. Any help is appreciated .
EDIT:
Tried the architectures
def create_model():
model = Sequential()
model.add(LSTM(20, input_shape=(train_X.shape[1], train_X.shape[2])))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
return model
def create_model(x,y):
# define LSTM
model = Sequential()
model.add(Bidirectional(LSTM(20, return_sequences=True), input_shape=(x,y)))
model.add(TimeDistributed(Dense(1, activation='sigmoid')))
model.compile(loss='mean_squared_error', optimizer='adam')
return model
but still it is overfitting.

First of all remove all your regularizers and dropout. You are literally spamming with all the tricks out there and 0.5 dropout is too high.
Reduce the number of units in your LSTM. Start from there. Reach a point where your model stops overfitting.
Then, add dropout if required.
After that, the next step is to add the tf.keras.Bidirectional. If still, you are not satfisfied then, increase number of layers. Remember to keep return_sequences True for every LSTM layer except the last one.
It is seldom I come across networks using layer regularization despite the availability because dropout and layer regularization have a same effect and people usually go with dropout (at maximum, I have seen 0.3 being used).

Tensorflow always gives me the same result while the outputs are normal

I hope you are having a great day!
I recently tried to train a regression model by using TensorFlow and I completed my code by following the instruction in here.
data = pd.read_csv('regret.csv')
max_regret = data['regret'].max()
data['regret'] = data['regret'] / max_regret # Normalize Regrets
regret_labels = data.pop('regret')
def build_model():
model = keras.Sequential([
layers.Dense(64, activation='relu', input_shape=[len(data.keys())]),
layers.Dense(64, activation='relu'),
layers.Dense(1)
])
optimizer = tf.keras.optimizers.RMSprop(0.001)
model.compile(loss='mse',
optimizer=optimizer,
metrics=['mae', 'mse'])
return model
model = build_model()
class PrintDot(keras.callbacks.Callback):
def on_epoch_end(self, epoch, logs):
if epoch % 100 == 0: print('')
print('.', end='')
EPOCHS = 1000
# early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=100)
history = model.fit(
data, regret_labels,
epochs=EPOCHS, validation_split=0.2, verbose=0,
callbacks=[PrintDot()])
test = model.predict(data)
loss, mae, mse = model.evaluate(data, regret_labels, verbose=2)
However, I encountered a problem that all the predictions were the same, even though the model.evaluate() gave me different statistics by trials.
I also attached the file via this link.
Would you take a look at it and give me some ideas to solve it? Thanks in advance!

You can try this approach as below which split your data set into training and test set before fitting into the model.
You can try this approach as below which split your data set into training and test set before fitting into the model.
import pandas as pd
import keras
from keras.models import Sequential
from keras.layers.core import Dense
from keras.optimizers import RMSprop
data = pd.read_csv('regret.csv')
max_regret = data['regret'].max()
data['regret'] = data['regret'] / max_regret
len(data.keys())
data
train_dataset = data.sample(frac=0.8,random_state=0)
test_dataset = data.drop(train_dataset.index)
train_labels = train_dataset.pop('regret')
test_labels = test_dataset.pop('regret')
def build_model():
model = Sequential()
model.add(Dense(64, activation='relu', input_shape=(27,)))
model.add(Dense(64, activation='relu'))
model.add(Dense(1))
optimizer = RMSprop(0.001)
model.compile(loss='mse',
optimizer=optimizer,
metrics=['mae', 'mse'])
return model
model = build_model()
class PrintDot(keras.callbacks.Callback):
def on_epoch_end(self, epoch, logs):
if epoch % 100 == 0: print('')
print('.', end='')
EPOCHS = 1000
early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=100)
history = model.fit(
train_dataset, train_labels,
epochs=EPOCHS, validation_split=0.2, verbose=0,
callbacks=[PrintDot()])
test = model.predict(test_dataset)
The result have slightly changes as below:
enter image description here
What you could do better is min max scaling all the attributes.
Hope it can help you.

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Keras fitting LSTM getting slower in loop - python

Thanks to DanielMöller, it is solved. I used keras.backend.clear_session() to remove "Keras's graph". from keras.backend import clear_session clear_session()

Related

How to calculate mean relative error on test datasets

The application of self-attention layer raised index error

How to obtain outputs that are from future in a keras sequential model

Overfitting in LSTM even after using regularizers

Tensorflow always gives me the same result while the outputs are normal

Categories

Resources