Problem with integrating keras into a sklearn pipeline - python

I am using a wrapper from sklearn to find the best hyperparameters for my Keras model. Briefly, this model is a conv autoencoder and takes in data with the shape of (x,x,x). Keras wrapper seems to take data with the shape of (x,x). Since it is autoencoder model, the data would be in the shape of (x,x,x) and I think because of this reason, I am getting the following error ValueError: Invalid shape for y: (3744, 288, 1). How can I resolve this?
full code
"""
# Load libraries
"""
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import pandas as pd
from tensorflow import keras
from tensorflow.keras import layers
from matplotlib import pyplot as plt
import numpy as np
from sklearn.model_selection import GridSearchCV
from keras.wrappers.scikit_learn import KerasClassifier
# Set random seed
np.random.seed(0)
"""
## Load the data
"""
master_url_root = "https://raw.githubusercontent.com/numenta/NAB/master/data/"
df_small_noise_url_suffix = "artificialNoAnomaly/art_daily_small_noise.csv"
df_small_noise_url = master_url_root + df_small_noise_url_suffix
df_small_noise = pd.read_csv(
df_small_noise_url, parse_dates=True, index_col="timestamp"
)
df_daily_jumpsup_url_suffix = "artificialWithAnomaly/art_daily_jumpsup.csv"
df_daily_jumpsup_url = master_url_root + df_daily_jumpsup_url_suffix
df_daily_jumpsup = pd.read_csv(
df_daily_jumpsup_url, parse_dates=True, index_col="timestamp"
)
"""
## Prepare training data
"""
# Normalize and save the mean and std we get,
# for normalizing test data.
training_mean = df_small_noise.mean()
training_std = df_small_noise.std()
df_training_value = (df_small_noise - training_mean) / training_std
print("Number of training samples:", len(df_training_value))
"""
### Create sequences
Create sequences combining `TIME_STEPS` contiguous data values from the
training data.
"""
TIME_STEPS = 288
# Generated training sequences for use in the model.
def create_sequences(values, time_steps=TIME_STEPS):
output = []
for i in range(len(values) - time_steps):
output.append(values[i : (i + time_steps)])
return np.stack(output)
x_train = create_sequences(df_training_value.values)
print("Training input shape: ", x_train.shape)
"""
## Build a model
We will build a convolutional reconstruction autoencoder model. The model will
take input of shape `(batch_size, sequence_length, num_features)` and return
output of the same shape. In this case, `sequence_length` is 288 and
`num_features` is 1.
"""
# Create function returning a compiled network
def create_network(optimizer='Adam'):
model = keras.Sequential(
[
layers.Input(shape=(x_train.shape[1], x_train.shape[2])),
layers.Conv1D(
filters=32, kernel_size=7, padding="same", strides=2, activation="relu"
),
layers.Dropout(rate=0.2),
layers.Conv1D(
filters=16, kernel_size=7, padding="same", strides=2, activation="relu"
),
layers.Conv1DTranspose(
filters=16, kernel_size=7, padding="same", strides=2, activation="relu"
),
layers.Dropout(rate=0.2),
layers.Conv1DTranspose(
filters=32, kernel_size=7, padding="same", strides=2, activation="relu"
),
layers.Conv1DTranspose(filters=1, kernel_size=7, padding="same"),
]
)
model.compile(optimizer=keras.optimizers.optimizer(learning_rate=0.001), loss="mse", metrics=['mae'])
return model
# Hyper-parameter tuning
# Wrap Keras model so it can be used by scikit-learn
CAE = KerasClassifier(build_fn=create_network, verbose=0)
# Create hyperparameter space
epochs = [5, 10]
batches = [5, 10, 100]
optimizers = ['rmsprop', 'adam']
# Create hyperparameter options
hyperparameters = dict(optimizer=optimizers, epochs=epochs, batch_size=batches)
# Create grid search
grid = GridSearchCV(estimator=CAE, cv=3, param_grid=hyperparameters)
# Fit grid search (we use train data as test data here since this is reconctruction model)
grid_result = grid.fit(x_train, x_train, validation_split=0.1)
# View hyperparameters of best neural network
print(grid_result.best_params_)

This is a special problem with KerasClassifier.fit(). If you look at its source code, you'd see that it throws error if y has >2 dimensions. Perhaps it is not aimed at autoencoders optimization :)
Your choices are:
subclass KerasClassifier.fit() and fix this limitation
use another optimization engine (my preference would be optuna)
squeeze the extra dimension out in the end of the model and reduce dimensions in y_train.
for 3) use these lines:
layers.Reshape((288,)) # add in the end of model constructor
y_train = x_train.reshape(x_train.shape[:-1]) # to match the above change
grid_result = grid.fit(x_train, y_train, validation_split=0.1) # feed y_train

There is one more, most elegant solution:
replace keras.wrappers.scikit_learn.KerasClassifier with keras.wrappers.scikit_learn.KerasRegressor. The latter is not checking dimensions of y.

Related

Simple Tensor Flow Example with keras_tuner

I am new to Tensorflow and keras_tuner. I am working with PyCharm, Anaconda3, Python 3.9.12.
Below you can find a minimal working example. The code trains in the first part a simple reference ANN with fixed hyperparameters, using one input and output vector computed by an artificial function. In the second part, the hyperparameters of the previous reference model should be found using the keras-tuner.
My question is, why does the "keras_tuner" (using the RandomSearch optimizer) perform so badly on my example. Below you can find my code
import pandas as pd
import numpy as np
# Make numpy values easier to read.
import pip
np.set_printoptions(precision=3, suppress=True)
import tensorflow as tf
from tensorflow.keras import layers
######################################################
from matplotlib import pyplot
np.set_printoptions(suppress=True)
import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'
########################################################################################################################
#######################################################################################################################
#Load training data into a pandas dataframe
#training_data = pd.read_csv('Encoder_Error.csv',names=["Position_Data", "Error_Data"])
#x_data = training_data['Position_Data']
#y_data = training_data['Error_Data']
#print(x_data)
#print(y_data)
#######################################################################################################################
from numpy import asarray
x_data = asarray([i/500 for i in range(-500,500)])
y_data = asarray([np.power(i,3)/np.exp(i/(abs(i)+1)) for i in x_data])
x_data = x_data.reshape((len(x_data), 1))
y_data = y_data.reshape((len(y_data), 1))
########################################################################################################################
#make a regression model
Encoder_error_model = tf.keras.Sequential([
layers.Dense(3 , activation="tanh", name="layer2"),
layers.Dense(2 , activation="tanh", name="layer3"),
layers.Dense(1 , name="layer4"),
])
Encoder_error_model.compile(loss = tf.keras.losses.MeanSquaredError(),optimizer = tf.keras.optimizers.Adam())
######################################################
#To train that model
Encoder_error_model.fit(x_data, y_data, batch_size=10, epochs=100)#batch_size=10, epochs=100
yhat = Encoder_error_model.predict(x_data)
########################################################################################################################
# Save the entire model as a SavedModel.
#Encoder_error_model.save('Tensorflow_Encoder_error_model_1')
########################################################################################################################
########################################################################################################################
########################################################################################################################
########################################################################################################################
import tensorflow as tf
from tensorflow import keras
import keras_tuner
#-----------------------------------------------------------------
def build_model(hp):
model = keras.Sequential()
model.add(
layers.Dense(
# Tune number of units.
units=hp.Int("units1", min_value=1, max_value=5, step=1),
# Tune the activation function to use.
#activation=hp.Choice("activation1", ["relu", "softmax", "tanh", "elu", "gelu", "selu", "softsign"]),
activation=hp.Choice("activation1", values = ["relu", "tanh"]),
#activation= "tanh",
)
)
model.add(
layers.Dense(
# Tune number of units.
units=hp.Int("units2", min_value=1, max_value=5, step=1),
# Tune the activation function to use.
#activation=hp.Choice("activation2", ["relu", "softmax", "tanh", "elu", "gelu", "selu", "softsign"]),
activation=hp.Choice("activation2", values = ["relu", "tanh"]),
#activation="tanh",
)
)
model.add(
layers.Dense(
1,
#activation=hp.Choice("output_activation", ["relu","softmax","tanh","elu","gelu","hard_sigmoid","selu","softsign","sigmoid"]),
)
)
# Define the optimizer learning rate as a hyperparameter.
learning_rate = hp.Float("lr", min_value=1e-8, max_value=1e-2, sampling="log")
model.compile(
optimizer=keras.optimizers.Adam(learning_rate=learning_rate),
#optimizer=keras.optimizers.Adam(hp.Choice('learning_rate', [1e-2, 1e-3, 1e-4, 1e-5, 1e-6, 1e-8])),
loss='mse',#MeanSquaredError
metrics=['mae']#MeanAbsoluteError#does not work with MeanSquaredError
)
return model
build_model(keras_tuner.HyperParameters())
#-----------------------------------------------------------------
tuner = keras_tuner.RandomSearch(
hypermodel=build_model,
objective="val_mae",#"val_accuracy"
max_trials=50,#50
#executions_per_trial=2,#2
directory="my_dir",
project_name="test optimize hyperparameter",
)
tuner.search_space_summary()
#-----------------------------------------------------------------
#stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)
#----------------------------------------------------------------
tuner.search(x_data, y_data,batch_size=10, epochs=100,validation_split=0.2)
#----------------------------------------------------------------
best_model = tuner.get_best_models(num_models=1)[0]
best_hyperparameters = tuner.get_best_hyperparameters(num_trials=1)[0]
print(best_hyperparameters.values)
#-----------------------------------------------------------------
yhat2 = best_model.predict(x_data)
########################################################################################################################
########################################################################################################################
########################################################################################################################
########################################################################################################################
#Plot Reference Data to Model
pyplot.scatter(x_data,y_data, label='Actual')
pyplot.scatter(x_data,yhat, label='Predicted-model-reference')
pyplot.scatter(x_data,yhat2, label='Predicted-model-tuning')
pyplot.title('Input (x) versus Output (y)')
pyplot.xlabel('Input Variable (x)')
pyplot.ylabel('Output Variable (y)')
pyplot.legend()
pyplot.show()
print(y_data)
Since the search space is very limited, I would expect similar results as my reference model. If I only optimise the number of neurons with the activation function "tanh" of the reference model fixed, the fitting works. But if I let him select between "tanh" and "relu", the optimiser prefers the "relu" which results in a bad approximation.
The solution was the following: I added overwrite=True within the keras_tuner.RandomSearch() arguments. Before, old optimisation results were used in new versions of the code (saved in "my_dir") which obviously led to wrong and old results. After adding this line, everythings works fine...

Decode prediction of custom keras model

some days ago I started with ML as I wanted to do a hcaptcha solver. I have everything ready, I just need to train a model that will classify the captcha images so I can send a request with the good answer and get the captcha token.
I've looked into some tutorials on how to train my own model with several classes. I have it the following way:
1 trainer folder, 1 validation folder and 1 testing folder. On the trainer and validation folder there is more subfolders named airplane, truck, boat, train,... each one containing aprox 20 images. On the testing folder, some random images related with the classes I have.
I have trained the model and it seems like I'm getting a 1 accuracy. Then I get some of the random testing images and try to predict them using this saved model. It does it's job and predicts them, returning an array of numbers. The thing is I don't know how to decode those predictions nor how to see the classes list with his representative integer before predicting.
I'm super new on this so I'm sure anything will help :)
My code below:
import os
from keras.preprocessing import image
from keras.models import Sequential
from keras import layers
from keras.models import load_model
import numpy as np
trainer_path = "./img/trainer"
validator_path = "./img/validator"
testing_path = "./img/tester"
WIDTH = 128
HEIGHT = 128
BATCH = 30
EPOCHS = 15
train_dataset = image.image_dataset_from_directory(
trainer_path,
label_mode="int",
batch_size=BATCH,
image_size=(WIDTH, HEIGHT)
)
validator_dataset = image.image_dataset_from_directory(
validator_path,
label_mode="int",
batch_size=BATCH,
image_size=(WIDTH, HEIGHT)
)
model = Sequential([
layers.Input((WIDTH, HEIGHT, 3)),
layers.Conv2D(16, 3, padding="same"),
layers.Conv2D(32, 3, padding="same"),
layers.MaxPooling2D(),
layers.Flatten(),
layers.Dense(10)
])
model.compile(
optimizer="adam",
loss=[
"sparse_categorical_crossentropy"
],
metrics=["accuracy"]
)
model_fit = model.fit(
train_dataset,
epochs=EPOCHS,
validation_data=validator_dataset,
verbose=2
)
#loading the saved model
model = load_model("./model")
for i in os.listdir(testing_path):
img = image.load_img(testing_path + "/" + i, target_size=(WIDTH, HEIGHT, 3))
img_array = image.img_to_array(img)
img_batch = np.expand_dims(img_array, axis=0)
prediction = model.predict(img_batch)
print(prediction)
print()
Output example:
[[ 875.5614 3123.8257 1521.7046 90.056526 335.5274
-785.3671 1075.9199 1105.3068 -14.917503 -3745.6494 ]]
You have to apply activation function on last Dense layer, if you want to classify the image it should be softmax (you will get probabilities for all classes), here is the link:
https://keras.io/api/layers/activations/
When it comes to class names it should be sorted by alphanumerical values, you can also pass class_names argument, here is the link to arguments of this function:
https://www.tensorflow.org/api_docs/python/tf/keras/utils/image_dataset_from_directory

Convolutional LSTM Model Dimension Incompatibility when making predictions & prediction dimension issues

I structured a Convolutional LSTM model to predict the forthcoming Bitcoin price data, using the analyzed past data of the Bitcoin close price and other features.
Let me jump straight to the code:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import tensorflow as tf
import tensorflow.keras as keras
import keras_tuner as kt
from keras_tuner import HyperParameters as hp
from keras.models import Sequential
from keras.layers import InputLayer, ConvLSTM1D, LSTM, Flatten, RepeatVector, Dense, TimeDistributed
from keras.callbacks import EarlyStopping
from tensorflow.keras.metrics import RootMeanSquaredError
from tensorflow.keras.optimizers import Adam
import keras.backend as K
from keras.losses import Huber
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
DIR = '../input/btc-features-targets'
SEG_DIR = '../input/segmented'
segmentized_features = os.listdir(SEG_DIR)
btc_train_features = []
for seg in segmentized_features:
train_features = pd.read_csv(f'{SEG_DIR}/{seg}')
train_features.set_index('date', inplace=True)
btc_train_features.append(scaler.fit_transform(train_features.values))
btc_train_targets = pd.read_csv(f'{DIR}/btc_train_targets.csv')
btc_train_targets.set_index('date', inplace=True)
btc_test_features = pd.read_csv(f'{DIR}/btc_test_features.csv')
btc_tef1 = btc_test_features.iloc[:111]
btc_tef2 = btc_test_features.iloc[25:]
btc_tef1.set_index('date', inplace=True)
btc_tef2.set_index('date', inplace=True)
btc_test_targets = pd.read_csv(f'{DIR}/btc_test_targets.csv')
btc_test_targets.set_index('date', inplace=True)
btc_trt_log = np.log(btc_train_targets)
btc_tefs1 = scaler.fit_transform(btc_tef1.values)
btc_tefs2 = scaler.fit_transform(btc_tef2.values)
btc_tet_log = np.log(btc_test_targets)
scaled_train_features = []
for features in btc_train_features:
shape = features.shape
scaled_train_features.append(np.expand_dims(features, [0,3]))
shape_2 = btc_tefs1.shape
btc_tefs1 = np.expand_dims(btc_tefs1, [0,3])
shape_3 = btc_tefs2.shape
btc_tefs2 = np.expand_dims(btc_tefs2, [0,3])
btc_trt_log = btc_trt_log.values[0]
btc_tet_log = btc_tet_log.values[0]
def build(hp):
model = keras.Sequential()
# Input Layer
model.add(InputLayer(input_shape=(111,32,1)))
# ConvLSTM1D
convLSTM_hp_filters = hp.Int(name='convLSTM_filters', min_value=32, max_value=512, step=32)
convLSTM_hp_kernel_size = hp.Choice(name='convLSTM_kernel_size', values=[3,5,7])
convLSTM_activation = hp.Choice(name='convLSTM_activation', values=['selu', 'relu'])
model.add(ConvLSTM1D(filters=convLSTM_hp_filters,
kernel_size=convLSTM_hp_kernel_size,
padding='same',
activation=convLSTM_activation,
use_bias=True,
bias_initializer='zeros'))
# Flatten
model.add(Flatten())
# RepeatVector
model.add(RepeatVector(5))
# LSTM
LSTM_hp_units = hp.Int(name='LSTM_units', min_value=32, max_value=512, step=32)
LSTM_activation = hp.Choice(name='LSTM_activation', values=['selu', 'relu'])
model.add(LSTM(units=LSTM_hp_units, activation=LSTM_activation, return_sequences=True))
# TimeDistributed Dense
dense_units = hp.Int(name='dense_units', min_value=32, max_value=512, step=32)
dense_activation = hp.Choice(name='dense_activation', values=['selu', 'relu'])
model.add(TimeDistributed(Dense(units=dense_units, activation=dense_activation)))
# TimeDistributed Dense_Output
model.add(Dense(1))
# Set Learning Rate
hp_learning_rate = hp.Choice(name='learning_rate', values=[1e-2, 1e-3, 1e-4])
# Compile Model
model.compile(optimizer=Adam(learning_rate=hp_learning_rate),
loss=Huber(),
metrics=[RootMeanSquaredError()])
return model
tuner = kt.Hyperband(build,
objective=kt.Objective('root_mean_squared_error', direction='min'),
max_epochs=10,
factor=3)
early_stop = EarlyStopping(monitor='root_mean_squared_error', patience=5)
opt_hps = []
for train_features in scaled_train_features:
tuner.search(train_features, btc_trt_log, epochs=50, callbacks=[early_stop])
opt_hps.append(tuner.get_best_hyperparameters(num_trials=1)[0])
models, epochs = ([] for _ in range(2))
for hps in opt_hps:
model = tuner.hypermodel.build(hps)
models.append(model)
history = model.fit(train_features, btc_trt_log, epochs=70, verbose=0)
rmse = history.history['root_mean_squared_error']
best_epoch = rmse.index(min(rmse)) + 1
epochs.append(best_epoch)
hypermodel = tuner.hypermodel.build(opt_hps[0])
for train_features, epoch in zip(scaled_train_features, epochs): hypermodel.fit(train_features, btc_trt_log, epochs=epoch)
tp1 = hypermodel.predict(btc_tefs1).flatten()
tp2 = hypermodel.predict(btc_tefs2).flatten()
test_predictions = np.concatenate((tp1, tp2[86:]), axis=None)
The hyperparameters of the model are configured using keras_tuner; as there were ResourceExhaustError issues output by the notebook when training is done with the full features dataset, sequentially segmented datasets are used instead (and apparently, referring to the study done utilizing the similar model architecture, training is able to be efficiently done through this training approach).
The input dimension of each segmented dataset is (111,32,1).
There aren't any issues reported until before the last code block. The models work fine. Yet, when the .predict() function is executed, the notebook prints out an error, which states that the dimension of the input features for making predictions is incompatible with the dimension of the input features used while training. I did not understand the reason behind its occurrence, since as far as I know, the input dimensions of a train dataset for a DNN model cannot be identical as the input dimensions of a test dataset.
Even though all the price data from 2018 to early 2021 are used as training datasets, predictions are only needed for the mid 2021 timeframe.
The dataset used for prediction has a dimension of (136,32,1).
I tried matching the dimension of this dataset to (111,32,1), through index slicing.
Now this showed issues in the output dimension. While predictions should be made for 136 data points, the result only returned 10.
Are there any issues relevant to the model configuration? Cannot interpret the current situation.

What this error means: `y` argument is not supported when using python generator as input

I try to develop a network, and use python generator as data provider. Everything looks OK until the model starts to fit, then I receive this error:
ValueError: `y` argument is not supported when using dataset as input.
I proofed every line and, I think the problem is in the format of x_test and y_test feed to the network. After hours of googling, and changing the format several times, the error is still there.
Can you help me to fix it? You can find the whole code below:
import os
import numpy as np
import pandas as pd
import re # To match regular expression for extracting labels
import tensorflow as tf
print(tf.__version__)
def xfiles(filename):
if re.match('^\w{12}_x\.csv$', filename) is None:
return False
else:
return True
def data_generator():
folder = "i:/Stockpred/csvdbase/datasets/DS0002"
file_list = os.listdir(folder)
x_files = list(filter(xfiles, file_list))
x_files.sort()
np.random.seed(1729)
np.random.shuffle(x_files)
for file in x_files:
filespec = folder + '/' + file
xs = pd.read_csv(filespec, header=None)
yfile = file.replace('_x', '_y')
yfilespec = folder + '/' + yfile
ys = pd.read_csv(open(yfilespec, 'r'), header=None, usecols=[1])
xs = np.asarray(xs, dtype=np.float32)
ys = np.asarray(ys, dtype=np.float32)
for i in range(xs.shape[0]):
yield xs[i][1:169], ys[i][0]
dataset = tf.data.Dataset.from_generator(
data_generator,
(tf.float32, tf.float32),
(tf.TensorShape([168, ]), tf.TensorShape([])))
dataset = dataset.shuffle(buffer_size=16000, seed=1729)
# dataset = dataset.batch(4000, drop_remainder=True)
dataset = dataset.cache('R:/Temp/model')
def is_test(i, d):
return i % 4 == 0
def is_train(i, d):
return not is_test(i, d)
recover = lambda i, d: d
test_dataset = dataset.enumerate().filter(is_test).map(recover)
train_dataset = dataset.enumerate().filter(is_train).map(recover)
x_test = test_dataset.map(lambda x, y: x)
y_test = test_dataset.map(lambda x, y: y)
x_train = train_dataset.map(lambda x, y: x)
y_train = train_dataset.map(lambda x, y: y)
print(x_train.element_spec)
print(y_train.element_spec)
print(x_test.element_spec)
print(y_test.element_spec)
# define an object (initializing RNN)
model = tf.keras.models.Sequential()
# first LSTM layer
model.add(tf.keras.layers.LSTM(units=168, activation='relu', return_sequences=True, input_shape=(168, 1)))
# dropout layer
model.add(tf.keras.layers.Dropout(0.2))
# second LSTM layer
model.add(tf.keras.layers.LSTM(units=168, activation='relu', return_sequences=True))
# dropout layer
model.add(tf.keras.layers.Dropout(0.2))
# third LSTM layer
model.add(tf.keras.layers.LSTM(units=80, activation='relu', return_sequences=True))
# dropout layer
model.add(tf.keras.layers.Dropout(0.2))
# fourth LSTM layer
model.add(tf.keras.layers.LSTM(units=120, activation='relu'))
# dropout layer
model.add(tf.keras.layers.Dropout(0.2))
# output layer
model.add(tf.keras.layers.Dense(units=1))
model.summary()
# compile the model
model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(x_train.as_numpy_iterator(), y_train.as_numpy_iterator(), batch_size=32, epochs=100)
predicted_stock_price = model.predict(x_test)
everything looks OK until the model starts to fit. and i reciev this error:
ValueError: `y` argument is not supported when using dataset as input.
Can you help to fix it?
As the docs say:
y - Target data. Like the input data x, it could be either Numpy array(s) or TensorFlow tensor(s). It should be consistent with x (you cannot have Numpy inputs and tensor targets, or inversely). If x is a dataset, generator, or keras.utils.Sequence instance, y should not be specified (since targets will be obtained from x).
So, I suppose you should have one generator serving tuples of sample and label.
If you are providing Dataset as input, then
type(train_dataset) should be tensorflow.python.data.ops.dataset_ops.BatchDataset
if so, simply feed this Dataset (which includes your X and y bundle) into the model,
model.fit(train_dataset, batch_size=32, epochs=100)
(Yes, this is a little different convention than how we did in sklearn - X and y separately.)
meanwhile, if you want tensorflow to explicitly use a separate dataset for validation, you must use the kwarg like:
model.fit(train_dataset, validation_data=val_dataset, batch_size=32, epochs=100)
where val_dataset is a separate dataset you had spared for validation during model training. (Not test).
use model.fit_generator, and use tuples (x,y) of input data and labels. So altogether:
model.fit_generator(train_dataset.as_numpy_iterator(),epochs=100)

Get Cell, Input Gate, Output Gate and Forget Gate activation values for LSTM network using Keras

I want to get the activation values for a given input of a trained LSTM network, specifically the values for the cell, the input gate, the output gate and the forget gate. According to this Keras issue and this Stackoverflow question I'm able to get some activation values with the following code:
(basically I'm trying to classify 1-dimensional timeseries using one label per timeseries, but that doesn't really matter for this general question)
import random
from pprint import pprint
import keras.backend as K
import numpy as np
from keras.layers import Dense
from keras.layers.recurrent import LSTM
from keras.models import Sequential
from keras.utils import to_categorical
def getOutputLayer(layerNumber, model, X):
return K.function([model.layers[0].input],
[model.layers[layerNumber].output])([X])
model = Sequential()
model.add(LSTM(10, batch_input_shape=(1, 1, 1), stateful=True))
model.add(Dense(2, activation='softmax'))
model.compile(
loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')
# generate some test data
for i in range(10):
# generate a random timeseries of 100 numbers
X = np.random.rand(10)
X = X.reshape(10, 1, 1)
# generate a random label for the whole timeseries between 0 and 1
y = to_categorical([random.randint(0, 1)] * 10, num_classes=2)
# train the lstm for this one timeseries
model.fit(X, y, epochs=1, batch_size=1, verbose=0)
model.reset_states()
# to keep the output simple use only 5 steps for the input of the timeseries
X_test = np.random.rand(5)
X_test = X_test.reshape(5, 1, 1)
# get the activations for the output lstm layer
pprint(getOutputLayer(0, model, X_test))
Using that I get the following activation values for the LSTM layer:
[array([[-0.04106992, -0.00327154, -0.01524276, 0.0055838 , 0.00969929,
-0.01438944, 0.00211149, -0.04286387, -0.01102304, 0.0113989 ],
[-0.05771339, -0.00425535, -0.02032563, 0.00751972, 0.01377549,
-0.02027745, 0.00268653, -0.06011265, -0.01602218, 0.01571197],
[-0.03069103, -0.00267129, -0.01183739, 0.00434298, 0.00710012,
-0.01082268, 0.00175544, -0.0318702 , -0.00820942, 0.00871707],
[-0.02062054, -0.00209525, -0.00834482, 0.00310852, 0.0045242 ,
-0.00741894, 0.00141046, -0.02104726, -0.0056723 , 0.00611038],
[-0.05246543, -0.0039417 , -0.01877101, 0.00691551, 0.01250046,
-0.01839472, 0.00250443, -0.05472757, -0.01437504, 0.01434854]],
dtype=float32)]
So I get for each input value 10 values, because I specified in the Keras model to use a LSTM with 10 neurons. But which one is a cell, which is is the input gate, which one the output gate, which one the forget gate?
Well, these are the output values, to get and look into the value of each gate look into this issue
I paste the essential part here
for i in range(epochs):
print('Epoch', i, '/', epochs)
model.fit(cos,
expected_output,
batch_size=batch_size,
verbose=1,
nb_epoch=1,
shuffle=False)
for layer in model.layers:
if 'LSTM' in str(layer):
print('states[0] = {}'.format(K.get_value(layer.states[0])))
print('states[1] = {}'.format(K.get_value(layer.states[1])))
print('Input')
print('b_i = {}'.format(K.get_value(layer.b_i)))
print('W_i = {}'.format(K.get_value(layer.W_i)))
print('U_i = {}'.format(K.get_value(layer.U_i)))
print('Forget')
print('b_f = {}'.format(K.get_value(layer.b_f)))
print('W_f = {}'.format(K.get_value(layer.W_f)))
print('U_f = {}'.format(K.get_value(layer.U_f)))
print('Cell')
print('b_c = {}'.format(K.get_value(layer.b_c)))
print('W_c = {}'.format(K.get_value(layer.W_c)))
print('U_c = {}'.format(K.get_value(layer.U_c)))
print('Output')
print('b_o = {}'.format(K.get_value(layer.b_o)))
print('W_o = {}'.format(K.get_value(layer.W_o)))
print('U_o = {}'.format(K.get_value(layer.U_o)))
# output of the first batch value of the batch after the first fit().
first_batch_element = np.expand_dims(cos[0], axis=1) # (1, 1) to (1, 1, 1)
print('output = {}'.format(get_LSTM_output([first_batch_element])[0].flatten()))
model.reset_states()
print('Predicting')
predicted_output = model.predict(cos, batch_size=batch_size)
print('Ploting Results')
plt.subplot(2, 1, 1)
plt.plot(expected_output)
plt.title('Expected')
plt.subplot(2, 1, 2)
plt.plot(predicted_output)
plt.title('Predicted')
plt.show()

Categories