"ValueError: activation is not a legal parameter" with Keras classifier - python

I've been playing around with Tensorflow and Keras and I finally got the following error while trying hyper parameter tuning:
"ValueError: activation is not a legal parameter"
The point is that I want to try different activation functions in my model to see which one works best.
I have the following code:
import pandas as pd
import tensorflow as tf
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV
import numpy as np
ds = pd.read_csv(
"https://storage.googleapis.com/download.tensorflow.org/data/abalone_train.csv",
names=["Length", "Diameter", "Height", "Whole weight", "Shucked weight",
"Viscera weight", "Shell weight", "Age"])
print(ds)
x_train = ds.copy()
y_train = x_train.pop('Age')
x_train = np.array(x_train)
def create_model(layers, activations):
model = tf.keras.Sequential()
for i, nodes in enumerate(layers):
if i == 0:
model.add(tf.keras.layers.Dense(nodes, input_dim=x_train.shape[1]))
model.add(layers.Activation(activations))
model.add(Dropout(0.3))
else:
model.add(tf.keras.layers.Dense(nodes))
model.add(layers.Activation(activations))
model.add(Dropout(0.3))
model.add(tf.keras.layers.Dense(units=1, kernel_initializer='glorot_uniform'))
model.add(layers.Activation('sigmoid'))
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
return model
model = KerasClassifier(build_fn=create_model, verbose=0)
layers = [[20], [40,20], [45, 30, 15]]
activations = ['sigmoid', 'relu']
param_grid = dict(layers=layers, activation=activations, batch_size = [128, 256], epochs=[30])
grid = GridSearchCV(estimator=model, param_grid=param_grid, cv=5)
grid_result = grid.fit(x_train, y_train)
print(grid_result.best_score_,grid_reslult.best_params_)
pred_y = grid.predict(x_test)
y_pred = (pred_y > 0.5)
cm=confusion_matrix(y_pred, y_test)
score=accuracy_score(y_pred, y_test)
model.fit(x_train, y_train, epochs=30, callbacks=[cp_callback])
#steps_per_epoch
model.evaluate(x_test, y_test, verbose=2)
probability_model = tf.keras.Sequential([
model,
tf.keras.layers.Softmax()
])
probability_model(x_test[:100])

If you see here, you must specify activations as :
from tensorflow.keras import activations
layers.Activation(activations.relu)
Right now, you have:
activations = ['sigmoid', 'relu']
So , that's why the value error.
You should change your code to sth like this:
model.add(tf.keras.layers.Dense(nodes, activation=activations[i], input_dim=x_train.shape[1]))
So, remove the Activation layer: model.add(layers.Activation(activations)) and instead use the activation inside each layer.
Example:
def create_model(layers, activations):
model = tf.keras.Sequential()
for i in range(2):
if i == 0:
model.add(tf.keras.layers.Dense(2, activation=activations[i], input_dim=x_train.shape[1]))
model.add(tf.keras.layers.Dropout(0.3))
else:
model.add(tf.keras.layers.Dense(2, activation=activations[i]))
model.add(tf.keras.layers.Dropout(0.3))
model.add(tf.keras.layers.Dense(units=1, activation='sigmoid', kernel_initializer='glorot_uniform'))
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
return model

layers.Activation() expects a function or a string, such as 'sigmoid' but you are currently passing an array activations to it. Use your index i (or a different index) to access the activation function like activations[i].
You can also pass the activation as string directly to the Dense layer like so:
model.add(tf.keras.layers.Dense(nodes, activation=activations[i], input_dim=x_train.shape[1])))

Related

I need consistent results for MSE between runs when training a Neural Network

import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt
from bayes_opt import BayesianOptimization
# load data from UCI Machine Learning Repository
df = pd.read_csv(r'C:\Test_set_Yacht.csv')
df1 = pd.read_csv(r'C:\Train_set_Yacht.csv')
df2 = pd.read_csv(r'C:\Yacht_hydro.csv')
X = df2.drop("residuary_resistance", axis=1)
Y = df2["residuary_resistance"]
# split data into features and target
X_train = df1.drop("residuary_resistance", axis=1)
y_train = df1["residuary_resistance"]
# split data into train and test sets
X_test = df.drop("residuary_resistance", axis=1)
y_test = df["residuary_resistance"]
# scale data using StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
def objective_model_1(hidden_units, learning_rate):
model = Sequential()
model.add(Dense(hidden_units, input_dim=X.shape[1], activation="relu"))
model.add(Dense(hidden_units, activation="relu"))
model.add(Dense(1, activation="linear"))
model.compile(loss="mse", optimizer=Adam(learning_rate=learning_rate))
model.fit(X_train_scaled, y_train, epochs=100, verbose=0)
y_pred = model.predict(X_test_scaled)
return -mean_squared_error(y_test, y_pred)
pbounds_model_1 = {
"hidden_units": (32, 128),
"learning_rate": (1e-5, 1e-1),
}
bo_model_1 = BayesianOptimization(
f=objective_model_1,
pbounds=pbounds_model_1,
random_state=42,
)
bo_model_1.maximize(init_points=10, n_iter=90)
def objective_model_2(hidden_units, learning_rate):
model = Sequential()
model.add(Dense(hidden_units, input_shape=X_train_scaled.shape[1:], activation="relu"))
model.add(Dense(hidden_units, activation="relu"))
model.add(Dense(hidden_units, activation="relu"))
model.add(Dense(hidden_units, activation="relu"))
model.add(Dense(1, activation="linear"))
model.compile(loss="mse", optimizer=Adam(learning_rate=learning_rate))
model.fit(X_train_scaled, y_train, epochs=100, verbose=0)
y_pred = model.predict(X_test_scaled)
return -mean_squared_error(y_test, y_pred)
pbounds_model_2 = {
"hidden_units": (32, 128),
"learning_rate": (1e-5, 1e-1),
}
bo_model_2 = BayesianOptimization(
f=objective_model_2,
pbounds=pbounds_model_2,
random_state=42,
)
bo_model_2.maximize(init_points=10, n_iter=90)
# get the best hyperparameters
# get the best hyperparameters for each model
best_params_model_1 = bo_model_1.max["params"]
best_params_model_2 = bo_model_2.max["params"]
# train and evaluate model 1 with best hyperparameters
model_1 = Sequential()
model_1.add(Dense(32, input_dim=X.shape[1], activation="relu"))
model_1.add(Dense(32, activation="relu"))
model_1.add(Dense(1, activation="linear"))
model_1.compile(loss="mse", optimizer=Adam(learning_rate=best_params_model_1["learning_rate"]))
model_1.fit(X_train_scaled, y_train, epochs=100, verbose=0)
y_pred_1 = model_1.predict(X_test_scaled)
mse_1 = mean_squared_error(y_test, y_pred_1)
print("Model 1 MSE on test set:", mse_1)
# train and evaluate model 2 with best hyperparameters
model_2 = Sequential()
model_2.add(Dense(64, input_dim=X.shape[1], activation="relu"))
model_2.add(Dense(64, activation="relu"))
model_2.add(Dense(64, activation="relu"))
model_2.add(Dense(64, activation="relu"))
model_2.add(Dense(1, activation="linear"))
model_2.compile(loss="mse", optimizer=Adam(learning_rate=best_params_model_2["learning_rate"]))
model_2.fit(X_train_scaled, y_train, epochs=100, verbose=0)
y_pred_2 = model_2.predict(X_test_scaled)
mse_2 = mean_squared_error(y_test, y_pred_2)
print("Model 2 MSE on test set:", mse_2)
In the following code, I implement a bayesian optimization for hyperparameter tunning of 2 different NN using the data set from: https://archive.ics.uci.edu/ml/datasets/yacht+hydrodynamics,After running this, I create again those 2 NN in a JupytherNotebook code block and run with the best hyperparameters already determined by the bayesian optimizer. I need each time I run the code to get the same MSE. This is the reason why I am splitting the data already to ensure the same results.
The inner workings of the tensorflow library are non-deterministic. So you must set a random seed in order to get reproducible results, in practice you just need to add this line at the start of your code:
tf.random.set_seed(0)

How to calculate mean relative error on test datasets

def create_model():
model = Sequential()
model.add(LSTM(50, return_sequences=True, input_shape=(40002, 12)))
model.add(LSTM(50, return_sequences= True))
model.add(LSTM(50, return_sequences= True))
model.add(tf.keras.layers.LSTM(30))
model.add(Dense(2, activation='linear'))
def rmse(Y_test, prediction):
return K.sqrt(K.mean(K.square(Y_test-prediction)))
# compile
model.compile(optimizer='adam', loss=rmse, metrics=['mean_squared_error', rmse])
return model
# fit the model
model = create_model()
model.fit(x_train, Y_train, shuffle=False, verbose=1, epochs=10)
# # predict model
prediction = model.predict(x_test, verbose=0)
print(prediction)
How to calculate mean relative error for tensor inputs i.e my Y_test and prediction are tensor.
Y_test and prediction as 2 values
Example:
Y_test = [[0.2,0.003],
[0.3, 0.008]]
prediction = [[0.4,0.005],
[0.5,0.007]]
mean_relative_error = mean(absolute(0.2-0.4)/0.2 + absolute(0.003-0.005)/0.003), mean(absolute(0.3-0.5)/0.3 + absolute(0.008-0.007)/0.008)
mean_relative_error = [0.533, 0.3925]
Please note that I don't want to use it for backpropagation to improve the network.
Would have added like this:
from tensorflow.math import reduce_mean, abs, reduce_sum
relative_error = reduce_mean(reduce_sum(abs(prediction-Y_test)/prediction, axis=1))
# [0.9, 0.54285717]
mean_relative_error = reduce_mean(relative_error)
# 0.7214286
I couldn't use tf.keras.losses.MeanAbsoluteError(reduction=tf.keras.losses.Reduction.NONE) because of a bug. The MeanAbsoluteError still does reduce to mean despite specifying it not to. The bug reported HERE

Loss is always nan when training a deep learning model from tabular data

I'm trying to train a model from a dataset of about a few thousands of entries with 51 numerical features and a labeled column, Example:
when training the model to predict the 3 labels (candidate, false positive, confirmed) the loss is always nan and the accuracy stabilizes very fast on a specific value.
The code:
import tensorflow as tf
import numpy as np
import pandas as pd
import sklearn.preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler, RobustScaler
from sklearn.preprocessing import OrdinalEncoder
from tensorflow import optimizers
from tensorflow.python.keras.layers import Dense, Dropout, Normalization
from tensorflow.python.keras.models import Sequential, Model
def load_dataset(data_folder_csv):
# load the dataset as a pandas DataFrame
data = pd.read_csv(data_folder_csv, header=0)
# retrieve numpy array
dataset = data.values
# split into input (X) and output (y) variables
X = dataset[:, :-1]
y = dataset[:, -1]
print(y)
# format all fields as floats
X = X.astype(np.float)
# reshape the output variable to be one column (e.g. a 2D shape)
y = y.reshape((len(y), 1))
return X, y
# prepare input data using min/max scaler.
def prepare_inputs(X_train, X_test):
oe = RobustScaler().fit_transform(X_train)
X_train_enc = oe.transform(X_train)
X_test_enc = oe.transform(X_test)
return X_train_enc, X_test_enc
# prepare target
def prepare_targets(y_train, y_test):
le = LabelEncoder()
ohe = OneHotEncoder()
le.fit(y_train)
le.fit(y_test)
y_train_enc = ohe.fit_transform(y_train).toarray()
y_test_enc = ohe.fit_transform(y_test).toarray()
return y_train_enc, y_test_enc
X, y = load_dataset("csv_ready.csv")
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=1)
print('Train', X_train.shape, y_train.shape)
print('Test', X_test.shape, y_test.shape)
X_train_enc, X_test_enc = X_train, X_test
print('Finished preparing inputs.'
# prepare output data
y_train_enc, y_test_enc = prepare_targets(y_train, y_test)
norm_layer = Normalization()
norm_layer.adapt(X)
model = Sequential()
model.add(Dense(128, input_dim=X_train.shape[1], activation="tanh", kernel_initializer='he_normal'))
model.add(Dropout(0.2))
model.add(Dense(64, input_dim=X_train.shape[1], activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(32, input_dim=X_train.shape[1], activation='relu'))
model.add(Dense(3, activation='sigmoid'))
opt = optimizers.Adam(lr=0.01, decay=1e-6)
model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
model.summary()
model.fit(X_train, y_train_enc, epochs=20, batch_size=128, verbose=1, use_multiprocessing=True)
_, accuracy = model.evaluate(X_test, y_test_enc, verbose=0)
print('Accuracy: %.2f' % (accuracy * 100))
I tried increasing/decreasing the learning rate, changing the optimizer, lowering and increasing the number of neurons and layers, and playing with batch sizes but nothing seems to bring the model to get good results. I think I'm missing something here but can't put my finger on it.
Result example:
EDIT: More lines from the csv:
EDIT2: Tried l2 regularization also and didnt did anything.
One of the reasons:
Check whether your dataset have NaN values or not. NaN values can cause problem to the model while learning.
Some of the major bugs in your code:
You are using sigmoid activation function instead of softmax for output layer having 3 neurons
You are fitting both train and test set while using encoders which is wrong. You should fit_transform for your train data and only use transform for test sets
Also you are using input for all layers which is wrong, Only the first layer should accept the input tensor.
You forgot to use prepare_inputs function for X_train and X_test
Your model should be fit with X_train_enc not X_train
Use this instead
import tensorflow as tf
import numpy as np
import pandas as pd
import sklearn.preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler, MinMaxScaler
from sklearn.preprocessing import OrdinalEncoder
from tensorflow import optimizers
from tensorflow.python.keras.layers import Dense, Dropout, Normalization
from tensorflow.python.keras.models import Sequential, Model
def load_dataset(data_folder_csv):
# load the dataset as a pandas DataFrame
data = pd.read_csv(data_folder_csv, header=0)
# retrieve numpy array
dataset = data.values
# split into input (X) and output (y) variables
X = dataset[:, :-1]
y = dataset[:, -1]
print(y)
# format all fields as floats
X = X.astype(np.float)
# reshape the output variable to be one column (e.g. a 2D shape)
y = y.reshape((len(y), 1))
return X, y
# prepare input data using min/max scaler.
def prepare_inputs(X_train, X_test):
oe = MinMaxScaler()
X_train_enc = oe.fit_transform(X_train)
X_test_enc = oe.transform(X_test)
return X_train_enc, X_test_enc
# prepare target
def prepare_targets(y_train, y_test):
le = LabelEncoder()
ohe = OneHotEncoder()
y_train = le.fit_transform(y_train)
y_test = le.transform(y_test)
y_train_enc = ohe.fit_transform(y_train).toarray()
y_test_enc = ohe.transform(y_test).toarray()
return y_train_enc, y_test_enc
X, y = load_dataset("csv_ready.csv")
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=1)
print('Train', X_train.shape, y_train.shape)
print('Test', X_test.shape, y_test.shape)
#prepare_input function missing here
X_train_enc, X_test_enc = prepare_inputs(X_train, X_test)
print('Finished preparing inputs.')
# prepare output data
y_train_enc, y_test_enc = prepare_targets(y_train, y_test)
model = Sequential()
model.add(Dense(128, input_dim=X_train.shape[1], activation="relu"))
model.add(Dropout(0.2))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(128, activation='relu'))
model.add(Dense(3, activation='softmax'))
#opt = optimizers.Adam(lr=0.01, decay=1e-6)
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()
model.fit(X_train_enc, y_train_enc, epochs=20, batch_size=32, verbose=1, use_multiprocessing=True)
_, accuracy = model.evaluate(X_test_enc, y_test_enc, verbose=0)
print('Accuracy: %.2f' % (accuracy * 100))
You want to change your model definition to this:
model = Sequential()
model.add(Dense(128, input_shape=X_train.shape[1:], activation="tanh", kernel_initializer='he_normal'))
model.add(Dropout(0.2))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(32, activation='relu'))
model.add(Dense(3, activation='softmax'))
You only need to define the input shape for the first layer, Keras will automatically determine the proper shape for the subsequent layers. You leave out the batch size when defining the input_shape, which is the first dimension, hence input_shape=X_train.shape[1:].
A sigmoid activation will actually work (because the output will vary between 0 and 1), but what you really want is a softmax activation (which makes sure all the outputs sum to 1, which is what probability dictates -- the probability that something happened is 100%, not the 120% that sigmoid could end up giving you).
Also, you're not using your LabelEncoder anywhere. I think what you mean to do is this:
def prepare_targets(y_train, y_test):
le = LabelEncoder()
ohe = OneHotEncoder()
# teach the label encoder our labels
le.fit(y_train)
# turn our strings into integers
y_train_transformed = le.transform(y_train)
y_test_transformed = le.transform(y_test)
# turn our integers into one-hot-encoded arrays
y_train_enc = ohe.fit_transform(y_train_transformed).toarray()
y_test_enc = ohe.transform(y_test_transformed).toarray()
return y_train_enc, y_test_enc

The application of self-attention layer raised index error

So I am doing a classification machine learning with the input of (batch, step, features).
In order to improve the accuracy of this model, I intended to apply a self-attention layer to it.
I am unfamiliar with how to use it for my case since most examples online are concerned with embedding NLP models.
def opt_select(optimizer):
if optimizer == 'Adam':
adamopt = tf.keras.optimizers.Adam(lr=learning_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-8)
return adamopt
elif optimizer == 'RMS':
RMSopt = tf.keras.optimizers.RMSprop(lr=learning_rate, rho=0.9, epsilon=1e-6)
return RMSopt
else:
print('undefined optimizer')
def LSTM_attention_model(X_train, y_train, X_test, y_test, num_classes, loss,batch_size=68, units=128, learning_rate=0.005,epochs=20, dropout=0.2, recurrent_dropout=0.2,optimizer='Adam'):
class myCallback(tf.keras.callbacks.Callback):
def on_epoch_end(self, epoch, logs={}):
if (logs.get('acc') > 0.90):
print("\nReached 90% accuracy so cancelling training!")
self.model.stop_training = True
callbacks = myCallback()
model = tf.keras.models.Sequential()
model.add(Masking(mask_value=0.0, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Bidirectional(LSTM(units, dropout=dropout, recurrent_dropout=recurrent_dropout)))
model.add(SeqSelfAttention(attention_activation='sigmoid'))
model.add(Dense(num_classes, activation='softmax'))
opt = opt_select(optimizer)
model.compile(loss=loss,
optimizer=opt,
metrics=['accuracy'])
history = model.fit(X_train, y_train,
batch_size=batch_size,
epochs=epochs,
validation_data=(X_test, y_test),
verbose=1,
callbacks=[callbacks])
score, acc = model.evaluate(X_test, y_test,
batch_size=batch_size)
yhat = model.predict(X_test)
return history, that
This led to IndexError: list index out of range
What is the correct way to apply this layer to my model?
As requested, one may use the following codes to simulate a set of the dataset.
import tensorflow as tf
from tensorflow.keras.layers import Dense, Dropout,Bidirectional,Masking,LSTM
from keras_self_attention import SeqSelfAttention
X_train = np.random.rand(700, 50,34)
y_train = np.random.choice([0, 1], 700)
X_test = np.random.rand(100, 50, 34)
y_test = np.random.choice([0, 1], 100)
batch_size= 217
epochs = 600
dropout = 0.6
Rdropout = 0.7
learning_rate = 0.00001
optimizer = 'RMS'
loss = 'categorical_crossentropy'
num_classes = y_train.shape[1]
LSTM_attention_his,yhat = LSTM_attention_model(X_train,y_train,X_test,y_test,loss =loss,num_classes=num_classes,batch_size=batch_size,units=32,learning_rate=learning_rate,epochs=epochs,dropout = 0.5,recurrent_dropout=Rdropout,optimizer=optimizer)
Here is how I would rewrite the code -
import tensorflow as tf
from tensorflow.keras.layers import Dense, Dropout, Bidirectional, Masking, LSTM, Reshape
from keras_self_attention import SeqSelfAttention
import numpy as np
def opt_select(optimizer):
if optimizer == 'Adam':
adamopt = tf.keras.optimizers.Adam(lr=learning_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-8)
return adamopt
elif optimizer == 'RMS':
RMSopt = tf.keras.optimizers.RMSprop(lr=learning_rate, rho=0.9, epsilon=1e-6)
return RMSopt
else:
print('undefined optimizer')
def LSTM_attention_model(X_train, y_train, X_test, y_test, num_classes, loss, batch_size=68, units=128,
learning_rate=0.005, epochs=20, dropout=0.2, recurrent_dropout=0.2, optimizer='Adam'):
class myCallback(tf.keras.callbacks.Callback):
def on_epoch_end(self, epoch, logs={}):
if (logs.get('accuracy') > 0.90):
print("\nReached 90% accuracy so cancelling training!")
self.model.stop_training = True
callbacks = myCallback()
model = tf.keras.models.Sequential()
model.add(Masking(mask_value=0.0, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Bidirectional(LSTM(units, dropout=dropout, recurrent_dropout=recurrent_dropout, return_sequences=True)))
model.add(SeqSelfAttention(attention_activation='sigmoid'))
model.add(Reshape((-1, model.output.shape[1]*model.output.shape[2])))
model.add(Dense(num_classes, activation='softmax'))
opt = opt_select(optimizer)
model.compile(loss=loss,
optimizer=opt,
metrics=['accuracy'])
history = model.fit(X_train, y_train,
batch_size=batch_size,
epochs=epochs,
validation_data=(X_test, y_test),
verbose=1,
callbacks=[callbacks])
score, acc = model.evaluate(X_test, y_test,
batch_size=batch_size)
yhat = model.predict(X_test)
return history, that
X_train = np.random.rand(700, 50,34)
y_train = np.random.choice([0, 1], (700, 1))
X_test = np.random.rand(100, 50, 34)
y_test = np.random.choice([0, 1], (100, 1))
batch_size= 217
epochs = 600
dropout = 0.6
Rdropout = 0.7
learning_rate = 0.00001
optimizer = 'RMS'
loss = 'categorical_crossentropy'
num_classes = y_train.shape[1]
LSTM_attention_his,yhat = LSTM_attention_model(
X_train,y_train,X_test,y_test,
loss =loss,num_classes=num_classes,batch_size=batch_size,units=32,
learning_rate=learning_rate,epochs=epochs,dropout = 0.5,recurrent_dropout=Rdropout,optimizer=optimizer
)
These are the changes I had to make to get this to start training -
The original issue was caused by the LSTM layer outputting the wrong dimensions. The SeqSelfAttention layer needs a 3D input (one dimension corresponding to the sequence of the data) which was missing from the output of the LSTM layer. As mentioned by #today, in the comments, this can be solved by adding return_sequences=True to the LSTM layer.
But even with that modification,the code still gives an error at when trying to compute the cost function.The issue is that, the output of the self-attention layer is (None, 50, 64) when this is directly passed into the Dense layer, the final output of the network becomes (None, 50, 1). This doesn't make sense for what we are trying to do, because the final output should just contain a single label for each datapoint (it should have the shape (None, 1)). The issue is the output from the self-attention layer which is 3 dimensional (each data point has a (50, 64) feature vector). This needs to be reshaped into a single dimensional feature vector for the computation to make sense. So I added a reshape layer model.add(Reshape((-1, ))) between the attention layer and the Dense layer.
In addition, the myCallback class is testing if logs.get('acc') is > 0.9 but I think it should be (logs.get('accuracy').
To comment on OP's question in the comment on what kind of column should be added, in this case, it was just a matter of extracting the full sequential data from the LSTM layer. Without the return_sequence flag, the output from the LSTM layer is (None, 64) This is simply the final features of the LSTM without the intermediate sequential data.

Hyperparapeters optimization with grid_search in keras and flow_from_directory

I tried to optimize hyperparameters in my keras CNN made for image classification. I decided to use grid search from sklearn. I overcame the fundamental difficulty with making x and y out of keras flow_from_directory but it still doesn't work.
Error in the last line
ValueError: dropout is not a legal parameter
def grid_model(optimizer='adam',
kernel_initializer='random_uniform',
dropout=0.2,
loss='categorical_crossentropy'):
model = Sequential()
model.add(Conv2D(6,(5,5),activation="relu",padding="same",
input_shape=(img_width, img_height, 3)))
model.add(MaxPooling2D((2,2)))
model.add(Dropout(dropout))
model.add(Conv2D(16,(5,5),activation="relu"))
model.add(MaxPooling2D((2,2)))
model.add(Dropout(dropout))
model.add(Flatten())
model.add(Dense(120, activation='relu', kernel_initializer=kernel_initializer))
model.add(Dropout(dropout))
model.add(Dense(84, activation='relu', kernel_initializer=kernel_initializer))
model.add(Dropout(dropout))
model.add(Dense(10, activation='softmax'))
model.compile(loss=loss,
optimizer=optimizer,
metrics=['accuracy'])
return model
train_generator = ImageDataGenerator(rescale=1/255)
validation_generator = ImageDataGenerator(rescale=1/255)
# Retrieve images and their classes for train and validation sets
train_flow = train_generator.flow_from_directory(directory=train_data_dir,
batch_size=batch_size,
target_size=(img_height,img_width))
validation_flow = validation_generator.flow_from_directory(directory=validation_data_dir,
batch_size=batch_size,
target_size=(img_height,img_width),
shuffle = False)
clf = KerasClassifier(build_fn=grid_model(), epochs=epochs, verbose=0)
param_grid = {
'clf__optimizer':['adam', 'Nadam'],
'clf__epochs':[100, 200],
'clf__dropout':[0.1, 0.2, 0.5],
'clf__kernel_initializer':['normal','uniform'],
'clf__loss':['categorical_crossentropy',
'sparse_categorical_crossentropy',
'kullback_leibler_divergence']
}
pipeline = Pipeline([('clf',clf)])
(X_train, Y_train) = train_flow.next()
grid = GridSearchCV(pipeline, cv=2, param_grid=param_grid)
grid.fit(X_train, Y_train)
The problem is in this line:
clf = KerasClassifier(build_fn=grid_model(), epochs=epochs, verbose=0)
change it to
clf = KerasClassifier(build_fn=grid_model, epochs=epochs, verbose=0)
The grid_model method should not be invoked but a reference to it should be passed.
Also, in the list of losses, 'sparse_categorical_crossentropy'(integer) cannot be used because the output shape required of the model is incompatible with that of 'categorical_crossentropy'(one-hot).

Categories