I need consistent results for MSE between runs when training a Neural Network - python

import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt
from bayes_opt import BayesianOptimization
# load data from UCI Machine Learning Repository
df = pd.read_csv(r'C:\Test_set_Yacht.csv')
df1 = pd.read_csv(r'C:\Train_set_Yacht.csv')
df2 = pd.read_csv(r'C:\Yacht_hydro.csv')
X = df2.drop("residuary_resistance", axis=1)
Y = df2["residuary_resistance"]
# split data into features and target
X_train = df1.drop("residuary_resistance", axis=1)
y_train = df1["residuary_resistance"]
# split data into train and test sets
X_test = df.drop("residuary_resistance", axis=1)
y_test = df["residuary_resistance"]
# scale data using StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
def objective_model_1(hidden_units, learning_rate):
model = Sequential()
model.add(Dense(hidden_units, input_dim=X.shape[1], activation="relu"))
model.add(Dense(hidden_units, activation="relu"))
model.add(Dense(1, activation="linear"))
model.compile(loss="mse", optimizer=Adam(learning_rate=learning_rate))
model.fit(X_train_scaled, y_train, epochs=100, verbose=0)
y_pred = model.predict(X_test_scaled)
return -mean_squared_error(y_test, y_pred)
pbounds_model_1 = {
"hidden_units": (32, 128),
"learning_rate": (1e-5, 1e-1),
}
bo_model_1 = BayesianOptimization(
f=objective_model_1,
pbounds=pbounds_model_1,
random_state=42,
)
bo_model_1.maximize(init_points=10, n_iter=90)
def objective_model_2(hidden_units, learning_rate):
model = Sequential()
model.add(Dense(hidden_units, input_shape=X_train_scaled.shape[1:], activation="relu"))
model.add(Dense(hidden_units, activation="relu"))
model.add(Dense(hidden_units, activation="relu"))
model.add(Dense(hidden_units, activation="relu"))
model.add(Dense(1, activation="linear"))
model.compile(loss="mse", optimizer=Adam(learning_rate=learning_rate))
model.fit(X_train_scaled, y_train, epochs=100, verbose=0)
y_pred = model.predict(X_test_scaled)
return -mean_squared_error(y_test, y_pred)
pbounds_model_2 = {
"hidden_units": (32, 128),
"learning_rate": (1e-5, 1e-1),
}
bo_model_2 = BayesianOptimization(
f=objective_model_2,
pbounds=pbounds_model_2,
random_state=42,
)
bo_model_2.maximize(init_points=10, n_iter=90)
# get the best hyperparameters
# get the best hyperparameters for each model
best_params_model_1 = bo_model_1.max["params"]
best_params_model_2 = bo_model_2.max["params"]
# train and evaluate model 1 with best hyperparameters
model_1 = Sequential()
model_1.add(Dense(32, input_dim=X.shape[1], activation="relu"))
model_1.add(Dense(32, activation="relu"))
model_1.add(Dense(1, activation="linear"))
model_1.compile(loss="mse", optimizer=Adam(learning_rate=best_params_model_1["learning_rate"]))
model_1.fit(X_train_scaled, y_train, epochs=100, verbose=0)
y_pred_1 = model_1.predict(X_test_scaled)
mse_1 = mean_squared_error(y_test, y_pred_1)
print("Model 1 MSE on test set:", mse_1)
# train and evaluate model 2 with best hyperparameters
model_2 = Sequential()
model_2.add(Dense(64, input_dim=X.shape[1], activation="relu"))
model_2.add(Dense(64, activation="relu"))
model_2.add(Dense(64, activation="relu"))
model_2.add(Dense(64, activation="relu"))
model_2.add(Dense(1, activation="linear"))
model_2.compile(loss="mse", optimizer=Adam(learning_rate=best_params_model_2["learning_rate"]))
model_2.fit(X_train_scaled, y_train, epochs=100, verbose=0)
y_pred_2 = model_2.predict(X_test_scaled)
mse_2 = mean_squared_error(y_test, y_pred_2)
print("Model 2 MSE on test set:", mse_2)
In the following code, I implement a bayesian optimization for hyperparameter tunning of 2 different NN using the data set from: https://archive.ics.uci.edu/ml/datasets/yacht+hydrodynamics,After running this, I create again those 2 NN in a JupytherNotebook code block and run with the best hyperparameters already determined by the bayesian optimizer. I need each time I run the code to get the same MSE. This is the reason why I am splitting the data already to ensure the same results.

The inner workings of the tensorflow library are non-deterministic. So you must set a random seed in order to get reproducible results, in practice you just need to add this line at the start of your code:
tf.random.set_seed(0)

Related

Why I can't reproduce Keras results?

I know that there is a problem with reproducibility in keras. However due to my research I created a function:
def set_seed():
seed_value = 42
os.environ['PYTHONHASHSEED']=str(seed_value)
session_conf = tf.compat.v1.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
sess = tf.compat.v1.Session(graph=tf1.get_default_graph(), config=session_conf)
np.random.seed(seed_value)
random.seed(seed_value)
tf.random.set_seed(seed_value)
K.set_session(sess)
which should assure me a reproductive results in keras.
My problem
I'm trying to run self-created grid search on neural network using keras:
# Early stopping
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience = 50)
callbacks=[es]
# Possible learning rates
learning_rates = np.linspace(0.1, 10**(-5), 10)
# Run grid search on one layer
set_seed()
mse1 = np.array([])
rate1 = np.array([])
neuron_number1 = np.array([])
for rate in learning_rates[0:2]:
for neuron in range(1, 3):
model = Sequential()
model.add(Dense(neuron, input_dim=2, kernel_initializer='normal', activation='relu'))
model.add(Dense(1, activation = 'relu'))
model.summary()
model.compile(loss='mse', optimizer=SGD(lr=rate), metrics=['mse'])
history = model.fit(X_train, y_train, epochs=1000, batch_size=50, validation_split=0.5, callbacks=[es])
mse1 = np.append(mse1, history.history['val_loss'][-1])
rate1 = np.append(rate1, rate)
neuron_number1= np.append(neuron_number1, neuron)
neural_summary1 = pandas.DataFrame(data = [neuron_number1, rate1, mse1])
neural_summary1 = neural_summary1.transpose()
neural_summary1.columns = ["number_of_neurons", "learning_rate", "mse"]
print(neural_summary1.iloc[neural_summary1['mse'].idxmin()])
number_of_neurons 2.000000
learning_rate 0.088890
mse 0.159922
But when I run it apart from grid search:
set_seed()
model = Sequential()
model.add(Dense(2, input_dim=2, kernel_initializer='normal', activation='relu'))
model.add(Dense(1, activation = 'relu'))
model.summary()
model.compile(loss='mse', optimizer=SGD(lr=0.088890), metrics=['mse'])
history = model.fit(X_train, y_train, epochs=1000, batch_size=50, validation_split=0.5, callbacks=[es])
print(history.history['val_loss'][-1])
8.767917346954345
which is different than result obtained previously from grid search.
Do you know where is mistake in my code or why is not working?
Packages that I use are the following:
import pandas
import random
import numpy as np
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense
from keras.callbacks import EarlyStopping
from keras.optimizers import SGD
import os
from keras import backend as K

"ValueError: activation is not a legal parameter" with Keras classifier

I've been playing around with Tensorflow and Keras and I finally got the following error while trying hyper parameter tuning:
"ValueError: activation is not a legal parameter"
The point is that I want to try different activation functions in my model to see which one works best.
I have the following code:
import pandas as pd
import tensorflow as tf
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV
import numpy as np
ds = pd.read_csv(
"https://storage.googleapis.com/download.tensorflow.org/data/abalone_train.csv",
names=["Length", "Diameter", "Height", "Whole weight", "Shucked weight",
"Viscera weight", "Shell weight", "Age"])
print(ds)
x_train = ds.copy()
y_train = x_train.pop('Age')
x_train = np.array(x_train)
def create_model(layers, activations):
model = tf.keras.Sequential()
for i, nodes in enumerate(layers):
if i == 0:
model.add(tf.keras.layers.Dense(nodes, input_dim=x_train.shape[1]))
model.add(layers.Activation(activations))
model.add(Dropout(0.3))
else:
model.add(tf.keras.layers.Dense(nodes))
model.add(layers.Activation(activations))
model.add(Dropout(0.3))
model.add(tf.keras.layers.Dense(units=1, kernel_initializer='glorot_uniform'))
model.add(layers.Activation('sigmoid'))
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
return model
model = KerasClassifier(build_fn=create_model, verbose=0)
layers = [[20], [40,20], [45, 30, 15]]
activations = ['sigmoid', 'relu']
param_grid = dict(layers=layers, activation=activations, batch_size = [128, 256], epochs=[30])
grid = GridSearchCV(estimator=model, param_grid=param_grid, cv=5)
grid_result = grid.fit(x_train, y_train)
print(grid_result.best_score_,grid_reslult.best_params_)
pred_y = grid.predict(x_test)
y_pred = (pred_y > 0.5)
cm=confusion_matrix(y_pred, y_test)
score=accuracy_score(y_pred, y_test)
model.fit(x_train, y_train, epochs=30, callbacks=[cp_callback])
#steps_per_epoch
model.evaluate(x_test, y_test, verbose=2)
probability_model = tf.keras.Sequential([
model,
tf.keras.layers.Softmax()
])
probability_model(x_test[:100])
If you see here, you must specify activations as :
from tensorflow.keras import activations
layers.Activation(activations.relu)
Right now, you have:
activations = ['sigmoid', 'relu']
So , that's why the value error.
You should change your code to sth like this:
model.add(tf.keras.layers.Dense(nodes, activation=activations[i], input_dim=x_train.shape[1]))
So, remove the Activation layer: model.add(layers.Activation(activations)) and instead use the activation inside each layer.
Example:
def create_model(layers, activations):
model = tf.keras.Sequential()
for i in range(2):
if i == 0:
model.add(tf.keras.layers.Dense(2, activation=activations[i], input_dim=x_train.shape[1]))
model.add(tf.keras.layers.Dropout(0.3))
else:
model.add(tf.keras.layers.Dense(2, activation=activations[i]))
model.add(tf.keras.layers.Dropout(0.3))
model.add(tf.keras.layers.Dense(units=1, activation='sigmoid', kernel_initializer='glorot_uniform'))
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
return model
layers.Activation() expects a function or a string, such as 'sigmoid' but you are currently passing an array activations to it. Use your index i (or a different index) to access the activation function like activations[i].
You can also pass the activation as string directly to the Dense layer like so:
model.add(tf.keras.layers.Dense(nodes, activation=activations[i], input_dim=x_train.shape[1])))

How to calibrate the input shape of my classification neural network?

I'm getting this error:
ValueError: Error when checking input: expected dense_27_input to have shape (20,) but got array with shape (3495,)
Here is my code:
import pandas as pd
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Input, Dense
from keras.layers import Flatten
from sklearn.preprocessing import StandardScaler
import numpy as np
df = pd.read_csv('../input/nasa-asteroids-classification/nasa.csv')
df = pd.get_dummies(df)
X = df.loc[:, df.columns != 'Harzardous']
y = df.loc[:, 'Hazardous']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
scaler = StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)
model = Sequential()
model.add(Dense(64, input_dim=(20), activation = 'relu'))
model.add(Dense(32, activation = 'relu'))
model.add(Dense(16, activation = 'relu'))
model.add(Dense(8, activation = 'relu'))
model.add(Dense(1, activation = 'sigmoid'))
compilation = model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=100, batch_size=32)
scores = model.evaluate(X_train, y_train, verbose=False)
print("Training Accuracy: %.2f%%\n" % (scores[1]*100))
scores = model.evaluate(X_test, y_test, verbose=False)
print("Testing Accuracy: %.2f%%\n" % (scores[1]*100))
How do i fix this?
The data set has 20 columns after get_dummies was applied to it, and it had 20 rows before it was applied.
Link to data set: https://www.kaggle.com/shrutimehta/nasa-asteroids-classification
Your input dimension is not correct. Print the shape of X_train, y_train. They should have shapes (X, 20) and (X,1), where X is a constant.

Very simple Keras binary classification doesn't work

Can someone please explain why the following code achieves only about 50% classification accuracy?
I am trying to classify lists of 20 items into 0 or 1. The lists are all 5s or all 6s.
import numpy as np
import keras
from sklearn.model_selection import train_test_split
positive_samples = [[5]*20]*100
negative_samples = [[6]*20]*100
x_list = np.array(positive_samples+negative_samples, dtype=np.float32)
y_list = np.array([1]*len(positive_samples)+[0]*len(negative_samples), dtype=np.float32)
x_train, x_test, y_train, y_test = train_test_split(x_list, y_list, test_size=0.20, random_state=42)
y_train = keras.utils.to_categorical(y_train, 2)
y_test = keras.utils.to_categorical(y_test, 2)
model = keras.models.Sequential()
model.add(keras.layers.Dense(10, input_dim=x_train.shape[1], kernel_initializer='normal', activation='relu'))
model.add(keras.layers.Dense(5, kernel_initializer='normal', activation='relu'))
model.add(keras.layers.Dense(2, kernel_initializer='normal', activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(x_train, y_train, batch_size=10, epochs=20, verbose=2, validation_data=(x_test, y_test))
print (model.evaluate(x_test, y_test, verbose=0))
Since the last output layer has 2 values per sample, you need to use a softmax activation instead of sigmoid.
Also, that means binary_crossentropy cannot be used, and you have to use categorical_crossentropy.
I have also normalized the dataset x_list by dividing with the maximum (6).
x_list /= x_list.max()
Also, you need to shuffle the dataset, by passing shuffle=True in train_test_split.
import numpy as np
import keras
from sklearn.model_selection import train_test_split
positive_samples = [[5]*20]*100
negative_samples = [[6]*20]*100
x_list = np.array(positive_samples+negative_samples, dtype=np.float32)
y_list = np.array([1]*len(positive_samples)+[0]*len(negative_samples), dtype=np.float32)
x_list /= x_list.max()
x_train, x_test, y_train, y_test = train_test_split(x_list, y_list, test_size=0.20, shuffle=True, random_state=42)
y_train = keras.utils.to_categorical(y_train, 2)
y_test = keras.utils.to_categorical(y_test, 2)
model = keras.models.Sequential()
model.add(keras.layers.Dense(10, input_dim=x_train.shape[1], kernel_initializer='normal', activation='relu'))
model.add(keras.layers.Dense(5, kernel_initializer='normal', activation='relu'))
model.add(keras.layers.Dense(2, kernel_initializer='normal', activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(x_train, y_train, batch_size=10, epochs=100, verbose=2, validation_data=(x_test, y_test))
print (model.evaluate(x_test, y_test, verbose=0))
A sigmoid activation in the output makes sense only when there is 1 output, in which the value would be in range [0, 1] signifying probability of the instance being a 1.
In case of 2 (or more) output neurons, it is necessary we normalize the probabilities to sum upto 1 so we use a softmax layer instead.
Data should be normalized before feeding it to the network, this is normally done by changing the values to be between 0 and 1 or -1 and 1. Setting the input to;
positive_samples = [[1]*20]*100
negative_samples = [[-1]*20]*100
works or the model could be changed to:
model = keras.models.Sequential()
model.add(BatchNormalization())
model.add(keras.layers.Dense(10, kernel_initializer='normal', activation='relu'))
model.add(keras.layers.Dense(5, kernel_initializer='normal', activation='relu'))
model.add(keras.layers.Dense(2, kernel_initializer='normal', activation='sigmoid'))

Real bad accuracy of training test on neural network on keras

I'm doing a Neural Network for the "Default of credit card clients" from http://archive.ics.uci.edu/ml/datasets/default+of+credit+card+clients.
But the accuracy of my models is pretty bad, worse than if I predicted all zeros. I already did some research on it and did oversample to correct the classes, and change the optimizer, because adam was not increasing the accuracy.
What else could I do?
import pandas
import numpy
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense
import keras
from imblearn.over_sampling import SMOTE
seed = 8
numpy.random.seed(seed)
base = pandas.read_csv('base_nao_trabalhada.csv')
train, test = train_test_split(base, test_size = 0.2)
train=train.values
test=test.values
X_train = train[:,1:23]
Y_train = train[:,24]
X_test = test[:,1:23]
Y_test = test[:,24]
sm = SMOTE(kind='regular')
X_resampled, Y_resampled = sm.fit_sample(X_train, Y_train)
# Model Creation
model = Sequential()
model.add(Dense(40, input_dim=22, init='uniform', activation='relu'))
model.add(Dense(4, init='uniform', activation='relu'))
model.add(Dense(1, init='uniform', activation='sigmoid'))
#activation='relu'
opt = keras.optimizers.SGD(lr=0.000001)
# Compile model
model.compile(loss='binary_crossentropy', optimizer=opt , metrics=['accuracy'])
#loss=binary_crossentropy
#optimizer='adam'
# creating .fit
model.fit(X_resampled, Y_resampled, nb_epoch=10000, batch_size=30)
# evaluate the model
scores = model.evaluate(X_test, Y_test)
print ()
print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))

Categories