Very simple Keras binary classification doesn't work - python

Can someone please explain why the following code achieves only about 50% classification accuracy?
I am trying to classify lists of 20 items into 0 or 1. The lists are all 5s or all 6s.
import numpy as np
import keras
from sklearn.model_selection import train_test_split
positive_samples = [[5]*20]*100
negative_samples = [[6]*20]*100
x_list = np.array(positive_samples+negative_samples, dtype=np.float32)
y_list = np.array([1]*len(positive_samples)+[0]*len(negative_samples), dtype=np.float32)
x_train, x_test, y_train, y_test = train_test_split(x_list, y_list, test_size=0.20, random_state=42)
y_train = keras.utils.to_categorical(y_train, 2)
y_test = keras.utils.to_categorical(y_test, 2)
model = keras.models.Sequential()
model.add(keras.layers.Dense(10, input_dim=x_train.shape[1], kernel_initializer='normal', activation='relu'))
model.add(keras.layers.Dense(5, kernel_initializer='normal', activation='relu'))
model.add(keras.layers.Dense(2, kernel_initializer='normal', activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(x_train, y_train, batch_size=10, epochs=20, verbose=2, validation_data=(x_test, y_test))
print (model.evaluate(x_test, y_test, verbose=0))

Since the last output layer has 2 values per sample, you need to use a softmax activation instead of sigmoid.
Also, that means binary_crossentropy cannot be used, and you have to use categorical_crossentropy.
I have also normalized the dataset x_list by dividing with the maximum (6).
x_list /= x_list.max()
Also, you need to shuffle the dataset, by passing shuffle=True in train_test_split.
import numpy as np
import keras
from sklearn.model_selection import train_test_split
positive_samples = [[5]*20]*100
negative_samples = [[6]*20]*100
x_list = np.array(positive_samples+negative_samples, dtype=np.float32)
y_list = np.array([1]*len(positive_samples)+[0]*len(negative_samples), dtype=np.float32)
x_list /= x_list.max()
x_train, x_test, y_train, y_test = train_test_split(x_list, y_list, test_size=0.20, shuffle=True, random_state=42)
y_train = keras.utils.to_categorical(y_train, 2)
y_test = keras.utils.to_categorical(y_test, 2)
model = keras.models.Sequential()
model.add(keras.layers.Dense(10, input_dim=x_train.shape[1], kernel_initializer='normal', activation='relu'))
model.add(keras.layers.Dense(5, kernel_initializer='normal', activation='relu'))
model.add(keras.layers.Dense(2, kernel_initializer='normal', activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(x_train, y_train, batch_size=10, epochs=100, verbose=2, validation_data=(x_test, y_test))
print (model.evaluate(x_test, y_test, verbose=0))
A sigmoid activation in the output makes sense only when there is 1 output, in which the value would be in range [0, 1] signifying probability of the instance being a 1.
In case of 2 (or more) output neurons, it is necessary we normalize the probabilities to sum upto 1 so we use a softmax layer instead.

Data should be normalized before feeding it to the network, this is normally done by changing the values to be between 0 and 1 or -1 and 1. Setting the input to;
positive_samples = [[1]*20]*100
negative_samples = [[-1]*20]*100
works or the model could be changed to:
model = keras.models.Sequential()
model.add(BatchNormalization())
model.add(keras.layers.Dense(10, kernel_initializer='normal', activation='relu'))
model.add(keras.layers.Dense(5, kernel_initializer='normal', activation='relu'))
model.add(keras.layers.Dense(2, kernel_initializer='normal', activation='sigmoid'))

Related

I need consistent results for MSE between runs when training a Neural Network

import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt
from bayes_opt import BayesianOptimization
# load data from UCI Machine Learning Repository
df = pd.read_csv(r'C:\Test_set_Yacht.csv')
df1 = pd.read_csv(r'C:\Train_set_Yacht.csv')
df2 = pd.read_csv(r'C:\Yacht_hydro.csv')
X = df2.drop("residuary_resistance", axis=1)
Y = df2["residuary_resistance"]
# split data into features and target
X_train = df1.drop("residuary_resistance", axis=1)
y_train = df1["residuary_resistance"]
# split data into train and test sets
X_test = df.drop("residuary_resistance", axis=1)
y_test = df["residuary_resistance"]
# scale data using StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
def objective_model_1(hidden_units, learning_rate):
model = Sequential()
model.add(Dense(hidden_units, input_dim=X.shape[1], activation="relu"))
model.add(Dense(hidden_units, activation="relu"))
model.add(Dense(1, activation="linear"))
model.compile(loss="mse", optimizer=Adam(learning_rate=learning_rate))
model.fit(X_train_scaled, y_train, epochs=100, verbose=0)
y_pred = model.predict(X_test_scaled)
return -mean_squared_error(y_test, y_pred)
pbounds_model_1 = {
"hidden_units": (32, 128),
"learning_rate": (1e-5, 1e-1),
}
bo_model_1 = BayesianOptimization(
f=objective_model_1,
pbounds=pbounds_model_1,
random_state=42,
)
bo_model_1.maximize(init_points=10, n_iter=90)
def objective_model_2(hidden_units, learning_rate):
model = Sequential()
model.add(Dense(hidden_units, input_shape=X_train_scaled.shape[1:], activation="relu"))
model.add(Dense(hidden_units, activation="relu"))
model.add(Dense(hidden_units, activation="relu"))
model.add(Dense(hidden_units, activation="relu"))
model.add(Dense(1, activation="linear"))
model.compile(loss="mse", optimizer=Adam(learning_rate=learning_rate))
model.fit(X_train_scaled, y_train, epochs=100, verbose=0)
y_pred = model.predict(X_test_scaled)
return -mean_squared_error(y_test, y_pred)
pbounds_model_2 = {
"hidden_units": (32, 128),
"learning_rate": (1e-5, 1e-1),
}
bo_model_2 = BayesianOptimization(
f=objective_model_2,
pbounds=pbounds_model_2,
random_state=42,
)
bo_model_2.maximize(init_points=10, n_iter=90)
# get the best hyperparameters
# get the best hyperparameters for each model
best_params_model_1 = bo_model_1.max["params"]
best_params_model_2 = bo_model_2.max["params"]
# train and evaluate model 1 with best hyperparameters
model_1 = Sequential()
model_1.add(Dense(32, input_dim=X.shape[1], activation="relu"))
model_1.add(Dense(32, activation="relu"))
model_1.add(Dense(1, activation="linear"))
model_1.compile(loss="mse", optimizer=Adam(learning_rate=best_params_model_1["learning_rate"]))
model_1.fit(X_train_scaled, y_train, epochs=100, verbose=0)
y_pred_1 = model_1.predict(X_test_scaled)
mse_1 = mean_squared_error(y_test, y_pred_1)
print("Model 1 MSE on test set:", mse_1)
# train and evaluate model 2 with best hyperparameters
model_2 = Sequential()
model_2.add(Dense(64, input_dim=X.shape[1], activation="relu"))
model_2.add(Dense(64, activation="relu"))
model_2.add(Dense(64, activation="relu"))
model_2.add(Dense(64, activation="relu"))
model_2.add(Dense(1, activation="linear"))
model_2.compile(loss="mse", optimizer=Adam(learning_rate=best_params_model_2["learning_rate"]))
model_2.fit(X_train_scaled, y_train, epochs=100, verbose=0)
y_pred_2 = model_2.predict(X_test_scaled)
mse_2 = mean_squared_error(y_test, y_pred_2)
print("Model 2 MSE on test set:", mse_2)
In the following code, I implement a bayesian optimization for hyperparameter tunning of 2 different NN using the data set from: https://archive.ics.uci.edu/ml/datasets/yacht+hydrodynamics,After running this, I create again those 2 NN in a JupytherNotebook code block and run with the best hyperparameters already determined by the bayesian optimizer. I need each time I run the code to get the same MSE. This is the reason why I am splitting the data already to ensure the same results.
The inner workings of the tensorflow library are non-deterministic. So you must set a random seed in order to get reproducible results, in practice you just need to add this line at the start of your code:
tf.random.set_seed(0)

How to calculate mean relative error on test datasets

def create_model():
model = Sequential()
model.add(LSTM(50, return_sequences=True, input_shape=(40002, 12)))
model.add(LSTM(50, return_sequences= True))
model.add(LSTM(50, return_sequences= True))
model.add(tf.keras.layers.LSTM(30))
model.add(Dense(2, activation='linear'))
def rmse(Y_test, prediction):
return K.sqrt(K.mean(K.square(Y_test-prediction)))
# compile
model.compile(optimizer='adam', loss=rmse, metrics=['mean_squared_error', rmse])
return model
# fit the model
model = create_model()
model.fit(x_train, Y_train, shuffle=False, verbose=1, epochs=10)
# # predict model
prediction = model.predict(x_test, verbose=0)
print(prediction)
How to calculate mean relative error for tensor inputs i.e my Y_test and prediction are tensor.
Y_test and prediction as 2 values
Example:
Y_test = [[0.2,0.003],
[0.3, 0.008]]
prediction = [[0.4,0.005],
[0.5,0.007]]
mean_relative_error = mean(absolute(0.2-0.4)/0.2 + absolute(0.003-0.005)/0.003), mean(absolute(0.3-0.5)/0.3 + absolute(0.008-0.007)/0.008)
mean_relative_error = [0.533, 0.3925]
Please note that I don't want to use it for backpropagation to improve the network.
Would have added like this:
from tensorflow.math import reduce_mean, abs, reduce_sum
relative_error = reduce_mean(reduce_sum(abs(prediction-Y_test)/prediction, axis=1))
# [0.9, 0.54285717]
mean_relative_error = reduce_mean(relative_error)
# 0.7214286
I couldn't use tf.keras.losses.MeanAbsoluteError(reduction=tf.keras.losses.Reduction.NONE) because of a bug. The MeanAbsoluteError still does reduce to mean despite specifying it not to. The bug reported HERE

Tensorflow (keras) model gives me always the same value

model.predict(x) where x is the same np array i used to train the model(x is obviously without the validation values).
Running this I just get the same value for all 1733 lines of numpy array. If you need code or an example for the np arrays used ask me.
the model is:
dataset = pd.read_csv('BNB.csv')
x = dataset.drop(columns=["Valuable"])
x = np.asarray(x).astype('float32')
y = dataset["Valuable"]
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Dense(256, input_shape=x_train.shape, activation='sigmoid'))
model.add(tf.keras.layers.Dense(256, activation='sigmoid'))
model.add(tf.keras.layers.Dense(1, activation='sigmoid'))
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.fit(x_train, y_train, epochs=1000)
The numpy array (csv file) I used to train and test looks like this:
Valuable,Open,High,Low,Close,EMA8,EMA14,EMA50,ht,sar,MorningStar,Engulfing
-1,355.48,355.82,355.21,355.76,355.21,355.51,357.96,356.63,351.08,0,0
0,355.77,356.2,355.52,355.79,355.34,355.54,357.87,356.51,351.08,0,0
0,355.82,356.61,355.5,356.23,355.54,355.63,357.81,356.44,351.08,0,0
0,356.14,356.17,354.63,354.92,355.4,355.54,357.69,356.46,351.08,0,0
0,354.88,355.54,354.81,354.96,355.3,355.46,357.59,356.55,351.08,0,0
0,354.91,354.91,353.71,354.11,355.04,355.28,357.45,356.59,351.08,0,0
0,354.12,354.93,353.89,354.72,354.97,355.21,357.34,356.44,351.08,0,0
0,354.72,355.2,354.01,354.7,354.91,355.14,357.24,356.21,351.08,0,0
0,354.69,355.46,354.43,355.23,354.98,355.15,357.16,355.9,351.08,0,100
0,355.27,355.47,354.54,355.39,355.07,355.18,357.09,355.57,351.08,0,0
0,355.37,356.0,355.22,355.81,355.24,355.27,357.04,355.31,351.08,0,0
0,355.79,356.23,355.11,355.54,355.3,355.3,356.98,355.15,351.08,0,0
0,355.56,355.67,354.78,355.21,355.28,355.29,356.91,355.08,351.08,0,0
0,355.2,355.63,354.88,355.2,355.26,355.28,356.84,355.06,351.08,0,0
0,355.2,355.99,355.2,355.76,355.37,355.34,356.8,355.08,351.08,0,0
0,355.74,355.97,355.17,355.37,355.37,355.35,356.75,355.14,351.08,0,0
0,355.37,355.38,354.51,354.69,355.22,355.26,356.67,355.19,351.08,0,0
0,354.78,355.4,354.64,355.02,355.18,355.23,356.6,355.23,351.08,0,0
I want to predict whether Valuable is 0, -1, -2, 1 or 2 (my csv file is about 1700 lines long).
There are few problems with your model.
First:
You should use sparse categorical cross entropy loss instead of binary loss for your model if you have more than two classes in output.
Second:
Use softmax activation for the last/output layer.
Third:
Use as many neurons in the last layer as there are classes.
I consider the distinct values in valuable column are: [-1,-2,0,1,2].
First encode your target column like this:
y = dataset["Valuable"] # after this
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y = le.fit_transform(y)
Then Change your model definition like this:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)
model = tf.keras.models.Sequential()
# changes
model.add(tf.keras.layers.Dense(256, input_shape=x_train.shape, activation="relu"))
model.add(tf.keras.layers.Dense(256, activation="relu"))
model.add(tf.keras.layers.Dense(5, activation="softmax"))
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
model.fit(x_train, y_train, epochs=1000)

how can i evaluate StratifiedKFold model

import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_val_predict
x_train = dataset[0:700,:-1]
y_train = dataset[0:700,-1]
x_test = dataset[700:,:-1]
y_test = dataset[700:,-1]
def create_model():
model = Sequential()
model.add(Dense(12, input_dim=8, activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
return model
model = KerasClassifier(build_fn=create_model, epochs=100, batch_size=64)
skf = StratifiedKFold(n_splits=3, shuffle=True, random_state=seed)
scores = cross_val_score(model, x_train, y_train, cv=skf)
predictions = cross_val_predict(model, x_test, y_test, cv=skf)
I want to train [x_train], [y_train] by StratifiedKFold
and eveluate by [x_test], [y_test]
how can i do?
I tried cross_val_predict. but i think it is not appropriate.
To split between train and test in a stratified way you can use:
from sklearn.model_selection import train_test_split
dataset_train, dataset_test = train_test_split(dataset,
stratify=dataset[:,-1],
test_size=0.2)
#split both datasets into X,y
Check:
https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.train_test_split.html
Stratified Train/Test-split in scikit-learn
skf = StratifiedKFold(n_splits=3, shuffle=True, random_state=seed)
accuracy=[]
for train in skf.split(x_train, y_train):
model = Sequential()
model.add(Dense(12, input_dim=8, activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
how about this one? it is work but i don`t know is it correct.

CNN training multi label classification---not working

Try to predict texture images' labels, an image can contain two labels like['banded', 'striped'], though most of them only have one label.
The output accuracy is extremely high....the first epoch can have 0.96 acc...but the prediction array are all close to 0, which is wrong, there must be at least one number is relevant closed to 1.
Can someone help me?
Thank you!!
Here are the code
Input image = (read by opencv)/255
Multi-labels = First LabelEncoder convert to numbers, then keras.to_categorical
Then I built a CNN model as follow
X_train, X_test, y_train, y_test = train_test_split(img_array, test_value, test_size=0.1)
model = Sequential()
model.add(Conv2D(filters=64, kernel_size=(5, 5), padding='Same', data_format='channels_last', activation='relu',
input_shape=(300, 300, 3)))
model.add(MaxPool2D(pool_size=(2, 2)))
model.add(Conv2D(filters=32, kernel_size=(3, 3), padding='Same', activation='relu'))
model.add(MaxPool2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(300, init ='uniform',activation='relu'))
model.add(Dense(285, init = 'uniform',activation='sigmoid'))
model.compile(optimizer='sgd', loss='binary_crossentropy', metrics=['accuracy'])
history = model.fit(X_train, y_train, batch_size= 24, epochs=10, validation_split=0.15)
If your model has only 2 labels, the last layer should be
model.add(Dense(2, init = 'uniform',activation='sigmoid'))
However, your class imbalance can also affect the accuracy. If your class imbalance is too high, your model will show 95%+ training, validation, and testing accuracies but the individual accuracies will still be low and the model will not work for real world data.
The detailed and class-based accuracy can be understood using :
from sklearn.metrics import classification_report
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.30)
X_test1, X_valid, y_test1, y_valid = train_test_split(X_test, y_test, test_size=0.30)
model.fit(X_train, y_train, batch_size=64, epochs=8, shuffle=True, validation_data=(X_test1,y_test1), callbacks=[metrics])
Y_TEST = np.argmax(y_valid, axis=1)
y_pred = model.predict_classes(X_valid)
print("#"*50,"\n",classification_report(Y_TEST, y_pred))
Please share your class distribution for further understanding.
Not sure why the number of neurons in Dense layer is 285. If there are 47 categories, then the output neurons of Dense layer should be 47. Also, use a kernel initializer like he_normal instead of uniform. https://github.com/keras-team/keras-applications/blob/master/keras_applications/resnet50.py
model.add(Dense(47, activation='sigmoid'))
model.compile(optimizer='sgd', loss='binary_crossentropy', metrics=['accuracy'])
This is a multi-label classification example with 5 classes.
https://github.com/suraj-deshmukh/Keras-Multi-Label-Image-Classification

Categories