Hyperparameter-Tuning for pretrained NN from TensorFlow - python

I am starting to learn Python and trying create a neural network, which detects and localizes anomalious parts of images. I am using a pretrained CNN from TensorFlow. The code works so far as it learns my images and classifies them. But when I give him the same amount (around 100) of images for each class. the accuracy is around 50% which is random for two classes. So I am looking for a solution for it right now.
The localization Problem I solving by slicing my images in to multiple parts. The anomaly is being split from non-anomalous region while every image has an reference of its location in its name. So when the image is classified as a certain class one also finds out about its location by its name.
first option: I have to find for a way to extent the amount of my images to look if it improves my accuracy.I didn't work on that yet.
second option: trying to tune the hyper parameters of the CNN, maybe put my images in some earlier layers. I watched a couple tutorials and tried to implement them, but they all failed mostly due to the shape of the arrays or inconsistent numbers.
# Use scikit-learn to grid search the batch size and epochs
import numpy
from sklearn.model_selection import GridSearchCV
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
# Function to create model, required for KerasClassifier
def create_model():
# create model
model = Sequential()
model.add(Dense(12, input_dim=8, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
# Compile model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
return model
# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)
# load dataset
# create model
model = KerasClassifier(build_fn=create_model, verbose=0)
# define the grid search parameters
batch_size = [10, 20, 40, 60, 80, 100]
epochs = [10, 50, 100]
param_grid = dict(batch_size=batch_size, epochs=epochs)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1)
grid_result = grid.fit(x_train, label_batch)
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
print("%f (%f) with: %r" % (mean, stdev, param))
init = tf.global_variables_initializer()
sess.run(init)
result = model.predict(image_batch)
result.shape
model.compile(
optimizer=tf.train.AdamOptimizer(),
loss='categorical_crossentropy',
metrics=['accuracy'])
class CollectBatchStats(tf.keras.callbacks.Callback):
def __init__(self):
self.batch_losses = []
self.batch_acc = []
def on_batch_end(self, batch, logs=None):
self.batch_losses.append(logs['loss'])
self.batch_acc.append(logs['acc'])
steps_per_epoch = image_data.samples//image_data.batch_size
batch_stats = CollectBatchStats()
model.fit((item for item in image_data), epochs=1,
steps_per_epoch=steps_per_epoch,
callbacks = [batch_stats])

From what you mentioned in the comments you are facing the below error.
ValueError: Error when checking input: expected dense_1_input to have
2 dimensions, but got array with shape (21, 224, 224, 3)
If you are using CNN, why is the First Layer a Dense Layer (I understand it from the name, dense_1_input), instead of a Convolutional Layer.
With the First Layer being the Convolutional Layer, you should pass (224,224,3) for the argument, input_shape
Complete code for Fine Tuning Batch_Size and Number of Epochs for Fashion_MNIST Dataset using CNN is shown below:
# To support both python 2 and python 3
from __future__ import division, print_function, unicode_literals
from io import open
# Common imports
import numpy as np
import os
import tensorflow as tf
from keras.layers import Input, Conv2D, MaxPool2D, Dense, Dropout, Flatten
from keras.models import Sequential
from keras.optimizers import Adam
import matplotlib.pyplot as plt
from keras.regularizers import l1_l2
from matplotlib.pyplot import axis as ax
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV
X = tf.placeholder(tf.float32, shape=[None, 784], name="X")
X_reshaped = tf.reshape(X, shape=[-1, 28, 28, 1])
y = tf.placeholder(tf.int32, shape=[None], name="y")
def create_model():
# instantiate regularizer
Regularizer = l1_l2(0.001)
cnn_model = Sequential()
cnn_model.add(Conv2D(filters = 64,kernel_size = 3, strides=(1, 1), input_shape = (28,28,1),
activation='relu', data_format='channels_last', activity_regularizer=Regularizer))
cnn_model.add(MaxPool2D(pool_size = (2, 2)))
cnn_model.add(Dropout(0.25))
cnn_model.add(Flatten())
cnn_model.add(Dense(units = 32, activation = 'relu', activity_regularizer=Regularizer))
cnn_model.add(Dense(units = 10, activation = 'sigmoid', activity_regularizer=Regularizer))
cnn_model.compile(loss ='sparse_categorical_crossentropy', optimizer=Adam(lr=0.001),metrics =['accuracy'])
return cnn_model
model = KerasClassifier(build_fn=create_model, verbose=0)
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.fashion_mnist.load_data()
X_train = X_train.astype(np.float32).reshape(-1, 28*28) / 255.0
X_train_reshaped = np.reshape(X_train, newshape=[-1, 28, 28, 1])
X_test = X_test.astype(np.float32).reshape(-1, 28*28) / 255.0
X_test_reshaped = np.reshape(X_test, newshape=[-1, 28, 28, 1])
y_train = y_train.astype(int)
y_test = y_test.astype(int)
batch_size = [20, 40]
epochs = [10, 50]
param_grid = dict(batch_size=batch_size, epochs=epochs)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=1, cv=3)
grid_result = grid.fit(X_train_reshaped, y_train)
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
print("%f (%f) with: %r" % (mean, stdev, param))

Related

SHAP ValueError: Dimension 1 in both shapes must be equal, but are 2 and 1. Shapes are [?,2] and [?,1]

Based on a previously trained feed-forward network, I tried to use SHAP to get the feature importance. I followed all the steps described in the documentation but I am still receiving the following error
ValueError: Dimension 1 in both shapes must be equal, but are 2 and 1. Shapes are [?,2] and [?,1]
The following code produces a reproduciple example that has the same error.
import pandas as pd
from numpy.random import randint
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, BatchNormalization, Dropout, Activation
from keras.optimizers import Adam
import shap
# Train_x data creation
train_x = pd.DataFrame({
'v1': randint(2, 20, 1489),
'v2': randint(50, 200, 1489),
'v3': randint(30, 90, 1489),
'v4': randint(100, 150, 1489)
})
# Train_y data creation
train_y = randint(0, 2, 1489)
# One-hot encoding as I use categorical cross-entropy
train_y = to_categorical(train_y, num_classes=2)
# Start construction of a DNN Sequential model.
model = Sequential()
# First input layer with a dropout and batch normalization layer following
model.add(Dense(256, input_dim=train_x.shape[1]))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(rate=0.2))
# Output layer
model.add(Dense(2))
model.add(Activation('softmax'))
# Use the Adam optimizer
optimizer = Adam(lr=0.001)
# Compile the model
model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
model.summary()
# Fit model
hist = model.fit(train_x, train_y, epochs=100, batch_size=128, shuffle=False, verbose=2)
# SHAP calculation
explainer = shap.DeepExplainer(model, train_x)
shap_values = explainer.shap_values(train_x[:500].values)
where I have an input shape of (None, 4) and a softmax activation function at the end with 2 neurons as I use it for binary classification. The train_x data on the following code snippet are a pandas data frame of shape (1489, 4).
I tried to change the train_x shape but I had a similar error. Any help would be much appreciated.
Please see below a working example for binary classification with TF:
from numpy.random import randint
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, BatchNormalization, Dropout, Activation
from tensorflow.keras.optimizers import Adam
import shap
import tensorflow
print(shap.__version__, "\n",tensorflow.__version__)
# Train_x data creation
train_x = pd.DataFrame({
'v1': randint(2, 20, 1489),
'v2': randint(50, 200, 1489),
'v3': randint(30, 90, 1489),
'v4': randint(100, 150, 1489)
})
# Train_y data creation
train_y = randint(0, 2, 1489)
# One-hot encoding as I use categorical cross-entropy
train_y = to_categorical(train_y, num_classes=2)
# Start construction of a DNN Sequential model.
model = Sequential()
# First input layer with a dropout and batch normalization layer following
model.add(Dense(256, input_dim=train_x.shape[1]))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(rate=0.2))
# Output layer
model.add(Dense(2))
model.add(Activation('softmax'))
# Use the Adam optimizer
optimizer = Adam(lr=0.001)
# Compile the model
model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
# model.summary()
# Fit model
hist = model.fit(train_x, train_y, epochs=100, batch_size=128, shuffle=False, verbose=0)
# SHAP calculation
shap.explainers._deep.deep_tf.op_handlers["AddV2"] = shap.explainers._deep.deep_tf.passthrough
explainer = shap.DeepExplainer(model, train_x)
shap_values = explainer.shap_values(train_x[:500].values)
shap.summary_plot(shap_values[1])
0.38.2
2.2.0
Note couple of things:
Package versions (tf should be below 2.4 I believe)
Addition of "AddV2" (see discussion here)

Keras CNN always predicts same class

EDIT: it seems like I did not even run the model for enough epochs, so I will try that out and return with my results
I am trying to create a CNN that classifies 3D brain images. However, the CNN program always predict the same class when I run it and am not sure what other methods I can do to prevent this. I have searched up this problem with many plausible solutions, but they did not work
So far, I have tried:
Decreasing the learning rate
Normalize the data to [0, 1]
Change optimizers
Only use sigmoid and binary_crossentropy
Add/remove dropout layers
Changed into a simpler CNN model
Balance the dataset
Added augmented data using a custom 3D imagedatagenerator()
Link: https://github.com/dhuy228/augmented-volumetric-image-generator
For context, I am classifying between two groups. The amount of images I am using is a total of 200 3D brain images (about 100 for each category). To increase my training size, I used a custom data augmentation I found from github
Looking at the learning curve, the accuracy and loss rates are completely random. Some runs they would be decreasing, some increasing, and some fluctuating within a range
Any help would be appreciated!
import os
import csv
import tensorflow as tf # 2.0
import nibabel as nib
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from keras.models import Model
from keras.layers import Conv3D, MaxPooling3D, Dense, Dropout, Activation, Flatten
from keras.layers import Input, concatenate
from keras import optimizers
from keras.utils import to_categorical
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
from augmentedvolumetricimagegenerator.generator import customImageDataGenerator
from keras.callbacks import EarlyStopping
# Administrative items
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
# Where the file is located
path = r'C:\Users\jesse\OneDrive\Desktop\Research\PD\decline'
folder = os.listdir(path)
target_size = (96, 96, 96)
# creating x - converting images to array
def read_image(path, folder):
mri = []
for i in range(len(folder)):
files = os.listdir(path + '\\' + folder[i])
for j in range(len(files)):
image = np.array(nib.load(path + '\\' + folder[i] + '\\' + files[j]).get_fdata())
image = np.resize(image, target_size)
image = np.expand_dims(image, axis=3)
image /= 255.
mri.append(image)
return mri
# creating y - one hot encoder
def create_y():
excel_file = r'C:\Users\jesse\OneDrive\Desktop\Research\PD\decline_label.xlsx'
excel_read = pd.read_excel(excel_file)
excel_array = np.array(excel_read['Label'])
label = LabelEncoder().fit_transform(excel_array)
label = label.reshape(len(label), 1)
onehot = OneHotEncoder(sparse=False).fit_transform(label)
return onehot
# Splitting image train/test
x = np.asarray(read_image(path, folder))
y = np.asarray(create_y())
x_split, x_test, y_split, y_test = train_test_split(x, y, test_size=.2, stratify=y)
x_train, x_val, y_train, y_val = train_test_split(x_split, y_split, test_size=.25, stratify=y_split)
print(x_train.shape, x_val.shape, x_test.shape, y_train.shape, y_val.shape, y_test.shape)
batch_size = 10
num_classes = len(folder)
inputs = Input((96, 96, 96, 1))
conv1 = Conv3D(32, [3, 3, 3], padding='same', activation='relu')(inputs)
conv1 = Conv3D(32, [3, 3, 3], padding='same', activation='relu')(conv1)
pool1 = MaxPooling3D(pool_size=(2, 2, 2), padding='same')(conv1)
drop1 = Dropout(0.5)(pool1)
conv2 = Conv3D(64, [3, 3, 3], padding='same', activation='relu')(drop1)
conv2 = Conv3D(64, [3, 3, 3], padding='same', activation='relu')(conv2)
pool2 = MaxPooling3D(pool_size=(2, 2, 2), padding='same')(conv2)
drop2 = Dropout(0.5)(pool2)
conv3 = Conv3D(128, [3, 3, 3], padding='same', activation='relu')(drop2)
conv3 = Conv3D(128, [3, 3, 3], padding='same', activation='relu')(conv3)
pool3 = MaxPooling3D(pool_size=(2, 2, 2), padding='same')(conv3)
drop3 = Dropout(0.5)(pool3)
flat1 = Flatten()(drop3)
dense1 = Dense(128, activation='relu')(flat1)
drop5 = Dropout(0.5)(dense1)
dense2 = Dense(num_classes, activation='sigmoid')(drop5)
model = Model(inputs=[inputs], outputs=[dense2])
opt = optimizers.Adagrad(lr=1e-5)
model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy'])
train_datagen = customImageDataGenerator(
horizontal_flip=True
)
val_datagen = customImageDataGenerator()
training_set = train_datagen.flow(x_train, y_train, batch_size=batch_size)
validation_set = val_datagen.flow(x_val, y_val, batch_size=batch_size)
callbacks = EarlyStopping(monitor='val_loss', patience=3)
history = model.fit_generator(training_set,
steps_per_epoch = 10,
epochs = 20,
validation_steps = 5,
callbacks = [callbacks],
validation_data = validation_set)
score = model.evaluate(x_test, y_test, batch_size=batch_size)
print(score)
y_pred = model.predict(x_test, batch_size=batch_size)
y_test = np.argmax(y_test, axis=1)
y_pred = np.argmax(y_pred, axis=1)
confusion = confusion_matrix(y_test, y_pred)
map = sns.heatmap(confusion, annot=True)
print(map)
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
plt.figure(1)
plt.plot(acc)
plt.plot(val_acc)
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='best')
plt.title('Accuracy')
plt.figure(2)
plt.plot(loss)
plt.plot(val_loss)
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='best')
plt.title('Loss')
You can find the outputs here: https://i.stack.imgur.com/FF13P.jpg
It is kind of hard to help without the dataset itself. Though one or two things I would test:
I find the ReLU activation inappropriate for Dense layer, which could lead to the mono-class prediction. Try replacing the relu from your Dense(128) layer by something else (sigmoid, tanh)
Dropout is not really appropriate for images in general, you might want to look at DropBlock
Initial learning rate is pretty low, I would start with something between 1e-3 or 1e-4
Stupid thing that happened to me way too often: have you visualize the image / label combinaison to make sure each image has the right label?
Again, not sure it will fix everything, but I hope it might help!
This could be any number of things, but it is possible that the misbehaviour is being caused by the data itself.
Just from looking at the code, it seems like you haven't normalized the testing data before calling model.predict or model.evaluate in the same way as you have done for the training and validation data.
I had a similar problem once and it turned out this was the cause. As a quick test you can just rescale the test data and see if that helps.

Error when checking target: expected dense_3 to have shape (4,) but got array with shape (10,)

I am Beginner in Python
I using Keras and Tensorflow to Create ML Model . When I have 4 classes it work well but add more Classes it got this error. Add have added 10 classes.
I use python 3.7 , Keras 2.3.1, Tensorflow 1.14.
I'm following tutorial in this link
here.
I have lost 3 days to solve this problem but I could not
Thanks
Classification.py File
import os as os
from keras.models import load_model
from keras.utils import np_utils
import model as md
import preparation as prep
import visualization as vis
from keras.utils import to_categorical
from keras import metrics
# ----------- dataset settings -----------
# number of instances per class used for train and test in total:
# should be smaller or equal than generated subset
INSTANCES_PER_CLASS = 5000
NUM_CLASS_LIMIT = 345 # limit of classes
# path of the dataset seperated in train and test
DATA_PATH = os.path.join(os.getcwd(), "../Draw/dataset/train_test_20k/")
# path for all created files
MODEL_PATH = os.path.join(os.getcwd(), "models/" + str(NUM_CLASS_LIMIT) + "/" + str(INSTANCES_PER_CLASS) + "/")
# ----------- model settings -----------
MODEL_NAME = 'model.h5' # name for the freezed model
# input size
IMG_WIDTH = 28
IMG_HEIGHT = 28
IMG_SIZE = IMG_WIDTH * IMG_HEIGHT
IMG_DIM = 1
# training settings
EPOCHS = 10
BATCH_SIZE = 256
if __name__ == "__main__":
# create new directories if required
if not os.path.isdir(MODEL_PATH):
os.makedirs(MODEL_PATH)
# get the dataset
num_classes, x_train, x_test, y_train, y_test, classes_dict = prep.collect_data(NUM_CLASS_LIMIT)
print("trainingsset instances {}".format(x_train.shape))
print("trainingsset labels {}".format(y_train.shape))
# plot first test images
#vis.plot_first_n_images(x_test, y_test, classes_dict, 100)
# class representation as "binary" vector
y_train = np_utils.to_categorical(y_train, num_classes=num_classes)
y_test = np_utils.to_categorical(y_test, num_classes=num_classes)
# create or load keras model
if not os.path.isfile(MODEL_PATH + MODEL_NAME):
print("create model...")
model = md.build_model(input_shape=x_train.shape[1:], num_classes=num_classes)
else:
print("load existing model...")
model = load_model(MODEL_PATH + MODEL_NAME)
# score trained model using validation set
scores = model.evaluate(x_test, y_test, verbose=1)
print('test loss:', scores[0])
print('test accuracy:', scores[1])
model.compile(loss='categorical_crossentropy',
optimizer='adam',
metrics=['acc'])
# print model information if desired
print(model.summary())
# model training from scratch or retrain by existing model
hist = model.fit(x_train, y_train, batch_size=BATCH_SIZE,
epochs=EPOCHS,
validation_data=[x_test, y_test],
shuffle=True)
#from keras.utils import plot_model
#plot_model(model, to_file=MODEL_PATH + 'model.png')
# evaluation process
print("evaluate model...")
# summarize history during training phase
# plot training and validation set accuracy
vis.plot_training_history_accuracy(hist)
# test set evaluation
scores = model.evaluate(x_test, y_test, verbose=1)
print(scores)
print('test loss:', scores[0])
print('test accuracy:', scores[1])
# create and plot confusion matrix
#y_pred = model.predict(x_test)
#vis.plot_confusion_matrix(y_pred, y_test, classes=list(classes_dict.values()))
# freeze the model (architecture and weights)
model.save(os.path.join(MODEL_PATH, MODEL_NAME))
print('saved trained model at {}'.format(os.path.join(MODEL_PATH, MODEL_NAME)))
The Model Code model.py
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from keras.models import Model
from keras.layers import Input, Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
def build_model(input_shape, num_classes):
"""
Builds the model architecture based on MNIST CNN
https://www.tensorflow.org/tutorials/estimators/cnn
Args:
input_spape: Input shape of the model
num_classes: Number of classes
Returns:
keras.models.Model: The created model
"""
inputs = Input(shape=input_shape)
x = Conv2D(32, (5,5), activation='relu')(inputs)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Conv2D(128, (3, 3), activation='relu')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Dropout(0.2)(x)
x = Flatten()(x)
x = Dense(512, activation='relu')(x)
x = Dense(256, activation='relu')(x)
predictions = Dense(num_classes, activation='softmax')(x)
print("SHOW DESEN", num_classes)
return Model(inputs=inputs, outputs=predictions)
I believe the error is coming from this step:
hist = model.fit(x_train, y_train, batch_size=BATCH_SIZE,
epochs=EPOCHS,
validation_data=[x_test, y_test],
shuffle=True)
The reference to "target" means the "y_train" array. You might want to confirm that the y_train that you are trying to fit matches the actual model.summary(), by checking the y_train.shape just before the fit. It's possibly that the old model that was designed for 4 classes is being used, even though the new 10 class y_train is being passed to the model.fit().
Diagnostic code example (model.fit is commented out):
# This outputs the model design. The final layer should be (None,num_classes)
print(model.summary())
# print shape of y_train/target data. The second dimension should match the number of classes from above (?, num_classes)
print(y_train.shape)
#model.fit( x_train, y_train, epochs=1)
I hope this helps.

Why state of the art network didn't perform well on Fashion MNIST

When I tried to use RestNet50 with all layers frozen to classify Fashion MNIST dataset, I could only get around 78% training accuracy and 41% prediction accuracy. Below is the code snippet:
from keras import optimizers
from keras.applications.resnet50 import ResNet50
from keras.datasets import fashion_mnist
from keras.layers import Activation, Flatten, Dense
from keras.models import Model
(x, y), (x_test, y_test) = fashion_mnist.load_data()
dat_train, dat_val, train_lbs, val_lbs = train_test_split(x, y, test_size=10000, random_state=42)
... # transform dat_train, dat_val, x_test from shapes (28, 28, ) to (32, 32, 3) and re-scale to value range [0, 1], also one hot encoding train_lbs, val_lbs, y_test to shape (, 10)
resnet50_base = ResNet50(include_top=False,
weights='imagenet',
input_shape=(32, 32, 3))
for layer in resnet50_base.layers:
layer.trainable = False
base_out = resnet50_base.output
base_out = Flatten()(base_out)
base_out = Dense(128)(base_out)
base_out = Activation("relu")(base_out)
preds = Dense(10, activation="softmax")(base_out)
model = Model(inputs=resnet50_base.input, outputs=preds)
model.compile(loss="categorical_crossentropy",
optimizer=optimizers.Adam(lr=0.0005),
metrics=["accuracy"])
It produced this result
Did I do something wrong or ResNet50 was not suited for the Fashion MNIST datatset?
Hey you can see my repo https://github.com/rushu570/Fashion_Mnist to understand how it works

How to create a neural network for regression?

I am trying to use Keras to make a neural network. The data I am using is https://archive.ics.uci.edu/ml/datasets/Yacht+Hydrodynamics. My code is as follows:
import numpy as np
from keras.layers import Dense, Activation
from keras.models import Sequential
from sklearn.model_selection import train_test_split
data = np.genfromtxt(r"""file location""", delimiter=',')
model = Sequential()
model.add(Dense(32, activation = 'relu', input_dim = 6))
model.add(Dense(1,))
model.compile(optimizer='adam', loss='mean_squared_error', metrics = ['accuracy'])
Y = data[:,-1]
X = data[:, :-1]
From here I have tried using model.fit(X, Y), but the accuracy of the model appears to remain at 0. I am new to Keras so this is probably an easy solution, apologies in advance.
My question is what is the best way to add regression to the model so that the accuracy increases? Thanks in advance.
First of all, you have to split your dataset into training set and test set using train_test_split class from sklearn.model_selection library.
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.08, random_state = 0)
Also, you have to scale your values using StandardScaler class.
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
Then, you should add more layers in order to get better results.
Note
Usually it's a good practice to apply following formula in order to find out the total number of hidden layers needed.
Nh = Ns/(α∗ (Ni + No))
where
Ni = number of input neurons.
No = number of output neurons.
Ns = number of samples in training data set.
α = an arbitrary scaling factor usually 2-10.
So our classifier becomes:
# Initialising the ANN
model = Sequential()
# Adding the input layer and the first hidden layer
model.add(Dense(32, activation = 'relu', input_dim = 6))
# Adding the second hidden layer
model.add(Dense(units = 32, activation = 'relu'))
# Adding the third hidden layer
model.add(Dense(units = 32, activation = 'relu'))
# Adding the output layer
model.add(Dense(units = 1))
The metric that you use- metrics=['accuracy'] corresponds to a classification problem. If you want to do regression, remove metrics=['accuracy']. That is, just use
model.compile(optimizer = 'adam',loss = 'mean_squared_error')
Here is a list of keras metrics for regression and classification
Also, you have to define the batch_size and epochs values for fit method.
model.fit(X_train, y_train, batch_size = 10, epochs = 100)
After you trained your network you can predict the results for X_test using model.predict method.
y_pred = model.predict(X_test)
Now, you can compare the y_pred that we obtained from neural network prediction and y_test which is real data. For this, you can create a plot using matplotlib library.
plt.plot(y_test, color = 'red', label = 'Real data')
plt.plot(y_pred, color = 'blue', label = 'Predicted data')
plt.title('Prediction')
plt.legend()
plt.show()
It seems that our neural network learns very good
Here is how the plot looks.
Here is the full code
import numpy as np
from keras.layers import Dense, Activation
from keras.models import Sequential
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
# Importing the dataset
dataset = np.genfromtxt("data.txt", delimiter='')
X = dataset[:, :-1]
y = dataset[:, -1]
# Splitting the dataset into the Training set and Test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.08, random_state = 0)
# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
# Initialising the ANN
model = Sequential()
# Adding the input layer and the first hidden layer
model.add(Dense(32, activation = 'relu', input_dim = 6))
# Adding the second hidden layer
model.add(Dense(units = 32, activation = 'relu'))
# Adding the third hidden layer
model.add(Dense(units = 32, activation = 'relu'))
# Adding the output layer
model.add(Dense(units = 1))
#model.add(Dense(1))
# Compiling the ANN
model.compile(optimizer = 'adam', loss = 'mean_squared_error')
# Fitting the ANN to the Training set
model.fit(X_train, y_train, batch_size = 10, epochs = 100)
y_pred = model.predict(X_test)
plt.plot(y_test, color = 'red', label = 'Real data')
plt.plot(y_pred, color = 'blue', label = 'Predicted data')
plt.title('Prediction')
plt.legend()
plt.show()

Categories