Why Can't I train the ANN for XNOR? - python

I have made a simple NN for deciding the XNOR values with the Two Binary values in the Input layer.
I have the Numpy array of all the possible combinations with the lables.
Code :
from keras.models import Sequential
from keras.layers import Dense
import numpy
data = numpy.array([[0.,0.,1.],[0.,1.,0.],[1.,0.,0.],[1.,1.,1.]])
train = data[:,:-1] # Taking The same and All data for training
test = data[:,:-1]
train_l = data[:,-1]
test_l = data[:,-1]
train_label = []
test_label = []
for i in train_l:
train_label.append([i])
for i in test_l:
test_label.append([i]) # Just made Labels Single element...
train_label = numpy.array(train_label)
test_label = numpy.array(test_label) # Numpy Conversion
model = Sequential()
model.add(Dense(2,input_dim = 2,activation = 'relu'))
model.add(Dense(2,activation = 'relu'))
model.add(Dense(1,activation = 'relu'))
model.compile(loss = "binary_crossentropy" , metrics = ['accuracy'], optimizer = 'adam')
model.fit(train,train_label, epochs = 10, verbose=2)
model.predict_classes(test)
Even if taking the Same dataset to train and to test... It doesn't predict properly ...
Where was I wrong ?
I have taken whole dataset deliberately as it wasn't predicting with 2 values...

Your architecture is just too simple for this function. If you use the architecture below and train for 100 epochs, you'll get accuracy = 1.
model = Sequential()
model.add(Dense(20,input_dim = 2,activation = 'relu'))
model.add(Dense(20,activation = 'relu'))
model.add(Dense(1,activation = 'sigmoid'))
UPD:
Why a simple model doesn't work that well?
One reason is that with a ReLU activation, if one neuron becomes negative on every data point, its gradient becomes zero, and its weights don't train any more. You have few neurons the start, and if some of them "die" this way, the remaining neurons may not be enough to approximate the function.
Another problem is that fewer neurons make it more likely for a model to get stuck in a local minimum.
However, you are right that theoretically, just a few neurons should be enough.
The model below works even with just one layer. I've replaced ReLU with LeakyReLU to remedy the first problem. It works most of the time, but sometimes gets stuck in a local minimum.
model = Sequential()
model.add(Dense(2,input_dim = 2,activation = LeakyReLU(alpha=0.3)))
model.add(Dense(1,activation = 'sigmoid'))
optimizer = Adam(lr=0.01)
model.compile(loss = "binary_crossentropy" , metrics = ['accuracy'], optimizer=optimizer)
model.fit(train,train_label, epochs = 500, verbose=2)

Related

Simple Keras ML model for predicting multiplication isn't working

I have created a simple machine learning model to predict the multiplication of two given numbers. I followed a youtube tutorial to learn the basic and try to work on this simple idea.
My model has three dense layers - input, hidden, output. Input and hidden were using same activation function 'relu' which were giving me loss as NaN on model fit so I changed one of them to sigmoid which started giving me 0.00000+e... something as loss.
I don't know what is wrong. Anyone can please direct me what I am doing wrong or assuming wrong?
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
df = pd.read_csv('data.csv')
print(df)
x = np.array(df['X'])
y = np.array(df['Y'])
s = np.array(df['S'])
def build_model():
model = keras.Sequential()
inputLayer = layers.Dense(64, activation='sigmoid', input_shape=[2])
hiddenLayer = layers.Dense(64, activation='relu')
outputLayer = layers.Dense(1)
model.add(inputLayer)
model.add(hiddenLayer)
model.add(outputLayer)
model.compile(optimizer='sgd', loss='mean_squared_error',metrics=['accuracy'])
return model
model = build_model()
print(model.summary())
EPOCHS = 1000
# I didn't know how to provide mulitple input to my model for
# training so I checked stackoverflow here
# https://stackoverflow.com/questions/55233377/keras-sequential-model-with-multiple-inputs?noredirect=1&lq=1
merged_array = np.stack([x, y], axis=1)
history = model.fit(merged_array, s, epochs=EPOCHS, validation_split = 0.2, verbose=2)
print(history)
print(model.predict([[2,3],]))
Disclaimer: I am a beginner and I have just started using keras and python for the first time in my life.
It does work for smaller numbers with ReLU activation.
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np
x = np.random.randint(0, 10, 1000)
y = np.random.randint(0, 10, 1000)
s = x*y
def build_model():
model = keras.Sequential()
model.add(layers.Dense(64, activation='relu', input_shape=[2]))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(1))
model.compile(optimizer=keras.optimizers.Adam(lr=0.01),
loss='mean_squared_error')
return model
model = build_model()
merged_array = np.stack([x, y], axis=1)
history = model.fit(merged_array, s, epochs=250,
validation_split=0.2)
test_input = [2, 3]
print('\n{} x {} ='.format(*test_input),
np.round(model.predict([test_input])[0][0]).astype(int))
2 x 3 = 6
SGD also works, but it requires standardization/normalization, which kind of defeats the purpose of your task, so I changed it. But it also works.
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np
x = np.random.randint(0, 10, 1000)
y = np.random.randint(0, 10, 1000)
s = x*y
x = x/10
y = y/10
def build_model():
model = keras.Sequential()
model.add(layers.Dense(64, activation='relu', input_shape=[2]))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(1))
model.compile(optimizer=keras.optimizers.SGD(0.001), loss='mean_squared_error')
return model
model = build_model()
merged_array = np.stack([x, y], axis=1)
history = model.fit(merged_array, s, epochs=250,
validation_split=0.2, batch_size=16)
test_input = [2/10, 3/10]
print('\n{} x {} ='.format(*map(lambda l: int(l*10), test_input)),
np.round(model.predict([test_input])[0][0]).astype(int))
i noticed a couple of issues with your model:
Your input layer is not an input. You do not need to have a designated input layer in this case. The arguement input_shape=[2] is sufficient to add a proper input layer before this layer.
You do not determine any batchsize in the fit function: batches are usually a small subset of your training and validation set (commonly some base-2 numbers like 4, 8, 16, 32, ...). During training not only one sample of your set is used for backpropagating and adjusting your weights (aka "learning") but in batches, which makes it faster. Since your input data are two single floating numbers (I assume) you can choose a really high batchsize like 1024 or higher. The batch size belongs to the so called hyperparameter, which affect your overall training success.
history = model.fit(merged_array, s, batch_size=1024, epochs=EPOCHS, validation_split=0.2, verbose=2)
During training you track the "accuracy" metric. As you are working on a regression problem, this is not helping you in estimating your model's performance. (Accuracy is used for classification problems) You can leave it out
I cannnot give you more specific advice with knowledge about the data you are using, how many, datapoints you have and what kind of numbers you want to multiply (bounded to numbers between 0 and 10, float or integeres,...)
Hope this helps sofar (;

Why is accuracy and loss staying exactly the same while training?

So I have tried to modify the entry-tutorial from https://www.tensorflow.org/tutorials/keras/basic_classification, to work with my own data. The goal is to classify images of dogs and cats. The code is very simple and given below. The problem is that the network does not seem to learn at all, training loss and accuracy stay the same after every epoch.
The images (X_training) and the labels (y_training) seem to have the right format:
X_training.shape returns: (18827, 80, 80, 3)
y_training is a one dimensional list with entries in {0,1}
I have checked several times, that the "images" in X_training are correctly labeled:
Let's say X_training[i,:,:,:] represents a dog, then y_training[i] will return a 1, if X_training[i,:,:,:] represents a cat, then y_training[i] will return a 0.
Shown below is the complete python file without the import statements.
#loading the data from 4 pickle files:
pickle_in = open("X_training.pickle","rb")
X_training = pickle.load(pickle_in)
pickle_in = open("X_testing.pickle","rb")
X_testing = pickle.load(pickle_in)
pickle_in = open("y_training.pickle","rb")
y_training = pickle.load(pickle_in)
pickle_in = open("y_testing.pickle","rb")
y_testing = pickle.load(pickle_in)
#normalizing the input data:
X_training = X_training/255.0
X_testing = X_testing/255.0
#building the model:
model = keras.Sequential([
keras.layers.Flatten(input_shape=(80, 80,3)),
keras.layers.Dense(128, activation=tf.nn.relu),
keras.layers.Dense(1,activation='sigmoid')
])
model.compile(optimizer='adam',loss='mean_squared_error',metrics=['accuracy'])
#running the model:
model.fit(X_training, y_training, epochs=10)
The code compiles and trains for 10 epochs, but neither loss nor accuracy improve, they stay exactly the same after every epoch.
The code works fine with the MNIST-fashion dataset used in the tutorial with slight changes accounting for the difference in multiclass vs binary classification and input shape.
if you want to train a classification model you must have binary_crossentropy as you lost function and not mean_squared_error which is used for regression tasks
replace
model.compile(optimizer='adam',loss='mean_squared_error',metrics=['accuracy'])
with
model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])
Furthermore i would recommend not using relu activation on your dense layer but linear
replace
keras.layers.Dense(128, activation=tf.nn.relu),
with
keras.layers.Dense(128),
and of cource to better use the power of neural networks use some convolutional layers prior your flatten layer
I have found a different implementation with a slightly more complex model that works.
Here is the complete code without the import statements:
#global variables:
batch_size = 32
nr_of_epochs = 64
input_shape = (80,80,3)
#loading the data from 4 pickle files:
pickle_in = open("X_training.pickle","rb")
X_training = pickle.load(pickle_in)
pickle_in = open("X_testing.pickle","rb")
X_testing = pickle.load(pickle_in)
pickle_in = open("y_training.pickle","rb")
y_training = pickle.load(pickle_in)
pickle_in = open("y_testing.pickle","rb")
y_testing = pickle.load(pickle_in)
#building the model
def define_model():
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
model.add(MaxPooling2D((2, 2)))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
# compile model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
return model
model = define_model()
#Possibility for image data augmentation
train_datagen = ImageDataGenerator(rescale=1.0/255.0)
val_datagen = ImageDataGenerator(rescale=1./255.)
train_generator =train_datagen.flow(X_training,y_training,batch_size=batch_size)
val_generator = val_datagen.flow(X_testing,y_testing,batch_size= batch_size)
#running the model
history = model.fit_generator(train_generator,steps_per_epoch=len(X_training) //batch_size,
epochs=nr_of_epochs,validation_data=val_generator,
validation_steps=len(X_testing) //batch_size)

Different training and validation results with same input Keras

I am trying to re-train MobileNet for a different multiclassification purpose as:
train_datagen = ImageDataGenerator(
preprocessing_function = preprocess_input
training_generator = train_datagen.flow_from_directory(
directory = train_data_dir,
target_size=(parameters["img_width"], parameters["img_height"]),
batch_size = parameters["batch_size"],
class_mode= "categorical",
subset = "training",
color_mode = "rgb",
seed = 42)
# Define the Model
base_model = MobileNet(weights='imagenet',
include_top=False, input_shape = (128, 128, 3)) #imports the mobilenet model and discards the last 1000 neuron layer.
# Let only the last n layers as trainable
for layer in base_model.layers:
layer.trainable = False
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(800,activation='relu')(x) #we add dense layers so that the model can learn more complex functions and classify for better results.
x = Dense(600,activation='relu')(x) #dense layer 2
x = Dropout(0.8)(x)
x = Dense(256,activation='relu')(x) #dense layer 3
x = Dropout(0.2)(x)
preds = Dense(N_classes, activation='softmax')(x) #final layer with softmax activation
model= Model(inputs = base_model.input, outputs = preds)
model.compile(optimizer = "Adam", loss='categorical_crossentropy', metrics=['accuracy'])
And performing training setting as validation dataset, the training set as:
history = model.fit_generator(
training_generator,
steps_per_epoch= training_generator.n // parameters["batch_size"],
epochs = parameters["epochs"]
,
##### VALIDATION SET = TRAINING
validation_data = training_generator,
validation_steps = training_generator.n // parameters["batch_size"],
callbacks=[
EarlyStopping(monitor = "acc", patience = 8, restore_best_weights=False),
ReduceLROnPlateau(patience = 3)]
)
However, I do get significant differences in accuracy, between TRAINING AND VALIDATION ACCURACY, even if they are the same dataset, while training; what could it be due to?
Training a neural network involves random distribution of the data in the training database. Because of this, the results are not reproducible. If you're getting significant differences in accuracy, you may try:
get a bigger training database;
retrain the network;
get a database with more consistent results.
LE: it doesn't matter if you get significant differences in accuracy while training. Training is an iterative optimization process, which minimizes the mean square error objective function. It takes a while until this goal is achieved.
I do not know the EXACT reason but I duplicated your problem. The problem happens because you are using the SAME generator which runs for training and then again for validation. If your create a SEPERATE generator for validation that takes the same training data as input then once you run enough epochs for the training accuracy to get into the 90% range you will see the validation accuracy stabilize and converge toward the training accuracy
Train-Valid Acc vs Epochs

Increasing Accuracy of Image Classification Model

I'm trying to use do image classification on two different classes using the pre-trained Inception V3 model. I have a data set of around 1400 images which are roughly balanced. When I run my program I get results that are off at the first couple epochs. Is this normal when training the model?
epochs = 175
batch_size = 64
#include_top = false to accomodate new classes
base_model = keras.applications.InceptionV3(
weights ='imagenet',
include_top=False,
input_shape = (img_width,img_height,3))
#Classifier Model ontop of Convolutional Model
model_top = keras.models.Sequential()
model_top.add(keras.layers.GlobalAveragePooling2D(input_shape=base_model.output_shape[1:], data_format=None)),
model_top.add(keras.layers.Dense(350,activation='relu'))
model_top.add(keras.layers.Dropout(0.4))
model_top.add(keras.layers.Dense(1,activation = 'sigmoid'))
model = keras.models.Model(inputs = base_model.input, outputs = model_top(base_model.output))
#freeze the convolutional layers of InceptionV3
for layer in model.layers[:30]:
layer.trainable = False
#Compiling model using Adam Optimizer
model.compile(optimizer = keras.optimizers.Adam(
lr=0.000001,
beta_1=0.9,
beta_2=0.999,
epsilon=1e-08),
loss='binary_crossentropy',
metrics=['accuracy'])
With my current parameters I only get an accuracy of 89% with a test loss of 0.3 when testing on a separated set of images. Do I need to add more layers to my model to increase this accuracy?
There are several issues with your code...
To start with, your way to build model_top is quite unconventional (and IMHO quite messy as well); in such cases, the documentation examples are your best friend. So, start with replacing your model_top part with:
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(350, activation='relu')(x)
x = Dropout(0.4)(x)
predictions = Dense(1, activation='sigmoid')(x)
# this is the model we will train
model = Model(inputs=base_model.input, outputs=predictions)
Notice that I have not changed your parameters of choice - you could certainly experiment with more units in the dense layer (the example in the docs uses 1024)...
Second, it is not clear why you choose to freeze only 30 layers of the InceptionV3, which has no less than 311 layers:
len(base_model.layers)
# 311
So, replace also this part with
for layer in base_model.layers:
layer.trainable = False
Third, your learning rate seems way too small; the Adam optimizer is supposed to work well enough out of the box with its default parameters, so I also suggest to compile your model simply as
model.compile(optimizer = keras.optimizers.Adam(),
loss='binary_crossentropy',
metrics=['accuracy'])

Difference between fit and fit_generator when doing transfer learning in keras

I am trying to train a deep neural network using transfer learning in Keras with tensorflow. There are different ways to do that, if your data is small you can afford computing features using the pre-trained model for the entire data and then use those features to train and test a small network, this is good as you don't need to compute those features for each batch and at each epoch. However, if the data is large, it will be impossible to compute features for the entire data, in this case we use ImageDataGenerator, flow_from_directory and fit_generator. In this case features are computed each time fore each batch at each epoch which make things much slower. I was assuming that both approaches produce similar results in terms of accuracy and loss. The problem is that I took a small data-set and tried both approaches and got completely different results. I will appreciate if someone can tell if something is wrong in the provided code and/or why I am getting different results please?
Approach when having large data-set:
from keras.applications.inception_v3 import InceptionV3,preprocess_input
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Model
datagen= ImageDataGenerator(preprocessing_function=preprocess_input)
train_generator = datagen.flow_from_directory('data/train',
class_mode='categorical',
batch_size=64,...)
vaild_generator = datagen.flow_from_directory('data/valid',
class_mode='categorical',
batch_size=64,...)
base_model = InceptionV3(weights='imagenet', include_top=False)
x = base_model.output
x = Conv2D(filters = 128 , kernel_size = (2,2)) (x)
x = MaxPooling2D()(x)
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
predictions = Dense(2, activation='softmax')(x)
model = Model(inputs=base_model.input, outputs=predictions)
for layer in base_model.layers:
layer.trainable = False
model.compile(optimizer='rmsprop', loss='categorical_crossentropy',...)
model.fit_generator(generator = train_generator,
steps_per_epoch = len (train_generator),
validation_data = valid_generator ,
validation_steps = len(valid_generator),
...)
Approach when having small data-set:
from keras.applications.inception_v3 import InceptionV3,preprocess_input
from keras.models import Sequential
from keras.utils import np_utils
base_model = InceptionV3(weights='imagenet', include_top=False)
train_features = base_model.predict(preprocess_input(train_data))
valid_features = base_model.predict(preprocess_input(valid_data))
model = Sequential()
model.add(Conv2D(filters = 128 , kernel_size = (2,2),
input_shape=(train_features [1],
train_features [2],
train_features [3])))
model.add(MaxPooling2D())
model.add(GlobalAveragePooling2D())
model.add(Dense(1024, activation='relu'))
model.add(Dense(2, activation='softmax'))
model.compile(optimizer='rmsprop', loss='categorical_crossentropy',...)
model.fit(train_features, np_utils.to_categorical(y_train,2),
validation_data=(valid_features, np_utils.to_categorical(y_valid,2)),
batch_size=64,...)

Categories