I need one or more hints to get over the first pain in transfer-learning.
The following code is a stripped-down version of what I am actually trying to do, but it shows the issues even with one the fake image (A: empty / B: empty + little square) I use there. In the final version, the input will be much more complex images (which justifies the complexity of the applied base model).
The problem looks simple. Input: two types of images, output: binary classification ("square present yes/no"). The modified ResNet50 model is fed with prepared training data via ImageDataGenerator. As I can create any amount of fake data, there is no data augmentation step in the code.
Anyway, when I run the code the displayed loss (for both the Adam and the SDG optimizer) doesn't seem to improve and the accuracy quickly tends to approach the ratio of the number of the examples in the two image classes (i.e. B/A). (Note: during the weekend, I even tried for 500 epochs ... no change.)
For both (most likely connected) issues I haven't been able to spot the reason yet ... could you? Is it one of the hyper-parameters, is there an obvious glitch in the model setup or any other part of the implementation? Probably it's just something stupid, but after chasing it and playing around with different and more and more simplified versions, I am about to run out of ideas regarding what to try next.
import cv2
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm
from random import randint
from keras.layers import Dense, GlobalAveragePooling2D
from keras.optimizers import Adam
from keras.models import Model
from keras.applications import ResNet50
from keras.preprocessing.image import ImageDataGenerator
def modified_resnet_model():
# load ResNet50 model excluding classification layers
basemodel = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
# freeze model weights
for layer in basemodel.layers:
layer.trainable = False
# add new classification head
x = GlobalAveragePooling2D()(basemodel.output)
x = Dense(128, activation='relu')(x)
predictions = Dense(1, activation='softmax')(x)
modresnet50model = Model(inputs=basemodel.input, outputs=predictions)
# return the result
return modresnet50model
def data_set_creator(numsamples, probpos, target_image_size=(224, 224)):
dataset = {}
image_stack = []
immean = np.array([0.0, 0.0, 0.0])
imstat = {}
# first create target labels
lbbuf = np.zeros((numsamples, 1))
lbbuf[:int(probpos*numsamples)] = 1
lbbuf = np.random.permutation(lbbuf)
# second create matching "fake" images according to label stack
for index in tqdm(range(numsamples)):
# zero labeled images are empty
img = np.zeros((target_image_size[0], target_image_size[1], 3)).astype(np.float32)
sh = 10
if lbbuf[index]:
# all others contain a suqare somewhere
xp = randint(sh, target_image_size[0]-1-sh)
yp = randint(sh, target_image_size[1]-1-sh)
randval = 100 # randint(1, 255)
# print('center: ({0:d},{1:d}); value: {2:d}'.format(xp, yp, randval))
img[yp-sh:yp+sh, xp-sh:xp+sh, :] = randval
# else:
# print(' --- ')
# normalize image and add it to the image stack
img /= 255.0 # normalize image
image_stack.append(img)
# update mean vector
immean += cv2.mean(img)[:-1]
# assemple data set
imstat['mean'] = immean/numsamples
image_stack = np.array(image_stack)
dataset['images'] = image_stack
dataset['imstat'] = imstat
dataset['labels'] = lbbuf
# return the result
return dataset
if __name__ == '__main__':
# define some parameters
imagesize = (224, 224)
nsamples = 10000
pos_prob_train = 0.3
probposval = pos_prob_train
valfrac = 0.1 # use 10% of the data for validation
batchsize = 24
epochs = 30
stepsperepoch = 100
validationsteps = 25
# ================================================================================
# create training and validation data sets
nst = int(nsamples*(1-valfrac))
dataset_training = data_set_creator(nst, pos_prob_train, target_image_size=imagesize)
dataset_validation = data_set_creator(nsamples-nst, probposval, target_image_size=imagesize)
# subtract the mean (training data!) from all the images
for ci in range(3):
dataset_training['images'][:, :, :, ci] -= dataset_training['imstat']['mean'][ci]
dataset_validation['images'][:, :, :, ci] -= dataset_training['imstat']['mean'][ci]
# get the (modified) model
model = modified_resnet_model()
theoptimizer = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-8)
model.compile(optimizer=theoptimizer, loss='binary_crossentropy', metrics=['accuracy'])
print(model.summary())
# setup data input generators
train_datagen = ImageDataGenerator()
validation_datagen = ImageDataGenerator()
train_generator = train_datagen.flow(dataset_training['images'],
dataset_training['labels'],
batch_size=batchsize)
validation_generator = validation_datagen.flow(dataset_validation['images'],
dataset_validation['labels'],
batch_size=batchsize)
# train the (modified) model
history = model.fit_generator(train_generator, steps_per_epoch=stepsperepoch,
epochs=epochs, validation_data=validation_generator,
validation_steps=validationsteps)
#visualize the training and validation performance
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
nepochs = range(1, len(acc)+1)
plt.plot(nepochs, acc, 'bo', label='Training acc')
plt.plot(nepochs, val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()
plt.savefig('trainval_acc.png')
plt.figure()
plt.plot(nepochs, loss, 'bo', label='Training loss')
plt.plot(nepochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()
plt.savefig('trainval_loss.png')
plt.show()
Related
I'm a student at the University of Bologna ( Italy) and I'm using the Google Coral USB accelerator for my thesis.
I realized a keras neural network that classifies my data in four classes and the accuracy I get is roughly 97%.
I performed a full integer post- training quantization since keras networks don't support quantization-aware training. I followed the guide on TensorFlow site but I had problems running inference on the edge tpu .
In particular my network undergoes an accuracy loss when the model is converted to a tensorflowlite one ( the accuracy drops to roughly the 25%) . This is due to quantization since the tensorflowlite model without quantization that runs on my pc is not affected by the conversion.
I tried to scale my input data in a range [0, 255] with MinMaxScaler but in this case , even if the accuracy of the tensorflowlite quantized model matches the one of the not converted one , the results are not satisfactory since the accuracy of the network itself is low.
I wonder if you could help me solve this problem. Perhaps the values on my dataset are too low and the quantization fails to convert float32 into uint8 without information loss.
Below you'll find my python code.
from __future__ import absolute_import, division, print_function, unicode_literals
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from numpy import loadtxt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import RMSprop
from sklearn.preprocessing import StandardScaler,MinMaxScaler,Normalizer
from sklearn.metrics import confusion_matrix, accuracy_score
from tensorflow.keras.callbacks import EarlyStopping
import os
import time
import tflite_runtime.interpreter as tflite
import collections
import operator
"""Functions to work with classification models."""
Class = collections.namedtuple('Class', ['id', 'score'])
def input_tensor(interpreter):
"""Returns input tensor view as numpy array of shape (height, width, 3)."""
tensor_index = interpreter.get_input_details()[0]['index']
return interpreter.tensor(tensor_index)()[0]
def output_tensor(interpreter):
"""Returns dequantized output tensor."""
output_details = interpreter.get_output_details()[0]
output_data = np.squeeze(interpreter.tensor(output_details['index'])()) #Remove single-dimensional entries from the shape of an array.
scale, zero_point = output_details['quantization']
return scale * (output_data - zero_point)
def set_input(interpreter, data):
"""Copies data to input tensor."""
input_tensor(interpreter)[:] = data
return data
def get_output(interpreter, top_k=1, score_threshold=0.0):
"""Returns no more than top_k classes with score >= score_threshold."""
scores = output_tensor(interpreter)
classes = [
Class(i, scores[i])
for i in np.argpartition(scores, -top_k)[-top_k:]
if scores[i] >= score_threshold
]
return sorted(classes, key=operator.itemgetter(1), reverse=True)
#load the dataset
Modelli_Prova01_Nom01_Acc1L = loadtxt(r'/home/utente/Scrivania/csvtesi/Modelli_Prova01_Nom01_Acc1L.csv',delimiter=',')
Modelli_Prova02_Nom01_Acc1L = loadtxt(r'/home/utente/Scrivania/csvtesi/Modelli_Prova02_Nom01_Acc1L.csv',delimiter=',')
Modelli_Prova03_Nom01_Acc1L = loadtxt(r'/home/utente/Scrivania/csvtesi/Modelli_Prova03_Nom01_Acc1L.csv',delimiter=',')
Modelli_Prova04_Nom01_Acc1L = loadtxt(r'/home/utente/Scrivania/csvtesi/Modelli_Prova04_Nom01_Acc1L.csv',delimiter=',')
Modelli_Prova05_Nom01_Acc1L = loadtxt(r'/home/utente/Scrivania/csvtesi/Modelli_Prova05_Nom01_Acc1L.csv',delimiter=',')
time_start = time.perf_counter()
#split x and y data (train and test)
Acc1L01_train,Acc1L01_test = train_test_split(Modelli_Prova01_Nom01_Acc1L ,test_size=0.015,random_state=42)
Acc1L02_train,Acc1L02_test = train_test_split(Modelli_Prova02_Nom01_Acc1L,test_size=0.3,random_state=42)
Acc1L03_train,Acc1L03_test = train_test_split(Modelli_Prova03_Nom01_Acc1L,test_size=0.3,random_state=42)
Acc1L04_train,Acc1L04_test = train_test_split(Modelli_Prova04_Nom01_Acc1L,test_size=0.3,random_state=42)
Acc1L05_train,Acc1L05_test = train_test_split(Modelli_Prova05_Nom01_Acc1L,test_size=0.15,random_state=42)
Y1_train= np.zeros([len(Acc1L01_train)+len(Acc1L05_train),1])
Y2_train= np.ones([len(Acc1L02_train),1])
Y3_train= np.ones([len(Acc1L03_train),1]) +1
Y4_train= np.ones([len(Acc1L04_train),1]) +2
Y1_test= np.zeros([len(Acc1L01_test)+len(Acc1L05_test),1])
Y2_test= np.ones([len(Acc1L02_test),1])
Y3_test= np.ones([len(Acc1L03_test),1]) +1
Y4_test= np.ones([len(Acc1L04_test),1]) +2
xAcc1L_train = np.concatenate((Acc1L01_train,Acc1L05_train,Acc1L02_train,Acc1L03_train,Acc1L04_train),axis=0)
xAcc1L_train=MinMaxScaler([0,255]).fit_transform(xAcc1L_train)
#xAcc1L_train=StandardScaler().fit_transform(xAcc1L_train)
#xAcc1L_train=Normalizer().fit_transform(xAcc1L_train)
#xAcc1L_train=np.transpose(xAcc1L_train)
yAcc1L_train = np.concatenate((Y1_train,Y2_train,Y3_train,Y4_train),axis=0)
xAcc1L_test = np.concatenate((Acc1L01_test,Acc1L05_test,Acc1L02_test,Acc1L03_test,Acc1L04_test),axis=0)
xAcc1L_test=Normalizer().fit_transform(xAcc1L_test)
#xAcc1L_test=MinMaxScaler([0,255]).fit_transform(xAcc1L_test)
#xAcc1L_test=StandardScaler().fit_transform(xAcc1L_test)
#xAcc1L_test=np.transpose(xAcc1L_test)
yAcc1L_test = np.concatenate((Y1_test,Y2_test,Y3_test,Y4_test),axis=0)
#1 hot encode y
one_hot_labelsAcc1L =to_categorical(yAcc1L_train, num_classes=4)
one_hot_labelsAcc1L_test = to_categorical(yAcc1L_test, num_classes=4)
#fit the model
model = Sequential()
model.add(Dense(300, activation='relu', input_dim=30))
model.add(Dense(4, activation='softmax'))
model.compile(optimizer='rmsprop',
loss='categorical_crossentropy',
metrics=['accuracy'])
model.summary()
es1 = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=100)
es2 = EarlyStopping(monitor='val_accuracy', mode='max', verbose=1, patience=100)
history=model.fit(xAcc1L_train, one_hot_labelsAcc1L,validation_data=(xAcc1L_test,one_hot_labelsAcc1L_test),epochs=500, batch_size=30, verbose=1, callbacks=[es1,es2])
#history=model.fit(tf.cast(xAcc1L_train, tf.float32), one_hot_labelsAcc1L,validation_data=(tf.cast(xAcc1L_test, tf.float32),one_hot_labelsAcc1L_test),epochs=500, batch_size=30, verbose=1, callbacks=[es1,es2])
time_elapsed = (time.perf_counter() - time_start)
print ("%5.1f secs " % (time_elapsed))
start=time.monotonic()
_, accuracy = model.evaluate(xAcc1L_test, one_hot_labelsAcc1L_test, batch_size=30, verbose=1)
#_, accuracy = model.evaluate(tf.cast(xAcc1L_test, tf.float32), one_hot_labelsAcc1L_test, batch_size=30, verbose=1)
print(accuracy)
inference_time = time.monotonic() - start
print('%.1fms ' % (inference_time * 1000))
# summarize history for accuracy
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper right')
plt.show()
#predicted labels
predictions = model.predict(xAcc1L_test)
y_pred = (predictions > 0.5)
matrix = confusion_matrix(one_hot_labelsAcc1L_test.argmax(axis=1), y_pred.argmax(axis=1))
print('confusion matrix = \n',matrix)
print("Accuracy:",accuracy_score(one_hot_labelsAcc1L_test.argmax(axis=1), y_pred.argmax(axis=1)))
mod01=model.save('/home/utente/Scrivania/csvtesi/rete_Nom01.h5')
#convert the model
#representative dataset
train_ds = tf.data.Dataset.from_tensor_slices(
(tf.cast(xAcc1L_train, tf.float32))).batch(1)
print(train_ds)
def representative_dataset_gen():
for input_value in train_ds:
yield [input_value]
print(model.layers[0].input_shape)
#integer post-training quantization
converter = tf.compat.v1.lite.TFLiteConverter.from_keras_model_file('/home/utente/Scrivania/csvtesi/rete_Nom01.h5') #all operations mapped on edge tpu
#converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_dataset_gen
print(converter.representative_dataset)
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.uint8
converter.inference_output_type = tf.uint8
tflite_quant_model = converter.convert()
open('/home/utente/Scrivania/csvtesi/rete_Nom01_quant.tflite', "wb").write(tflite_quant_model)
#compiler compila il modello quantizzato tflite per edge tpu
os.system("edgetpu_compiler \'/home/utente/Scrivania/csvtesi/rete_Nom01_quant.tflite'")
#interpret the model
interpreter = tf.lite.Interpreter('/home/utente/Scrivania/csvtesi/rete_Nom01_quant_edgetpu.tflite',experimental_delegates=[tflite.load_delegate('libedgetpu.so.1')])
interpreter.allocate_tensors()
idt=print(interpreter.get_input_details())
odt=print(interpreter.get_output_details())
for j in range(5):
start = time.monotonic()
o_test=np.arange(len(xAcc1L_test[:,0]))
o_test=o_test[:,np.newaxis]
for i in range (len(xAcc1L_test[:,0])):
input=set_input(interpreter, xAcc1L_test[i,:])
#print("inference input %s" % input)
interpreter.invoke()
classes = get_output(interpreter,4)
output = interpreter.get_tensor(interpreter.get_output_details()[0]['index'])#/255 con edgetpu
#print("inference output %s" % output)
#print("inference classes %s" % classes)
a=np.array([one_hot_labelsAcc1L_test[i,:].argmax(axis=0)])
b=np.array(output.argmax(axis=1))
o_test[i]=b
#if a==b:
#print('good classification')
#else:
#print('bad classification')
inference_time = time.monotonic() - start
print('%.1fms ' % (inference_time * 1000))
#print(o_test)
print("Accuracy:",accuracy_score(yAcc1L_test,o_test))
My input train dataset is part of csv files and it's a matrix of dimensions = (1756,30) and my input test one is a matrix of (183,30).
This is how the data looks like (first two rows):
[[-0.283589 -0.0831421 -0.199936 -0.144523 -0.215593 -0.199029 0.0300179 -0.0299262 -0.0759612 -0.0349733 0.102882 -0.00470235 -0.14267 -0.116636 -0.0842867 -0.124638 -0.107917 -0.0995006 -0.222817 -0.256093 -0.121859 -0.130829 -0.186091 -0.174511 -0.0715493 -0.0595195 -0.054914 -0.0362971 -0.0286576 -0.0409128],
[-0.226151 -0.0386177 -0.16834 -0.0768908 -0.166611 -0.161028 0.0493133 -0.00515959 -0.0362308 -0.00723895 0.105943 -0.010825 -0.142335 -0.10863 -0.0634201 -0.112928 -0.0927994 -0.0556194 -0.180721 -0.218341 -0.0934449 -0.100047 -0.134569 -0.119806 -0.0265749 -0.044841 -0.0538225 -0.017408 -0.00528171 -0.0248457]]
So I've just started experimenting a bit with tensorflow but I feel like I have a hard time grasping the concept, I'm currently focusing on the MNIST-dataset, but only 8000 of them as training and 2000 for testing. The little code snippet I have currently is:
from keras.layers import Input, Dense, initializers
from keras.models import Model
from Dataset import Dataset
import matplotlib.pyplot as plt
from keras import optimizers, losses
import tensorflow as tf
import keras.backend as K
#global variables
d = Dataset()
num_features = d.X_train.shape[1]
low_dim = 32
def autoencoder():
w = initializers.RandomNormal(mean=0.0, stddev=0.05, seed=None)
input = Input(shape=(num_features,))
encoded = Dense(low_dim, activation='relu', kernel_initializer = w)(input)
decoded = Dense(num_features, activation='sigmoid', kernel_initializer = w)(encoded)
autoencoder = Model(input, decoded)
adam = optimizers.Adagrad(lr=0.01, epsilon=None, decay=0.0)
autoencoder.compile(optimizer=adam, loss='binary_crossentropy')
autoencoder.fit(d.X_train, d.X_train,
epochs=50,
batch_size=64,
shuffle=True,
)
encoded_imgs = autoencoder.predict(d.X_test)
decoded_imgs = autoencoder.predict(encoded_imgs)
#sess = tf.InteractiveSession()
#error = losses.mean_absolute_error(decoded_imgs[0], d.X_train[0])
#print(error.eval())
#print(decoded_imgs.shape)
#sess.close()
n = 20 # how many digits we will display
plt.figure(figsize=(20, 4))
for i in range(n):
# display original
#sess = tf.InteractiveSession()
error = losses.mean_absolute_error(decoded_imgs[n], d.X_test[n])
#print(error.eval())
#print(decoded_imgs.shape)
#sess.close()
ax = plt.subplot(2, n, i + 1)
plt.imshow(d.X_test[i].reshape(28, 28))
plt.gray()
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
# display reconstruction
ax = plt.subplot(2, n, i + 1 + n)
plt.imshow(decoded_imgs[i].reshape(28, 28))
plt.gray()
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
#print(error)
plt.show()
return error
What I want to do is to store the error as a list which I later can print or plot in a graph, but how do you do this efficiently with tensorflow/keras? Thanks in advance
You can store the errors inside a csv file by using the callback CSVLogger. This is a code snippet for this task.
from keras.callbacks import CSVLogger
# define callbacks
callbacks = [CSVLogger(path_csv_logger, separator=';', append=True)]
# pass callback to model.fit() oder model.fit_generator()
model.fit_generator(
train_batch, train_steps, epochs=10, callbacks=callbacks,
validation_data=validation_batch, validation_steps=val_steps)
EDIT: For storing the errors in list you can use something like this
# source https://keras.io/callbacks/
class LossHistory(keras.callbacks.Callback):
def on_train_begin(self, logs={}):
self.losses = []
def on_batch_end(self, batch, logs={}):
self.losses.append(logs.get('loss'))
I am using PyCharm Community Edition and Python 3.7. Via Anaconda, I have installed the Tensorflow machine learning package.
I am following the Google Tensorflow Regression tutorial here, but I am getting limited output. Only the numerical results and the plt are are being displayed on the output console and a separate window respectively.
The data table mentioned in the attached link, coded as:
# Prints the stored data table in a formatted fashion (ERROR)
df = pd.DataFrame(train_data, columns = column_names)
df.head()
doesn't seem to generate its own window, as suggested by the link. How do I display this table in a new window, just like the plt is displayed?
Here is my main.py:
# Importing the different libraries and packages
# Machine Learning is mainly executed by the TENSORFLOW LIBRARY
from __future__ import absolute_import, division, print_function
import tensorflow as tf
from tensorflow import keras
import numpy as np
import pandas as pd
TFVersion = tf.__version__
newline = "\n"
print(newline)
print("Current version of TensorFlow: ", TFVersion)
# Downloading the Boston Housing Data Set - it is already present in the keras
# NOTE: This will be referred to as the "BHD"
boston_housing = keras.datasets.boston_housing
# Initializing the training + testing data and labels as per the information suggested in the BHD
(train_data, train_labels), (test_data, test_labels) = boston_housing.load_data()
# Shuffle the training set in order to assure randomness - this condition is required for any statistical analysis
order = np.argsort(np.random.random(train_labels.shape))
train_data = train_data[order]
train_labels = train_labels[order]
# Printing the training and testing data sets (the .shape member function gets the examples and feature frequency
# from the train_data vector instance)
print("Training set: {}".format(train_data.shape)) # 404 examples, 13 features
print("Testing set: {}".format(test_data.shape)) # 102 examples, 13 features
# Initializing the variables/attributes for the data-set
column_names = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD',
'TAX', 'PTRATIO', 'B', 'LSTAT']
# Prints the stored data table in a formatted fashion (ERROR)
df = pd.DataFrame(train_data, columns = column_names)
df.head()
# Display first 10 entries
print(train_labels[0:10])
# TEST DATA is *not* used when calculating the mean and std
# Normalize data - these are basically z-scores
mean = train_data.mean(axis = 0)
std = train_data.std(axis = 0)
train_data = (train_data - mean)/std
test_data = (test_data - mean)/std
print(train_data[0]) # First training sample, normalized
def build_model():
model = keras.Sequential([
keras.layers.Dense(64, activation = tf.nn.relu,
input_shape = (train_data.shape[1],)),
keras.layers.Dense(64, activation = tf.nn.relu),
keras.layers.Dense(1)
])
optimizer = tf.train.RMSPropOptimizer(0.001)
model.compile(loss='mse',
optimizer=optimizer,
metrics=['mae'])
return model
model = build_model()
model.summary()
# Display training progress by printing a single dot for each completed epoch
class PrintDot(keras.callbacks.Callback):
def on_epoch_end(self, epoch, logs):
if epoch % 100 == 0: print('')
print('.', end='')
EPOCHS = 500
# Store training stats
history = model.fit(train_data, train_labels, epochs=EPOCHS,
validation_split=0.2, verbose=0,
callbacks=[PrintDot()])
import matplotlib.pyplot as plt
plt.interactive(False)
def plot_history(history):
plt.figure()
plt.xlabel('Epoch')
plt.ylabel('Mean Abs Error [1000$]')
plt.plot(history.epoch, np.array(history.history['mean_absolute_error']),
label='Train Loss')
plt.plot(history.epoch, np.array(history.history['val_mean_absolute_error']),
label='Val loss')
plt.legend()
plt.ylim([0, 5])
plot_history(history)
[loss, mae] = model.evaluate(test_data, test_labels, verbose=0)
print("Testing set Mean Abs Error: ${:7.2f}".format(mae * 1000))
test_predictions = model.predict(test_data).flatten()
plt.scatter(test_labels, test_predictions)
plt.xlabel('True Values [1000$]')
plt.ylabel('Predictions [1000$]')
plt.axis('equal')
plt.xlim(plt.xlim())
plt.ylim(plt.ylim())
_ = plt.plot([-100, 100], [-100, 100])
error = test_predictions - test_labels
plt.hist(error, bins=50)
plt.xlabel("Prediction Error [1000$]")
_ = plt.ylabel("Count")
plt.show()
# https://www.youtube.com/watch?v=voLSnXi4hAI
I have been trying to replicate the same simple network structure in MATLAB and Keras. The problem is the accuracy I get is very different. MATLAB code gets accuracy near 0.84 and loss near 17 and Keras code gets accuracy near 0.63 and loss near 130, with Keras using double epochs to train and the same data. I think the difference is too big to be a matter of implementation, so I think I'm missing something.
The original code is from a MATLAB example in which I have made a little change to avoid normalization in the first layer.
Here is the MATLAB code:
% Load the digit training set as 4-D array data using
% |digitTrain4DArrayData|.
[trainImages,~,trainAngles] = digitTrain4DArrayData;
disp("Train Images:")
disp(trainImages(:,:,:,1))
% Display 20 random sample training digits using |imshow|.
numTrainImages = size(trainImages,4);
figure
idx = randperm(numTrainImages,20);
for i = 1:numel(idx)
subplot(4,5,i)
imshow(trainImages(:,:,:,idx(i)))
drawnow
end
%%
% Combine all the layers together in a |Layer| array.
layers = [ ...
imageInputLayer([28 28 1], 'Normalization', 'none')
convolution2dLayer(12,25)
reluLayer
fullyConnectedLayer(1)
regressionLayer];
%% Train Network'
options = trainingOptions('sgdm','InitialLearnRate',0.001, ...
'MaxEpochs',15)
net = trainNetwork(trainImages,trainAngles,layers,options)
net.Layers
%% Test Network
[testImages,~,testAngles] = digitTest4DArrayData;
predictedTestAngles = predict(net,testImages);
% *Evaluate Performance*
predictionError = testAngles - predictedTestAngles;
thr = 10;
numCorrect = sum(abs(predictionError) < thr);
numTestImages = size(testImages,4);
accuracy = numCorrect/numTestImages
%%
% Use the root-mean-square error (RMSE) to measure the differences between
% the predicted and actual angles of rotation.
squares = predictionError.^2;
rmse = sqrt(mean(squares))
%%
% *Display Box Plot of Residuals for Each Digit Class*
residuals = testAngles - predictedTestAngles;
residualMatrix = reshape(residuals,500,10);
figure
boxplot(residualMatrix, ...
'Labels',{'0','1','2','3','4','5','6','7','8','9'})
xlabel('Digit Class')
ylabel('Degrees Error')
title('Residuals')
idx = randperm(numTestImages,49);
for i = 1:numel(idx)
image = testImages(:,:,:,idx(i));
predictedAngle = predictedTestAngles(idx(i));
imagesRotated(:,:,:,i) = imrotate(image,predictedAngle,'bicubic','crop');
end
figure
subplot(1,2,1)
montage(testImages(:,:,:,idx))
title('Original')
subplot(1,2,2)
montage(imagesRotated)
title('Corrected')
Here is the Keras code:
import numpy as np
import scipy.io
import matplotlib.pyplot as plt
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dropout, Flatten, Dense, Activation, Conv2D, BatchNormalization, AveragePooling2D
from keras.optimizers import SGD
from keras.utils import np_utils
from keras import regularizers
np.random.seed(1671) # for reproducibility
# network and training
NB_EPOCH = 30
BATCH_SIZE = 128
VERBOSE = 1
NB_CLASSES = 10 # number of outputs = number of digits
OPTIMIZER = SGD() # SGD optimizer, explained later in this chapter
N_HIDDEN = 128
VALIDATION_SPLIT=0.2 # how much TRAIN is reserved for VALIDATION
data = scipy.io.loadmat('RegressionImageData.mat')
XTrain = np.rollaxis(data['XTrain'],3,0)
XTest = np.rollaxis(data['XTest'],3,0)
YTest = np.squeeze(data['YTest'])
YTrain = np.squeeze(data['YTrain'])
print("Train Images:")
print(XTrain.shape)
print(type(XTrain))
print(XTrain)
XTrain_test = np.reshape(XTrain, (5000,28,28))
with open("./test.txt", "a+") as file:
np.set_printoptions(threshold=np.nan)
file.write(np.array2string(XTrain_test[0], max_line_width=np.inf))
model = Sequential()
model.add(Conv2D(25,(12,12), input_shape=(28,28,1), strides=(1,1), activation = "relu"))
model.add(Flatten())
model.add(Dense(1))
model.summary()
sgd = SGD(lr=0.001, decay=0.1, momentum=0.9, nesterov=False)
model.compile(loss='mean_squared_error', optimizer=sgd)
history = model.fit(XTrain, YTrain,
batch_size=BATCH_SIZE, epochs=NB_EPOCH,
verbose=VERBOSE, validation_split=VALIDATION_SPLIT,
shuffle=False)
predictions= model.predict(XTrain)
[np.transpose(predictions[1:50]), np.transpose(YTrain[1:50]), np.abs(np.transpose(predictions[1:50])- np.transpose(YTrain[1:50]))]
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('rmse')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
Ypred_test= model.predict(XTest)
Ypred_test=np.reshape(Ypred_test, (5000,))
predictionError = Ypred_test - YTest
thr = 10;
numCorrect = np.sum((np.abs(predictionError) < thr)*1)
numValidationImages = len(YTest)
accuracy = numCorrect/numValidationImages
print(accuracy)
squares = np.power(predictionError,2)
rmse = np.sqrt(np.mean(squares))
print(rmse)
Anyone know where could be the gap?
I have programmed a GAN model using keras but the training didn't go well. The generator model always returns a bare noise image (28x28 size) instead of something similar to mnist dataset. This doesn't give me any error though, when it comes to training discriminator model will become trainable=False, which is not what I want to do.
If this implementation is bad, please let me know. Can anyone help?
import os
import numpy as np
import matplotlib.pyplot as plt
import keras
from keras.models import Sequential
from keras.layers import Dense, Activation, BatchNormalization
from keras.optimizers import SGD, Adam, RMSprop
from keras.datasets import mnist
from keras.regularizers import l1_l2
def plot_generated(noise, Generator):
image_fake = Generator.predict(noise)
plt.figure(figsize=(10,8))
plt.show()
plt.close()
def plot_metircs(metrics, epoch=None):
plt.figure(figsize=(10,8))
plt.plot(metrics['d'], label='discriminative loss', color='b')
plt.legend()
plt.show()
plt.close()
plt.figure(figsize=(10,8))
plt.plot(metrics['g'], label='generative loss', color='r')
plt.legend()
plt.show()
plt.close()
def Generator():
model = Sequential()
LeakyReLU = keras.layers.advanced_activations.LeakyReLU(alpha=0.2)
model.add(Dense(input_dim=100, units=128, activation=LeakyReLU, name='g_input'))
model.add(Dense(input_dim=128, units=784, activation='tanh', name='g_output'))
return model
def Discriminator():
model = Sequential()
LeakyReLU = keras.layers.advanced_activations.LeakyReLU(alpha=0.2)
model.add(Dense(input_dim=784, units=128, activation=LeakyReLU, name='d_input'))
model.add(Dense(input_dim=128, units=1, activation='sigmoid', name='d_output'))
model.compile(loss='binary_crossentropy', optimizer='Adam')
return model
def Generative_Adversarial_Network(Generator, Discriminator):
model = Sequential()
model.add(Generator)
model.add(Discriminator)
# train only generator in the entire GAN architecture
Discriminator.trainable = False
model.compile(loss='binary_crossentropy', optimizer='Adam')
return model
def Training(z_input_size, Generator, Discriminator, GAN, loss_dict, X_train, epoch, batch, smooth):
for e in range(epoch):
# z: noise, used for input of G to generate fake image based on this noise! it's like a seed
noise = np.random.uniform(-1, 1, size=[batch, z_input_size])
image_fake = Generator.predict_on_batch(noise)
# sampled real_image from dataset
rand_train_index = np.random.randint(0, X_train.shape[0], size=batch)
image_real = X_train[rand_train_index, :]
# concatenate real and fake images
"""
X = [
image_real => label : 1 (we can multiply a smoothing factor)
image_fake => label : 0
]
"""
X = np.vstack((image_real, image_fake))
y = np.zeros(len(X))
# putting label "1" to image_real
y[len(image_real):] = 1*(1 - smooth)
y = y.astype(int)
# train only discriminator
d_loss = Discriminator.train_on_batch(x=X, y=y)
# NOTE: remember?? we set discriminator OFF during the training of GAN!
# So, we can safely train only generator, weight of discriminator set fixed!
g_loss = GAN.train_on_batch(x=noise, y=y[len(noise):])
loss_dict['d'].append(d_loss)
loss_dict['g'].append(g_loss)
if e%1000 == 0:
plt.imshow(image_fake)
plt.show()
plot_generated(noise, Generator)
plot_metircs(loss_dict)
return "done!"
Gen = Generator()
Dis = Discriminator()
GAN = Generative_Adversarial_Network(Gen, Dis)
GAN.summary()
Gen.summary()
Dis.summary()
gan_losses = {"d":[], "g":[], "f":[]}
epoch = 30000
batch = 1000
smooth = 0.9
z_input_size = 100
row, col = 28, 28
z_group_matrix = np.random.uniform(0, 1, examples*z_input_size)
z_group_matrix = z_group_matrix.reshape([9, z_input_size])
print(z_group_matrix.shape)
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train, X_test = X_train.reshape(X_train.shape[0], row*col), X_test.reshape(X_test.shape[0], row*col)
X_train.astype('float32')
X_test.astype('float32')
X_train, X_test = X_train/255, X_test/255
print('X_train shape: ', X_train.shape)
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')
Training(z_input_size, Gen, Dis, GAN, loss_dict=gan_losses, X_train=X_train, epoch=epoch, batch=batch, smooth=smooth)
The model itself is correct.
I would suggest a few minor changes:
smooth 0.9 is too much. Make it close to 0.1.
Leak Factor you have is 0.2, usually its a very small decimal close to 0; take around
0.01/0.02.
Batchsize around 400
Epochs around 2000
And finally early stopping with a bit large threshold.