I thought that pre-built models were supposed to be high of accuracy simply by changing the last layer to accommodate your CNN needs. I am unsure of why I receive such a low validated accuracy when fitting the model. Any layer suggestions or what I should do to get my accuracy to 80%? I am attempting to decide whether a plane is an Airbus or a Boeing aircraft. Could it be the extremely low resolution image sizes?
40% Fitted Example:
Epoch 10/10
20/149 [===>..........................] - ETA: 24s - loss: 0.6908 - accuracy: 0.5500
40/149 [=======>......................] - ETA: 21s - loss: 0.6916 - accuracy: 0.5250
60/149 [===========>..................] - ETA: 17s - loss: 0.6918 - accuracy: 0.5167
80/149 [===============>..............] - ETA: 13s - loss: 0.6917 - accuracy: 0.5125
100/149 [===================>..........] - ETA: 9s - loss: 0.6918 - accuracy: 0.5200
120/149 [=======================>......] - ETA: 5s - loss: 0.6924 - accuracy: 0.5167
140/149 [===========================>..] - ETA: 1s - loss: 0.6924 - accuracy: 0.5071
149/149 [==============================] - 33s 225ms/step - loss: 0.6925 - accuracy: 0.5034 - val_loss: 0.6965 - val_accuracy: 0.4706
Here is the full script:
Any idea of what is going wrong?
from keras.applications.resnet50 import ResNet50
from keras.applications import VGG19
from keras.preprocessing import image
from keras.applications.resnet50 import preprocess_input, decode_predictions
from sklearn.utils import shuffle
import numpy as np
import tensorflow as tf
from tensorflow import keras
from keras import models
from keras import layers
import matplotlib as plt
import cv2
import os
'''Resnet-50 classifier that attempts to predict whether a photo of an aircraft is the type of a Boeing or an airbus'''
boeing_dir = '#' # Paths of images/folders used to create training data
airbus_dir = '#'
conv_base = ResNet50(weights='imagenet', input_shape=(224, 224, 3))
model = models.Sequential()
model.add(conv_base)
for layer in model.layers:
layer.trainable = False
model.add(layers.Dropout(0.15))
model.add(layers.GaussianNoise(0.15))
model.add(layers.Dense(256, activation='relu'))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(16, activation='relu'))
model.add(layers.Dense(2, activation='softmax'))
model.layers[0].trainable = False
print('model constructed')
boeing_data = []
boeing_label = []
airbus_data = []
airbus_label = []
'''iterate through each file, resize, append to variable accordingly'''
for filename in os.listdir(boeing_dir):
if filename.endswith(".jpg") or filename.endswith(".png") or filename.endswith(".jpeg"):
path_b = os.path.join(boeing_dir, filename)
im = cv2.imread(path_b)
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
im = cv2.resize(im, (224, 224))
boeing_data.append(im)
boeing_label.append(0)
for filename in os.listdir(airbus_dir):
if filename.endswith(".jpg") or filename.endswith(".png") or filename.endswith(".jpeg"):
path_b = os.path.join(airbus_dir, filename)
im = cv2.imread(path_b)
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
im = cv2.resize(im, (224, 224))
airbus_data.append(im)
airbus_label.append(1)
training_data = boeing_data + airbus_data #Concadenate Boeing and Airbus data
training_label = boeing_label + airbus_label
training_data = np.array(training_data)
training_label = np.asarray(training_label) # Turn Data into numpy arrays
training_data, training_label = shuffle(training_data, training_label) # Shuffle
print(training_data.shape)
print(training_label)
model.compile(loss='sparse_categorical_crossentropy', optimizer='sgd', metrics=['accuracy'])
model.fit(training_data, training_label, epochs=10, batch_size=20, validation_split=0.1, verbose=1)
print('finished training')
Related
I'm trying to train a simple model for the Yelp binary classification task.
Load BERT encoder:
gs_folder_bert = "gs://cloud-tpu-checkpoints/bert/keras_bert/uncased_L-12_H-768_A-12"
bert_config_file = os.path.join(gs_folder_bert, "bert_config.json")
config_dict = json.loads(tf.io.gfile.GFile(bert_config_file).read())
bert_config = bert.configs.BertConfig.from_dict(config_dict)
_, bert_encoder = bert.bert_models.classifier_model(
bert_config, num_labels=2)
checkpoint = tf.train.Checkpoint(model=bert_encoder)
checkpoint.restore(
os.path.join(gs_folder_bert, 'bert_model.ckpt')).assert_consumed()
Load data:
data, info = tfds.load('yelp_polarity_reviews', with_info=True, batch_size=-1, as_supervised=True)
train_x_orig, train_y_orig = tfds.as_numpy(data['train'])
train_x = encode_examples(train_x_orig)
train_y = train_y_orig
Use BERT to embed the data:
encoder_output = bert_encoder.predict(train_x)
Setup the model:
inputs = keras.Input(shape=(768,))
x = keras.layers.Dense(64, activation='relu')(inputs)
x = keras.layers.Dense(8, activation='relu')(x)
outputs = keras.layers.Dense(1, activation='sigmoid')(x)
model = keras.Model(inputs=inputs, outputs=outputs)
sgd = SGD(lr=0.0001)
model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])
Train:
model.fit(encoder_output[0], train_y, batch_size=64, epochs=3)
# encoder_output[0].shape === (10000, 1, 768)
# y_train.shape === (100000,)
Training results:
Epoch 1/5
157/157 [==============================] - 1s 5ms/step - loss: 0.6921 - accuracy: 0.5455
Epoch 2/5
157/157 [==============================] - 1s 5ms/step - loss: 0.6918 - accuracy: 0.5455
Epoch 3/5
157/157 [==============================] - 1s 5ms/step - loss: 0.6915 - accuracy: 0.5412
Epoch 4/5
157/157 [==============================] - 1s 5ms/step - loss: 0.6913 - accuracy: 0.5407
Epoch 5/5
157/157 [==============================] - 1s 5ms/step - loss: 0.6911 - accuracy: 0.5358
I tried different learning rates, but the main issue seems that training takes 1 second and the accuracy stays at ~0.5. Am I not setting the inputs/model correctly?
Your BERT model is not training. It has to be placed before dense layers and train as part of the model. the input layer has to take not BERT vectors, but the sequence of tokens cropped to max_length and padded. Here is the example code: https://keras.io/examples/nlp/text_extraction_with_bert/, see the beginning of create_model function.
Alternatively, you can use Trainer from transformers.
I wanted to learn more about machine learning / deep learning so I have been attempting to solve the Kaggle Diabetic Retinopathy competition as a learning experience. However, my Keras model's accuracy and loss function do not seem to improve.
I downloaded the Diabetic Retinopathy dataset. Balanced the classes and created equally distributed batches of 100 images per batch. I have tried many combinations of parameterisation like more Epochs, different learning rates, complexer models, whatsoever. They all seem to have no effects. So here is my code.
My imports:
from tqdm import tqdm
import os
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from keras.utils import to_categorical
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dense, Conv2D, MaxPooling2D, Dropout, Flatten
from keras import optimizers
from keras.callbacks import ModelCheckpoint
from keras import backend as K
K.tensorflow_backend._get_available_gpus()
My parameters:
HEIGHT = 512
WIDTH = 512
DEPTH = 3
inputShape = (HEIGHT, WIDTH, DEPTH)
NUM_CLASSES = 5
EPOCHS = 15
INIT_LR = 0.001
BS = 1
I check the batches in a given directory:
''' read batches '''
train_dir = '/DATA/npy_data/train_dir/'
batch_path_list = []
for batch in tqdm(os.listdir(train_dir)):
batch_full_path = os.path.join(os.path.sep, train_dir, batch)
batch_path_list.append(str(batch_full_path))
AMOUNT_OF_BATCHES = len(batch_path_list)
if AMOUNT_OF_BATCHES == 0:
print('We found no batches. Either no data or wrong directory...')
if AMOUNT_OF_BATCHES != 0:
print('We found ' + str(AMOUNT_OF_BATCHES) + ' batches.')
I read the CSV file to obtain the labels
''' read csv labels '''
csv_dir = '/DATA/data/trainLabels_normalised.csv'
dataframe = pd.read_csv(csv_dir, sep=',')
patientIDList = []
for index, row in dataframe.iterrows():
patientID = row[0] + ''
patientID = patientID.replace('_right', '')
patientID = patientID.replace('_left', '')
dataframe.at[index, 'PatientID'] = patientID
patientIDList.append(patientID)
I create and compile my model
model = Sequential(name='test')
model.add(Conv2D(32, (3, 3), padding='same', activation='relu', input_shape=inputShape))
model.add(Conv2D(32, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(64, (3, 3), padding='same', activation='relu'))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(64, (3, 3), padding='same', activation='relu'))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.25))
model.add(Dense(activation='softmax', units=5))
opt = optimizers.SGD(decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=["accuracy", "mse"])
checkpointer = ModelCheckpoint(filepath="/home/user/Desktop/code/model/best_weights.hdf5",
verbose=1,
save_best_only=True)
I load a batch and join the labels with the 100 images from the batch.
''' load batches '''
for item in batch_path_list:
batch_data = np.load(item).tolist()
df1 = dataframe[dataframe['image'].isin(batch_data)]
imageNameArr = []
dataArr = []
for index, row in df1.iterrows():
key = str(row[0])
if key in batch_data:
imageNameArr.append(key)
dataArr.append(batch_data[key])
df2 = pd.DataFrame({'image': imageNameArr, 'data': dataArr})
for idx in range(0, len(df1)):
if (df1.loc[df1.index[idx], 'image'] != df2.loc[df2.index[idx], 'image']):
print("Error " + df1.loc[df1.index[idx], 'image'] + "==" + df2.loc[df2.index[idx], 'image'])
merged_files = pd.merge(df2, df1, left_on='image', right_on='image', how='outer')
I generate splits
train_ids, valid_ids = train_test_split(patientIDList, test_size=0.25, random_state=10)
traindf = merged_files[merged_files.PatientID.isin(train_ids)] #data (data) image (img name) level (fase)
valSet = merged_files[merged_files.PatientID.isin(valid_ids)]
trainX = traindf['data']
trainY = traindf['level']
valX = valSet['data']
valY = valSet['level']
trainY = to_categorical(trainY, num_classes=NUM_CLASSES)
valY = to_categorical(valY, num_classes=NUM_CLASSES)
Xtrain = np.zeros([trainX.shape[0], HEIGHT, WIDTH, DEPTH])
Xval = np.zeros([valX.shape[0], HEIGHT, WIDTH, DEPTH])
I use a generator and call the fit function.
aug = ImageDataGenerator(rotation_range=30, width_shift_range=0.1,
height_shift_range=0.1, shear_range=0.2, zoom_range=0.2,
horizontal_flip=True, fill_mode="nearest")
model.fit_generator(aug.flow(Xtrain, trainY,
batch_size=BS),
validation_data=(Xval, valY),
steps_per_epoch=(len(trainX) // BS),
epochs=EPOCHS,
verbose=1,
callbacks=[checkpointer])
However, this results in a very low accuracy and does not seem to improve over 29 batches.
Result:
54/87 [=================>............] - ETA: 0s - loss: 1.6092 - acc: 0.2037 - mean_squared_error: 0.1599
56/87 [==================>...........] - ETA: 0s - loss: 1.6089 - acc: 0.2143 - mean_squared_error: 0.1598
58/87 [===================>..........] - ETA: 0s - loss: 1.6169 - acc: 0.2069 - mean_squared_error: 0.1605
60/87 [===================>..........] - ETA: 0s - loss: 1.6146 - acc: 0.2167 - mean_squared_error: 0.1602
62/87 [====================>.........] - ETA: 0s - loss: 1.6172 - acc: 0.2097 - mean_squared_error: 0.1605
64/87 [=====================>........] - ETA: 0s - loss: 1.6196 - acc: 0.2031 - mean_squared_error: 0.1607
66/87 [=====================>........] - ETA: 0s - loss: 1.6180 - acc: 0.2121 - mean_squared_error: 0.1605
68/87 [======================>.......] - ETA: 0s - loss: 1.6164 - acc: 0.2206 - mean_squared_error: 0.1604
70/87 [=======================>......] - ETA: 0s - loss: 1.6144 - acc: 0.2286 - mean_squared_error: 0.1602
72/87 [=======================>......] - ETA: 0s - loss: 1.6163 - acc: 0.2222 - mean_squared_error: 0.1604
74/87 [========================>.....] - ETA: 0s - loss: 1.6134 - acc: 0.2297 - mean_squared_error: 0.1601
76/87 [=========================>....] - ETA: 0s - loss: 1.6102 - acc: 0.2368 - mean_squared_error: 0.1598
78/87 [=========================>....] - ETA: 0s - loss: 1.6119 - acc: 0.2308 - mean_squared_error: 0.1600
80/87 [==========================>...] - ETA: 0s - loss: 1.6159 - acc: 0.2250 - mean_squared_error: 0.1604
82/87 [===========================>..] - ETA: 0s - loss: 1.6150 - acc: 0.2195 - mean_squared_error: 0.1603
84/87 [===========================>..] - ETA: 0s - loss: 1.6206 - acc: 0.2143 - mean_squared_error: 0.1608
86/87 [============================>.] - ETA: 0s - loss: 1.6230 - acc: 0.2093 - mean_squared_error: 0.1610
87/87 [==============================] - 3s 31ms/step - loss: 1.6234 - acc: 0.2069 - mean_squared_error: 0.1610 - val_loss: 1.6435 - val_acc: 0.1282 - val_mean_squared_error: 0.1629
Epoch 00015: val_loss did not improve from 1.57533
Suggestions and feedback to improve my model are highly appreciated!
I am learning how to train a keras neural network on the MNIST dataset. However, when I run this code, I get only 10% accuracy after 10 epochs of training. This means that the neural network is predicting only one class, since there are 10 classes. I am sure it is a bug in data preparation rather than a problem with the network architecture, because I got the architecture off of a tutorial (medium tutorial). Any idea why the model is not training?
My code:
from skimage import io
import numpy as np
from numpy import array
from PIL import Image
import csv
import random
from keras.preprocessing.image import ImageDataGenerator
import pandas as pd
from keras.utils import multi_gpu_model
import tensorflow as tf
train_datagen = ImageDataGenerator()
train_generator = train_datagen.flow_from_directory(
directory="./trainingSet",
class_mode="categorical",
target_size=(50, 50),
color_mode="rgb",
batch_size=1,
shuffle=True,
seed=42
)
print(str(train_generator.class_indices) + " class indices")
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.convolutional import Conv2D
from keras.layers.pooling import MaxPooling2D, GlobalAveragePooling2D
from keras.optimizers import SGD
from keras import backend as K
from keras.layers import Input
from keras.models import Model
import keras
from keras.layers.normalization import BatchNormalization
K.clear_session()
K.set_image_dim_ordering('tf')
reg = keras.regularizers.l1_l2(1e-5, 0.0)
def conv_layer(channels, kernel_size, input):
output = Conv2D(channels, kernel_size, padding='same',kernel_regularizer=reg)(input)
output = BatchNormalization()(output)
output = Activation('relu')(output)
output = Dropout(0)(output)
return output
model = Sequential()
model.add(Conv2D(28, kernel_size=(3,3), input_shape=(50, 50, 3)))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten()) # Flattening the 2D arrays for fully connected layers
model.add(Dense(128, activation=tf.nn.relu))
model.add(Dropout(0.2))
model.add(Dense(10, activation=tf.nn.softmax))
from keras.optimizers import Adam
import tensorflow as tf
model.compile(loss='categorical_crossentropy',
optimizer='adam',
metrics=['accuracy'])
from keras.callbacks import ModelCheckpoint
epochs = 10
checkpoint = ModelCheckpoint('mnist.h5', save_best_only=True)
STEP_SIZE_TRAIN=train_generator.n/train_generator.batch_size
model.fit_generator(generator=train_generator,
steps_per_epoch=STEP_SIZE_TRAIN,
epochs=epochs,
callbacks=[checkpoint]
)
The output I am getting is as follows:
Using TensorFlow backend.
Found 42000 images belonging to 10 classes.
{'0': 0, '1': 1, '2': 2, '3': 3, '4': 4, '5': 5, '6': 6, '7': 7, '8': 8, '9': 9} class indices
Epoch 1/10
42000/42000 [==============================] - 174s 4ms/step - loss: 14.4503 - acc: 0.1035
/home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/keras/callbacks.py:434: RuntimeWarning: Can save best model only with val_loss available, skipping.
'skipping.' % (self.monitor), RuntimeWarning)
Epoch 2/10
42000/42000 [==============================] - 169s 4ms/step - loss: 14.4487 - acc: 0.1036
Epoch 3/10
42000/42000 [==============================] - 169s 4ms/step - loss: 14.4483 - acc: 0.1036
Epoch 4/10
42000/42000 [==============================] - 168s 4ms/step - loss: 14.4483 - acc: 0.1036
Epoch 5/10
42000/42000 [==============================] - 169s 4ms/step - loss: 14.4483 - acc: 0.1036
Epoch 6/10
42000/42000 [==============================] - 168s 4ms/step - loss: 14.4483 - acc: 0.1036
Epoch 7/10
42000/42000 [==============================] - 168s 4ms/step - loss: 14.4483 - acc: 0.1036
Epoch 8/10
42000/42000 [==============================] - 168s 4ms/step - loss: 14.4483 - acc: 0.1036
Epoch 9/10
42000/42000 [==============================] - 168s 4ms/step - loss: 14.4480 - acc: 0.1036
Epoch 10/10
5444/42000 [==>...........................] - ETA: 2:26 - loss: 14.3979 - acc: 0.1067
The trainingSet directory contains a folder for each 1-9 digit with the images inside the folders. I am training on an AWS EC2 p3.2xlarge instance with the Amazon Deep Learning Linux AMI.
Here is the list of some weird points that I see :
Not rescaling your images -> ImageDataGenerator(rescale=1/255)
Batch Size of 1 (You may want to increase that)
MNIST is grayscale pictures , therefore color_mode should be "grayscale".
(Also you have several unused part in your code, that you may want to delete from the question)
Adding two more point in answer of #abcdaire,
mnist has image size of (28,28), you have assigned it wrong.
Binarization is another method, which can be used. It also make network to learn fast. It can be done like this.
`
imges_dataset = imges_dataset/255.0
imges_dataset = np.where(imges_dataset>0.5,1,0)
My model trains fine on a CPU machine but I am running into an issue when trying to rerun it on our cluster (using a single GPU and the same dataset). When training on a GPU machine validation loss and accuracy are not improving from epoch to epoch (see below).This was not the case on a CPU machine (I was able to achieve validation accuracy ~0.8 after 20 epochs)
Details:
Keras 2.1.3
TensforFlow backend
70/20/10 train/dev/test
~ 7000 images
model is based on ResNet50
Code
import sys
import math
import os
import glob
from keras import applications
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers
from keras.models import Sequential, Model
from keras.layers import Flatten, Dense
from keras import backend as k
from keras.callbacks import ModelCheckpoint, CSVLogger, EarlyStopping
############ Training parameters ##################
img_width, img_height = 224, 224
batch_size = 32
epochs = 100
############ Define the data ##################
train_data_dir = '/mnt/data/train'
validation_data_dir = '/mnt/data/validate'
train_data_dir_class1 = os.path.join(train_data_dir,'class1', '*.jpg')
train_data_dir_class2 = os.path.join(train_data_dir, 'class2', '*.jpg')
validation_data_dir_class1 = os.path.join(validation_data_dir, 'class1', '*.jpg')
validation_data_dir_class2 = os.path.join(validation_data_dir, 'class2', '*.jpg')
# number of training and validation samples
nb_train_samples = len(glob.glob(train_data_dir_class1)) + len(glob.glob(train_data_dir_class2))
nb_validation_samples = len(glob.glob(validation_data_dir_class1)) + len(glob.glob(validation_data_dir_class2))
############ Define the model ##################
model = applications.resnet50.ResNet50(weights = "imagenet",
include_top = False,
input_shape = (img_width, img_height, 3))
for layer in model.layers:
layer.trainable = False
# Adding a FC layer
x = model.output
x = Flatten()(x)
predictions = Dense(1, activation = "sigmoid")(x)
# creating the final model
model_final = Model(inputs = model.input, outputs = predictions)
# compile the model
model_final.compile(loss = "binary_crossentropy",
optimizer = optimizers.Adam(lr = 0.001,
beta_1 = 0.9,
beta_2 = 0.999,
epsilon = 1e-10),
metrics = ["accuracy"])
# train and test generators
train_datagen = ImageDataGenerator(rescale = 1./255,
horizontal_flip = True,
fill_mode = "nearest",
zoom_range = 0.3,
width_shift_range = 0.3,
height_shift_range = 0.3,
rotation_range = 30)
test_datagen = ImageDataGenerator(rescale = 1./255)
train_generator = train_datagen.flow_from_directory(train_data_dir,
target_size = (img_height, img_width),
batch_size = batch_size,
class_mode = "binary",
seed = 2018)
validation_generator = test_datagen.flow_from_directory(validation_data_dir,
target_size = (img_height, img_width),
class_mode = "binary",
seed = 2018)
early = EarlyStopping(monitor = 'val_loss', min_delta = 10e-5, patience = 10, verbose = 1, mode = 'auto')
performance_log = CSVLogger('/mnt/results/vanilla_model_log.csv', separator = ',', append = False)
# Train the model
model_final.fit_generator(generator = train_generator,
steps_per_epoch = math.ceil(train_generator.samples / batch_size),
epochs = epochs,
validation_data = validation_generator,
validation_steps = math.ceil(validation_generator.samples / batch_size),
callbacks = [early, performance_log])
# Save the model
model_final.save('/mnt/results/vanilla_model.h5')
Training Log
Epoch 1/100
151/151 [==============================] - 237s 2s/step - loss: 0.7234 - acc: 0.5240 - val_loss: 0.9899 - val_acc: 0.5425
Epoch 2/100
151/151 [==============================] - 65s 428ms/step - loss: 0.6491 - acc: 0.6228 - val_loss: 1.0248 - val_acc: 0.5425
Epoch 3/100
151/151 [==============================] - 65s 429ms/step - loss: 0.6091 - acc: 0.6648 - val_loss: 1.0377 - val_acc: 0.5425
Epoch 4/100
151/151 [==============================] - 64s 426ms/step - loss: 0.5829 - acc: 0.6968 - val_loss: 1.0459 - val_acc: 0.5425
Epoch 5/100
151/151 [==============================] - 64s 427ms/step - loss: 0.5722 - acc: 0.7070 - val_loss: 1.0472 - val_acc: 0.5425
Epoch 6/100
151/151 [==============================] - 64s 427ms/step - loss: 0.5582 - acc: 0.7166 - val_loss: 1.0501 - val_acc: 0.5425
Epoch 7/100
151/151 [==============================] - 64s 424ms/step - loss: 0.5535 - acc: 0.7188 - val_loss: 1.0492 - val_acc: 0.5425
Epoch 8/100
151/151 [==============================] - 64s 426ms/step - loss: 0.5377 - acc: 0.7287 - val_loss: 1.0209 - val_acc: 0.5425
Epoch 9/100
151/151 [==============================] - 64s 425ms/step - loss: 0.5328 - acc: 0.7368 - val_loss: 1.0062 - val_acc: 0.5425
Epoch 10/100
151/151 [==============================] - 65s 432ms/step - loss: 0.5296 - acc: 0.7381 - val_loss: 1.0016 - val_acc: 0.5425
Epoch 11/100
151/151 [==============================] - 65s 430ms/step - loss: 0.5231 - acc: 0.7419 - val_loss: 1.0021 - val_acc: 0.5425
Since I was able to get good results on a CPU machine, I hypothesized that validation loss/accuracy must be calculated incorrectly at the end of each epoch. To test this theory I used train set as validation set: if validation loss/accuracy is calculated correctly we should see roughly the same values for train and validation loss and accuracy. As you may see below, validation loss values are not the same as training loss values, which makes me believe validation loss is calculated incorrectly at the end of each epoch. Why does it happen? What are the possible solutions?
Modified Code
import sys
import math
import os
import glob
from keras import applications
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers
from keras.models import Sequential, Model
from keras.layers import Flatten, Dense
from keras import backend as k
from keras.callbacks import ModelCheckpoint, CSVLogger, EarlyStopping
############ Training parameters ##################
img_width, img_height = 224, 224
batch_size = 32
epochs = 100
############ Define the data ##################
train_data_dir = '/mnt/data/train'
validation_data_dir = '/mnt/data/train' # redefined validation set to test accuracy of validation loss/accuracy calculations
train_data_dir_class1 = os.path.join(train_data_dir,'class1', '*.jpg')
train_data_dir_class2 = os.path.join(train_data_dir, 'class2', '*.jpg')
validation_data_dir_class1 = os.path.join(validation_data_dir, 'class1', '*.jpg')
validation_data_dir_class2 = os.path.join(validation_data_dir, 'class2', '*.jpg')
# number of training and validation samples
nb_train_samples = len(glob.glob(train_data_dir_class1)) + len(glob.glob(train_data_dir_class2))
nb_validation_samples = len(glob.glob(validation_data_dir_class1)) + len(glob.glob(validation_data_dir_class2))
############ Define the model ##################
model = applications.resnet50.ResNet50(weights = "imagenet",
include_top = False,
input_shape = (img_width, img_height, 3))
for layer in model.layers:
layer.trainable = False
# Adding a FC layer
x = model.output
x = Flatten()(x)
predictions = Dense(1, activation = "sigmoid")(x)
# creating the final model
model_final = Model(inputs = model.input, outputs = predictions)
# compile the model
model_final.compile(loss = "binary_crossentropy",
optimizer = optimizers.Adam(lr = 0.001,
beta_1 = 0.9,
beta_2 = 0.999,
epsilon = 1e-10),
metrics = ["accuracy"])
# train and test generators
train_datagen = ImageDataGenerator(rescale = 1./255,
horizontal_flip = True,
fill_mode = "nearest",
zoom_range = 0.3,
width_shift_range = 0.3,
height_shift_range = 0.3,
rotation_range = 30)
test_datagen = ImageDataGenerator(rescale = 1./255)
train_generator = train_datagen.flow_from_directory(train_data_dir,
target_size = (img_height, img_width),
batch_size = batch_size,
class_mode = "binary",
seed = 2018)
validation_generator = test_datagen.flow_from_directory(validation_data_dir,
target_size = (img_height, img_width),
class_mode = "binary",
seed = 2018)
early = EarlyStopping(monitor = 'val_loss', min_delta = 10e-5, patience = 10, verbose = 1, mode = 'auto')
performance_log = CSVLogger('/mnt/results/vanilla_model_log.csv', separator = ',', append = False)
# Train the model
model_final.fit_generator(generator = train_generator,
steps_per_epoch = math.ceil(train_generator.samples / batch_size),
epochs = epochs,
validation_data = validation_generator,
validation_steps = math.ceil(validation_generator.samples / batch_size),
callbacks = [early, performance_log])
# Save the model
model_final.save('/mnt/results/vanilla_model.h5')
Training log for the modified code:
Epoch 1/100
151/151 [==============================] - 251s 2s/step - loss: 0.6804 - acc: 0.5910 - val_loss: 0.6923 - val_acc: 0.5469
Epoch 2/100
151/151 [==============================] - 87s 578ms/step - loss: 0.6258 - acc: 0.6523 - val_loss: 0.6938 - val_acc: 0.5469
Epoch 3/100
151/151 [==============================] - 88s 580ms/step - loss: 0.5946 - acc: 0.6874 - val_loss: 0.7001 - val_acc: 0.5469
Epoch 4/100
151/151 [==============================] - 88s 580ms/step - loss: 0.5718 - acc: 0.7086 - val_loss: 0.7036 - val_acc: 0.5469
Epoch 5/100
151/151 [==============================] - 87s 578ms/step - loss: 0.5634 - acc: 0.7157 - val_loss: 0.7067 - val_acc: 0.5469
Epoch 6/100
151/151 [==============================] - 87s 578ms/step - loss: 0.5467 - acc: 0.7243 - val_loss: 0.7099 - val_acc: 0.5469
Epoch 7/100
151/151 [==============================] - 87s 578ms/step - loss: 0.5392 - acc: 0.7317 - val_loss: 0.7096 - val_acc: 0.5469
Epoch 8/100
151/151 [==============================] - 87s 578ms/step - loss: 0.5287 - acc: 0.7387 - val_loss: 0.7083 - val_acc: 0.5469
Epoch 9/100
151/151 [==============================] - 87s 575ms/step - loss: 0.5306 - acc: 0.7385 - val_loss: 0.7088 - val_acc: 0.5469
Epoch 10/100
151/151 [==============================] - 87s 577ms/step - loss: 0.5303 - acc: 0.7318 - val_loss: 0.7111 - val_acc: 0.5469
Epoch 11/100
151/151 [==============================] - 87s 578ms/step - loss: 0.5157 - acc: 0.7474 - val_loss: 0.7143 - val_acc: 0.5469
A very quick idea that might help.
I think image labels are randomly assigned by two image data generator and trained.
And two image data generator gives different label distribution.
That's why training accuracy goes up while validation set remains around 50%.
I haven't entirely checked documentation of data image generator. Hope this might helps.
Argument classes for flow_from_directory() describes a way of setting up training labels.
classes: optional list of class subdirectories (e.g. ['dogs',
'cats']). Default: None. If not provided, the list of classes will be
automatically inferred from the subdirectory names/structure under
directory, where each subdirectory will be treated as a different
class (and the order of the classes, which will map to the label
indices, will be alphanumeric). The dictionary containing the mapping
from class names to class indices can be obtained via the attribute
class_indices.
As an experiment I am building a keras model to approximate the determinant of a matrix. However, when I run it the loss goes down at every epoch and the validation loss goes up! For example:
8s - loss: 7573.9168 - val_loss: 21831.5428
Epoch 21/50
8s - loss: 7345.0197 - val_loss: 23594.8540
Epoch 22/50
13s - loss: 7087.7454 - val_loss: 24718.3967
Epoch 23/50
7s - loss: 6851.8714 - val_loss: 25624.8609
Epoch 24/50
6s - loss: 6637.8168 - val_loss: 26616.7835
Epoch 25/50
7s - loss: 6446.8898 - val_loss: 28856.9654
Epoch 26/50
7s - loss: 6255.7414 - val_loss: 30122.7924
Epoch 27/50
7s - loss: 6054.5280 - val_loss: 32458.5306
Epoch 28/50
Here is the complete code:
import numpy as np
import sys
from scipy.stats import pearsonr
from scipy.linalg import det
from sklearn.model_selection import train_test_split
from tqdm import tqdm
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
import math
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasRegressor
from keras import backend as K
def baseline_model():
# create model
model = Sequential()
model.add(Dense(200, input_dim=n**2, kernel_initializer='normal', activation='relu'))
model.add(Dense(1, input_dim=n**2))
# model.add(Dense(1, kernel_initializer='normal'))
# Compile model
model.compile(loss='mean_squared_error', optimizer='adam')
return model
n = 15
print("Making the input data using seed 7", file=sys.stderr)
np.random.seed(7)
U = np.random.choice([0, 1], size=(n**2,n))
#U is a random orthogonal matrix
X =[]
Y =[]
# print(U)
for i in tqdm(range(100000)):
I = np.random.choice(n**2, size = n)
# Pick out the random rows and sort the rows of the matrix lexicographically.
A = U[I][np.lexsort(np.rot90(U[I]))]
X.append(A.ravel())
Y.append(det(A))
X = np.array(X)
Y = np.array(Y)
print("Data created")
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasRegressor(build_fn=baseline_model, epochs=50, batch_size=32, verbose=2)))
pipeline = Pipeline(estimators)
X_train, X_test, y_train, y_test = train_test_split(X, Y,
train_size=0.75, test_size=0.25)
pipeline.fit(X_train, y_train, mlp__validation_split=0.3)
How can I stop it overfitting so badly?
Update 1
I tried adding more layers and L_2 regularization. However, it makes little or no difference.
def baseline_model():
# create model
model = Sequential()
model.add(Dense(n**2, input_dim=n**2, kernel_initializer='glorot_normal', activation='relu'))
model.add(Dense(int((n**2)/2.0), kernel_initializer='glorot_normal', activation='relu', kernel_regularizer=regularizers.l2(0.01)))
model.add(Dense(int((n**2)/2.0), kernel_initializer='glorot_normal', activation='relu', kernel_regularizer=regularizers.l2(0.01)))
model.add(Dense(int((n**2)/2.0), kernel_initializer='glorot_normal', activation='relu', kernel_regularizer=regularizers.l2(0.01)))
model.add(Dense(1, kernel_initializer='glorot_normal'))
# Compile model
model.compile(loss='mean_squared_error', optimizer='adam')
return model
I increased the number of epochs to 100 and it finishes with:
19s - loss: 788.9504 - val_loss: 18423.2807
Epoch 97/100
24s - loss: 760.2046 - val_loss: 18305.9273
Epoch 98/100
20s - loss: 806.0941 - val_loss: 18174.8706
Epoch 99/100
24s - loss: 780.0487 - val_loss: 18356.7482
Epoch 100/100
27s - loss: 749.2595 - val_loss: 18331.5859
Is it possible to approximate the determinant of a matrix using keras?
I tested your code and got the same result. But let's go into basic understanding of matrix determinant (DET). DET consists of n! products, so you cannot really approximate it with n*n weights in few layers of neural network. This requires number of weights that would not scale to n=15, since 15! is 1307674368000 terms for multiplication in the DET.