This is my model:
def evaluate_model(X_train, y_train,X_test,y_test):
verbose=1
epochs=50
batch_size = 32
n_outputs = 1
model = Sequential()
model.add(Conv1D(filters=32, kernel_size=6, activation='relu', input_shape=(25,1)))
model.add(Conv1D(filters=32, kernel_size=6, activation='relu'))
model.add(Dropout(0.3))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(100, activation='relu'))
model.add(Dense(n_outputs, activation='sigmoid'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
# fit network
model.fit(X_train, y_train,epochs=50, batch_size=batch_size, verbose=1)
# evaluate model
_, accuracy = model.evaluate(X_test, y_test, batch_size=batch_size, verbose=1)
return accuracy
# summarize scores
def summarize_results(scores):
print(scores)
m, s = mean(scores), std(scores)
print('Accuracy: %.3f%% (+/-%.3f)' % (m, s))
# run an experiment
def run_experiment(repeats=5):
# repeat experiment
scores = list()
for r in range(repeats):
score = evaluate_model(X_train, y_train,X_test,y_test)
score = score * 100.0
print('>#%d: %.3f' % (r+1, score))
scores.append(score)
# summarize results
summarize_results(scores)
# run the experiment
run_experiment()
How do I get the train and test accuracy separately? Right now I am only getting test accuracy with model.evaluate.
What accuracy you get depends on the arguments you use with model.evaluate(); if you replace X_test and y_test with X_train and y_train, respectively, you will get the training accuracy. So, to get both, you should modify the last lines of your evaluate_model function as:
_, train_accuracy = model.evaluate(X_train, y_train, batch_size=batch_size, verbose=1)
_, test_accuracy = model.evaluate(X_test, y_test, batch_size=batch_size, verbose=1)
return train_accuracy, test_accuracy
Related
I am having some trouble with my ANN. It is only predicting '0.' The dataset is imbalanced (10:1), ALTHOUGH, I undersampled the training dataset, so I am unsure of what is going on. I am getting 92-93% accuracy on the balanced training set, although on testing (on an unbalanced test set) it just predicts zeroes. Unsure of where to go from here. Anything helps. The data has been one hot encoded and scaled.
#create 80/20 train-test split
train, test = train_test_split(selection, test_size=0.2)
# Class count
count_class_0, count_class_1 = train.AUDITED_FLAG.value_counts()
# Divide by class
df_class_0 = train[train['AUDITED_FLAG'] == 0]
df_class_1 = train[train['AUDITED_FLAG'] == 1]
df_class_0_under = df_class_0.sample(count_class_1)
train_under = pd.concat([df_class_0_under, df_class_1], axis=0)
print('Random under-sampling:')
print(train_under.AUDITED_FLAG.value_counts())
train_under.AUDITED_FLAG.value_counts().plot(kind='bar', title='Count (target)');
Random under-sampling:
1.0 112384
0.0 112384
#split features and labels
y_train = np.array(train_under['AUDITED_FLAG'])
X_train = train_under.drop('AUDITED_FLAG', axis=1)
y_test = np.array(test['AUDITED_FLAG'])
X_test = test.drop('AUDITED_FLAG', axis=1)
y_train = y_train.astype(int)
y_test = y_test.astype(int)
# define model
model = Sequential()
model.add(Dense(6, input_dim=179, activation='relu'))
model.add(Dense(30, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
# fit model
history = model.fit(X_train, y_train, epochs=5, batch_size=16, verbose=1)
#validate
test_loss, test_acc = model.evaluate(X_test, y_test)
# evaluate the model
_, train_acc = model.evaluate(X_train, y_train, verbose=0)
_, test_acc = model.evaluate(X_test, y_test, verbose=0)
print('Train: %.3f, Test: %.3f' % (train_acc, test_acc))
print('test_acc:', test_acc)
# plot history
pyplot.plot(history.history['acc'], label='train')
#pyplot.plot(history.history['val_acc'], label='test')
Train: 0.931, Test: 0.921
#preds
y_pred = model.predict(X_test)
y_pred_bool = np.argmax(y_pred, axis=1)
# #plot confusion matrix
y_actu = pd.Series(y_test, name='Actual')
y_pred_bool = pd.Series(y_pred_bool, name='Predicted')
print(pd.crosstab(y_actu, y_pred_bool))
'''
Predicted 0
Actual
0 300011
1 28030
This is not right:
y_pred_bool = np.argmax(y_pred, axis=1)
Argmax is only used with categorical cross-entropy loss and softmax outputs. For binary cross-entropy and sigmoid outputs, you should round the outputs, which is equivalent to thresholding predictions > 0.5:
y_pred_bool = np.round(y_pred)
This is what Keras does to compute binary accuracy.
I tried to optimize hyperparameters in my keras CNN made for image classification. I decided to use grid search from sklearn. I overcame the fundamental difficulty with making x and y out of keras flow_from_directory but it still doesn't work.
Error in the last line
ValueError: dropout is not a legal parameter
def grid_model(optimizer='adam',
kernel_initializer='random_uniform',
dropout=0.2,
loss='categorical_crossentropy'):
model = Sequential()
model.add(Conv2D(6,(5,5),activation="relu",padding="same",
input_shape=(img_width, img_height, 3)))
model.add(MaxPooling2D((2,2)))
model.add(Dropout(dropout))
model.add(Conv2D(16,(5,5),activation="relu"))
model.add(MaxPooling2D((2,2)))
model.add(Dropout(dropout))
model.add(Flatten())
model.add(Dense(120, activation='relu', kernel_initializer=kernel_initializer))
model.add(Dropout(dropout))
model.add(Dense(84, activation='relu', kernel_initializer=kernel_initializer))
model.add(Dropout(dropout))
model.add(Dense(10, activation='softmax'))
model.compile(loss=loss,
optimizer=optimizer,
metrics=['accuracy'])
return model
train_generator = ImageDataGenerator(rescale=1/255)
validation_generator = ImageDataGenerator(rescale=1/255)
# Retrieve images and their classes for train and validation sets
train_flow = train_generator.flow_from_directory(directory=train_data_dir,
batch_size=batch_size,
target_size=(img_height,img_width))
validation_flow = validation_generator.flow_from_directory(directory=validation_data_dir,
batch_size=batch_size,
target_size=(img_height,img_width),
shuffle = False)
clf = KerasClassifier(build_fn=grid_model(), epochs=epochs, verbose=0)
param_grid = {
'clf__optimizer':['adam', 'Nadam'],
'clf__epochs':[100, 200],
'clf__dropout':[0.1, 0.2, 0.5],
'clf__kernel_initializer':['normal','uniform'],
'clf__loss':['categorical_crossentropy',
'sparse_categorical_crossentropy',
'kullback_leibler_divergence']
}
pipeline = Pipeline([('clf',clf)])
(X_train, Y_train) = train_flow.next()
grid = GridSearchCV(pipeline, cv=2, param_grid=param_grid)
grid.fit(X_train, Y_train)
The problem is in this line:
clf = KerasClassifier(build_fn=grid_model(), epochs=epochs, verbose=0)
change it to
clf = KerasClassifier(build_fn=grid_model, epochs=epochs, verbose=0)
The grid_model method should not be invoked but a reference to it should be passed.
Also, in the list of losses, 'sparse_categorical_crossentropy'(integer) cannot be used because the output shape required of the model is incompatible with that of 'categorical_crossentropy'(one-hot).
What is the best way to use ROC / AUC as metric in Keras instead of 'accuracy', while doing a KFold validtation? I tried replacing 'accuracy' with sklearns 'roc_curve' and 'auc', but this did not work.
Also: How could I access the 'cross_val_score()' loops in order to plot the ROC-Curve?
Here is my code:
X = X.values
Y = Y.values
def create_baseline():
model = Sequential()
model.add(Dense(82, input_dim=82, kernel_initializer='normal', activation='relu'))
model.add(Dense(50, activation='relu'))
model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
return model
estimator = KerasClassifier(build_fn=create_baseline, epochs=100, batch_size=5, verbose=0)
kfold = KFold(n_splits=50, shuffle=True, random_state=True)
results = model_selection.cross_val_score(estimator, X, Y, cv=kfold)
print("Accuracy: %.3f%% (%.3f%%)" % (results.mean()*100.0, results.std()*100.0))
I am using Scikit Learn in Python where I pipelined KerasClassifier with StandardScaler().
The code is:
def create_baseline():
model = Sequential()
model.add(Dense(11, input_dim=11, kernel_initializer='normal', activation='relu'))
model.add(Dense(7, kernel_initializer='normal', activation='relu'))
model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
return model
classifier = KerasClassifier(build_fn=create_baseline, nb_epoch=150, batch_size=5)
kfold = StratifiedKFold(n_splits=2, shuffle=True, random_state=seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', classifier))
pipeline = Pipeline(estimators)
results = cross_val_score(pipeline, X, Y, cv=kfold, verbose=1, fit_params={'mlp__callbacks':[tbCallBack]})
print("Result: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))
How can I save the cross validation? Taking into consideration that I did not fit the classifier before, I need to save the result and then load it to make predictions.
I'm triying to train my network using a convolutional neural network based on keras tensorflow this is my code i have an error on the function compile but i don't know why
model = Sequential() # or Graph or whatever
model.add(Embedding(input_dim = n_symbols + 1,
output_dim = vocab_dim,
input_length=maxlen,
dropout=0.2))
# we add a Convolution1D, which will learn nb_filter
# word group filters of size filter_length:
model.add(Convolution1D(nb_filter=nb_filter,
filter_length=filter_length,
border_mode='valid',
activation='relu',
subsample_length=1))
# we use max pooling:
model.add(GlobalMaxPooling1D())
# We add a vanilla hidden layer:
model.add(Dense(hidden_dims))
model.add(Dropout(0.2))
model.add(Activation('relu'))
# We project onto a single unit output layer, and squash it with a sigmoid:
model.add(Dense(1))
model.add(Activation('sigmoid'))
model.compile(loss='mean_squared_error',
optimizer='adam',
metrics=['accuracy'])
model.fit(X_train, y_train,
batch_size=batch_size,
nb_epoch=nb_epoch,
validation_data=(X_test, y_test))
print("Evaluate...")
score, acc = model.evaluate(X_test, y_test,
batch_size=batch_size)
print('Test score:', score)
print('Test accuracy:', acc)
error:
ValueError: Operation u'init_27' has been marked as not fetchable.