I get this error :
sum() got an unexpected keyword argument 'out'
when I run this code:
import pandas as pd, numpy as np
import keras
from keras.layers.core import Dense, Activation
from keras.models import Sequential
def AUC(y_true,y_pred):
not_y_pred=np.logical_not(y_pred)
y_int1=y_true*y_pred
y_int0=np.logical_not(y_true)*not_y_pred
TP=np.sum(y_pred*y_int1)
FP=np.sum(y_pred)-TP
TN=np.sum(not_y_pred*y_int0)
FN=np.sum(not_y_pred)-TN
TPR=np.float(TP)/(TP+FN)
FPR=np.float(FP)/(FP+TN)
return((1+TPR-FPR)/2)
# Input datasets
train_df = pd.DataFrame(np.random.rand(91,1000))
train_df.iloc[:,-2]=(train_df.iloc[:,-2]>0.8)*1
model = Sequential()
model.add(Dense(output_dim=60, input_dim=91, init="glorot_uniform"))
model.add(Activation("sigmoid"))
model.add(Dense(output_dim=1, input_dim=60, init="glorot_uniform"))
model.add(Activation("sigmoid"))
model.compile(optimizer='rmsprop',loss='binary_crossentropy',metrics=[AUC])
train_df.iloc[:,-1]=np.ones(train_df.shape[0]) #bias
X=train_df.iloc[:,:-1].values
Y=train_df.iloc[:,-1].values
print X.shape,Y.shape
model.fit(X, Y, batch_size=50,show_accuracy = False, verbose = 1)
Is it possible to implement a custom metric aside from doing a loop on batches and editing the source code?
Here I'm answering to OP's topic question rather than his exact problem. I'm doing this as the question shows up in the top when I google the topic problem.
You can implement a custom metric in two ways.
As mentioned in Keras docu.
import keras.backend as K
def mean_pred(y_true, y_pred):
return K.mean(y_pred)
model.compile(optimizer='sgd',
loss='binary_crossentropy',
metrics=['accuracy', mean_pred])
But here you have to remember as mentioned in Marcin Możejko's answer that y_true and y_pred are tensors. So in order to correctly calculate the metric you need to use keras.backend functionality. Please look at this SO question for details How to calculate F1 Macro in Keras?
Or you can implement it in a hacky way as mentioned in Keras GH issue. For that you need to use callbacks argument of model.fit.
import keras as keras
import numpy as np
from keras.optimizers import SGD
from sklearn.metrics import roc_auc_score
model = keras.models.Sequential()
# ...
sgd = SGD(lr=0.001, momentum=0.9)
model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy'])
class Metrics(keras.callbacks.Callback):
def on_train_begin(self, logs={}):
self._data = []
def on_epoch_end(self, batch, logs={}):
X_val, y_val = self.validation_data[0], self.validation_data[1]
y_predict = np.asarray(model.predict(X_val))
y_val = np.argmax(y_val, axis=1)
y_predict = np.argmax(y_predict, axis=1)
self._data.append({
'val_rocauc': roc_auc_score(y_val, y_predict),
})
return
def get_data(self):
return self._data
metrics = Metrics()
history = model.fit(X_train, y_train, epochs=100, validation_data=(X_val, y_val), callbacks=[metrics])
metrics.get_data()
The problem is that y_pred and y_true are not NumPy arrays but either Theano or TensorFlow tensors. That's why you got this error.
You can define your custom metrics but you have to remember that its arguments are those tensors – not NumPy arrays.
you can pass a model.predict() in your AUC metric function. [this will iterate on bacthes so you might be better off using model.predict_on_batch(). Assuming you have something like a softmax layer as output (something that outputs probabilities), then you can use that together with sklearn.metric to get the AUC.
from sklearn.metrics import roc_curve, auc
from here
def sklearnAUC(test_labels,test_prediction):
n_classes = 2
# Compute ROC curve and ROC area for each class
fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(n_classes):
# ( actual labels, predicted probabilities )
fpr[i], tpr[i], _ = roc_curve(test_labels[:, i], test_prediction[:, i])
roc_auc[i] = auc(fpr[i], tpr[i])
return round(roc_auc[0],3) , round(roc_auc[1],3)
now make your metric
# gives a numpy array like so [ [0.3,0.7] , [0.2,0.8] ....]
Y_pred = model.predict_on_batch ( X_test )
# Y_test looks something like [ [0,1] , [1,0] .... ]
# auc1 and auc2 should be equal
auc1 , auc2 = sklearnAUC( Y_test , Y_pred )
Related
I have a regression neural network with ten input features and three outputs. But all ten features do not have the same importance in loss function calculation (mean square error). So I want to define specific coefficients for each input feature to increase their role in the loss function.
Consider we define coefficients in an array: coeff=[5,20,2,1,4,5,6,2,9,15]. When mean squared error is measuring the distances of input features, for example, if the distance of the second feature is '60', this distance is multiplied by coefficient '20' from coeff array.
I guess I need to define a custom loss function, but how to pass the defined "coeff" array and multiply its elements with input features?
Updated
I guess my idea is similar to this code and this code, but I am not sure. however, I was unable to run the first one and got errors.
from numpy import mean
from numpy import std
from sklearn.datasets import make_regression
from sklearn.model_selection import RepeatedKFold
from keras.models import Sequential
from keras.layers import Dense
# get the dataset
def get_dataset():
X, y = make_regression(n_samples=1000, n_features=10, n_informative=5, n_targets=3, random_state=2)
return X, y
# get the model
def get_model(n_inputs, n_outputs):
model = Sequential()
model.add(Dense(20, input_dim=n_inputs, kernel_initializer='he_uniform', activation='relu'))
model.add(Dense(n_outputs))
model.compile(loss='mse', optimizer='adam')
return model
# evaluate a model using repeated k-fold cross-validation
def evaluate_model(X, y):
results = list()
n_inputs, n_outputs = X.shape[1], y.shape[1]
# define evaluation procedure
cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)
# enumerate folds
for train_ix, test_ix in cv.split(X):
# prepare data
X_train, X_test = X[train_ix], X[test_ix]
y_train, y_test = y[train_ix], y[test_ix]
# define model
model = get_model(n_inputs, n_outputs)
# fit model
model.fit(X_train, y_train, verbose=0, epochs=100)
# evaluate model on test set
mse = model.evaluate(X_test, y_test, verbose=0)
# store result
print('>%.3f' % mse)
results.append(mse)
return results
# load dataset
X, y = get_dataset()
# evaluate model
results = evaluate_model(X, y)
# summarize performance
print('MSE: %.3f (%.3f)' % (mean(results), std(results)))
If you use the functional api, then you could add a custom loss function with the model.add_loss function, within the model. Your loss function can then use the model inputs and outputs and anything in your model.
The problem with this approach is, that in the model you don't have the 'true' y values. So you would need to add an additional input to your model, and pass the y values to the model – but just for the loss calculation.
Something like this:
inputs = Input(shape=(n_inputs))
x = Dense(20, ...)(model_inputs)
outputs = Dense(n_outputs)(x)
y_true = Input(shape=(n_outputs))
modelx = Model(inputs=[inputs, y_true], outputs=outputs)
modelx.add_loss(your_loss_function(y_true=y_true, y_pred=outputs, inputs=inputs)
Since you already added the loss to the model, you compile it without any loss:
modelx.compile(loss=None, optimizer='adam')
When you fit the model, you need to pass the y values to the model inputs.
modelx.fit(x=[X_train, y_train], y=y_train, verbose=0, epochs=100)
When you want a model with just the X values as input, for example for prediction, you can create it like so:
model = Model(modelx.input[0], modelx.output)
Can someone explain why adding random numbers to the loss does not affect the predictions of this Keras model? Every time I run it I get a very similar AUC for both models but I would expect the AUC from the second model to be close to 0.5. I use Colab.
Any suggestions why this might be happening?
import numpy as np
import pandas as pd
import tensorflow as tf
import keras as keras
from keras import layers
import random
from keras import backend as K
from sklearn import metrics
from sklearn.metrics import roc_auc_score
opt = tf.keras.optimizers.Adam(learning_rate=1e-04)
#resetting seeds to ensure reproducibility
def reset_random_seeds():
tf.random.set_seed(1)
np.random.seed(1)
random.seed(1)
def get_auc(y_test,y_pred):
fpr, tpr, threshold = metrics.roc_curve(y_test, y_pred)
auc = metrics.auc(fpr, tpr)
return auc
#standard loss function with binary cross-entropy
def binary_crossentropy1(y_true, y_pred):
bin_cross = tf.keras.losses.BinaryCrossentropy(from_logits=False)
bce1 = K.mean(bin_cross(y_true, y_pred))
return bce1
#same loss function but with added random numbers
def binary_crossentropy2(y_true, y_pred):
bin_cross = tf.keras.losses.BinaryCrossentropy(from_logits=False)
bce2 = K.mean(bin_cross(y_true, y_pred))
penalty = tf.random.normal([], mean=50.0, stddev=100.0)
bce2 = tf.math.add(bce2, penalty)
return bce2
#model without randomness
reset_random_seeds()
input1 = keras.Input(shape=(9,))
x = layers.Dense(12, activation="relu", kernel_initializer=keras.initializers.glorot_uniform(seed=123))(input1)
x = layers.Dense(8, activation="relu", kernel_initializer=keras.initializers.glorot_uniform(seed=123))(x)
output = layers.Dense(1, activation="sigmoid", kernel_initializer=keras.initializers.glorot_uniform(seed=123))(x)
model1 = keras.Model(inputs=input1, outputs=output)
model1.compile(optimizer=opt, loss=binary_crossentropy1, metrics=['accuracy'])
model1.fit(x=X_train, y=y_train, epochs=10, batch_size = 32)
model1_pred = model1.predict(X_test)
#model with randomness
reset_random_seeds()
input1 = keras.Input(shape=(9,))
x = layers.Dense(12, activation="relu", kernel_initializer=keras.initializers.glorot_uniform(seed=123))(input1)
x = layers.Dense(8, activation="relu", kernel_initializer=keras.initializers.glorot_uniform(seed=123))(x)
output = layers.Dense(1, activation="sigmoid", kernel_initializer=keras.initializers.glorot_uniform(seed=123))(x)
model2 = keras.Model(inputs=input1, outputs=output)
model2.compile(optimizer=opt, loss=binary_crossentropy2, metrics=['accuracy'])
model2.fit(x=X_train, y=y_train, epochs=10, batch_size = 32)
model2_pred = model2.predict(X_test)
print(get_auc(y_test, model1_pred))
print(get_auc(y_test, model2_pred))
Result
0.7228943446346893
0.7231896873302319
What the penalty looks like
penalty = 112.050842
penalty = 139.664017
penalty = 152.505341
penalty = -37.1483
penalty = -74.08284
penalty = 155.872528
penalty = 42.7903175
The training is guided by the gradient of the loss with respect to the input.
The random value that you add to the loss in the second model is independent form the input, so it will not contribute to the gradient of the loss during training. When you are running the prediction you are taking the the model output (before the loss function), so that's not affected as well.
I'm trying to solve the spiral problem using Keras with 3 spirals instead of 2 using a similar strategy that I used for 2. Problem is my loss is now growing exponentially instead of decreasing with the same parameters I used for 2 spirals (The neural network structure has 3 outputs instead of being binary). I'm not quite sure what could be happening with this issue if anyone could help? I have tried this with various epochs, learning rates, batch sizes.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.optimizers import RMSprop
from Question1.utils import create_neural_network, create_test_data
EPOCHS = 250
BATCH_SIZE = 20
def main():
df = three_spirals(1000)
# Set-up data
x_train = df[['x-coord', 'y-coord']].values
y_train = df['class'].values
# Don't need y_test, can inspect visually if it worked or not
x_test = create_test_data()
# Scale data
scaler = MinMaxScaler()
scaler.fit(x_train)
x_train = scaler.transform(x_train)
x_test = scaler.transform(x_test)
relu_model = create_neural_network(layers=3,
neurons=[40, 40, 40],
activation='relu',
optimizer=RMSprop(learning_rate=0.001),
loss='categorical_crossentropy',
outputs=3)
# Train networks
relu_model.fit(x=x_train, y=y_train, epochs=EPOCHS, verbose=1, batch_size=BATCH_SIZE)
# Predictions on test data
relu_predictions = relu_model.predict_classes(x_test)
models = [relu_model]
test_predictions = [relu_predictions]
# Plot
plot_data(models, test_predictions)
And here is the create_neural_network function:
def create_neural_network(layers, neurons, activation, optimizer, loss, outputs=1):
if layers != len(neurons):
raise ValueError("Number of layers doesn't much the amount of neuron layers.")
model = Sequential()
for i in range(layers):
model.add(Dense(neurons[i], activation=activation))
# Output
if outputs == 1:
model.add(Dense(outputs))
else:
model.add(Dense(outputs, activation='softmax'))
model.compile(optimizer=optimizer,
loss=loss)
return model
I have worked it out, for the output data it isn't like a binary classification where you only need one column. For multi classification you need a column for each class you want to classify...so where I had y could be 0, 1, 2 was incorrect. The correct way to do this was to have y0, y1, y2 which would be 1 if it fit that specific class and 0 if it didn't.
I'm trying to find hyper-parameters for my Keras model, which contains a custom loss function, with gridsearchcv.
When I run this manually (outside of a gridsearch), it works. However, if I run gridsearchcv on it and do this in parallel, it seems like there is some problem with the serialization that the Koby backend performs, when setting the parameter n_jobs=-1.
This is how I set up my custom loss function:
from keras import backend as K
def my_loss(y_true, y_pred):
# perform some calculations using K
# ...
return 1 - K.mean(score)
This is my model function, which takes some parameters from the grid search:
from keras.models import Model
from keras.layers import Dense, Dropout, Input
def create_model(hidden_layers, activation, last_activation, optimizer, drop_rate, initializer, units, input_dim):
# some layer definitions of inputs and outputs
model = Model(inputs, outputs)
model.compile(loss=my_loss, optimizer=optimizer, metrics=['binary_accuracy'])
return model
Finally, how I put it together:
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from keras.wrappers.scikit_learn import KerasClassifier
skfs = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
parameters = {'hidden_layers': [...],
'activation':[...],
'last_activation': [...],
'optimizer':[...],
'drop_rate':[...],
'initializer':[...],
'units':[...],
'input_dim': [...]
}
model = KerasClassifier(build_fn=create_model, epochs=100, batch_size=256, verbose=0)
classifier = GridSearchCV(estimator=model, param_grid=parameters, cv=skfs, scoring=score, verbose=1, n_jobs=-1, return_train_score=True)
results = classifier.fit(X, y, callbacks=early_stopping)
When I execute the above code, I get an exception saying my_loss function is unknown. However, if I change it to (see n_jobs paramter):
classifier = GridSearchCV(estimator=model, param_grid=parameters, cv=skfs, scoring=score, verbose=1, n_jobs=1, return_train_score=True)
it works. I believe that I must somehow propagate my own function. How can I do this?
Trying to understand and implement GridSearch method for the Keras Regression. Here is my simple producible regression application.
import pandas as pd
import numpy as np
import sklearn
from sklearn.model_selection import train_test_split
from sklearn import metrics
from keras.models import Sequential
from keras.layers.core import Dense, Activation
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint
df = pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/concrete/slump/slump_test.data")
df.drop(['No','FLOW(cm)','Compressive Strength (28-day)(Mpa)'],1,inplace=True)
# Convert a Pandas dataframe to the x,y inputs that TensorFlow needs
def to_xy(df, target):
result = []
for x in df.columns:
if x != target:
result.append(x)
# find out the type of the target column. Is it really this hard? :(
target_type = df[target].dtypes
target_type = target_type[0] if hasattr(target_type, '__iter__') else target_type
# Encode to int for classification, float otherwise. TensorFlow likes 32 bits.
if target_type in (np.int64, np.int32):
# Classification
dummies = pd.get_dummies(df[target])
return df.as_matrix(result).astype(np.float32), dummies.as_matrix().astype(np.float32)
else:
# Regression
return df.as_matrix(result).astype(np.float32), df.as_matrix([target]).astype(np.float32)
x,y = to_xy(df,'SLUMP(cm)')
x_train, x_test, y_train, y_test = train_test_split(
x, y, test_size=0.25, random_state=42)
#Create Model
model = Sequential()
model.add(Dense(128, input_dim=x.shape[1], activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
monitor = EarlyStopping(monitor='val_loss', min_delta=1e-5, patience=5, mode='auto')
checkpointer = ModelCheckpoint(filepath="best_weights.hdf5",save_best_only=True) # save best model
model.fit(x_train,y_train,callbacks=[monitor,checkpointer],verbose=0,epochs=1000)
#model.fit(x_train,y_train,validation_data=(x_test,y_test),callbacks=[monitor,checkpointer],verbose=0,epochs=1000)
pred = model.predict(x_test)
score = np.sqrt(metrics.mean_squared_error(pred,y_test))
print("(RMSE): {}".format(score))
If you run the code, you can see loss is not too big numbers.
And here is my producible GridSearch implementation. First of all, I have simply searched the web and find the GridSearch application for KerasClassifier, then tried to revise it for KerasRegressor. I am not sure if my revision is correct. If I assume the general concept is correct, there must be a problem in this code, because loss function does not make sense. The loss function is MSE but the output is negative, unfortunately I could not figure out where I am doing wrong.
from keras.wrappers.scikit_learn import KerasRegressor
import pandas as pd
import numpy as np
import sklearn
from sklearn.model_selection import train_test_split
from sklearn import metrics
from keras.models import Sequential
from keras.layers.core import Dense, Activation
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint
from sklearn.model_selection import GridSearchCV
df = pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/concrete/slump/slump_test.data")
df.drop(['No','FLOW(cm)','Compressive Strength (28-day)(Mpa)'],1,inplace=True)
#Convert a Pandas dataframe to the x,y inputs that TensorFlow needs
def to_xy(df, target):
result = []
for x in df.columns:
if x != target:
result.append(x)
# find out the type of the target column. Is it really this hard? :(
target_type = df[target].dtypes
target_type = target_type[0] if hasattr(target_type, '__iter__') else target_type
# Encode to int for classification, float otherwise. TensorFlow likes 32 bits.
if target_type in (np.int64, np.int32):
#Classification
dummies = pd.get_dummies(df[target])
return df.as_matrix(result).astype(np.float32), dummies.as_matrix().astype(np.float32)
else:
#Regression
return df.as_matrix(result).astype(np.float32), df.as_matrix([target]).astype(np.float32)
x,y = to_xy(df,'SLUMP(cm)')
x_train, x_test, y_train, y_test = train_test_split(
x, y, test_size=0.25, random_state=42)
def create_model(optimizer='adam'):
# create model
model = Sequential()
model.add(Dense(128, input_dim=x.shape[1], activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer=optimizer,metrics=['mse'])
return model
model = KerasRegressor(build_fn=create_model, epochs=100, batch_size=10, verbose=0)
optimizer = ['SGD', 'RMSprop', 'Adagrad']
param_grid = dict(optimizer=optimizer)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=1)
grid_result = grid.fit(x_train, y_train)
#summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
print("%f (%f) with: %r" % (mean, stdev, param))
I have tested your code, and I have seen that you are not using a scoring function in GridSearchCV so according to documentation scikit-learn documentation:
If None, the estimator’s default scorer (if available) is used.
It seems like if it would be using the 'neg_mean_absolute_error' (or any of these scoring functions for regression) by default for scoring models.
That is because probably it says that the best model is:
-75.820078 using {'optimizer':'Adagrad'}