How to implement custom stateful metric in Keras - python

I am trying to implement a custom stateful metric in Keras. The Keras API website provides a short demo of how to do so. However, the demo only includes class definition, creating an instance, and calculation of the metric from some data. They do not demonstrate how to use it in the model.fit method, which is how I (and most people) would use it. When I use it, I get results that (1) do not change epoch-to-epoch and (2) conflict with built-in metrics, so I know that I am getting incorrect results. Am I using the code provided incorrectly or is the sample code not functional?
I am including a minimal demonstration of the issue below:
# libraries
import numpy as np
import random
import tensorflow as tf
from sklearn import datasets
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.model_selection import train_test_split
# setting psuedorandom seeds for reproducibility
np.random.seed(0)
tf.random.set_seed(0)
random.seed(0)
# loads famous Iris dataset
iris = datasets.load_iris()
x = iris.data
y = iris.target
# converts to binary prediction problem
y = np.where(y == 2, 1, 0)
# train/validation split
x_train, x_val, y_train, y_val = train_test_split(x, y, test_size = 0.2)
# reformatting
x_train = tf.constant(x_train)
x_val = tf.constant(x_val)
y_train = tf.constant(y_train)
y_val = tf.constant(y_val)
# stateful metric code from keras website
class BinaryTruePositives(tf.keras.metrics.Metric):
def __init__(self, name='binary_true_positives', **kwargs):
super(BinaryTruePositives, self).__init__(name=name, **kwargs)
self.true_positives = self.add_weight(name='tp', initializer='zeros')
def update_state(self, y_true, y_pred, sample_weight=None):
y_true = tf.cast(y_true, tf.bool)
y_pred = tf.cast(y_pred, tf.bool)
values = tf.logical_and(tf.equal(y_true, True), tf.equal(y_pred, True))
values = tf.cast(values, self.dtype)
if sample_weight is not None:
sample_weight = tf.cast(sample_weight, self.dtype)
values = tf.multiply(values, sample_weight)
self.true_positives.assign_add(tf.reduce_sum(values))
def result(self):
return self.true_positives
def reset_states(self):
self.true_positives.assign(0)
# demonstration from website (correct value is returned)
m = BinaryTruePositives()
m.update_state([0, 1, 1, 1], [0, 1, 0, 0])
print(m.result())
# instantiates metric
true_pos = BinaryTruePositives()
# defines a very simple model
model = Sequential()
model.add(Dense(1, activation = 'sigmoid'))
# compiles model
model.compile(optimizer = 'adam',
loss = 'binary_crossentropy',
metrics = ['Recall', true_pos])
# trains model and prints output
history = model.fit(x_train, y_train,
epochs = 30,
verbose = 1,
validation_data = (x_val, y_val),
batch_size = 10
)
Notice that recall improves, while true positives remains constant, which is impossible.

I have slightly modified your BinaryTruePositives class, because in your model, y_pred is a continuous variable in (0, 1), not boolean. You need to convert it to a boolean variable using tf.where(y_pred > 0.5, True, False) for example.
class BinaryTruePositives(tf.keras.metrics.Metric):
def __init__(self, name='binary_true_positives', **kwargs):
super(BinaryTruePositives, self).__init__(name=name, **kwargs)
self.true_positives = self.add_weight(name='tp', initializer='zeros')
def update_state(self, y_true, y_pred, sample_weight=None):
y_true = tf.cast(y_true, tf.bool)
y_pred = tf.where(y_pred > 0.5, True, False)
values = tf.logical_and(tf.equal(y_true, True), tf.equal(y_pred, True))
values = tf.cast(values, self.dtype)
if sample_weight is not None:
sample_weight = tf.cast(sample_weight, self.dtype)
values = tf.multiply(values, sample_weight)
self.true_positives.assign_add(tf.reduce_sum(values))
def result(self):
return self.true_positives
def reset_states(self):
self.true_positives.assign(0)

Related

How to create custom eval metric for catboost?

Similar SO questions:
Python Catboost: Multiclass F1 score custom metric
Catboost tutorials
https://catboost.ai/docs/concepts/python-usages-examples.html#user-defined-loss-function
Question
In this question, I have a binary classification problem. After modelling we get the test model predictions y_pred and we already have true test labels y_true.
I would like to get the custom evaluation metric defined by following equation:
profit = 400 * truePositive - 200*fasleNegative - 100*falsePositive
Also, since higher profit is better I would like to maximize the function instead of minimize it.
How to get this eval_metric in catboost?
Using sklearn
def get_profit(y_true, y_pred):
tn, fp, fn, tp = sklearn.metrics.confusion_matrix(y_true,y_pred).ravel()
loss = 400*tp - 200*fn - 100*fp
return loss
scoring = sklearn.metrics.make_scorer(get_profit, greater_is_better=True)
Using catboost
class ProfitMetric(object):
def get_final_error(self, error, weight):
return error / (weight + 1e-38)
def is_max_optimal(self):
return True
def evaluate(self, approxes, target, weight):
assert len(approxes) == 1
assert len(target) == len(approxes[0])
approx = approxes[0]
error_sum = 0.0
weight_sum = 0.0
** I don't know here**
return error_sum, weight_sum
Question
How to complete the custom eval metric in catboost?
UPDATE
My update so far
import numpy as np
import pandas as pd
import seaborn as sns
import sklearn
from catboost import CatBoostClassifier
from sklearn.model_selection import train_test_split
def get_profit(y_true, y_pred):
tn, fp, fn, tp = sklearn.metrics.confusion_matrix(y_true,y_pred).ravel()
profit = 400*tp - 200*fn - 100*fp
return profit
class ProfitMetric:
def is_max_optimal(self):
return True # greater is better
def evaluate(self, approxes, target, weight):
assert len(approxes) == 1
assert len(target) == len(approxes[0])
approx = approxes[0]
y_pred = np.rint(approx)
y_true = np.array(target).astype(int)
output_weight = 1 # weight is not used
score = get_profit(y_true, y_pred)
return score, output_weight
def get_final_error(self, error, weight):
return error
df = sns.load_dataset('titanic')
X = df[['survived','pclass','age','sibsp','fare']]
y = X.pop('survived')
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=100)
model = CatBoostClassifier(metric_period=50,
n_estimators=200,
eval_metric=ProfitMetric()
)
model.fit(X, y, eval_set=(X_test, y_test)) # this fails
The main difference from yours is:
#staticmethod
def get_profit(y_true, y_pred):
y_pred = expit(y_pred).astype(int)
y_true = y_true.astype(int)
#print("ACCURACY:",(y_pred==y_true).mean())
tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
loss = 400*tp - 200*fn - 100*fp
return loss
It's not obvious from the example you linked what are the predictions, but after inspecting it turns out catboost treats predictions internally as raw log-odds (hat tip #Ben). So, to properly use confusion_matrix you need to make it sure both y_true and y_pred are integer class labels. This is done via:
y_pred = scipy.special.expit(y_pred)
y_true = y_true.astype(int)
So the full working code is:
import seaborn as sns
from catboost import CatBoostClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from scipy.special import expit
df = sns.load_dataset('titanic')
X = df[['survived','pclass','age','sibsp','fare']]
y = X.pop('survived')
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=100)
class ProfitMetric:
#staticmethod
def get_profit(y_true, y_pred):
y_pred = expit(y_pred).astype(int)
y_true = y_true.astype(int)
#print("ACCURACY:",(y_pred==y_true).mean())
tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
loss = 400*tp - 200*fn - 100*fp
return loss
def is_max_optimal(self):
return True # greater is better
def evaluate(self, approxes, target, weight):
assert len(approxes) == 1
assert len(target) == len(approxes[0])
y_true = np.array(target).astype(int)
approx = approxes[0]
score = self.get_profit(y_true, approx)
return score, 1
def get_final_error(self, error, weight):
return error
model = CatBoostClassifier(metric_period=50,
n_estimators=200,
eval_metric=ProfitMetric()
)
model.fit(X, y, eval_set=(X_test, y_test))
By way of example, I implemented a very simple metric.
It counts the number of times y_pred != y_true in a multi-class classifier.
class CountErrors:
'''Count of wrong predictions'''
def is_max_optimal(self):
False
def evaluate(self, approxes, target, weight):
y_pred = np.array(approxes).argmax(0)
y_true = np.array(target)
return sum(y_pred!=y_true), 1
def get_final_error(self, error, weight):
return error
You can see it used if you run this code:
import numpy as np
import pandas as pd
from catboost import CatBoostClassifier
from sklearn.model_selection import train_test_split
class CountErrors:
'''Count number of wrong predictions'''
def is_max_optimal(self):
False # Lower is better
def evaluate(self, approxes, target, weight):
y_pred = np.array(approxes).argmax(0)
y_true = np.array(target)
return sum(y_pred!=y_true), 1
def get_final_error(self, error, weight):
return error
df = pd.read_csv('https://raw.githubusercontent.com/mkleinbort/resource-datasets/master/abalone/abalone.csv')
y = df['sex']
X = df.drop(columns=['sex'])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=12)
model = CatBoostClassifier(metric_period=50, n_estimators=200, eval_metric=CountErrors())
model.fit(X, y, eval_set=(X_test, y_test))
Hope you can adapt this to your use-case.

LSTM/GRU TImeSeries multioutput strategy forecasts give dropped values

Currently, I'm playing with Stocks Predictions task which I try to solve using LSTM/GRU.
Problem: After training LSTM/GRU I get huge drop predicted values
Model training process
Train, test data is simply generated using pd.shift in series_to_supervised function.
df['Mid'] = df['Low'] + df['High'] / 2
n_lag = 1 # Lag columns back
n_seq = 1*50 # TimeSteps to predict
seq_col = 'Mid'
seq_col_t = f'{seq_col}(t)'
split_date = '2018-01-01'
def series_to_supervised(data: pd.DataFrame,
seq_col: str,
n_in: int = 1,
n_out: int = 1,
drop_seq_col: bool = True,
dropna: bool = True):
"""Convert time series into supervised learning problem
{input sequence, forecast sequence}
"""
# input sequence (t-n, ... t-1) -> pisitive shift
for i in range(n_in, 0, -1):
data[f'{seq_col}(t-{i})'] = data[seq_col].shift(i)
# no sequence -> no shift
data[f'{seq_col}(t)'] = data[seq_col]
for i in range(1, n_out+1):
# forecast sequence (t, t+1, ... t+n) -> negative shift
data[f'{seq_col}(t+{i})'] = data[seq_col].shift(-i)
if drop_seq_col:
data = data.drop(seq_col, axis=1)
if dropna:
data.dropna(inplace=True)
return data
df = series_to_supervised(df, seq_col=seq_col, n_in=n_lag, n_out=n_seq)
mask = df.index < split_date
train, test = df[mask], df[~mask]
X_cols = ['Mid(t-1)']
y_cols = train.filter(like='Mid(t+').columns
X_train, y_train, X_test, y_test = train[X_cols], train[y_cols], test[X_cols], test[y_cols]
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(-1, 1))
# also returns np.ndarray
X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)
y_train = y_train.values
y_test = y_test.values
from keras.models import Sequential
from keras.layers import Dense, Dropout, LSTM, GRU
from keras.optimizers import Adam, RMSprop, Adamax
from keras.callbacks import ModelCheckpoint
def get_model(X, y, n_batch):
num_classes=y.shape[1]
# design network
model = Sequential()
# For Stock Predictions has to be used LSTM stateful=True
model.add(GRU(10, batch_input_shape=(n_batch, X.shape[1], X.shape[2]), stateful=True))
model.add(Dropout(0.3))
model.add(Dense(num_classes))
opt = Adam(learning_rate=0.01)
# opt = RMSprop(learning_rate=0.001)
model.compile(loss='mean_squared_error', optimizer=opt)
return model
def reshape_batch(X_train, y_train, X_test, y_test, n_batch):
# reshape training into [samples, timesteps, features]
X_train = X_train.reshape(X_train.shape[0], 1, X_train.shape[1])
X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])
# cut to equally divided n_batches (without reminder).
# needed for LSTM stateful=True
train_cut = X_train.shape[0] % n_batch
test_cut = X_test.shape[0] % n_batch
if train_cut > 0:
X_train = X_train[:-train_cut]
y_train = y_train[:-train_cut]
if test_cut > 0:
X_test = X_test[:-test_cut]
y_test = y_test[:-test_cut]
return X_train, y_train, X_test, y_test
# fit an LSTM network to training data
def fit_lstm(X_train: np.ndarray,
y_train: np.ndarray,
n_lag: int,
n_seq: int,
n_batch: int,
nb_epoch: int,
X_test: np.ndarray=None,
y_test: np.ndarray=None):
model = get_model(X_train, y_train, n_batch)
# fit network
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), callbacks=None,
epochs=nb_epoch, batch_size=n_batch, verbose=1, shuffle=False)
print('Predict:', model.predict(X_test, batch_size=n_batch))
model.reset_states()
return model, history
n_batch = 32
nb_epoch = 40
X_train, y_train, X_test, y_test = reshape_batch(X_train, y_train, X_test, y_test, n_batch)
model, history = fit_lstm(X_train, y_train, n_lag, n_seq, n_batch, nb_epoch, X_test=X_test, y_test=y_test)
What I Have tried
Different optimizers (kinda all available in keras)
DIfferent recurrent network structures (GRU/LSTM)
Different learning rates
Different epochs from 1 to 1500
Adding/Removing Drop layers with different params (0.1-0.7)
Different LSTM/GRU amount of neurons (1-100)
Number of LSTM/GRU layers, via return_sequences params with more Drop layers.
Different number of forecasts(t+1,t+2 ... t+n) features from 1-365
Different number of lag (t-1, t-2, t-n ...) features from 1-5
Different scale normalization borders (0,1) and (-1,1)
Different n_batch values: 1,8,16,32
What can affect LSTM/GRU give so strange behaviour? And What else should I try to make it work the normal way?

sklearn GridSearchCV not using sample_weight in score function

I have data with differing weights for each sample. In my application, it is important that these weights are accounted for in estimating the model and comparing alternative models.
I'm using sklearn to estimate models and to compare alternative hyperparameter choices. But this unit test shows that GridSearchCV does not apply sample_weights to estimate scores.
Is there a way to have sklearn use sample_weight to score the models?
Unit test:
from __future__ import division
import numpy as np
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import log_loss
from sklearn.model_selection import GridSearchCV, RepeatedKFold
def grid_cv(X_in, y_in, w_in, cv, max_features_grid, use_weighting):
out_results = dict()
for k in max_features_grid:
clf = RandomForestClassifier(n_estimators=256,
criterion="entropy",
warm_start=False,
n_jobs=-1,
random_state=RANDOM_STATE,
max_features=k)
for train_ndx, test_ndx in cv.split(X=X_in, y=y_in):
X_train = X_in[train_ndx, :]
y_train = y_in[train_ndx]
w_train = w_in[train_ndx]
y_test = y[test_ndx]
clf.fit(X=X_train, y=y_train, sample_weight=w_train)
y_hat = clf.predict_proba(X=X_in[test_ndx, :])
if use_weighting:
w_test = w_in[test_ndx]
w_i_sum = w_test.sum()
score = w_i_sum / w_in.sum() * log_loss(y_true=y_test, y_pred=y_hat, sample_weight=w_test)
else:
score = log_loss(y_true=y_test, y_pred=y_hat)
results = out_results.get(k, [])
results.append(score)
out_results.update({k: results})
for k, v in out_results.items():
if use_weighting:
mean_score = sum(v)
else:
mean_score = np.mean(v)
out_results.update({k: mean_score})
best_score = min(out_results.values())
best_param = min(out_results, key=out_results.get)
return best_score, best_param
if __name__ == "__main__":
RANDOM_STATE = 1337
X, y = load_iris(return_X_y=True)
sample_weight = np.array([1 + 100 * (i % 25) for i in range(len(X))])
# sample_weight = np.array([1 for _ in range(len(X))])
inner_cv = RepeatedKFold(n_splits=3, n_repeats=1, random_state=RANDOM_STATE)
outer_cv = RepeatedKFold(n_splits=3, n_repeats=1, random_state=RANDOM_STATE)
rfc = RandomForestClassifier(n_estimators=256,
criterion="entropy",
warm_start=False,
n_jobs=-1,
random_state=RANDOM_STATE)
search_params = {"max_features": [1, 2, 3, 4]}
fit_params = {"sample_weight": sample_weight}
my_scorer = make_scorer(log_loss,
greater_is_better=False,
needs_proba=True,
needs_threshold=False)
grid_clf = GridSearchCV(estimator=rfc,
scoring=my_scorer,
cv=inner_cv,
param_grid=search_params,
refit=True,
return_train_score=False,
iid=False) # in this usage, the results are the same for `iid=True` and `iid=False`
grid_clf.fit(X, y, **fit_params)
print("This is the best out-of-sample score using GridSearchCV: %.6f." % -grid_clf.best_score_)
msg = """This is the best out-of-sample score %s weighting using grid_cv: %.6f."""
score_with_weights, param_with_weights = grid_cv(X_in=X,
y_in=y,
w_in=sample_weight,
cv=inner_cv,
max_features_grid=search_params.get(
"max_features"),
use_weighting=True)
print(msg % ("WITH", score_with_weights))
score_without_weights, param_without_weights = grid_cv(X_in=X,
y_in=y,
w_in=sample_weight,
cv=inner_cv,
max_features_grid=search_params.get(
"max_features"),
use_weighting=False)
print(msg % ("WITHOUT", score_without_weights))
Which produces output:
This is the best out-of-sample score using GridSearchCV: 0.135692.
This is the best out-of-sample score WITH weighting using grid_cv: 0.099367.
This is the best out-of-sample score WITHOUT weighting using grid_cv: 0.135692.
Explanation: Since manually computing the loss without weighting produces the same scoring as GridSearchCV, we know that the sample weights are not being used.
The GridSearchCV takes a scoring as input, which can be callable. You can see the details of how to change the scoring function, and also how to pass your own scoring function here. Here's the relevant piece of code from that page for the sake of completeness:
EDIT: The fit_params is passed only to the fit functions, and not the score functions. If there are parameters which are supposed to be passed to the scorer, they should be passed to the make_scorer. But that still doesn't solve the issue here, since that would mean that the whole sample_weight parameter would be passed to log_loss, whereas only the part which corresponds to y_test at the time of calculating the loss should be passed.
sklearn does NOT support such a thing, but you can hack your way through, using a padas.DataFrame. The good news is, sklearn understands a DataFrame, and keeps it that way. Which means you can exploit the index of a DataFrame as you see in the code here:
# more code
X, y = load_iris(return_X_y=True)
index = ['r%d' % x for x in range(len(y))]
y_frame = pd.DataFrame(y, index=index)
sample_weight = np.array([1 + 100 * (i % 25) for i in range(len(X))])
sample_weight_frame = pd.DataFrame(sample_weight, index=index)
# more code
def score_f(y_true, y_pred, sample_weight):
return log_loss(y_true.values, y_pred,
sample_weight=sample_weight.loc[y_true.index.values].values.reshape(-1),
normalize=True)
score_params = {"sample_weight": sample_weight_frame}
my_scorer = make_scorer(score_f,
greater_is_better=False,
needs_proba=True,
needs_threshold=False,
**score_params)
grid_clf = GridSearchCV(estimator=rfc,
scoring=my_scorer,
cv=inner_cv,
param_grid=search_params,
refit=True,
return_train_score=False,
iid=False) # in this usage, the results are the same for `iid=True` and `iid=False`
grid_clf.fit(X, y_frame)
# more code
As you see, the score_f uses the index of y_true to find which parts of sample_weight to use. For the sake of completeness, here's the whole code:
from __future__ import division
import numpy as np
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import log_loss
from sklearn.model_selection import GridSearchCV, RepeatedKFold
from sklearn.metrics import make_scorer
import pandas as pd
def grid_cv(X_in, y_in, w_in, cv, max_features_grid, use_weighting):
out_results = dict()
for k in max_features_grid:
clf = RandomForestClassifier(n_estimators=256,
criterion="entropy",
warm_start=False,
n_jobs=1,
random_state=RANDOM_STATE,
max_features=k)
for train_ndx, test_ndx in cv.split(X=X_in, y=y_in):
X_train = X_in[train_ndx, :]
y_train = y_in[train_ndx]
w_train = w_in[train_ndx]
y_test = y_in[test_ndx]
clf.fit(X=X_train, y=y_train, sample_weight=w_train)
y_hat = clf.predict_proba(X=X_in[test_ndx, :])
if use_weighting:
w_test = w_in[test_ndx]
w_i_sum = w_test.sum()
score = w_i_sum / w_in.sum() * log_loss(y_true=y_test, y_pred=y_hat, sample_weight=w_test)
else:
score = log_loss(y_true=y_test, y_pred=y_hat)
results = out_results.get(k, [])
results.append(score)
out_results.update({k: results})
for k, v in out_results.items():
if use_weighting:
mean_score = sum(v)
else:
mean_score = np.mean(v)
out_results.update({k: mean_score})
best_score = min(out_results.values())
best_param = min(out_results, key=out_results.get)
return best_score, best_param
#if __name__ == "__main__":
if True:
RANDOM_STATE = 1337
X, y = load_iris(return_X_y=True)
index = ['r%d' % x for x in range(len(y))]
y_frame = pd.DataFrame(y, index=index)
sample_weight = np.array([1 + 100 * (i % 25) for i in range(len(X))])
sample_weight_frame = pd.DataFrame(sample_weight, index=index)
# sample_weight = np.array([1 for _ in range(len(X))])
inner_cv = RepeatedKFold(n_splits=3, n_repeats=1, random_state=RANDOM_STATE)
outer_cv = RepeatedKFold(n_splits=3, n_repeats=1, random_state=RANDOM_STATE)
rfc = RandomForestClassifier(n_estimators=256,
criterion="entropy",
warm_start=False,
n_jobs=1,
random_state=RANDOM_STATE)
search_params = {"max_features": [1, 2, 3, 4]}
def score_f(y_true, y_pred, sample_weight):
return log_loss(y_true.values, y_pred,
sample_weight=sample_weight.loc[y_true.index.values].values.reshape(-1),
normalize=True)
score_params = {"sample_weight": sample_weight_frame}
my_scorer = make_scorer(score_f,
greater_is_better=False,
needs_proba=True,
needs_threshold=False,
**score_params)
grid_clf = GridSearchCV(estimator=rfc,
scoring=my_scorer,
cv=inner_cv,
param_grid=search_params,
refit=True,
return_train_score=False,
iid=False) # in this usage, the results are the same for `iid=True` and `iid=False`
grid_clf.fit(X, y_frame)
print("This is the best out-of-sample score using GridSearchCV: %.6f." % -grid_clf.best_score_)
msg = """This is the best out-of-sample score %s weighting using grid_cv: %.6f."""
score_with_weights, param_with_weights = grid_cv(X_in=X,
y_in=y,
w_in=sample_weight,
cv=inner_cv,
max_features_grid=search_params.get(
"max_features"),
use_weighting=True)
print(msg % ("WITH", score_with_weights))
score_without_weights, param_without_weights = grid_cv(X_in=X,
y_in=y,
w_in=sample_weight,
cv=inner_cv,
max_features_grid=search_params.get(
"max_features"),
use_weighting=False)
print(msg % ("WITHOUT", score_without_weights))
The output of the code is then:
This is the best out-of-sample score using GridSearchCV: 0.095439.
This is the best out-of-sample score WITH weighting using grid_cv: 0.099367.
This is the best out-of-sample score WITHOUT weighting using grid_cv: 0.135692.
EDIT 2: as the comment bellow says:
the difference in my score and the sklearn score using this solution
originates in the way that I was computing a weighted average of
scores. If you omit the weighted average portion of the code, the two
outputs match to machine precision.
Currently in sklearn, GridSearchCV(and any classes inherit BaseSearchCV) only allow sample_weight in **fit_params but not using it in scoring, which is not correct, since CV pick the "best estimator" via unweighted score. Notes, when you grid.fit(X, y, sample_weight=w) only use sample weights in fit, not score.
There are two ways to solve this problems:
Handy method: add weight as the first columns in X. write your customized scoring function and transformer in your model.
from sklearn.base import BaseEstimator, TransformerMixin
# customized scorer
def weight_remover_scorer(estimator, X, y):
y_pred = estimator.predict(X)
w = X[:,0]
return your_scorer(y, y_pred, sample_weight=w)
# customized transformer
class WeightRemover(TransformerMixin, BaseEstimator):
def fit(self, X, y=None, **fit_params):
return self
def transform(self, X, y=None, **fit_params):
return X[:,1:]
# in your main function
if __name__=='__main__':
pipe = Pipeline([('remove_weight', WeightRemover()),('model',model)])
params_grid = {'model__'+k:v for k,v in params_grid.items()}
X = np.c_[train_w, X]
X_test = np.c_[test_w, X_test]
grid = GridSearchCV(pipe, params_grid, cv=5, scoring=weight_remover_scorer)
grid.fit(X, y)
add features in sklearn class (wait for new upgrade). Just add parameters sample_weight in BaseSearchCV (default is None), safer indexing them in the same way as fit_params = _check_fit_params(X, fit_params).
Just pointing out that there is an ongoing effort to support this important feature: https://github.com/scikit-learn/scikit-learn/pull/13432
But it seems that because of backward compatibility issues and the desire to tackle the more general problem of passing arbitrary sample related information it is taking a bit too long. The last attempt seems to be: https://github.com/scikit-learn/scikit-learn/pull/16079
Here is a good review of the issue: http://deaktator.github.io/2019/03/10/the-error-in-the-comparator/

Prediction with LSTM using Keras

I am predicting Y based on X from past values. Our formatted CSV dataset has three columns (time_stamp, X and Y - where Y is the actual value) whose sample format is
time,X,Y
0.000561,0,10
0.000584,0,10
0.040411,5,10
0.040437,10,10
0.041638,12,10
0.041668,14,10
0.041895,15,10
0.041906,19,10
... ... ...
Before training the prediction model, here is how the plots of X and Y respectively look like the following.
Here is how I approached the problem with LSTM Recurrent Neural Networks in Python with Keras.
import numpy as np
from keras.models import Sequential
from keras.layers import LSTM, Dense
from sklearn.preprocessing import MinMaxScaler
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
np.random.seed(7)
# Load data
df = pd.read_csv('test32_C_data.csv')
n_features = 100
def create_sequences(data, window=15, step=1, prediction_distance=15):
x = []
y = []
for i in range(0, len(data) - window - prediction_distance, step):
x.append(data[i:i + window])
y.append(data[i + window + prediction_distance][1])
x, y = np.asarray(x), np.asarray(y)
return x, y
# Scaling prior to splitting
scaler = MinMaxScaler(feature_range=(0.01, 0.99))
scaled_data = scaler.fit_transform(df.loc[:, ["X", "Y"]].values)
# Build sequences
x_sequence, y_sequence = create_sequences(scaled_data)
# Create test/train split
test_len = int(len(x_sequence) * 0.90)
valid_len = int(len(x_sequence) * 0.90)
train_end = len(x_sequence) - (test_len + valid_len)
x_train, y_train = x_sequence[:train_end], y_sequence[:train_end]
x_valid, y_valid = x_sequence[train_end:train_end + valid_len], y_sequence[train_end:train_end + valid_len]
x_test, y_test = x_sequence[train_end + valid_len:], y_sequence[train_end + valid_len:]
# Initialising the RNN
model = Sequential()
# Adding the input layerand the LSTM layer
model.add(LSTM(15, input_shape=(15, 2)))
# Adding the output layer
model.add(Dense(1))
# Compiling the RNN
model.compile(loss='mse', optimizer='rmsprop')
# Fitting the RNN to the Training set
model.fit(x_train, y_train, epochs=5)
# Getting the predicted values
y_pred = model.predict(x_test)
#y_pred = scaler.inverse_transform(y_pred)
plot_colors = ['#332288', '#3cb44b']
# Plot the results
pd.DataFrame({"Actual": y_test, "Predicted": np.squeeze(y_pred)}).plot(color=plot_colors)
plt.xlabel('Time [Index]')
plt.ylabel('Values')
Finally, when I run the code - the neural model seems to capture the pattern of the signal well as it is shown below.
However, one problem that I encountered in this output is the ranges of Y. As it is shown in the first two plots, the ranges should be 0-400 as shown above and to solve that I tried to use the scaler to inverse_transform as y_pred = scaler.inverse_transform(y_pred) but this throws an error: ValueError: non-broadcastable output operand with shape (7625,1) doesn't match the broadcast shape (7625,2). How can we solve this broadcast shape error?
Basically, the scaler has remembered that it was fed 2 features(/columns). So it is expecting 2 features to invert the transformation.
Two options here.
1) You make two different scalers: scaler_x and scaler_y like this :
# Scaling prior to splitting
scaler_x = MinMaxScaler(feature_range=(0.01, 0.99))
scaler_y = MinMaxScaler(feature_range=(0.01, 0.99))
scaled_x = scaler_x.fit_transform(df.loc[:, "X"].reshape([-1, 1]))
scaled_y = scaler_y.fit_transform(df.loc[:, "Y"].reshape([-1, 1]))
scaled_data = np.column_stack((scaled_x, scaled_y))
Then you will be able to do :
y_pred = scaler_y.inverse_transform(y_pred)
2) You fake the X column in your output like this :
y_pred_reshaped = np.zeros((len(y_pred), 2))
y_pred_reshaped[:,1] = y_pred
y_pred = scaler.inverse_transform(y_pred_reshaped)[:,1]
Does that help?
EDIT
here is the full code as required
import numpy as np
from keras.models import Sequential
from keras.layers import LSTM, Dense
from sklearn.preprocessing import MinMaxScaler
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
np.random.seed(7)
# Load data
#df = pd.read_csv('test32_C_data.csv')
df = pd.DataFrame(np.random.randint(0,100, size=(100,3)), columns = ['time', 'X', 'Y'])
n_features = 100
def create_sequences(data, window=15, step=1, prediction_distance=15):
x = []
y = []
for i in range(0, len(data) - window - prediction_distance, step):
x.append(data[i:i + window])
y.append(data[i + window + prediction_distance][1])
x, y = np.asarray(x), np.asarray(y)
return x, y
# Scaling prior to splitting
scaler_x = MinMaxScaler(feature_range=(0.01, 0.99))
scaler_y = MinMaxScaler(feature_range=(0.01, 0.99))
scaled_x = scaler_x.fit_transform(df.loc[:, "X"].reshape([-1,1]))
scaled_y = scaler_y.fit_transform(df.loc[:, "Y"].reshape([-1,1]))
scaled_data = np.column_stack((scaled_x, scaled_y))
# Build sequences
x_sequence, y_sequence = create_sequences(scaled_data)
test_len = int(len(x_sequence) * 0.90)
valid_len = int(len(x_sequence) * 0.90)
train_end = len(x_sequence) - (test_len + valid_len)
x_train, y_train = x_sequence[:train_end], y_sequence[:train_end]
x_valid, y_valid = x_sequence[train_end:train_end + valid_len], y_sequence[train_end:train_end + valid_len]
x_test, y_test = x_sequence[train_end + valid_len:], y_sequence[train_end + valid_len:]
# Initialising the RNN
model = Sequential()
# Adding the input layerand the LSTM layer
model.add(LSTM(15, input_shape=(15, 2)))
# Adding the output layer
model.add(Dense(1))
# Compiling the RNN
model.compile(loss='mse', optimizer='rmsprop')
# Fitting the RNN to the Training set
model.fit(x_train, y_train, epochs=5)
# Getting the predicted values
y_pred = model.predict(x_test)
y_pred = scaler_y.inverse_transform(y_pred)

how to implement custom metric in keras?

I get this error :
sum() got an unexpected keyword argument 'out'
when I run this code:
import pandas as pd, numpy as np
import keras
from keras.layers.core import Dense, Activation
from keras.models import Sequential
def AUC(y_true,y_pred):
not_y_pred=np.logical_not(y_pred)
y_int1=y_true*y_pred
y_int0=np.logical_not(y_true)*not_y_pred
TP=np.sum(y_pred*y_int1)
FP=np.sum(y_pred)-TP
TN=np.sum(not_y_pred*y_int0)
FN=np.sum(not_y_pred)-TN
TPR=np.float(TP)/(TP+FN)
FPR=np.float(FP)/(FP+TN)
return((1+TPR-FPR)/2)
# Input datasets
train_df = pd.DataFrame(np.random.rand(91,1000))
train_df.iloc[:,-2]=(train_df.iloc[:,-2]>0.8)*1
model = Sequential()
model.add(Dense(output_dim=60, input_dim=91, init="glorot_uniform"))
model.add(Activation("sigmoid"))
model.add(Dense(output_dim=1, input_dim=60, init="glorot_uniform"))
model.add(Activation("sigmoid"))
model.compile(optimizer='rmsprop',loss='binary_crossentropy',metrics=[AUC])
train_df.iloc[:,-1]=np.ones(train_df.shape[0]) #bias
X=train_df.iloc[:,:-1].values
Y=train_df.iloc[:,-1].values
print X.shape,Y.shape
model.fit(X, Y, batch_size=50,show_accuracy = False, verbose = 1)
Is it possible to implement a custom metric aside from doing a loop on batches and editing the source code?
Here I'm answering to OP's topic question rather than his exact problem. I'm doing this as the question shows up in the top when I google the topic problem.
You can implement a custom metric in two ways.
As mentioned in Keras docu.
import keras.backend as K
def mean_pred(y_true, y_pred):
return K.mean(y_pred)
model.compile(optimizer='sgd',
loss='binary_crossentropy',
metrics=['accuracy', mean_pred])
But here you have to remember as mentioned in Marcin Możejko's answer that y_true and y_pred are tensors. So in order to correctly calculate the metric you need to use keras.backend functionality. Please look at this SO question for details How to calculate F1 Macro in Keras?
Or you can implement it in a hacky way as mentioned in Keras GH issue. For that you need to use callbacks argument of model.fit.
import keras as keras
import numpy as np
from keras.optimizers import SGD
from sklearn.metrics import roc_auc_score
model = keras.models.Sequential()
# ...
sgd = SGD(lr=0.001, momentum=0.9)
model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy'])
class Metrics(keras.callbacks.Callback):
def on_train_begin(self, logs={}):
self._data = []
def on_epoch_end(self, batch, logs={}):
X_val, y_val = self.validation_data[0], self.validation_data[1]
y_predict = np.asarray(model.predict(X_val))
y_val = np.argmax(y_val, axis=1)
y_predict = np.argmax(y_predict, axis=1)
self._data.append({
'val_rocauc': roc_auc_score(y_val, y_predict),
})
return
def get_data(self):
return self._data
metrics = Metrics()
history = model.fit(X_train, y_train, epochs=100, validation_data=(X_val, y_val), callbacks=[metrics])
metrics.get_data()
The problem is that y_pred and y_true are not NumPy arrays but either Theano or TensorFlow tensors. That's why you got this error.
You can define your custom metrics but you have to remember that its arguments are those tensors – not NumPy arrays.
you can pass a model.predict() in your AUC metric function. [this will iterate on bacthes so you might be better off using model.predict_on_batch(). Assuming you have something like a softmax layer as output (something that outputs probabilities), then you can use that together with sklearn.metric to get the AUC.
from sklearn.metrics import roc_curve, auc
from here
def sklearnAUC(test_labels,test_prediction):
n_classes = 2
# Compute ROC curve and ROC area for each class
fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(n_classes):
# ( actual labels, predicted probabilities )
fpr[i], tpr[i], _ = roc_curve(test_labels[:, i], test_prediction[:, i])
roc_auc[i] = auc(fpr[i], tpr[i])
return round(roc_auc[0],3) , round(roc_auc[1],3)
now make your metric
# gives a numpy array like so [ [0.3,0.7] , [0.2,0.8] ....]
Y_pred = model.predict_on_batch ( X_test )
# Y_test looks something like [ [0,1] , [1,0] .... ]
# auc1 and auc2 should be equal
auc1 , auc2 = sklearnAUC( Y_test , Y_pred )

Categories