How to get "is_active_trail" in pgmpy working? - python

I'm unable to get the code on this webpage to run. I don't see the function "is_active_trail" in pgmpy. Has it been changed or removed? How best to get the code to work?
from pgmpy.models import BayesianModel
model = BayesianModel()
model.add_nodes_from(['rain', 'traffic_jam'])
model.add_edge('rain', 'traffic_jam')
model.add_edge('rain', 'traffic_jam')
model.add_edge('accident', 'traffic_jam')
model.nodes()
from pgmpy.factors.discrete import TabularCPD
cpd_rain = TabularCPD('rain', 2, [[0.4], [0.6]])
cpd_accident = TabularCPD('accident', 2, [[0.2], [0.8]])
cpd_traffic_jam = TabularCPD(
'traffic_jam', 2,
[[0.9, 0.6, 0.7, 0.1],
[0.1, 0.4, 0.3, 0.9]],
evidence=['rain', 'accident'],
evidence_card=[2, 2])
model.add_cpds(cpd_rain, cpd_accident, cpd_traffic_jam)
model.get_cpds()
model.add_node('long_queues')
model.add_edge('traffic_jam', 'long_queues')
cpd_long_queues = TabularCPD('long_queues', 2,
[[0.9, 0.2],
[0.1, 0.8]],
evidence=['traffic_jam'],
evidence_card=[2])
model.add_cpds(cpd_long_queues)
model.add_nodes_from(['getting_up_late',
'late_for_school'])
model.add_edges_from(
[('getting_up_late', 'late_for_school'),
('traffic_jam', 'late_for_school')])
cpd_getting_up_late = TabularCPD('getting_up_late', 2,
[[0.6], [0.4]])
cpd_late_for_school = TabularCPD(
'late_for_school', 2,
[[0.9, 0.45, 0.8, 0.1],
[0.1, 0.55, 0.2, 0.9]],
evidence=['getting_up_late',
'traffic_jam'],
evidence_card=[2, 2])
model.add_cpds(cpd_getting_up_late, cpd_late_for_school)
model.get_cpds()
model.check_model()
model.is_active_trail('accident', 'rain')
error:
/usr/local/lib/python3.7/dist-packages/pgmpy/models/BayesianModel.py:10: FutureWarning: BayesianModel has been renamed to BayesianNetwork. Please use BayesianNetwork class, BayesianModel will be removed in future.
FutureWarning,
AttributeError Traceback (most recent call last)
in ()
46 model.check_model()
47
---> 48 model.is_active_trail('accident', 'rain')
AttributeError: 'BayesianModel' object has no attribute 'is_active_trail'

Related

TabNetRegressor does not recognize parameters on python

I can't understand why, using TabnetRegressor, it does not recognize in any way the parameters created using optuna, Using tabnetclassifier gives me no problem, but if I use tabnetregressor it tells me Unexpected argument
`
clf = TabNetRegressor(**final_params) # TabNetRegressor()
clf.fit(
X_train=X_train.values, y_train=y_train.values,
eval_set=[(X_test.values, y_test.values)],
patience=TabNet_params['patience'], max_epochs=epochs,
eval_metric=['rmse']
)
res.append(roc_auc_score(y_test.values, clf.predict(X_test.values)))
File line 504, in main_pipeline2
clf = TabNetRegressor(**final_params) # TabNetRegressor()
TypeError: __init__() got an unexpected keyword argument 'n_d'
this is how I create the hyperparameters, using tabnetclassifier because the regressor gives me problems
def Objective(trial):
mask_type = trial.suggest_categorical("mask_type", ["entmax", "sparsemax"])
n_da = trial.suggest_int("n_da", 56, 64, step=4)
n_steps = trial.suggest_int("n_steps", 1, 3, step=1)
gamma = trial.suggest_float("gamma", 1., 1.4, step=0.2)
n_shared = trial.suggest_int("n_shared", 1, 3)
lambda_sparse = trial.suggest_float("lambda_sparse", 1e-6, 1e-3, log=True)
tabnet_params = dict(n_d=n_da, n_a=n_da, n_steps=n_steps, gamma=gamma,
lambda_sparse=lambda_sparse, optimizer_fn=torch.optim.Adam,
optimizer_params=dict(lr=2e-2, weight_decay=1e-5),
mask_type=mask_type, n_shared=n_shared,
scheduler_params=dict(mode="min",
patience=trial.suggest_int("patienceScheduler", low=3, high=10),
# changing sheduler patience to be lower than early stopping patience
min_lr=1e-5,
factor=0.5, ),
scheduler_fn=torch.optim.lr_scheduler.ReduceLROnPlateau,
verbose=0,
) # early stopping
regressor = TabNetClassifier(**tabnet_params)
regressor.fit(X_train=X_train.values, y_train=y_train.values,
eval_set=[(X_test.values, y_test.values)],
patience=trial.suggest_int("patience", low=15, high=30),
max_epochs=trial.suggest_int('epochs', 1, 100),
eval_metric=['rmse'])
avg = roc_auc_score(y_test.values, regressor.predict(X_test.values))
return avg
study = optuna.create_study(direction="maximize", study_name='TabNet optimization')
# TabNet_params = {'mask_type': 'entmax', 'n_da': 56, 'n_steps': 1, 'gamma': 1.2, 'n_shared': 1, 'lambda_sparse': 0.00018593172980376437, 'patienceScheduler': 8, 'patience': 17, 'epochs': 13}
TabNet_params = {'mask_type': 'entmax', 'n_da': 56, 'n_steps': 3, 'gamma': 1.4, 'n_shared': 2,
'lambda_sparse': 7.628773104483722e-05, 'patienceScheduler': 10, 'patience': 29, 'epochs': 45}
final_params = dict(n_d=TabNet_params['n_da'], n_a=TabNet_params['n_da'], n_steps=TabNet_params['n_steps'],
gamma=TabNet_params['gamma'],
lambda_sparse=TabNet_params['lambda_sparse'], optimizer_fn=torch.optim.Adam,
optimizer_params=dict(lr=2e-2, weight_decay=1e-5),
mask_type=TabNet_params['mask_type'], n_shared=TabNet_params['n_shared'],
scheduler_params=dict(mode="min",
patience=TabNet_params['patienceScheduler'],
min_lr=1e-5,
factor=0.5, ),
scheduler_fn=torch.optim.lr_scheduler.ReduceLROnPlateau,
verbose=0)
epochs = TabNet_params['epochs']

SHAP waterfall plot errors with lightgbm.lgbmclassifier

I used the following codes to draw a waterfall plot.
explainer = shap.TreeExplainer(gbm, data=None)
shap_values = explainer.shap_values(P)
# visualize the first prediction's explanation
shap.waterfall_plot(explainer.expected_value[0], shap_values[0])
It shows this error
AttributeError: 'numpy.float64' object has no attribute 'base_values'
Then I tried this method according to a similar q&a in github https://github.com/slundberg/shap/issues/2255
explainer = shap.TreeExplainer(gbm, data=None)
shap_values = explainer(P)
# NOW CHANGED: SET UP THE WORKAROUND
class helper_object():
"""
This wraps the shap object.
It takes as input i, which indicates the index of the observation to be explained.
"""
def __init__(self, i):
self.base_values = shap_values.base_values[i][0]
self.data = P.loc[i]
self.feature_names = P.columns.to_list()
self.values = shap_values.values[i]
# visualize the sixth prediction's explanation using the workaround
shap.waterfall_plot(helper_object(5), len(shap_values[0]))
It shows this error
AttributeError: 'list' object has no attribute 'base_values'
I changed to explainer(P) instead of explainer.shap_values(P)
It shows this error
AttributeError: 'helper_object' object has no attribute 'display_data'
I printed the values and showed me a range of values instead of a constant.
print(shap_values[0].base_values)
print(type(shap_values.base_values[0]))
print(shap_values[0])
[ 4.03719405 -4.03719405]
<class 'numpy.ndarray'>
.values =
array([[-5.19398412e-02, 5.19398412e-02],
[-1.52522416e+00, 1.52522416e+00],
[-7.06765115e-01, 7.06765115e-01],
[-2.52875346e-01, 2.52875346e-01],
[-1.54701093e-01, 1.54701093e-01],
[ 6.35169405e-03, -6.35169405e-03],
[ 6.57487803e-03, -6.57487803e-03],
[ 3.70178479e-02, -3.70178479e-02],
[ 0.00000000e+00, 0.00000000e+00],
[-6.99857166e-02, 6.99857166e-02],
[-1.38675779e-01, 1.38675779e-01],
[-6.09464170e-02, 6.09464170e-02],
[-1.94668294e-03, 1.94668294e-03],
[-1.15005190e-03, 1.15005190e-03],
[-1.11472815e+00, 1.11472815e+00],
[-1.05449992e-01, 1.05449992e-01],
[-1.82219843e-01, 1.82219843e-01],
[ 1.58137725e-02, -1.58137725e-02],
[-3.98713235e-01, 3.98713235e-01],
[-9.45830700e-01, 9.45830700e-01],
[-6.23312829e-02, 6.23312829e-02],
[ 6.32792510e-02, -6.32792510e-02],
[-6.01518308e-02, 6.01518308e-02],
[ 1.87929746e-04, -1.87929746e-04],
[-1.58341844e-03, 1.58341844e-03],
[-5.08391166e-03, 5.08391166e-03],
[ 0.00000000e+00, 0.00000000e+00],
[ 1.05478554e-02, -1.05478554e-02],
[ 2.37974651e-02, -2.37974651e-02],
[ 5.65731935e-03, -5.65731935e-03],
[ 2.05245700e-03, -2.05245700e-03]])
.base_values =
array([ 4.03719405, -4.03719405])
.data =
array([ 1.07000001e-01, 7.87079980e+03, 4.26199989e+01, 9.39999998e-01,
1.48000002e+01, 2.19000006e+00, 3.10000002e-01, 7.98212012e+03,
2.61899994e+02, 8.40000000e+01, 2.00000000e+01, 3.74000001e+00,
-1.00000000e+00, -1.00000000e+00, 4.00000000e+00, 3.00000000e+00,
1.00000000e+00, -1.87000008e+01, 8.41299988e+02, 1.36000004e+01,
-8.52000046e+00, 6.99999809e-01, -7.65000000e+02, 5.40000010e+00,
0.00000000e+00, -1.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00])
what should I do to generate a waterfall plot for a single observation?
UPDATE
A = [1,2,3,4,5,6,7,8,9,10]
B = [21,5,7,8,15,36,20,18,15,13]
W = [1,0,0,0,0,0,1,1,0,0]
C = ["Adult", "Child", "Child", "Child", "Child", "Adult", "Adult", "Adult", "Child", "Child"]
Number = pd.Series(A, name='Number')
Age = pd.Series(B, name='Age')
Car = pd.Series(W, name='Car')
User_ages = pd.concat([Number, Age], axis=1)
User_cars = pd.concat([User_ages, Car], axis=1)
group = pd.Series(C, name='group')
data = pd.concat([User_cars, group], axis=1)
features = ['Age', 'Car']
X = data[features]
y = data['group']
D = [11,12,13,14,15,16,17,18,19,20]
E = [22,11,17,21,25,31,30,8,5,3]
F = [1,0,0,0,1,0,1,0,0,0]
Number = pd.Series(D, name='Number')
Age = pd.Series(E, name='Age')
Car = pd.Series(F, name='Car')
data1 = pd.concat([Number, Age], axis=1)
data2 = pd.concat([data1, Car], axis=1)
P = data2[features]
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=22, test_size=0.1)
params = {
'num_leaves': [20, 30, 40, 50, 60, 70],
'learning_rate': [0.05, 0.01],
'n_estimators': [100, 300, 500],
'subsample': [0.95],
'colsample_bytree': [0.95],
'n_jobs': [7],
'random_state': [22]
}
gcv = GridSearchCV(LGBMClassifier(), params, cv=2, verbose=1, error_score='raise').fit(X_train, y_train)
gbm = gcv.best_estimator_
gbm_pred = gbm.predict(X_test)
explainer = shap.TreeExplainer(gbm, data=None)
shap_values = explainer.shap_values(P)
# visualize the first prediction's explanation
shap.waterfall_plot(explainer.expected_value[0], shap_values[0])

'Module' Object is not collable

enter image description hereI am trying to run this part of the code and I am getting the TypeError
data_dir = '../Cat_Dog_data/train'
transform = transforms.Compose([transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
dataset = datasets.ImageFolder(data_dir, transform = transforms)
dataloader = torch.utils.data.DataLoader(dataset, batch_size = 32, shuffle = True)
# Run this to test your data loader
images, labels = next(iter(dataloader))
helper.imshow(images[0], normalize=False)
Error:
TypeError Traceback (most recent call last)
<ipython-input-21-d9e81831faed> in <module>()
1 # Run this to test your data loader
----> 2 images, labels = next(iter(dataloader))
3 helper.imshow(images[0], normalize=False)
.
.
.
TypeError: 'module' object is not callable

LightGBMError "Check failed: num_data > 0" with Sklearn RandomizedSearchCV

I'm trying LightGBMRegressor parameter tuning with Sklearn RandomizedSearchCV. I got an error with message below.
error:
LightGBMError: b'Check failed: num_data > 0 at /src/LightGBM/src/io/dataset.cpp, line 27 .\n'
I cannot tell why and the specific parameters caused this error. Any of params_dist below was not suitable for train_x.shape:(1630, 1565)?
Please tell me any hints or solutions. Thank you.
LightGBM version: '2.0.12'
function caused this error:
def get_lgbm(train_x, train_y, val_x, val_y):
lgbm = lgb.LGBMRegressor(
objective='regression',
device='gpu',
n_jobs=1,
)
param_dist = {'boosting_type': ['gbdt', 'dart', 'rf'],
'num_leaves': sp.stats.randint(2, 1001),
'subsample_for_bin': sp.stats.randint(10, 1001),
'min_split_gain': sp.stats.uniform(0, 5.0),
'min_child_weight': sp.stats.uniform(1e-6, 1e-2),
'reg_alpha': sp.stats.uniform(0, 1e-2),
'reg_lambda': sp.stats.uniform(0, 1e-2),
'tree_learner': ['data', 'feature', 'serial', 'voting' ],
'application': ['regression_l1', 'regression_l2', 'regression'],
'bagging_freq': sp.stats.randint(1, 11),
'bagging_fraction': sp.stats.uniform(1e-3, 0.99),
'feature_fraction': sp.stats.uniform(1e-3, 0.99),
'learning_rate': sp.stats.uniform(1e-6, 0.99),
'max_depth': sp.stats.randint(1, 501),
'n_estimators': sp.stats.randint(100, 20001),
'gpu_use_dp': [True, False],
}
rscv = RandomizedSearchCV(
estimator=lgbm,
param_distributions=param_dist,
cv=3,
n_iter=3000,
n_jobs=4,
verbose=1,
refit=True,
fit_params={'eval_set':(val_x, val_y.ravel()),
'early_stopping_rounds':1,
'eval_metric':['l2', 'l1'],
'verbose': False,
},
)
# This line throws error
rscv = rscv.fit(train_x,
train_y.ravel(),
)
return rscv.best_estimator_
Too long to put full stack trace, here is on the lightgbm src.
...........................................................................
/opt/conda/lib/python3.6/site-packages/lightgbm/sklearn.py in fit(self=LGBMRegressor(application='regression_l1',
..., subsample_freq=1,
tree_learner='voting'), X=memmap([[-0.80256822, 1.63302752, -0.55377441, ...12.251635 ,
12.27866017, 1. ]]), y=array([-1.81712472, 0. , -1.7366136 , 0... , 0.36258158, -0.13661202, 0.2919708 ]), sample_weight=None, init_score=None, eval_set=(memmap([[-1.16531701, -0.97454256, -1.36807818, ...11.55465037,
11.55160629, 2. ]]), array([ 0.58517555, -1.01419878, -0.05787037, -0...64139942, 1.04166667, 0. , -0.11668611])), eval_names=None, eval_sample_weight=None, eval_init_score=None, eval_metric=['l2', 'l1'], early_stopping_rounds=1, verbose=False, feature_name='auto', categorical_feature='auto', callbacks=None)
613 eval_init_score=eval_init_score,
614 eval_metric=eval_metric,
615 early_stopping_rounds=early_stopping_rounds,
616 verbose=verbose, feature_name=feature_name,
617 categorical_feature=categorical_feature,
--> 618 callbacks=callbacks)
callbacks = None
619 return self
620
621 base_doc = LGBMModel.fit.__doc__
622 fit.__doc__ = (base_doc[:base_doc.find('eval_class_weight :')] +
...........................................................................
/opt/conda/lib/python3.6/site-packages/lightgbm/sklearn.py in fit(self=LGBMRegressor(application='regression_l1',
..., subsample_freq=1,
tree_learner='voting'), X=array([[-0.80256822, 1.63302752, -0.55377441, .... 12.251635 ,
12.27866017, 1. ]]), y=array([-1.81712472, 0. , -1.7366136 , 0... , 0.36258158, -0.13661202, 0.2919708 ]), sample_weight=None, init_score=None, group=None, eval_set=[(memmap([[-1.16531701, -0.97454256, -1.36807818, ...11.55465037,
11.55160629, 2. ]]), array([ 0.58517555, -1.01419878, -0.05787037, -0...64139942, 1.04166667, 0. , -0.11668611]))], eval_names=None, eval_sample_weight=None, eval_class_weight=None, eval_init_score=None, eval_group=None, eval_metric=['l2', 'l1'], early_stopping_rounds=1, verbose=False, feature_name='auto', categorical_feature='auto', callbacks=None)
468 self.n_estimators, valid_sets=valid_sets, valid_names=eval_names,
469 early_stopping_rounds=early_stopping_rounds,
470 evals_result=evals_result, fobj=self._fobj, feval=feval,
471 verbose_eval=verbose, feature_name=feature_name,
472 categorical_feature=categorical_feature,
--> 473 callbacks=callbacks)
callbacks = None
474
475 if evals_result:
476 self._evals_result = evals_result
477
...........................................................................
/opt/conda/lib/python3.6/site-packages/lightgbm/engine.py in train(params={'application': 'regression_l1', 'bagging_fraction': 0.0013516565394267757, 'bagging_freq': 8, 'boosting_type': 'dart', 'colsample_bytree': 1.0, 'device': 'gpu', 'feature_fraction': 0.18574060093496944, 'gpu_use_dp': True, 'learning_rate': 0.06354739024799887, 'max_depth': 267, ...}, train_set=<lightgbm.basic.Dataset object>, num_boost_round=11610, valid_sets=[<lightgbm.basic.Dataset object>], valid_names=None, fobj=None, feval=None, init_model=None, feature_name='auto', categorical_feature='auto', early_stopping_rounds=1, evals_result={}, verbose_eval=False, learning_rates=None, keep_training_booster=False, callbacks={<function print_evaluation.<locals>.callback>, <function early_stopping.<locals>.callback>, <function record_evaluation.<locals>.callback>})
175 callbacks_before_iter = sorted(callbacks_before_iter, key=attrgetter('order'))
176 callbacks_after_iter = sorted(callbacks_after_iter, key=attrgetter('order'))
177
178 # construct booster
179 try:
--> 180 booster = Booster(params=params, train_set=train_set)
booster = undefined
params = {'application': 'regression_l1', 'bagging_fraction': 0.0013516565394267757, 'bagging_freq': 8, 'boosting_type': 'dart', 'colsample_bytree': 1.0, 'device': 'gpu', 'feature_fraction': 0.18574060093496944, 'gpu_use_dp': True, 'learning_rate': 0.06354739024799887, 'max_depth': 267, ...}
train_set = <lightgbm.basic.Dataset object>
181 if is_valid_contain_train:
182 booster.set_train_data_name(train_data_name)
183 for valid_set, name_valid_set in zip(reduced_valid_sets, name_valid_sets):
184 booster.add_valid(valid_set, name_valid_set)
...........................................................................
/opt/conda/lib/python3.6/site-packages/lightgbm/basic.py in __init__(self=<lightgbm.basic.Booster object>, params={'application': 'regression_l1', 'bagging_fraction': 0.0013516565394267757, 'bagging_freq': 8, 'boosting_type': 'dart', 'colsample_bytree': 1.0, 'device': 'gpu', 'feature_fraction': 0.18574060093496944, 'gpu_use_dp': True, 'learning_rate': 0.06354739024799887, 'max_depth': 267, ...}, train_set=<lightgbm.basic.Dataset object>, model_file=None, silent=False)
1290 # construct booster object
1291 self.handle = ctypes.c_void_p()
1292 _safe_call(_LIB.LGBM_BoosterCreate(
1293 train_set.construct().handle,
1294 c_str(params_str),
-> 1295 ctypes.byref(self.handle)))
self.handle = c_void_p(None)
1296 # save reference to data
1297 self.train_set = train_set
1298 self.valid_sets = []
1299 self.name_valid_sets = []
...........................................................................
/opt/conda/lib/python3.6/site-packages/lightgbm/basic.py in _safe_call(ret=-1)
43 ----------
44 ret : int
45 return value from API calls
46 """
47 if ret != 0:
---> 48 raise LightGBMError(_LIB.LGBM_GetLastError())
49
50
51 def is_numeric(obj):
52 """Check is a number or not, include numpy number etc."""
LightGBMError: b'Check failed: num_data > 0 at /usr/local/src/lightgbm/LightGBM/src/io/dataset.cpp, line 27 .\n'
Minimum value of bagging_fraction and feature_fraction could be too small. I changed the distribution to "sp.stats.uniform(loc=0.1, scale=0.9)" and it works.
I got the same error in LightGBM Python. In my case, the size of the test dataset was 0 rows. So make sure the size of test/train dataset is not 0 rows.
In my cases, this error always happens when min_sum_hessian_in_leaf = 0 since I do grid search for min_sum_hessian_in_leaf in [0, 2, 4, 5, 6, 7, 8, 9, 10]
After remove the 0 from the list then the error never happen again
Maybe train_x or train_y is null. You can check it by print the data

Extending xgboost.XGBClassifier

I am trying to define a class called XGBExtended that extends the class xgboost.XGBClassifier, the scikit-learn API for xgboost. I am running into some issues with the get_params method. Below is an IPython session illustrating the issue. Basically, get_params seems to only be returning the attributes I define within XGBExtended.__init__, and attributes defined during the parent init method (xgboost.XGBClassifier.__init__) are ignored. I am using IPython and running python 2.7. Full system specs at bottom.
In [182]: import xgboost as xgb
...:
...: class XGBExtended(xgb.XGBClassifier):
...: def __init__(self, foo):
...: super(XGBExtended, self).__init__()
...: self.foo = foo
...:
...: clf = XGBExtended(foo = 1)
...:
...: clf.get_params()
...:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-182-431c4c3f334b> in <module>()
8 clf = XGBExtended(foo = 1)
9
---> 10 clf.get_params()
/Users/andrewhannigan/lib/xgboost/python-package/xgboost/sklearn.pyc in get_params(self, deep)
188 if isinstance(self.kwargs, dict): # if kwargs is a dict, update params accordingly
189 params.update(self.kwargs)
--> 190 if params['missing'] is np.nan:
191 params['missing'] = None # sklearn doesn't handle nan. see #4725
192 if not params.get('eval_metric', True):
KeyError: 'missing'
So I've hit an error because 'missing' is not a key in the params dict within the XGBClassifier.get_params method. I enter the debugger to poke around:
In [183]: %debug
> /Users/andrewhannigan/lib/xgboost/python-package/xgboost/sklearn.py(190)get_params()
188 if isinstance(self.kwargs, dict): # if kwargs is a dict, update params accordingly
189 params.update(self.kwargs)
--> 190 if params['missing'] is np.nan:
191 params['missing'] = None # sklearn doesn't handle nan. see #4725
192 if not params.get('eval_metric', True):
ipdb> params
{'foo': 1}
ipdb> self.__dict__
{'n_jobs': 1, 'seed': None, 'silent': True, 'missing': nan, 'nthread': None, 'min_child_weight': 1, 'random_state': 0, 'kwargs': {}, 'objective': 'binary:logistic', 'foo': 1, 'max_depth': 3, 'reg_alpha': 0, 'colsample_bylevel': 1, 'scale_pos_weight': 1, '_Booster': None, 'learning_rate': 0.1, 'max_delta_step': 0, 'base_score': 0.5, 'n_estimators': 100, 'booster': 'gbtree', 'colsample_bytree': 1, 'subsample': 1, 'reg_lambda': 1, 'gamma': 0}
ipdb>
As you can see, the params contains only the foo variable. However, the object itself contains all of the params defined by xgboost.XGBClassifier.__init__. But for some reason the BaseEstimator.get_params method which is called from xgboost.XGBClassifier.get_params is only getting the parameters defined explicitly in the XGBExtended.__init__ method. Unfortunately, even if I explicitly call get_params with deep = True, it still does not work correctly:
ipdb> super(XGBModel, self).get_params(deep=True)
{'foo': 1}
ipdb>
Can anyone tell why this is happening?
System specs:
In [186]: print IPython.sys_info()
{'commit_hash': u'1149d1700',
'commit_source': 'installation',
'default_encoding': 'UTF-8',
'ipython_path': '/Users/andrewhannigan/virtualenvironment/nimble_ai/lib/python2.7/site-packages/IPython',
'ipython_version': '5.4.1',
'os_name': 'posix',
'platform': 'Darwin-14.5.0-x86_64-i386-64bit',
'sys_executable': '/usr/local/Cellar/python/2.7.10/Frameworks/Python.framework/Versions/2.7/Resources/Python.app/Contents/MacOS/Python',
'sys_platform': 'darwin',
'sys_version': '2.7.10 (default, Jul 3 2015, 12:05:53) \n[GCC 4.2.1 Compatible Apple LLVM 6.1.0 (clang-602.0.53)]'}
The problem here is incorrect declaration of child class.
When you declare the init method only using foo, you are overriding the original one. It will not be initialized automatically, even if the base class constructor is supposed to have default values for them.
You should use the following:
class XGBExtended(xgb.XGBClassifier):
def __init__(self, foo, max_depth=3, learning_rate=0.1,
n_estimators=100, silent=True,
objective="binary:logistic",
nthread=-1, gamma=0, min_child_weight=1,
max_delta_step=0, subsample=1, colsample_bytree=1, colsample_bylevel=1,
reg_alpha=0, reg_lambda=1, scale_pos_weight=1,
base_score=0.5, seed=0, missing=None, **kwargs):
# Pass the required parameters to super class
super(XGBExtended, self).__init__(max_depth, learning_rate,
n_estimators, silent, objective,
nthread, gamma, min_child_weight,
max_delta_step, subsample,
colsample_bytree, colsample_bylevel,
reg_alpha, reg_lambda,
scale_pos_weight, base_score, seed, missing, **kwargs)
# Use other custom parameters
self.foo = foo
After that you will not get any error.
clf = XGBExtended(foo = 1)
print(clf.get_params(deep=True))
>>> {'reg_alpha': 0, 'colsample_bytree': 1, 'silent': True,
'colsample_bylevel': 1, 'scale_pos_weight': 1, 'learning_rate': 0.1,
'missing': None, 'max_delta_step': 0, 'nthread': -1, 'base_score': 0.5,
'n_estimators': 100, 'subsample': 1, 'reg_lambda': 1, 'seed': 0,
'min_child_weight': 1, 'objective': 'binary:logistic',
'foo': 1, 'max_depth': 3, 'gamma': 0}

Categories