i am trying to use XGBoost package in Python
I have this error when running this code
import xgboost as xgb
data=np.array(traindata.drop('Category',axis=1))
labels=np.array(traindata['Category'].cat.codes)
dtrain = xgb.DMatrix( data, label=labels)
param = {'bst:max_depth':6, 'bst:eta':0.5, 'silent':1, 'objective':'multi:softprob' }
param['nthread'] = 4
param['eval_metric'] = 'mlogloss'
param['lambda'] = 1
param['num_class']=39
evallist = [(dtrain,'train')]
plst = param.items()
plst += [('eval_metric', 'ams#0')]
num_round = 10
bst = xgb.train( plst, dtrain, num_round, evallist )
bst.save_model('0001.model')
--------------------------------------------------------------------------- XGBoostError Traceback (most recent call
last) in ()
17
18 num_round = 10
---> 19 bst = xgb.train( plst, dtrain, num_round, evallist )
20
21 bst.save_model('0001.model')
/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/xgboost/training.pyc
in train(params, dtrain, num_boost_round, evals, obj, feval, maximize,
early_stopping_rounds, evals_result, verbose_eval, learning_rates,
xgb_model)
122 nboost += 1
123 if len(evals) != 0:
--> 124 bst_eval_set = bst.eval_set(evals, i, feval)
125 if isinstance(bst_eval_set, STRING_TYPES):
126 msg = bst_eval_set
/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/xgboost/core.pyc
in eval_set(self, evals, iteration, feval)
753 _check_call(_LIB.XGBoosterEvalOneIter(self.handle, iteration,
754 dmats, evnames, len(evals),
--> 755 ctypes.byref(msg)))
756 return msg.value
757 else:
/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/xgboost/core.pyc
in _check_call(ret)
95 """
96 if ret != 0:
---> 97 raise XGBoostError(_LIB.XGBGetLastError())
98
99
XGBoostError: we need weight to evaluate ams
i don't see anything about it in the docs
https://xgboost.readthedocs.io/en/latest/python/python_intro.html
http://www.analyticsvidhya.com/blog/2016/03/complete-guide-parameter-tuning-xgboost-with-codes-python/
When computing the ams metric, you need to have a weight for each labeled training point. You set the weights by using the keyword argument weight when creating your DMatrix. A simple example.
weights = np.ones(len(labels))
dtrain = xgb.DMatrix(data, label = labels, weight = weights)
And an in-depth example from a recent Kaggle competition: https://github.com/tqchen/xgboost/blob/master/demo/kaggle-higgs/higgs-numpy.py.
Related
I'm trying to train RL-agent to play Car Racing environment with OpenAI Gym and been using following code:
import gym
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import VecFrameStack
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.evaluation import evaluate_policy
import os
environment_name = "CarRacing-v2"
env = gym.make(environment_name)
used this for testing de environment
episodes = 5
for episode in range(1, episodes+1):
state = env.reset()
done = False
truncated = False
score = 0
while not done and not truncated:
env.render()
action = env.action_space.sample()
observation, reward, done, truncated, info = env.step(action)
print(done)
score+=reward
print('Episode:{} Score:{}'.format(episode, score))
env.close()
Then Trying to Train a model like so:
env = gym.make(environment_name)
env = DummyVecEnv([lambda: env])
log_path = os.path.join('Training', 'Logs')
model = PPO("CnnPolicy", env, verbose=1, tensorboard_log=log_path)
model.learn(total_timesteps=3000000)
Im getting this error after the last line of code where I'm trying to get the model to learn:
model.learn(total_timesteps=3000000)
model.learn(total_timesteps=3000000)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-59-262f6d03e40c> in <module>
----> 1 model.learn(total_timesteps=3000000)
~\anaconda3\lib\site-packages\stable_baselines3\ppo\ppo.py in learn(self, total_timesteps, callback, log_interval, eval_env, eval_freq, n_eval_episodes, tb_log_name, eval_log_path, reset_num_timesteps, progress_bar)
315 ) -> PPOSelf:
316
--> 317 return super().learn(
318 total_timesteps=total_timesteps,
319 callback=callback,
~\anaconda3\lib\site-packages\stable_baselines3\common\on_policy_algorithm.py in learn(self, total_timesteps, callback, log_interval, eval_env, eval_freq, n_eval_episodes, tb_log_name, eval_log_path, reset_num_timesteps, progress_bar)
244 iteration = 0
245
--> 246 total_timesteps, callback = self._setup_learn(
247 total_timesteps,
248 eval_env,
~\anaconda3\lib\site-packages\stable_baselines3\common\base_class.py in _setup_learn(self, total_timesteps, eval_env, callback, eval_freq, n_eval_episodes, log_path, reset_num_timesteps, tb_log_name, progress_bar)
487 # Avoid resetting the environment when calling ``.learn()`` consecutive times
488 if reset_num_timesteps or self._last_obs is None:
--> 489 self._last_obs = self.env.reset() # pytype: disable=annotation-type-mismatch
490 self._last_episode_starts = np.ones((self.env.num_envs,), dtype=bool)
491 # Retrieve unnormalized observation for saving into the buffer
~\anaconda3\lib\site-packages\stable_baselines3\common\vec_env\vec_transpose.py in reset(self)
108 Reset all environments
109 """
--> 110 return self.transpose_observations(self.venv.reset())
111
112 def close(self) -> None:
~\anaconda3\lib\site-packages\stable_baselines3\common\vec_env\dummy_vec_env.py in reset(self)
62 for env_idx in range(self.num_envs):
63 obs = self.envs[env_idx].reset()
---> 64 self._save_obs(env_idx, obs)
65 return self._obs_from_buf()
66
~\anaconda3\lib\site-packages\stable_baselines3\common\vec_env\dummy_vec_env.py in _save_obs(self, env_idx, obs)
92 for key in self.keys:
93 if key is None:
---> 94 self.buf_obs[key][env_idx] = obs
95 else:
96 self.buf_obs[key][env_idx] = obs[key]
ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (2,) + inhomogeneous part.
Does anyone know how I can get around this?
OBS1:
I found that it has to do with the reset method on the env. When I run
env.reset()
I'm getting the same error
I'm getting an error when trying to run a simple XGB. The error is mentioned in the below but the solution is not helping.
XGBoost Error info.labels.size() != 0U (0 vs. 0)
The error is as follows:
---------------------------------------------------------------------------
XGBoostError Traceback (most recent call last)
<ipython-input-748-7f7917b57181> in <module>
11 param = {'max_depth':3, 'eta':1, 'objective':'binary:logistic'}
12
---> 13 bst = xgb.train(param, dtrain, num_round)
14 preds = bst.predict(dtest)
~\AppData\Local\Continuum\anaconda3\envs\torch_env\lib\site-packages\xgboost\training.py in train(params, dtrain, num_boost_round, evals, obj, feval, maximize, early_stopping_rounds, evals_result, verbose_eval, xgb_model, callbacks)
207 evals=evals,
208 obj=obj, feval=feval,
--> 209 xgb_model=xgb_model, callbacks=callbacks)
210
211
~\AppData\Local\Continuum\anaconda3\envs\torch_env\lib\site-packages\xgboost\training.py in _train_internal(params, dtrain, num_boost_round, evals, obj, feval, xgb_model, callbacks)
72 # Skip the first update if it is a recovery step.
73 if version % 2 == 0:
---> 74 bst.update(dtrain, i, obj)
75 bst.save_rabit_checkpoint()
76 version += 1
~\AppData\Local\Continuum\anaconda3\envs\torch_env\lib\site-packages\xgboost\core.py in update(self, dtrain, iteration, fobj)
1247 _check_call(_LIB.XGBoosterUpdateOneIter(self.handle,
1248 ctypes.c_int(iteration),
-> 1249 dtrain.handle))
1250 else:
1251 pred = self.predict(dtrain, training=True)
~\AppData\Local\Continuum\anaconda3\envs\torch_env\lib\site-packages\xgboost\core.py in _check_call(ret)
187 """
188 if ret != 0:
--> 189 raise XGBoostError(py_str(_LIB.XGBGetLastError()))
190
191
XGBoostError: [15:11:21] C:/Users/Administrator/workspace/xgboost-win64_release_1.0.0/src/objective/regression_obj.cu:60: Check failed: preds.Size() == info.labels_.Size() (3843 vs. 0) : labels are not correctly providedpreds.size=3843, label.size=0
So the above points to a label size issue. When I run the following code, my training data (train_xgb) and my label data (test_xgb) match.
train_xgb = train_dummies.iloc[:, 2:]
test_xgb = train_dummies.iloc[:, 2]
print(train_xgb.shape, test_xgb.shape, type(train_xgb), type(test_xgb))
dtrain = xgb.DMatrix(train_xgb)
dtest = xgb.DMatrix(test_xgb)
Here is the result of the print statement above:
(3843, 3143) (3843,) <class 'pandas.core.frame.DataFrame'> <class 'pandas.core.series.Series'>
I've tried changing the labels to their own dataframe and numpy array with no change in the error.
I tried to use lightGBM with python on AWS instance. I can train with standard lightgbm, but if i use it with GPU acceleration. It shows following error
LightGBMError Traceback (most recent call last)
<ipython-input-28-758038d46621> in <module>()
----> 1 train_blend_x_lgb, test_blend_x_lgb, blend_scores_lgb = lgb_rgr_stack(lgb_params, train_x, y, test_x, 5, early_stopping_rounds=0, missing=None)
2 train_blend_x_lgb.tofile('train_blend_x_lgb_v2.dat')
3 test_blend_x_lgb.tofile('test_blend_x_lgb_v2.dat')
<ipython-input-16-ba585885e839> in lgb_rgr_stack(rgr_params, train_x, train_y, test_x, kfolds, early_stopping_rounds, missing)
25 feature_name=full_vars,
26 categorical_feature = dense_cat_vars),
---> 27 num_boost_round=num_boost_round
28 )
29 val_y_predict_fold = model.predict(val_x_fold)
~/anaconda3/lib/python3.6/site-packages/lightgbm/engine.py in train(params, train_set, num_boost_round, valid_sets, valid_names, fobj, feval, init_model, feature_name, categorical_feature, early_stopping_rounds, evals_result, verbose_eval, learning_rates, keep_training_booster, callbacks)
181 # construct booster
182 try:
--> 183 booster = Booster(params=params, train_set=train_set)
184 if is_valid_contain_train:
185 booster.set_train_data_name(train_data_name)
~/anaconda3/lib/python3.6/site-packages/lightgbm/basic.py in __init__(self, params, train_set, model_file, silent)
1307 train_set.construct().handle,
1308 c_str(params_str),
-> 1309 ctypes.byref(self.handle)))
1310 # save reference to data
1311 self.train_set = train_set
~/anaconda3/lib/python3.6/site-packages/lightgbm/basic.py in _safe_call(ret)
47 """
48 if ret != 0:
---> 49 raise LightGBMError(decode_string(_LIB.LGBM_GetLastError()))
50
51
LightGBMError: No OpenCL device found
Did you add a GPU to your EC2 instance? They don't come with GPUS by default. GPU's are pretty expensive. Add a GPU and see if that helps.
https://aws.amazon.com/ec2/elastic-gpus/
I am performing a GridSearch with H2O using the Python API using the following code,
from h2o.estimators.random_forest import H2ORandomForestEstimator
from h2o.grid import H2OGridSearch
hyper_parameters = {'ntrees':[10, 50, 100, 200], 'max_depth':[5, 10, 15, 20, 25], 'balance_classes':[True, False]}
search_criteria = {
"strategy": "RandomDiscrete",
"max_runtime_secs": 600,
"max_models": 30,
"stopping_metric": 'AUTO',
"stopping_tolerance": 0.0001,
'seed': 42
}
grid_search = H2OGridSearch(H2ORandomForestEstimator, hyper_parameters, search_criteria=search_criteria)
grid_search.train(x=events_names_x,
y="total_rsvps",
training_frame=train,
validation_frame=test)
Once run I want to print the models and predict in order of AUC,
grid_search.sort_by('auc', False)
I get the following error,
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-272-b250bf2b838e> in <module>()
----> 1 grid_search.sort_by('auc', False)
/Users/stereo/.pyenv/versions/3.5.2/lib/python3.5/site-packages/h2o/grid/grid_search.py in sort_by(self, metric, increasing)
663
664 if metric[-1] != ')': metric += '()'
--> 665 c_values = [list(x) for x in zip(*sorted(eval('self.' + metric + '.items()'), key=lambda k_v: k_v[1]))]
666 c_values.insert(1, [self.get_hyperparams(model_id, display=False) for model_id in c_values[0]])
667 if not increasing:
/Users/stereo/.pyenv/versions/3.5.2/lib/python3.5/site-packages/h2o/grid/grid_search.py in <module>()
/Users/stereo/.pyenv/versions/3.5.2/lib/python3.5/site-packages/h2o/grid/grid_search.py in auc(self, train, valid, xval)
606 :return: The AUC.
607 """
--> 608 return {model.model_id: model.auc(train, valid, xval) for model in self.models}
609
610 def aic(self, train=False, valid=False, xval=False):
/Users/stereo/.pyenv/versions/3.5.2/lib/python3.5/site-packages/h2o/grid/grid_search.py in <dictcomp>(.0)
606 :return: The AUC.
607 """
--> 608 return {model.model_id: model.auc(train, valid, xval) for model in self.models}
609
610 def aic(self, train=False, valid=False, xval=False):
/Users/stereo/.pyenv/versions/3.5.2/lib/python3.5/site-packages/h2o/model/model_base.py in auc(self, train, valid, xval)
669 tm = ModelBase._get_metrics(self, train, valid, xval)
670 m = {}
--> 671 for k, v in viewitems(tm): m[k] = None if v is None else v.auc()
672 return list(m.values())[0] if len(m) == 1 else m
673
/Users/stereo/.pyenv/versions/3.5.2/lib/python3.5/site-packages/h2o/model/metrics_base.py in auc(self)
158 :return: Retrieve the AUC for this set of metrics.
159 """
--> 160 return self._metric_json['AUC']
161
162 def aic(self):
KeyError: 'AUC'
Any advise on:
can print the models in order of performance
forecast with the model with the highest AUC
what you need is
sorted_grid = grid_search.get_grid(sort_by='auc',decreasing=True)
print(sorted_grid)
you can change decreasing to False if you would prefer
A snippet of code involving RandomForestClassifier using the python machine learning library scikit-learn.
I am trying to give weight to different classes using the class_weight opition in the scikit's RandomForestClassifier.Below is my code snippet and then the error that I am getting
print 'Training...'
forest = RandomForestClassifier(n_estimators=500,class_weight= {0:1,1:1,2:1,3:1,4:1,5:1,6:1,7:4})
forest = forest.fit( train_data[0::,1::], train_data[0::,0] )
print 'Predicting...'
output = forest.predict(test_data).astype(int)
predictions_file = open("myfirstforest.csv", "wb")
open_file_object = csv.writer(predictions_file)
open_file_object.writerow(["PassengerId","Survived"])
open_file_object.writerows(zip(ids, output))
predictions_file.close()
print 'Done.'
And I am getting the following error:
Training...
IndexError Traceback (most recent call last)
<ipython-input-20-122f2e5a0d3b> in <module>()
84 print 'Training...'
85 forest = RandomForestClassifier(n_estimators=500,class_weight={0:1,1:1,2:1,3:1,4:1,5:1,6:1,7:4})
---> 86 forest = forest.fit( train_data[0::,1::], train_data[0::,0] )
87
88 print 'Predicting...'
/home/rpota/anaconda/lib/python2.7/site-packages/sklearn/ensemble/forest.pyc in fit(self, X, y, sample_weight)
216 self.n_outputs_ = y.shape[1]
217
--> 218 y, expanded_class_weight = self._validate_y_class_weight(y)
219
220 if getattr(y, "dtype", None) != DOUBLE or not y.flags.contiguous:
/home/rpota/anaconda/lib/python2.7/site-packages/sklearn/ensemble/forest.pyc in _validate_y_class_weight(self, y)
433 class_weight = self.class_weight
434 expanded_class_weight = compute_sample_weight(class_weight,
--> 435 y_original)
436
437 return y, expanded_class_weight
/home/rpota/anaconda/lib/python2.7/site-packages/sklearn/utils/class_weight.pyc in compute_sample_weight(class_weight, y, indices)
150 weight_k = compute_class_weight(class_weight_k,
151 classes_full,
--> 152 y_full)
153
154 weight_k = weight_k[np.searchsorted(classes_full, y_full)]
/home/rpota/anaconda/lib/python2.7/site-packages/sklearn/utils/class_weight.pyc in compute_class_weight(class_weight, classes, y)
58 for c in class_weight:
59 i = np.searchsorted(classes, c)
---> 60 if classes[i] != c:
61 raise ValueError("Class label %d not present." % c)
62 else:
IndexError: index 2 is out of bounds for axis 0 with size 2
Please help!.