sklearn - AttributeError: 'CustomScaler' object has no attribute 'copy' - python

When i write my custom scaler to scale my data without scaling dummies i've already created, i get an error regarding custom scaler not having copy?
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.preprocessing import StandardScaler
class CustomScaler(BaseEstimator,TransformerMixin):
def __init__(self,columns,copy=True,with_mean=True,with_std=True):
self.scaler = StandardScaler(copy,with_mean,with_std)
self.columns = columns
self.mean_ = None
self.var_ = None
def fit(self, X, y=None):
self.scaler.fit(X[self.columns], y)
self.mean_ = np.mean(X[self.columns])
self.var_ = np.var(X[self.columns])
return self
def transform(self, X, y=None, copy=True):
init_col_order = X.columns
X_scaled = pd.DataFrame(self.scaler.transform(X[self.columns]),
columns=self.columns)
X_not_scaled = X.loc[:,~X.columns.isin(self.columns)]
return pd.concat([X_not_scaled, X_scaled], axis=1)[init_col_order]
-> These are my unscaled columns
unscaled_inputs.columns.values
array(['Reason_1', 'Reason_2', 'Reason_3', 'Reason_4', 'Month',
'Day of the week', 'Transportation Expense', 'Distance to Work',
'Age', 'Daily Work Load Average', 'Body Mass Index', 'Education',
'Children', 'Pets'], dtype=object)
-> dummy variables i don't want to scale
columns_to_omit = ['Reason_1', 'Reason_2', 'Reason_3', 'Reason_4','Education']
-> variables i want to scale
columns_to_scale = [x for x in unscaled_inputs.columns.values if x not in columns_to_omit]
-> Giving input data "columns_to_scale" to my "CustomScaler"
absenteeism_scaler = CustomScaler(columns_to_scale)
-> i get this warning
C:\Users\prati\Anaconda3\lib\site-packages\sklearn\utils\validation.py:70:
FutureWarning: Pass copy=True, with_mean=True, with_std=True as keyword args. From
version 1.0 (renaming of 0.25) passing these as positional arguments will result in an
error
warnings.warn(f"Pass {args_msg} as keyword args. From version "
-> When i fit my "absenteeism_scaler" to my "unscaled_inputs", i get an error, but the error doesn't stop my code from executing further
absenteeism_scaler.fit(unscaled_inputs)
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
~\Anaconda3\lib\site-packages\IPython\core\formatters.py in __call__(self, obj, include,
exclude)
968
969 if method is not None:
--> 970 return method(include=include, exclude=exclude)
971 return None
972 else:
~\Anaconda3\lib\site-packages\sklearn\base.py in _repr_mimebundle_(self, **kwargs)
462 def _repr_mimebundle_(self, **kwargs):
463 """Mime bundle used by jupyter kernels to display estimator"""
--> 464 output = {"text/plain": repr(self)}
465 if get_config()["display"] == 'diagram':
466 output["text/html"] = estimator_html_repr(self)
~\Anaconda3\lib\site-packages\sklearn\base.py in __repr__(self, N_CHAR_MAX)
258 n_max_elements_to_show=N_MAX_ELEMENTS_TO_SHOW)
259
--> 260 repr_ = pp.pformat(self)
261
262 # Use bruteforce ellipsis when there are a lot of non-blank characters
~\Anaconda3\lib\pprint.py in pformat(self, object)
151 def pformat(self, object):
152 sio = _StringIO()
--> 153 self._format(object, sio, 0, 0, {}, 0)
154 return sio.getvalue()
155
~\Anaconda3\lib\pprint.py in _format(self, object, stream, indent, allowance, context,
level)
168 self._readable = False
169 return
--> 170 rep = self._repr(object, context, level)
171 max_width = self._width - indent - allowance
172 if len(rep) > max_width:
~\Anaconda3\lib\pprint.py in _repr(self, object, context, level)
402
403 def _repr(self, object, context, level):
--> 404 repr, readable, recursive = self.format(object, context.copy(),
405 self._depth, level)
406 if not readable:
~\Anaconda3\lib\site-packages\sklearn\utils\_pprint.py in format(self, object, context,
maxlevels, level)
178
179 def format(self, object, context, maxlevels, level):
--> 180 return _safe_repr(object, context, maxlevels, level,
181 changed_only=self._changed_only)
182
~\Anaconda3\lib\site-packages\sklearn\utils\_pprint.py in _safe_repr(object, context,
maxlevels, level, changed_only)
423 recursive = False
424 if changed_only:
--> 425 params = _changed_params(object)
426 else:
427 params = object.get_params(deep=False)
~\Anaconda3\lib\site-packages\sklearn\utils\_pprint.py in _changed_params(estimator)
89 estimator with non-default values."""
90
---> 91 params = estimator.get_params(deep=False)
92 init_func = getattr(estimator.__init__, 'deprecated_original',
93 estimator.__init__)
~\Anaconda3\lib\site-packages\sklearn\base.py in get_params(self, deep)
193 out = dict()
194 for key in self._get_param_names():
--> 195 value = getattr(self, key)
196 if deep and hasattr(value, 'get_params'):
197 deep_items = value.get_params().items()
AttributeError: 'CustomScaler' object has no attribute 'copy'
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
~\Anaconda3\lib\site-packages\IPython\core\formatters.py in __call__(self, obj)
700 type_pprinters=self.type_printers,
701 deferred_pprinters=self.deferred_printers)
--> 702 printer.pretty(obj)
703 printer.flush()
704 return stream.getvalue()
~\Anaconda3\lib\site-packages\IPython\lib\pretty.py in pretty(self, obj)
392 if cls is not object \
393 and callable(cls.__dict__.get('__repr__')):
--> 394 return _repr_pprint(obj, self, cycle)
395
396 return _default_pprint(obj, self, cycle)
~\Anaconda3\lib\site-packages\IPython\lib\pretty.py in _repr_pprint(obj, p, cycle)
698 """A pprint that just redirects to the normal repr function."""
699 # Find newlines and replace them with p.break_()
--> 700 output = repr(obj)
701 lines = output.splitlines()
702 with p.group():
~\Anaconda3\lib\site-packages\sklearn\base.py in __repr__(self, N_CHAR_MAX)
258 n_max_elements_to_show=N_MAX_ELEMENTS_TO_SHOW)
259
--> 260 repr_ = pp.pformat(self)
261
262 # Use bruteforce ellipsis when there are a lot of non-blank characters
~\Anaconda3\lib\pprint.py in pformat(self, object)
151 def pformat(self, object):
152 sio = _StringIO()
--> 153 self._format(object, sio, 0, 0, {}, 0)
154 return sio.getvalue()
155
~\Anaconda3\lib\pprint.py in _format(self, object, stream, indent, allowance, context,
level)
168 self._readable = False
169 return
--> 170 rep = self._repr(object, context, level)
171 max_width = self._width - indent - allowance
172 if len(rep) > max_width:
~\Anaconda3\lib\pprint.py in _repr(self, object, context, level)
402
403 def _repr(self, object, context, level):
--> 404 repr, readable, recursive = self.format(object, context.copy(),
405 self._depth, level)
406 if not readable:
~\Anaconda3\lib\site-packages\sklearn\utils\_pprint.py in format(self, object, context,
maxlevels, level)
178
179 def format(self, object, context, maxlevels, level):
--> 180 return _safe_repr(object, context, maxlevels, level,
181 changed_only=self._changed_only)
182
~\Anaconda3\lib\site-packages\sklearn\utils\_pprint.py in _safe_repr(object, context,
maxlevels, level, changed_only)
423 recursive = False
424 if changed_only:
--> 425 params = _changed_params(object)
426 else:
427 params = object.get_params(deep=False)
~\Anaconda3\lib\site-packages\sklearn\utils\_pprint.py in _changed_params(estimator)
89 estimator with non-default values."""
90
---> 91 params = estimator.get_params(deep=False)
92 init_func = getattr(estimator.__init__, 'deprecated_original',
93 estimator.__init__)
~\Anaconda3\lib\site-packages\sklearn\base.py in get_params(self, deep)
193 out = dict()
194 for key in self._get_param_names():
--> 195 value = getattr(self, key)
196 if deep and hasattr(value, 'get_params'):
197 deep_items = value.get_params().items()
AttributeError: 'CustomScaler' object has no attribute 'copy'
-> it still gives me output, but i don't understand, that if i get an error the code should stops executing?, but it runs afterwards,
P.S - this is just the left half of the data, as i can't fit all the data here, sry for that....
scaled_inputs = absenteeism_scaler.transform(unscaled_inputs)
scaled_inputs
Reason_1 Reason_2 Reason_3 Reason_4 Month Day of the week Transportation
Expense
0 0 0 0 1 0.182726 -0.683704 1.005844
1 0 0 0 0 0.182726 -0.683704 -1.574681
2 0 0 0 1 0.182726 -0.007725 -0.654143
3 1 0 0 0 0.182726 0.668253 0.854936
4 0 0 0 1 0.182726 0.668253 1.005844
Update 1: as i removed
copy=True,with_mean=True,with_std=True from init, my error got resolved, but i think the scaled data will be an "inplace" change & the mean and S.D thing, and so if i don't want to make inplace changes to my data what can i do???
Update 2: is it because copy, with_mean, and with_std are "True" by default?? just check the StandardScaler library on sklearn
def _init__(self,columns):
self.scaler = StandardScaler()
self.columns = columns
self.mean = None
self.var_ = None

Positional/keyword warning/error
The first warning (an error for sklearn versions >1.0) is because you're setting keyword arguments of the StandardScaler as positional arguments here:
def __init__(...):
self.scaler = StandardScaler(copy,with_mean,with_std)
...
You should change it to use the keywords,
def __init__(...):
self.scaler = StandardScaler(copy=copy, with_mean=with_mean, with_std=with_std)
...
No attribute copy error
This is only being thrown when trying to display the html representation of the transformer, after fitting has already succeeded, which is why you're able to continue and transform successfully.
But the issue is more serious than that if you want to make use of the transformer in pipelines, grid searches, etc. In order to clone properly, you need to follow the specific guidance of the sklearn API, or else provide your own get_params and set_params. The __init__ method should set an attribute for every parameter, and only those attributes. So here it should be
def __init__(self, columns, copy=True, with_mean=True, with_std=True):
self.columns = columns
self.copy = copy
self.with_mean = with_mean
self.with_std = with_std
And then make the contained StandardScaler at fit time:
def fit(self, X, y=None):
self.scaler = StandardScaler(copy=self.copy, with_mean=self.with_mean, with_std=self.with_std)
self.scaler.fit(X[self.columns], y)
self.mean_ = np.mean(X[self.columns])
self.var_ = np.var(X[self.columns])
return self

Related

Custom scaler transformer fit error only when used in ColumnTransformer pipeline

I have a df with some columns target time series which I need to scale as 1D array and as two thus defining custom wrapper to extend scikit-learn scalers with optional preprocessing as flatten. transform and inverse_transform are not posted for simplicity
import copy
class ScalerOptFlatten( TransformerMixin,
BaseEstimator ):
def __init__( self, instance, flatten , **kwargs):
#super().__init__(**kwargs)
self.scaler = instance
self.flatten = flatten
def get_params(self, deep=True):
cp = copy.copy(self.scaler)
cp.__class__ = type(self.scaler)
params = type(self.scaler).get_params(cp, deep)
return params
def fit( self, X, y = None ):
if self.flatten:
scale_in = X.reshape(-1).reshape(-1, 1)
else:
scale_in = X
if y==None:
self.scaler.fit(scale_in)
else:
self.scaler.fit(scale_in,y)
return self
It works when used on its own
scaler_transformer = ScalerOptFlatten( instance = StandardScaler(),
flatten = True )
scaler_transformer.fit(df_unstack)
But fails when stacked in
preprocessor = ColumnTransformer(
transformers=[
("ts", scaler_transformer, target_feature_names)
]
)
preprocessor.fit(df_unstack)
With
---------------------------------------------------------------------------
Empty Traceback (most recent call last)
~/anaconda3/envs/tf_2_2_0/lib/python3.8/site-packages/joblib/parallel.py in dispatch_one_batch(self, iterator)
826 try:
--> 827 tasks = self._ready_batches.get(block=False)
828 except queue.Empty:
~/anaconda3/envs/tf_2_2_0/lib/python3.8/queue.py in get(self, block, timeout)
166 if not self._qsize():
--> 167 raise Empty
168 elif timeout is None:
Empty:
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
<ipython-input-494-0eb4b591d8a6> in <module>
----> 1 ts_scaler_step.fit(df_unstack)
~/anaconda3/envs/tf_2_2_0/lib/python3.8/site-packages/sklearn/compose/_column_transformer.py in fit(self, X, y)
492 # we use fit_transform to make sure to set sparse_output_ (for which we
493 # need the transformed data) to have consistent output type in predict
--> 494 self.fit_transform(X, y=y)
495 return self
496
~/anaconda3/envs/tf_2_2_0/lib/python3.8/site-packages/sklearn/compose/_column_transformer.py in fit_transform(self, X, y)
529 self._validate_remainder(X)
530
--> 531 result = self._fit_transform(X, y, _fit_transform_one)
532
533 if not result:
~/anaconda3/envs/tf_2_2_0/lib/python3.8/site-packages/sklearn/compose/_column_transformer.py in _fit_transform(self, X, y, func, fitted)
456 self._iter(fitted=fitted, replace_strings=True))
457 try:
--> 458 return Parallel(n_jobs=self.n_jobs)(
459 delayed(func)(
460 transformer=clone(trans) if not fitted else trans,
~/anaconda3/envs/tf_2_2_0/lib/python3.8/site-packages/joblib/parallel.py in __call__(self, iterable)
1046 # remaining jobs.
1047 self._iterating = False
-> 1048 if self.dispatch_one_batch(iterator):
1049 self._iterating = self._original_iterator is not None
1050
~/anaconda3/envs/tf_2_2_0/lib/python3.8/site-packages/joblib/parallel.py in dispatch_one_batch(self, iterator)
836 big_batch_size = batch_size * n_jobs
837
--> 838 islice = list(itertools.islice(iterator, big_batch_size))
839 if len(islice) == 0:
840 return False
~/anaconda3/envs/tf_2_2_0/lib/python3.8/site-packages/sklearn/compose/_column_transformer.py in <genexpr>(.0)
458 return Parallel(n_jobs=self.n_jobs)(
459 delayed(func)(
--> 460 transformer=clone(trans) if not fitted else trans,
461 X=_safe_indexing(X, column, axis=1),
462 y=y,
~/anaconda3/envs/tf_2_2_0/lib/python3.8/site-packages/sklearn/utils/validation.py in inner_f(*args, **kwargs)
70 FutureWarning)
71 kwargs.update({k: arg for k, arg in zip(sig.parameters, args)})
---> 72 return f(**kwargs)
73 return inner_f
74
~/anaconda3/envs/tf_2_2_0/lib/python3.8/site-packages/sklearn/base.py in clone(estimator, safe)
86 for name, param in new_object_params.items():
87 new_object_params[name] = clone(param, safe=False)
---> 88 new_object = klass(**new_object_params)
89 params_set = new_object.get_params(deep=False)
90
TypeError: __init__() missing 2 required positional arguments: 'instance' and 'flatten'
I suspect that error is in get_param method

'KMeans' object has no attribute 'k'

I am using the Yellowbrick package to plot an elbow curve for a dataset to find the most optimal number of clusters for the dataset using KMeans as a model.
I am using Scikit-learn's KMeans and Yellowbrick's kelbow_visualizer functions.
The elbow curve is generated and I am able to read the elbow value however the following error is thrown afterwards:
AttributeError: 'KMeans' object has no attribute 'k'
The way I am generating the curve is as follows:
from sklearn.cluster import KMeans
from yellowbrick.cluster.elbow import kelbow_visualizer
def elbow_method(X, max_range_for_elbow, rseed = RSEED):
return kelbow_visualizer(KMeans(random_state=rseed), X, k=(1, max_range_for_elbow))
elbow_method(data_standardized,10)
Where data_standardized is my dataset and RSEED is a constant with value 2.
I'm not sure what the issue is since the elbow curve is generated before the error is thrown.
This is the full error message:
AttributeError: 'KMeans' object has no attribute 'k'
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
~\anaconda3\lib\site-packages\IPython\core\formatters.py in __call__(self, obj, include, exclude)
968
969 if method is not None:
--> 970 return method(include=include, exclude=exclude)
971 return None
972 else:
~\anaconda3\lib\site-packages\sklearn\base.py in _repr_mimebundle_(self, **kwargs)
462 def _repr_mimebundle_(self, **kwargs):
463 """Mime bundle used by jupyter kernels to display estimator"""
--> 464 output = {"text/plain": repr(self)}
465 if get_config()["display"] == 'diagram':
466 output["text/html"] = estimator_html_repr(self)
~\anaconda3\lib\site-packages\sklearn\base.py in __repr__(self, N_CHAR_MAX)
258 n_max_elements_to_show=N_MAX_ELEMENTS_TO_SHOW)
259
--> 260 repr_ = pp.pformat(self)
261
262 # Use bruteforce ellipsis when there are a lot of non-blank characters
~\anaconda3\lib\pprint.py in pformat(self, object)
151 def pformat(self, object):
152 sio = _StringIO()
--> 153 self._format(object, sio, 0, 0, {}, 0)
154 return sio.getvalue()
155
~\anaconda3\lib\pprint.py in _format(self, object, stream, indent, allowance, context, level)
168 self._readable = False
169 return
--> 170 rep = self._repr(object, context, level)
171 max_width = self._width - indent - allowance
172 if len(rep) > max_width:
~\anaconda3\lib\pprint.py in _repr(self, object, context, level)
402
403 def _repr(self, object, context, level):
--> 404 repr, readable, recursive = self.format(object, context.copy(),
405 self._depth, level)
406 if not readable:
~\anaconda3\lib\site-packages\sklearn\utils\_pprint.py in format(self, object, context, maxlevels, level)
178
179 def format(self, object, context, maxlevels, level):
--> 180 return _safe_repr(object, context, maxlevels, level,
181 changed_only=self._changed_only)
182
~\anaconda3\lib\site-packages\sklearn\utils\_pprint.py in _safe_repr(object, context, maxlevels, level, changed_only)
423 recursive = False
424 if changed_only:
--> 425 params = _changed_params(object)
426 else:
427 params = object.get_params(deep=False)
~\anaconda3\lib\site-packages\sklearn\utils\_pprint.py in _changed_params(estimator)
89 estimator with non-default values."""
90
---> 91 params = estimator.get_params(deep=False)
92 init_func = getattr(estimator.__init__, 'deprecated_original',
93 estimator.__init__)
~\anaconda3\lib\site-packages\yellowbrick\base.py in get_params(self, deep)
340 the estimator params.
341 """
--> 342 params = super(ModelVisualizer, self).get_params(deep=deep)
343 for param in list(params.keys()):
344 if param.startswith("estimator__"):
~\anaconda3\lib\site-packages\sklearn\base.py in get_params(self, deep)
193 out = dict()
194 for key in self._get_param_names():
--> 195 value = getattr(self, key)
196 if deep and hasattr(value, 'get_params'):
197 deep_items = value.get_params().items()
~\anaconda3\lib\site-packages\yellowbrick\utils\wrapper.py in __getattr__(self, attr)
40 def __getattr__(self, attr):
41 # proxy to the wrapped object
---> 42 return getattr(self._wrapped, attr)
I solved this issue by storing the result of the elbow_method function into a variable, as so:
elbow = elbow_method(data_standardized,10)
just remove the return and put pass at last line of the elbow_method() method.

How can I resolve all these errors from folium?

I am trying to create a map following this tutorial. I am using almost the exact same code as in the tutorial, but get a series I am unsure how to begin resolving, anyone have any thoughts on how I could start resolving?
Here is my code below. I think my table merge worked okay because it appears correctly when I print the table, but I think things start going downhill with the choropleth function.
'''
import geopandas
import pandas as pd
import folium
url = 'https://en.wikipedia.org/wiki/List_of_countries_by_meat_consumption'
tables = pd.read_html(url)
table = tables[0]
world = geopandas.read_file(geopandas.datasets.get_path('naturalearth_lowres'))
table = world.merge(table, how='left', left_on=['name'], right_on=['Country'])
pd.options.display.width = 4000
pd.set_option('display.max_rows', 188)
table = table.dropna(subset=['kg/person (2002)[9][note 1]'])
meat_map = folium.Map()
folium.Choropleth(
geo_data=table,
name='choropleth',
data=table,
columns=['Country', 'kg/person (2002)[9][note 1]'],
key_on ='feature.properties.name',
fill_color ='YlGnBu',
fill_opacity = 0.7,
line_opacity = 0.2,
legend_name = "Meat consumption in kg/person"
).add_to(meat_map)
here is my error message:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-11-b9be599a18fa> in <module>
10 meat_map = folium.Map()
11
---> 12 folium.Choropleth(
13 geo_data=table,
14 name='choropleth',
~\anaconda3\envs\geo_env\lib\site-packages\folium\features.py in __init__(self, geo_data, data, columns, key_on, bins, fill_color, nan_fill_color, fill_opacity, nan_fill_opacity, line_color, line_weight, line_opacity, name, legend_name, overlay, control, show, topojson, smooth_factor, highlight, **kwargs)
1287 smooth_factor=smooth_factor)
1288 else:
-> 1289 self.geojson = GeoJson(
1290 geo_data,
1291 style_function=style_function,
~\anaconda3\envs\geo_env\lib\site-packages\folium\features.py in __init__(self, data, style_function, highlight_function, name, overlay, control, show, smooth_factor, tooltip, embed, popup, zoom_on_click, marker)
497 self.marker = marker
498
--> 499 self.data = self.process_data(data)
500
501 if self.style or self.highlight:
~\anaconda3\envs\geo_env\lib\site-packages\folium\features.py in process_data(self, data)
540 if hasattr(data, 'to_crs'):
541 data = data.to_crs('EPSG:4326')
--> 542 return json.loads(json.dumps(data.__geo_interface__))
543 else:
544 raise ValueError('Cannot render objects with any missing geometries'
~\anaconda3\envs\geo_env\lib\json\__init__.py in dumps(obj, skipkeys, ensure_ascii, check_circular, allow_nan, cls, indent, separators, default, sort_keys, **kw)
229 cls is None and indent is None and separators is None and
230 default is None and not sort_keys and not kw):
--> 231 return _default_encoder.encode(obj)
232 if cls is None:
233 cls = JSONEncoder
~\anaconda3\envs\geo_env\lib\json\encoder.py in encode(self, o)
197 # exceptions aren't as detailed. The list call should be roughly
198 # equivalent to the PySequence_Fast that ''.join() would do.
--> 199 chunks = self.iterencode(o, _one_shot=True)
200 if not isinstance(chunks, (list, tuple)):
201 chunks = list(chunks)
~\anaconda3\envs\geo_env\lib\json\encoder.py in iterencode(self, o, _one_shot)
255 self.key_separator, self.item_separator, self.sort_keys,
256 self.skipkeys, _one_shot)
--> 257 return _iterencode(o, 0)
258
259 def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
~\anaconda3\envs\geo_env\lib\json\encoder.py in default(self, o)
177
178 """
--> 179 raise TypeError(f'Object of type {o.__class__.__name__} '
180 f'is not JSON serializable')
181
TypeError: Object of type MultiPolygon is not JSON serializable
'''

KeyError: 'base_score' while fitting XGBClassifier

Using Gridsearch I find the most optimal hyperparameters after fitting my training data:
model_xgb = XGBClassifier()
n_estimators = [50, 100, 150, 200]
max_depth = [2, 4, 6, 8]
param_grid = dict(max_depth=max_depth, n_estimators=n_estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=7)
grid_search = GridSearchCV(model_xgb, param_grid, scoring="neg_log_loss", n_jobs=-1, cv=kfold, verbose=1)
grid_result = grid_search.fit(train_X, y_train)
The best answer is obtained by using {'max_depth': 4, 'n_estimators': 50}. That's why I create a new model with these hyperparameters:
model_xgb_tn = XGBClassifier(n_estimators=50,max_depth=4,objective='multi:softprob')
When I try to fit the model to my data: model_xgb_tn.fit(train_X,y_train), I receive a KeyError: 'base_score'. I just wasn't able to understand why I got a KeyError when I even didn't use the hyperparameter.
Below is the error code:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
~\Anaconda3\lib\site-packages\IPython\core\formatters.py in __call__(self, obj, include, exclude)
968
969 if method is not None:
--> 970 return method(include=include, exclude=exclude)
971 return None
972 else:
~\Anaconda3\lib\site-packages\sklearn\base.py in _repr_mimebundle_(self, **kwargs)
461 def _repr_mimebundle_(self, **kwargs):
462 """Mime bundle used by jupyter kernels to display estimator"""
--> 463 output = {"text/plain": repr(self)}
464 if get_config()["display"] == 'diagram':
465 output["text/html"] = estimator_html_repr(self)
~\Anaconda3\lib\site-packages\sklearn\base.py in __repr__(self, N_CHAR_MAX)
277 n_max_elements_to_show=N_MAX_ELEMENTS_TO_SHOW)
278
--> 279 repr_ = pp.pformat(self)
280
281 # Use bruteforce ellipsis when there are a lot of non-blank characters
~\Anaconda3\lib\pprint.py in pformat(self, object)
142 def pformat(self, object):
143 sio = _StringIO()
--> 144 self._format(object, sio, 0, 0, {}, 0)
145 return sio.getvalue()
146
~\Anaconda3\lib\pprint.py in _format(self, object, stream, indent, allowance, context, level)
159 self._readable = False
160 return
--> 161 rep = self._repr(object, context, level)
162 max_width = self._width - indent - allowance
163 if len(rep) > max_width:
~\Anaconda3\lib\pprint.py in _repr(self, object, context, level)
391 def _repr(self, object, context, level):
392 repr, readable, recursive = self.format(object, context.copy(),
--> 393 self._depth, level)
394 if not readable:
395 self._readable = False
~\Anaconda3\lib\site-packages\sklearn\utils\_pprint.py in format(self, object, context, maxlevels, level)
168 def format(self, object, context, maxlevels, level):
169 return _safe_repr(object, context, maxlevels, level,
--> 170 changed_only=self._changed_only)
171
172 def _pprint_estimator(self, object, stream, indent, allowance, context,
~\Anaconda3\lib\site-packages\sklearn\utils\_pprint.py in _safe_repr(object, context, maxlevels, level, changed_only)
412 recursive = False
413 if changed_only:
--> 414 params = _changed_params(object)
415 else:
416 params = object.get_params(deep=False)
~\Anaconda3\lib\site-packages\sklearn\utils\_pprint.py in _changed_params(estimator)
96 init_params = {name: param.default for name, param in init_params.items()}
97 for k, v in params.items():
---> 98 if (repr(v) != repr(init_params[k]) and
99 not (is_scalar_nan(init_params[k]) and is_scalar_nan(v))):
100 filtered_params[k] = v
KeyError: 'base_score'
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
~\Anaconda3\lib\site-packages\IPython\core\formatters.py in __call__(self, obj)
700 type_pprinters=self.type_printers,
701 deferred_pprinters=self.deferred_printers)
--> 702 printer.pretty(obj)
703 printer.flush()
704 return stream.getvalue()
~\Anaconda3\lib\site-packages\IPython\lib\pretty.py in pretty(self, obj)
400 if cls is not object \
401 and callable(cls.__dict__.get('__repr__')):
--> 402 return _repr_pprint(obj, self, cycle)
403
404 return _default_pprint(obj, self, cycle)
~\Anaconda3\lib\site-packages\IPython\lib\pretty.py in _repr_pprint(obj, p, cycle)
695 """A pprint that just redirects to the normal repr function."""
696 # Find newlines and replace them with p.break_()
--> 697 output = repr(obj)
698 for idx,output_line in enumerate(output.splitlines()):
699 if idx:
~\Anaconda3\lib\site-packages\sklearn\base.py in __repr__(self, N_CHAR_MAX)
277 n_max_elements_to_show=N_MAX_ELEMENTS_TO_SHOW)
278
--> 279 repr_ = pp.pformat(self)
280
281 # Use bruteforce ellipsis when there are a lot of non-blank characters
~\Anaconda3\lib\pprint.py in pformat(self, object)
142 def pformat(self, object):
143 sio = _StringIO()
--> 144 self._format(object, sio, 0, 0, {}, 0)
145 return sio.getvalue()
146
~\Anaconda3\lib\pprint.py in _format(self, object, stream, indent, allowance, context, level)
159 self._readable = False
160 return
--> 161 rep = self._repr(object, context, level)
162 max_width = self._width - indent - allowance
163 if len(rep) > max_width:
~\Anaconda3\lib\pprint.py in _repr(self, object, context, level)
391 def _repr(self, object, context, level):
392 repr, readable, recursive = self.format(object, context.copy(),
--> 393 self._depth, level)
394 if not readable:
395 self._readable = False
~\Anaconda3\lib\site-packages\sklearn\utils\_pprint.py in format(self, object, context, maxlevels, level)
168 def format(self, object, context, maxlevels, level):
169 return _safe_repr(object, context, maxlevels, level,
--> 170 changed_only=self._changed_only)
171
172 def _pprint_estimator(self, object, stream, indent, allowance, context,
~\Anaconda3\lib\site-packages\sklearn\utils\_pprint.py in _safe_repr(object, context, maxlevels, level, changed_only)
412 recursive = False
413 if changed_only:
--> 414 params = _changed_params(object)
415 else:
416 params = object.get_params(deep=False)
~\Anaconda3\lib\site-packages\sklearn\utils\_pprint.py in _changed_params(estimator)
96 init_params = {name: param.default for name, param in init_params.items()}
97 for k, v in params.items():
---> 98 if (repr(v) != repr(init_params[k]) and
99 not (is_scalar_nan(init_params[k]) and is_scalar_nan(v))):
100 filtered_params[k] = v
KeyError: 'base_score'
You need to give a base score parameter, for the first iteration of gradient boosting, you can think of it as an initial weight to start with. For regression, it's the average of your target column and for classification problems, it's 1/(number of classes). You can refer to documentation of xgboost for more information about this hyperparameter.

Bokeh Geoviews use Lat/Long or UTM?

I am trying to plot the Zillow dataset with Bokeh using Geoviews and Datashader but I am having the damnedest time getting it to work. I am able to plot the data on a Cartesian plane fine but when I attempt to overlay the data with a map I run into errors.
I have used code adapted from the census-hv example on the datashader github. I believe my problem is that it is looking for the coordinates to be in UTM not Lat/Long. Because the code works when I have my coordinates multiplied by a few thousand. The points are then put above the map in white space. If i attempt to plot the proper lat/long coordinates I get the following errors.
Can someone please point me in the direction of a map that uses Lat/Long
>>>props.head()
longitude latitude
0 -118.654084 34.144442
1 -118.625364 34.140430
2 -118.394633 33.989359
3 -118.437206 34.148863
4 -118.385816 34.194168
import pandas as pd
import holoviews as hv
import geoviews as gv
import datashader as ds
from bokeh.models import WMTSTileSource
from holoviews.operation.datashader import datashade, dynspread
hv.notebook_ex
tension('bokeh')
%%opts Overlay [width=900 height=525 xaxis=None yaxis=None]
geomap = gv.WMTS(WMTSTileSource(url=\
'https://server.arcgisonline.com/ArcGIS/rest/services/World_Imagery/MapServer/tile/{Z}/{Y}/{X}.jpg'))
points = hv.Points(gv.Dataset(props, kdims=['longitude', 'latitude']))
# color_key = {'w':'aqua', 'b':'lime', 'a':'red', 'h':'fuchsia', 'o':'yellow' }
race = datashade(points, x_sampling=50, y_sampling=50,
element_type=gv.Image)
geomap * race
RETURNS ERROR:
WARNING:root:dynamic_operation: Exception raised in callable
'dynamic_operation' of type 'function'.
Invoked as dynamic_operation(height=400, scale=1.0, width=400, x_range=None, y_range=None)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/IPython/core/formatters.py in __call__(self, obj)
305 pass
306 else:
--> 307 return printer(obj)
308 # Finally look for special method names
309 method = get_real_method(obj, self.print_method)
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/ipython/display_hooks.py in pprint_display(obj)
255 if not ip.display_formatter.formatters['text/plain'].pprint:
256 return None
--> 257 return display(obj, raw=True)
258
259
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/ipython/display_hooks.py in display(obj, raw, **kwargs)
241 elif isinstance(obj, (HoloMap, DynamicMap)):
242 with option_state(obj):
--> 243 html = map_display(obj)
244 else:
245 return repr(obj) if raw else IPython.display.display(obj, **kwargs)
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/ipython/display_hooks.py in wrapped(element)
127 try:
128 html = fn(element,
--> 129 max_frames=OutputMagic.options['max_frames'])
130
131 # Only want to add to the archive for one display hook...
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/ipython/display_hooks.py in map_display(vmap, max_frames)
196 return None
197
--> 198 return render(vmap)
199
200
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/ipython/display_hooks.py in render(obj, **kwargs)
57 if renderer.fig == 'pdf':
58 renderer = renderer.instance(fig='png')
---> 59 return renderer.html(obj, **kwargs)
60
61
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/plotting/renderer.py in html(self, obj, fmt, css, comm, **kwargs)
253 code to initialize a Comm, if the plot supplies one.
254 """
--> 255 plot, fmt = self._validate(obj, fmt)
256 figdata, _ = self(plot, fmt, **kwargs)
257 if css is None: css = self.css
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/plotting/renderer.py in _validate(self, obj, fmt)
189 if isinstance(obj, tuple(self.widgets.values())):
190 return obj, 'html'
--> 191 plot = self.get_plot(obj, renderer=self)
192
193 fig_formats = self.mode_formats['fig'][self.mode]
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/plotting/renderer.py in get_plot(self_or_cls, obj, renderer)
164 """
165 # Initialize DynamicMaps with first data item
--> 166 initialize_dynamic(obj)
167
168 if not isinstance(obj, Plot) and not displayable(obj):
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/plotting/util.py in initialize_dynamic(obj)
173 continue
174 if not len(dmap):
--> 175 dmap[dmap._initial_key()]
176
177
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/core/spaces.py in __getitem__(self, key)
942 # Not a cross product and nothing cached so compute element.
943 if cache is not None: return cache
--> 944 val = self._execute_callback(*tuple_key)
945 if data_slice:
946 val = self._dataslice(val, data_slice)
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/core/spaces.py in _execute_callback(self, *args)
791
792 with dynamicmap_memoization(self.callback, self.streams):
--> 793 retval = self.callback(*args, **kwargs)
794 return self._style(retval)
795
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/core/spaces.py in __call__(self, *args, **kwargs)
489 # Nothing to do for callbacks that accept no arguments
490 (inargs, inkwargs) = (args, kwargs)
--> 491 if not args and not kwargs: return self.callable()
492 inputs = [i for i in self.inputs if isinstance(i, DynamicMap)]
493 streams = []
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/core/overlay.py in dynamic_mul(*args, **kwargs)
27 from .spaces import Callable
28 def dynamic_mul(*args, **kwargs):
---> 29 element = other[args]
30 return self * element
31 callback = Callable(dynamic_mul, inputs=[self, other])
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/core/spaces.py in __getitem__(self, key)
942 # Not a cross product and nothing cached so compute element.
943 if cache is not None: return cache
--> 944 val = self._execute_callback(*tuple_key)
945 if data_slice:
946 val = self._dataslice(val, data_slice)
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/core/spaces.py in _execute_callback(self, *args)
791
792 with dynamicmap_memoization(self.callback, self.streams):
--> 793 retval = self.callback(*args, **kwargs)
794 return self._style(retval)
795
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/core/spaces.py in __call__(self, *args, **kwargs)
519
520 try:
--> 521 ret = self.callable(*args, **kwargs)
522 except:
523 posstr = ', '.join(['%r' % el for el in inargs]) if inargs else ''
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/util.py in dynamic_operation(*key, **kwargs)
101 self.p.kwargs.update(kwargs)
102 obj = map_obj[key] if isinstance(map_obj, HoloMap) else map_obj
--> 103 return self._process(obj, key)
104 else:
105 def dynamic_operation(*key, **kwargs):
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/util.py in _process(self, element, key)
87 kwargs = {k: v for k, v in self.p.kwargs.items()
88 if k in self.p.operation.params()}
---> 89 return self.p.operation.process_element(element, key, **kwargs)
90 else:
91 return self.p.operation(element, **self.p.kwargs)
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/core/operation.py in process_element(self, element, key, **params)
133 """
134 self.p = param.ParamOverrides(self, params)
--> 135 return self._process(element, key)
136
137
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/operation/datashader.py in _process(self, element, key)
357
358 def _process(self, element, key=None):
--> 359 agg = aggregate._process(self, element, key)
360 shaded = shade._process(self, agg, key)
361 return shaded
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/operation/datashader.py in _process(self, element, key)
226 agg = getattr(cvs, glyph)(data, x, y, self.p.aggregator)
227 if agg.ndim == 2:
--> 228 return self.p.element_type(agg, **params)
229 else:
230 return NdOverlay({c: self.p.element_type(agg.sel(**{column: c}),
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/geoviews/element/geo.py in __init__(self, data, **kwargs)
81 elif crs:
82 kwargs['crs'] = crs
---> 83 super(_Element, self).__init__(data, **kwargs)
84
85
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/element/raster.py in __init__(self, data, bounds, extents, xdensity, ydensity, **params)
242 if bounds is None:
243 xvals = self.dimension_values(0, False)
--> 244 l, r, xdensity, _ = util.bound_range(xvals, xdensity)
245 yvals = self.dimension_values(1, False)
246 b, t, ydensity, _ = util.bound_range(yvals, ydensity)
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/core/util.py in bound_range(vals, density)
1373 using significant digits reported by sys.float_info.dig.
1374 """
-> 1375 low, high = vals.min(), vals.max()
1376 invert = False
1377 if vals[0] > vals[1]:
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/numpy/core/_methods.py in _amin(a, axis, out, keepdims)
27
28 def _amin(a, axis=None, out=None, keepdims=False):
---> 29 return umr_minimum(a, axis, None, out, keepdims)
30
31 def _sum(a, axis=None, dtype=None, out=None, keepdims=False):
ValueError: zero-size array to reduction operation minimum which has no identity
Out[54]:
b':DynamicMap []'
I think the problem here is two-fold, first of all since the coordinates are latitudes and longitudes and you specify xsampling/ysampling values of 50 the datashaded image ends up with a tiny or zero shape, which causes this error. My suggestion would be to cast the coordinates to Google Mercator first. In future this PR will let you do so very simply by calling this:
import cartopy.crs as ccrs
projected = gv.operation.project(points, projection=ccrs.GOOGLE_MERCATOR)
...
To do this manually for now you can use the cartopy projection directly:
coords = ccrs.GOOGLE_MERCATOR.transform_points(ccrs.PlateCarree(), lons, lats)
projected = gv.Points(coords, crs=ccrs.GOOGLE_MERCATOR)
...

Categories