KeyError: 'base_score' while fitting XGBClassifier - python

Using Gridsearch I find the most optimal hyperparameters after fitting my training data:
model_xgb = XGBClassifier()
n_estimators = [50, 100, 150, 200]
max_depth = [2, 4, 6, 8]
param_grid = dict(max_depth=max_depth, n_estimators=n_estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=7)
grid_search = GridSearchCV(model_xgb, param_grid, scoring="neg_log_loss", n_jobs=-1, cv=kfold, verbose=1)
grid_result = grid_search.fit(train_X, y_train)
The best answer is obtained by using {'max_depth': 4, 'n_estimators': 50}. That's why I create a new model with these hyperparameters:
model_xgb_tn = XGBClassifier(n_estimators=50,max_depth=4,objective='multi:softprob')
When I try to fit the model to my data: model_xgb_tn.fit(train_X,y_train), I receive a KeyError: 'base_score'. I just wasn't able to understand why I got a KeyError when I even didn't use the hyperparameter.
Below is the error code:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
~\Anaconda3\lib\site-packages\IPython\core\formatters.py in __call__(self, obj, include, exclude)
968
969 if method is not None:
--> 970 return method(include=include, exclude=exclude)
971 return None
972 else:
~\Anaconda3\lib\site-packages\sklearn\base.py in _repr_mimebundle_(self, **kwargs)
461 def _repr_mimebundle_(self, **kwargs):
462 """Mime bundle used by jupyter kernels to display estimator"""
--> 463 output = {"text/plain": repr(self)}
464 if get_config()["display"] == 'diagram':
465 output["text/html"] = estimator_html_repr(self)
~\Anaconda3\lib\site-packages\sklearn\base.py in __repr__(self, N_CHAR_MAX)
277 n_max_elements_to_show=N_MAX_ELEMENTS_TO_SHOW)
278
--> 279 repr_ = pp.pformat(self)
280
281 # Use bruteforce ellipsis when there are a lot of non-blank characters
~\Anaconda3\lib\pprint.py in pformat(self, object)
142 def pformat(self, object):
143 sio = _StringIO()
--> 144 self._format(object, sio, 0, 0, {}, 0)
145 return sio.getvalue()
146
~\Anaconda3\lib\pprint.py in _format(self, object, stream, indent, allowance, context, level)
159 self._readable = False
160 return
--> 161 rep = self._repr(object, context, level)
162 max_width = self._width - indent - allowance
163 if len(rep) > max_width:
~\Anaconda3\lib\pprint.py in _repr(self, object, context, level)
391 def _repr(self, object, context, level):
392 repr, readable, recursive = self.format(object, context.copy(),
--> 393 self._depth, level)
394 if not readable:
395 self._readable = False
~\Anaconda3\lib\site-packages\sklearn\utils\_pprint.py in format(self, object, context, maxlevels, level)
168 def format(self, object, context, maxlevels, level):
169 return _safe_repr(object, context, maxlevels, level,
--> 170 changed_only=self._changed_only)
171
172 def _pprint_estimator(self, object, stream, indent, allowance, context,
~\Anaconda3\lib\site-packages\sklearn\utils\_pprint.py in _safe_repr(object, context, maxlevels, level, changed_only)
412 recursive = False
413 if changed_only:
--> 414 params = _changed_params(object)
415 else:
416 params = object.get_params(deep=False)
~\Anaconda3\lib\site-packages\sklearn\utils\_pprint.py in _changed_params(estimator)
96 init_params = {name: param.default for name, param in init_params.items()}
97 for k, v in params.items():
---> 98 if (repr(v) != repr(init_params[k]) and
99 not (is_scalar_nan(init_params[k]) and is_scalar_nan(v))):
100 filtered_params[k] = v
KeyError: 'base_score'
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
~\Anaconda3\lib\site-packages\IPython\core\formatters.py in __call__(self, obj)
700 type_pprinters=self.type_printers,
701 deferred_pprinters=self.deferred_printers)
--> 702 printer.pretty(obj)
703 printer.flush()
704 return stream.getvalue()
~\Anaconda3\lib\site-packages\IPython\lib\pretty.py in pretty(self, obj)
400 if cls is not object \
401 and callable(cls.__dict__.get('__repr__')):
--> 402 return _repr_pprint(obj, self, cycle)
403
404 return _default_pprint(obj, self, cycle)
~\Anaconda3\lib\site-packages\IPython\lib\pretty.py in _repr_pprint(obj, p, cycle)
695 """A pprint that just redirects to the normal repr function."""
696 # Find newlines and replace them with p.break_()
--> 697 output = repr(obj)
698 for idx,output_line in enumerate(output.splitlines()):
699 if idx:
~\Anaconda3\lib\site-packages\sklearn\base.py in __repr__(self, N_CHAR_MAX)
277 n_max_elements_to_show=N_MAX_ELEMENTS_TO_SHOW)
278
--> 279 repr_ = pp.pformat(self)
280
281 # Use bruteforce ellipsis when there are a lot of non-blank characters
~\Anaconda3\lib\pprint.py in pformat(self, object)
142 def pformat(self, object):
143 sio = _StringIO()
--> 144 self._format(object, sio, 0, 0, {}, 0)
145 return sio.getvalue()
146
~\Anaconda3\lib\pprint.py in _format(self, object, stream, indent, allowance, context, level)
159 self._readable = False
160 return
--> 161 rep = self._repr(object, context, level)
162 max_width = self._width - indent - allowance
163 if len(rep) > max_width:
~\Anaconda3\lib\pprint.py in _repr(self, object, context, level)
391 def _repr(self, object, context, level):
392 repr, readable, recursive = self.format(object, context.copy(),
--> 393 self._depth, level)
394 if not readable:
395 self._readable = False
~\Anaconda3\lib\site-packages\sklearn\utils\_pprint.py in format(self, object, context, maxlevels, level)
168 def format(self, object, context, maxlevels, level):
169 return _safe_repr(object, context, maxlevels, level,
--> 170 changed_only=self._changed_only)
171
172 def _pprint_estimator(self, object, stream, indent, allowance, context,
~\Anaconda3\lib\site-packages\sklearn\utils\_pprint.py in _safe_repr(object, context, maxlevels, level, changed_only)
412 recursive = False
413 if changed_only:
--> 414 params = _changed_params(object)
415 else:
416 params = object.get_params(deep=False)
~\Anaconda3\lib\site-packages\sklearn\utils\_pprint.py in _changed_params(estimator)
96 init_params = {name: param.default for name, param in init_params.items()}
97 for k, v in params.items():
---> 98 if (repr(v) != repr(init_params[k]) and
99 not (is_scalar_nan(init_params[k]) and is_scalar_nan(v))):
100 filtered_params[k] = v
KeyError: 'base_score'

You need to give a base score parameter, for the first iteration of gradient boosting, you can think of it as an initial weight to start with. For regression, it's the average of your target column and for classification problems, it's 1/(number of classes). You can refer to documentation of xgboost for more information about this hyperparameter.

Related

FSSpec Error Handling in Python - Timeout Error

I am trying to get Terraclimate Data from Microsoft Planetary and facing time out error. Is there a possiblity of increasing the timeout time ? Please find the code below and the error I am facing. I am using fsspec and xarray for downloading spatial data from MS Planetary portal.
import fsspec
import xarray as xr
store = fsspec.get_mapper(asset.href)
data = xr.open_zarr(store, **asset.extra_fields["xarray:open_kwargs"])
clipped_data = data.sel(time=slice('2015-01-01','2019-12-31'),lon=slice(min_lon,max_lon),lat=slice(max_lat,min_lat))
parsed_data = clipped_data[['tmax', 'tmin', 'ppt', 'soil']]
lat_list = parsed_data['lat'].values.tolist()
lon_list = parsed_data['lon'].values.tolist()
filename = "Soil_Moisture_sample.csv"
for(i,j) in zip(lat_list,lon_list):
parsed_data[["soil","tmax","tmin","ppt"]].sel(lon=i, lat=j, method="nearest").to_dataframe().to_csv(filename,mode='a',index=False, header=False)
I am getting the following error
TimeoutError Traceback (most recent call last)
File ~\Anaconda3\envs\satellite\lib\site-packages\fsspec\asyn.py:53, in _runner(event, coro, result, timeout)
52 try:
---> 53 result[0] = await coro
54 except Exception as ex:
File ~\Anaconda3\envs\satellite\lib\site-packages\fsspec\asyn.py:423, in AsyncFileSystem._cat(self, path, recursive, on_error, batch_size, **kwargs)
422 if ex:
--> 423 raise ex
424 if (
425 len(paths) > 1
426 or isinstance(path, list)
427 or paths[0] != self._strip_protocol(path)
428 ):
File ~\Anaconda3\envs\satellite\lib\asyncio\tasks.py:455, in wait_for(fut, timeout, loop)
454 if timeout is None:
--> 455 return await fut
457 if timeout <= 0:
File ~\Anaconda3\envs\satellite\lib\site-packages\fsspec\implementations\http.py:221, in HTTPFileSystem._cat_file(self, url, start, end, **kwargs)
220 async with session.get(url, **kw) as r:
--> 221 out = await r.read()
222 self._raise_not_found_for_status(r, url)
File ~\Anaconda3\envs\satellite\lib\site-packages\aiohttp\client_reqrep.py:1036, in ClientResponse.read(self)
1035 try:
-> 1036 self._body = await self.content.read()
1037 for trace in self._traces:
File ~\Anaconda3\envs\satellite\lib\site-packages\aiohttp\streams.py:375, in StreamReader.read(self, n)
374 while True:
--> 375 block = await self.readany()
376 if not block:
File ~\Anaconda3\envs\satellite\lib\site-packages\aiohttp\streams.py:397, in StreamReader.readany(self)
396 while not self._buffer and not self._eof:
--> 397 await self._wait("readany")
399 return self._read_nowait(-1)
File ~\Anaconda3\envs\satellite\lib\site-packages\aiohttp\streams.py:304, in StreamReader._wait(self, func_name)
303 with self._timer:
--> 304 await waiter
305 else:
File ~\Anaconda3\envs\satellite\lib\site-packages\aiohttp\helpers.py:721, in TimerContext.__exit__(self, exc_type, exc_val, exc_tb)
720 if exc_type is asyncio.CancelledError and self._cancelled:
--> 721 raise asyncio.TimeoutError from None
722 return None
TimeoutError:
The above exception was the direct cause of the following exception:
FSTimeoutError Traceback (most recent call last)
Input In [62], in <cell line: 3>()
1 # Flood Region Point - Thiruvanthpuram
2 filename = "Soil_Moisture_sample.csv"
----> 3 parsed_data[["soil","tmax","tmin","ppt"]].sel(lon=8.520833, lat=76.4375, method="nearest").to_dataframe().to_csv(filename,mode='a',index=False, header=False)
File ~\Anaconda3\envs\satellite\lib\site-packages\xarray\core\dataset.py:5898, in Dataset.to_dataframe(self, dim_order)
5870 """Convert this dataset into a pandas.DataFrame.
5871
5872 Non-index variables in this dataset form the columns of the
(...)
5893
5894 """
5896 ordered_dims = self._normalize_dim_order(dim_order=dim_order)
-> 5898 return self._to_dataframe(ordered_dims=ordered_dims)
File ~\Anaconda3\envs\satellite\lib\site-packages\xarray\core\dataset.py:5862, in Dataset._to_dataframe(self, ordered_dims)
5860 def _to_dataframe(self, ordered_dims: Mapping[Any, int]):
5861 columns = [k for k in self.variables if k not in self.dims]
-> 5862 data = [
5863 self._variables[k].set_dims(ordered_dims).values.reshape(-1)
5864 for k in columns
5865 ]
5866 index = self.coords.to_index([*ordered_dims])
5867 return pd.DataFrame(dict(zip(columns, data)), index=index)
File ~\Anaconda3\envs\satellite\lib\site-packages\xarray\core\dataset.py:5863, in <listcomp>(.0)
5860 def _to_dataframe(self, ordered_dims: Mapping[Any, int]):
5861 columns = [k for k in self.variables if k not in self.dims]
5862 data = [
-> 5863 self._variables[k].set_dims(ordered_dims).values.reshape(-1)
5864 for k in columns
5865 ]
5866 index = self.coords.to_index([*ordered_dims])
5867 return pd.DataFrame(dict(zip(columns, data)), index=index)
File ~\Anaconda3\envs\satellite\lib\site-packages\xarray\core\variable.py:527, in Variable.values(self)
524 #property
525 def values(self):
526 """The variable's data as a numpy.ndarray"""
--> 527 return _as_array_or_item(self._data)
File ~\Anaconda3\envs\satellite\lib\site-packages\xarray\core\variable.py:267, in _as_array_or_item(data)
253 def _as_array_or_item(data):
254 """Return the given values as a numpy array, or as an individual item if
255 it's a 0d datetime64 or timedelta64 array.
256
(...)
265 TODO: remove this (replace with np.asarray) once these issues are fixed
266 """
--> 267 data = np.asarray(data)
268 if data.ndim == 0:
269 if data.dtype.kind == "M":
File ~\AppData\Roaming\Python\Python38\site-packages\dask\array\core.py:1696, in Array.__array__(self, dtype, **kwargs)
1695 def __array__(self, dtype=None, **kwargs):
-> 1696 x = self.compute()
1697 if dtype and x.dtype != dtype:
1698 x = x.astype(dtype)
File ~\AppData\Roaming\Python\Python38\site-packages\dask\base.py:315, in DaskMethodsMixin.compute(self, **kwargs)
291 def compute(self, **kwargs):
292 """Compute this dask collection
293
294 This turns a lazy Dask collection into its in-memory equivalent.
(...)
313 dask.base.compute
314 """
--> 315 (result,) = compute(self, traverse=False, **kwargs)
316 return result
File ~\AppData\Roaming\Python\Python38\site-packages\dask\base.py:600, in compute(traverse, optimize_graph, scheduler, get, *args, **kwargs)
597 keys.append(x.__dask_keys__())
598 postcomputes.append(x.__dask_postcompute__())
--> 600 results = schedule(dsk, keys, **kwargs)
601 return repack([f(r, *a) for r, (f, a) in zip(results, postcomputes)])
File ~\AppData\Roaming\Python\Python38\site-packages\dask\threaded.py:89, in get(dsk, keys, cache, num_workers, pool, **kwargs)
86 elif isinstance(pool, multiprocessing.pool.Pool):
87 pool = MultiprocessingPoolExecutor(pool)
---> 89 results = get_async(
90 pool.submit,
91 pool._max_workers,
92 dsk,
93 keys,
94 cache=cache,
95 get_id=_thread_get_id,
96 pack_exception=pack_exception,
97 **kwargs,
98 )
100 # Cleanup pools associated to dead threads
101 with pools_lock:
File ~\AppData\Roaming\Python\Python38\site-packages\dask\local.py:511, in get_async(submit, num_workers, dsk, result, cache, get_id, rerun_exceptions_locally, pack_exception, raise_exception, callbacks, dumps, loads, chunksize, **kwargs)
509 _execute_task(task, data) # Re-execute locally
510 else:
--> 511 raise_exception(exc, tb)
512 res, worker_id = loads(res_info)
513 state["cache"][key] = res
File ~\AppData\Roaming\Python\Python38\site-packages\dask\local.py:319, in reraise(exc, tb)
317 if exc.__traceback__ is not tb:
318 raise exc.with_traceback(tb)
--> 319 raise exc
File ~\AppData\Roaming\Python\Python38\site-packages\dask\local.py:224, in execute_task(key, task_info, dumps, loads, get_id, pack_exception)
222 try:
223 task, data = loads(task_info)
--> 224 result = _execute_task(task, data)
225 id = get_id()
226 result = dumps((result, id))
File ~\AppData\Roaming\Python\Python38\site-packages\dask\core.py:119, in _execute_task(arg, cache, dsk)
115 func, args = arg[0], arg[1:]
116 # Note: Don't assign the subtask results to a variable. numpy detects
117 # temporaries by their reference count and can execute certain
118 # operations in-place.
--> 119 return func(*(_execute_task(a, cache) for a in args))
120 elif not ishashable(arg):
121 return arg
File ~\AppData\Roaming\Python\Python38\site-packages\dask\array\core.py:128, in getter(a, b, asarray, lock)
123 # Below we special-case `np.matrix` to force a conversion to
124 # `np.ndarray` and preserve original Dask behavior for `getter`,
125 # as for all purposes `np.matrix` is array-like and thus
126 # `is_arraylike` evaluates to `True` in that case.
127 if asarray and (not is_arraylike(c) or isinstance(c, np.matrix)):
--> 128 c = np.asarray(c)
129 finally:
130 if lock:
File ~\Anaconda3\envs\satellite\lib\site-packages\xarray\core\indexing.py:459, in ImplicitToExplicitIndexingAdapter.__array__(self, dtype)
458 def __array__(self, dtype=None):
--> 459 return np.asarray(self.array, dtype=dtype)
File ~\Anaconda3\envs\satellite\lib\site-packages\xarray\core\indexing.py:623, in CopyOnWriteArray.__array__(self, dtype)
622 def __array__(self, dtype=None):
--> 623 return np.asarray(self.array, dtype=dtype)
File ~\Anaconda3\envs\satellite\lib\site-packages\xarray\core\indexing.py:524, in LazilyIndexedArray.__array__(self, dtype)
522 def __array__(self, dtype=None):
523 array = as_indexable(self.array)
--> 524 return np.asarray(array[self.key], dtype=None)
File ~\Anaconda3\envs\satellite\lib\site-packages\xarray\backends\zarr.py:76, in ZarrArrayWrapper.__getitem__(self, key)
74 array = self.get_array()
75 if isinstance(key, indexing.BasicIndexer):
---> 76 return array[key.tuple]
77 elif isinstance(key, indexing.VectorizedIndexer):
78 return array.vindex[
79 indexing._arrayize_vectorized_indexer(key, self.shape).tuple
80 ]
File ~\Anaconda3\envs\satellite\lib\site-packages\zarr\core.py:788, in Array.__getitem__(self, selection)
786 result = self.vindex[selection]
787 else:
--> 788 result = self.get_basic_selection(pure_selection, fields=fields)
789 return result
File ~\Anaconda3\envs\satellite\lib\site-packages\zarr\core.py:914, in Array.get_basic_selection(self, selection, out, fields)
911 return self._get_basic_selection_zd(selection=selection, out=out,
912 fields=fields)
913 else:
--> 914 return self._get_basic_selection_nd(selection=selection, out=out,
915 fields=fields)
File ~\Anaconda3\envs\satellite\lib\site-packages\zarr\core.py:957, in Array._get_basic_selection_nd(self, selection, out, fields)
951 def _get_basic_selection_nd(self, selection, out=None, fields=None):
952 # implementation of basic selection for array with at least one dimension
953
954 # setup indexer
955 indexer = BasicIndexer(selection, self)
--> 957 return self._get_selection(indexer=indexer, out=out, fields=fields)
File ~\Anaconda3\envs\satellite\lib\site-packages\zarr\core.py:1247, in Array._get_selection(self, indexer, out, fields)
1241 if not hasattr(self.chunk_store, "getitems") or \
1242 any(map(lambda x: x == 0, self.shape)):
1243 # sequentially get one key at a time from storage
1244 for chunk_coords, chunk_selection, out_selection in indexer:
1245
1246 # load chunk selection into output array
-> 1247 self._chunk_getitem(chunk_coords, chunk_selection, out, out_selection,
1248 drop_axes=indexer.drop_axes, fields=fields)
1249 else:
1250 # allow storage to get multiple items at once
1251 lchunk_coords, lchunk_selection, lout_selection = zip(*indexer)
File ~\Anaconda3\envs\satellite\lib\site-packages\zarr\core.py:1939, in Array._chunk_getitem(self, chunk_coords, chunk_selection, out, out_selection, drop_axes, fields)
1935 ckey = self._chunk_key(chunk_coords)
1937 try:
1938 # obtain compressed data for chunk
-> 1939 cdata = self.chunk_store[ckey]
1941 except KeyError:
1942 # chunk not initialized
1943 if self._fill_value is not None:
File ~\Anaconda3\envs\satellite\lib\site-packages\zarr\storage.py:717, in KVStore.__getitem__(self, key)
716 def __getitem__(self, key):
--> 717 return self._mutable_mapping[key]
File ~\Anaconda3\envs\satellite\lib\site-packages\fsspec\mapping.py:137, in FSMap.__getitem__(self, key, default)
135 k = self._key_to_str(key)
136 try:
--> 137 result = self.fs.cat(k)
138 except self.missing_exceptions:
139 if default is not None:
File ~\Anaconda3\envs\satellite\lib\site-packages\fsspec\asyn.py:111, in sync_wrapper.<locals>.wrapper(*args, **kwargs)
108 #functools.wraps(func)
109 def wrapper(*args, **kwargs):
110 self = obj or args[0]
--> 111 return sync(self.loop, func, *args, **kwargs)
File ~\Anaconda3\envs\satellite\lib\site-packages\fsspec\asyn.py:94, in sync(loop, func, timeout, *args, **kwargs)
91 return_result = result[0]
92 if isinstance(return_result, asyncio.TimeoutError):
93 # suppress asyncio.TimeoutError, raise FSTimeoutError
---> 94 raise FSTimeoutError from return_result
95 elif isinstance(return_result, BaseException):
96 raise return_result
FSTimeoutError:
In the line:
store = fsspec.get_mapper(asset.href)
You can pass extra arguments to the fsspec backend, in this case HTTP, see fsspec.implementations.http.HTTPFileSystem. In this case, client_kwargs get passed to aiohttp.ClientSession, and include an optional timeout argument. Your call may look something like
from aiohttp import ClientTimeout
store = get_mapper(asset.href, client_kwargs={"timeout": ClientTimeout(total=5000, connect=1000)})

sklearn - AttributeError: 'CustomScaler' object has no attribute 'copy'

When i write my custom scaler to scale my data without scaling dummies i've already created, i get an error regarding custom scaler not having copy?
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.preprocessing import StandardScaler
class CustomScaler(BaseEstimator,TransformerMixin):
def __init__(self,columns,copy=True,with_mean=True,with_std=True):
self.scaler = StandardScaler(copy,with_mean,with_std)
self.columns = columns
self.mean_ = None
self.var_ = None
def fit(self, X, y=None):
self.scaler.fit(X[self.columns], y)
self.mean_ = np.mean(X[self.columns])
self.var_ = np.var(X[self.columns])
return self
def transform(self, X, y=None, copy=True):
init_col_order = X.columns
X_scaled = pd.DataFrame(self.scaler.transform(X[self.columns]),
columns=self.columns)
X_not_scaled = X.loc[:,~X.columns.isin(self.columns)]
return pd.concat([X_not_scaled, X_scaled], axis=1)[init_col_order]
-> These are my unscaled columns
unscaled_inputs.columns.values
array(['Reason_1', 'Reason_2', 'Reason_3', 'Reason_4', 'Month',
'Day of the week', 'Transportation Expense', 'Distance to Work',
'Age', 'Daily Work Load Average', 'Body Mass Index', 'Education',
'Children', 'Pets'], dtype=object)
-> dummy variables i don't want to scale
columns_to_omit = ['Reason_1', 'Reason_2', 'Reason_3', 'Reason_4','Education']
-> variables i want to scale
columns_to_scale = [x for x in unscaled_inputs.columns.values if x not in columns_to_omit]
-> Giving input data "columns_to_scale" to my "CustomScaler"
absenteeism_scaler = CustomScaler(columns_to_scale)
-> i get this warning
C:\Users\prati\Anaconda3\lib\site-packages\sklearn\utils\validation.py:70:
FutureWarning: Pass copy=True, with_mean=True, with_std=True as keyword args. From
version 1.0 (renaming of 0.25) passing these as positional arguments will result in an
error
warnings.warn(f"Pass {args_msg} as keyword args. From version "
-> When i fit my "absenteeism_scaler" to my "unscaled_inputs", i get an error, but the error doesn't stop my code from executing further
absenteeism_scaler.fit(unscaled_inputs)
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
~\Anaconda3\lib\site-packages\IPython\core\formatters.py in __call__(self, obj, include,
exclude)
968
969 if method is not None:
--> 970 return method(include=include, exclude=exclude)
971 return None
972 else:
~\Anaconda3\lib\site-packages\sklearn\base.py in _repr_mimebundle_(self, **kwargs)
462 def _repr_mimebundle_(self, **kwargs):
463 """Mime bundle used by jupyter kernels to display estimator"""
--> 464 output = {"text/plain": repr(self)}
465 if get_config()["display"] == 'diagram':
466 output["text/html"] = estimator_html_repr(self)
~\Anaconda3\lib\site-packages\sklearn\base.py in __repr__(self, N_CHAR_MAX)
258 n_max_elements_to_show=N_MAX_ELEMENTS_TO_SHOW)
259
--> 260 repr_ = pp.pformat(self)
261
262 # Use bruteforce ellipsis when there are a lot of non-blank characters
~\Anaconda3\lib\pprint.py in pformat(self, object)
151 def pformat(self, object):
152 sio = _StringIO()
--> 153 self._format(object, sio, 0, 0, {}, 0)
154 return sio.getvalue()
155
~\Anaconda3\lib\pprint.py in _format(self, object, stream, indent, allowance, context,
level)
168 self._readable = False
169 return
--> 170 rep = self._repr(object, context, level)
171 max_width = self._width - indent - allowance
172 if len(rep) > max_width:
~\Anaconda3\lib\pprint.py in _repr(self, object, context, level)
402
403 def _repr(self, object, context, level):
--> 404 repr, readable, recursive = self.format(object, context.copy(),
405 self._depth, level)
406 if not readable:
~\Anaconda3\lib\site-packages\sklearn\utils\_pprint.py in format(self, object, context,
maxlevels, level)
178
179 def format(self, object, context, maxlevels, level):
--> 180 return _safe_repr(object, context, maxlevels, level,
181 changed_only=self._changed_only)
182
~\Anaconda3\lib\site-packages\sklearn\utils\_pprint.py in _safe_repr(object, context,
maxlevels, level, changed_only)
423 recursive = False
424 if changed_only:
--> 425 params = _changed_params(object)
426 else:
427 params = object.get_params(deep=False)
~\Anaconda3\lib\site-packages\sklearn\utils\_pprint.py in _changed_params(estimator)
89 estimator with non-default values."""
90
---> 91 params = estimator.get_params(deep=False)
92 init_func = getattr(estimator.__init__, 'deprecated_original',
93 estimator.__init__)
~\Anaconda3\lib\site-packages\sklearn\base.py in get_params(self, deep)
193 out = dict()
194 for key in self._get_param_names():
--> 195 value = getattr(self, key)
196 if deep and hasattr(value, 'get_params'):
197 deep_items = value.get_params().items()
AttributeError: 'CustomScaler' object has no attribute 'copy'
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
~\Anaconda3\lib\site-packages\IPython\core\formatters.py in __call__(self, obj)
700 type_pprinters=self.type_printers,
701 deferred_pprinters=self.deferred_printers)
--> 702 printer.pretty(obj)
703 printer.flush()
704 return stream.getvalue()
~\Anaconda3\lib\site-packages\IPython\lib\pretty.py in pretty(self, obj)
392 if cls is not object \
393 and callable(cls.__dict__.get('__repr__')):
--> 394 return _repr_pprint(obj, self, cycle)
395
396 return _default_pprint(obj, self, cycle)
~\Anaconda3\lib\site-packages\IPython\lib\pretty.py in _repr_pprint(obj, p, cycle)
698 """A pprint that just redirects to the normal repr function."""
699 # Find newlines and replace them with p.break_()
--> 700 output = repr(obj)
701 lines = output.splitlines()
702 with p.group():
~\Anaconda3\lib\site-packages\sklearn\base.py in __repr__(self, N_CHAR_MAX)
258 n_max_elements_to_show=N_MAX_ELEMENTS_TO_SHOW)
259
--> 260 repr_ = pp.pformat(self)
261
262 # Use bruteforce ellipsis when there are a lot of non-blank characters
~\Anaconda3\lib\pprint.py in pformat(self, object)
151 def pformat(self, object):
152 sio = _StringIO()
--> 153 self._format(object, sio, 0, 0, {}, 0)
154 return sio.getvalue()
155
~\Anaconda3\lib\pprint.py in _format(self, object, stream, indent, allowance, context,
level)
168 self._readable = False
169 return
--> 170 rep = self._repr(object, context, level)
171 max_width = self._width - indent - allowance
172 if len(rep) > max_width:
~\Anaconda3\lib\pprint.py in _repr(self, object, context, level)
402
403 def _repr(self, object, context, level):
--> 404 repr, readable, recursive = self.format(object, context.copy(),
405 self._depth, level)
406 if not readable:
~\Anaconda3\lib\site-packages\sklearn\utils\_pprint.py in format(self, object, context,
maxlevels, level)
178
179 def format(self, object, context, maxlevels, level):
--> 180 return _safe_repr(object, context, maxlevels, level,
181 changed_only=self._changed_only)
182
~\Anaconda3\lib\site-packages\sklearn\utils\_pprint.py in _safe_repr(object, context,
maxlevels, level, changed_only)
423 recursive = False
424 if changed_only:
--> 425 params = _changed_params(object)
426 else:
427 params = object.get_params(deep=False)
~\Anaconda3\lib\site-packages\sklearn\utils\_pprint.py in _changed_params(estimator)
89 estimator with non-default values."""
90
---> 91 params = estimator.get_params(deep=False)
92 init_func = getattr(estimator.__init__, 'deprecated_original',
93 estimator.__init__)
~\Anaconda3\lib\site-packages\sklearn\base.py in get_params(self, deep)
193 out = dict()
194 for key in self._get_param_names():
--> 195 value = getattr(self, key)
196 if deep and hasattr(value, 'get_params'):
197 deep_items = value.get_params().items()
AttributeError: 'CustomScaler' object has no attribute 'copy'
-> it still gives me output, but i don't understand, that if i get an error the code should stops executing?, but it runs afterwards,
P.S - this is just the left half of the data, as i can't fit all the data here, sry for that....
scaled_inputs = absenteeism_scaler.transform(unscaled_inputs)
scaled_inputs
Reason_1 Reason_2 Reason_3 Reason_4 Month Day of the week Transportation
Expense
0 0 0 0 1 0.182726 -0.683704 1.005844
1 0 0 0 0 0.182726 -0.683704 -1.574681
2 0 0 0 1 0.182726 -0.007725 -0.654143
3 1 0 0 0 0.182726 0.668253 0.854936
4 0 0 0 1 0.182726 0.668253 1.005844
Update 1: as i removed
copy=True,with_mean=True,with_std=True from init, my error got resolved, but i think the scaled data will be an "inplace" change & the mean and S.D thing, and so if i don't want to make inplace changes to my data what can i do???
Update 2: is it because copy, with_mean, and with_std are "True" by default?? just check the StandardScaler library on sklearn
def _init__(self,columns):
self.scaler = StandardScaler()
self.columns = columns
self.mean = None
self.var_ = None
Positional/keyword warning/error
The first warning (an error for sklearn versions >1.0) is because you're setting keyword arguments of the StandardScaler as positional arguments here:
def __init__(...):
self.scaler = StandardScaler(copy,with_mean,with_std)
...
You should change it to use the keywords,
def __init__(...):
self.scaler = StandardScaler(copy=copy, with_mean=with_mean, with_std=with_std)
...
No attribute copy error
This is only being thrown when trying to display the html representation of the transformer, after fitting has already succeeded, which is why you're able to continue and transform successfully.
But the issue is more serious than that if you want to make use of the transformer in pipelines, grid searches, etc. In order to clone properly, you need to follow the specific guidance of the sklearn API, or else provide your own get_params and set_params. The __init__ method should set an attribute for every parameter, and only those attributes. So here it should be
def __init__(self, columns, copy=True, with_mean=True, with_std=True):
self.columns = columns
self.copy = copy
self.with_mean = with_mean
self.with_std = with_std
And then make the contained StandardScaler at fit time:
def fit(self, X, y=None):
self.scaler = StandardScaler(copy=self.copy, with_mean=self.with_mean, with_std=self.with_std)
self.scaler.fit(X[self.columns], y)
self.mean_ = np.mean(X[self.columns])
self.var_ = np.var(X[self.columns])
return self

'KMeans' object has no attribute 'k'

I am using the Yellowbrick package to plot an elbow curve for a dataset to find the most optimal number of clusters for the dataset using KMeans as a model.
I am using Scikit-learn's KMeans and Yellowbrick's kelbow_visualizer functions.
The elbow curve is generated and I am able to read the elbow value however the following error is thrown afterwards:
AttributeError: 'KMeans' object has no attribute 'k'
The way I am generating the curve is as follows:
from sklearn.cluster import KMeans
from yellowbrick.cluster.elbow import kelbow_visualizer
def elbow_method(X, max_range_for_elbow, rseed = RSEED):
return kelbow_visualizer(KMeans(random_state=rseed), X, k=(1, max_range_for_elbow))
elbow_method(data_standardized,10)
Where data_standardized is my dataset and RSEED is a constant with value 2.
I'm not sure what the issue is since the elbow curve is generated before the error is thrown.
This is the full error message:
AttributeError: 'KMeans' object has no attribute 'k'
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
~\anaconda3\lib\site-packages\IPython\core\formatters.py in __call__(self, obj, include, exclude)
968
969 if method is not None:
--> 970 return method(include=include, exclude=exclude)
971 return None
972 else:
~\anaconda3\lib\site-packages\sklearn\base.py in _repr_mimebundle_(self, **kwargs)
462 def _repr_mimebundle_(self, **kwargs):
463 """Mime bundle used by jupyter kernels to display estimator"""
--> 464 output = {"text/plain": repr(self)}
465 if get_config()["display"] == 'diagram':
466 output["text/html"] = estimator_html_repr(self)
~\anaconda3\lib\site-packages\sklearn\base.py in __repr__(self, N_CHAR_MAX)
258 n_max_elements_to_show=N_MAX_ELEMENTS_TO_SHOW)
259
--> 260 repr_ = pp.pformat(self)
261
262 # Use bruteforce ellipsis when there are a lot of non-blank characters
~\anaconda3\lib\pprint.py in pformat(self, object)
151 def pformat(self, object):
152 sio = _StringIO()
--> 153 self._format(object, sio, 0, 0, {}, 0)
154 return sio.getvalue()
155
~\anaconda3\lib\pprint.py in _format(self, object, stream, indent, allowance, context, level)
168 self._readable = False
169 return
--> 170 rep = self._repr(object, context, level)
171 max_width = self._width - indent - allowance
172 if len(rep) > max_width:
~\anaconda3\lib\pprint.py in _repr(self, object, context, level)
402
403 def _repr(self, object, context, level):
--> 404 repr, readable, recursive = self.format(object, context.copy(),
405 self._depth, level)
406 if not readable:
~\anaconda3\lib\site-packages\sklearn\utils\_pprint.py in format(self, object, context, maxlevels, level)
178
179 def format(self, object, context, maxlevels, level):
--> 180 return _safe_repr(object, context, maxlevels, level,
181 changed_only=self._changed_only)
182
~\anaconda3\lib\site-packages\sklearn\utils\_pprint.py in _safe_repr(object, context, maxlevels, level, changed_only)
423 recursive = False
424 if changed_only:
--> 425 params = _changed_params(object)
426 else:
427 params = object.get_params(deep=False)
~\anaconda3\lib\site-packages\sklearn\utils\_pprint.py in _changed_params(estimator)
89 estimator with non-default values."""
90
---> 91 params = estimator.get_params(deep=False)
92 init_func = getattr(estimator.__init__, 'deprecated_original',
93 estimator.__init__)
~\anaconda3\lib\site-packages\yellowbrick\base.py in get_params(self, deep)
340 the estimator params.
341 """
--> 342 params = super(ModelVisualizer, self).get_params(deep=deep)
343 for param in list(params.keys()):
344 if param.startswith("estimator__"):
~\anaconda3\lib\site-packages\sklearn\base.py in get_params(self, deep)
193 out = dict()
194 for key in self._get_param_names():
--> 195 value = getattr(self, key)
196 if deep and hasattr(value, 'get_params'):
197 deep_items = value.get_params().items()
~\anaconda3\lib\site-packages\yellowbrick\utils\wrapper.py in __getattr__(self, attr)
40 def __getattr__(self, attr):
41 # proxy to the wrapped object
---> 42 return getattr(self._wrapped, attr)
I solved this issue by storing the result of the elbow_method function into a variable, as so:
elbow = elbow_method(data_standardized,10)
just remove the return and put pass at last line of the elbow_method() method.

Python/Numba: Trouble creating custom type using Numba Extension API

I am trying to create a custom Numba Type. I am having issues boxing and unboxing Numba Numpy Arrays to a Native Numpy Arrays.
I have searched online for similar issues and followed the documentation example to the best of my ability. (https://numba.pydata.org/numba-doc/latest/extending/interval-example.html).
I have tried to interpret (https://github.com/numba/numba/blob/master/numba/targets/boxing.py) but it is quite difficult. Therefore, I think I might be doing something small wrong.
Below is my current attempt at including a Numpy array in my custom type.
import numpy as np
from numba import types, cgutils
from numba.extending import typeof_impl, type_callable, models
from numba.extending import register_model, make_attribute_wrapper, overload_attribute
from numba.extending import lower_builtin, unbox, NativeValue, box
class BMatrix(object):
"""
A empty wrapper for a Binary Matrix
"""
def __init__(self, m, n, row_index):#, col_index):
self.m = m
self.n = n
self.row_index = row_index
# self.col_i = col_index
def __repr__(self):
return 'BMatrix(%d, %d)' % (self.m, self.n)
#property
def shape(self):
return (self.m, self.n)
class BMatrixType(types.Type):
def __init__(self):
super(BMatrixType, self).__init__(name='BMatrix')
bmatrix_type = BMatrixType()
#typeof_impl.register(BMatrix)
def typeof_index(val, c):
return bmatrix_type
#type_callable(BMatrix)
def type_bmatrix(context):
def typer(m, n, row_index):
if (isinstance(m, types.Integer)
and isinstance(n, types.Integer)
and isinstance(row_index, nb.types.Array)):
# and isinstance(col_index, nb.types.Array)):
return bmatrix_type
return typer
#register_model(BMatrixType)
class BMatrixModel(models.StructModel):
def __init__(self, dmm, fe_type):
members = [
('m', types.int64),
('n', types.int64),
('row_index', types.Array(types.int64, 1, 'C'))
]
models.StructModel.__init__(self, dmm, fe_type, members)
make_attribute_wrapper(BMatrixType, 'm', 'm')
make_attribute_wrapper(BMatrixType, 'n', 'n')
make_attribute_wrapper(BMatrixType, 'row_index', 'row_index')
#overload_attribute(BMatrixType, "shape")
def get_shape(bmatrix):
def getter(bmatrix):
return (bmatrix.m, bmatrix.n)
return getter
#lower_builtin(BMatrix, types.Integer, types.Integer, types.Array) #nb.types.Array, #nb.types.Array)
def impl_bmatrix(context, builder, sig, args):
typ = sig.return_type
m, n, row_index = args
bmatrix = cgutils.create_struct_proxy(typ)(context, builder)
bmatrix.m = m
bmatrix.n = n
bmatrix.row_index = row_index
return bmatrix._getvalue()
#unbox(BMatrixType)
def unbox_bmatrix(typ, obj, c):
"""
Convert a BMatrixType object to a native interval structure.
"""
m_obj = c.pyapi.object_getattr_string(obj, "m")
n_obj = c.pyapi.object_getattr_string(obj, "n")
row_index_obj = c.pyapi.object_getattr_string(obj, "row_index")
BMatrix = cgutils.create_struct_proxy(typ)(c.context, c.builder)
BMatrix.m = c.pyapi.long_as_longlong(m_obj)
BMatrix.n = c.pyapi.long_as_longlong(n_obj)
BMatrix.row_index = nb.targets.boxing.unbox_array(types.Array(types.int64, 1, 'C'),
row_index_obj, c)
c.pyapi.decref(m_obj)
c.pyapi.decref(n_obj)
c.pyapi.decref(row_index_obj)
is_error = cgutils.is_not_null(c.builder, c.pyapi.err_occurred())
return NativeValue(BMatrix._getvalue(), is_error=is_error)
#box(BMatrixType)
def box_bmatrix(typ, val, c):
"""
Convert a native bmatrix structure to an BMatrix object.
"""
Bmatrix = cgutils.create_struct_proxy(typ)(c.context, c.builder, value=val)
m_obj = c.pyapi.long_from_longlong(Bmatrix.m)
n_obj = c.pyapi.long_from_longlong(Bmatrix.n)
row_index_obj = nb.targets.boxing.box_array(types.Array(types.int64, 1, 'C'),
Bmatrix.row_index, c)
class_obj = c.pyapi.unserialize(c.pyapi.serialize_object(Bmatrix))
res = c.pyapi.call_function_objargs(class_obj, (m_obj, n_obj))
c.pyapi.decref(m_obj)
c.pyapi.decref(n_obj)
c.pyapi.decref(row_index_obj)
c.pyapi.decref(class_obj)
return res
Test Cases (The error Tracebacks are absolutely massive for test_2 and test_3).
#nb.jit(nopython=True)
def test_1(): #Runs
x = BMatrix(10, 10, np.array([10,10,10]))
def test_2(): #Errors
x = BMatrix(10, 10, np.array([10,10,10]))
#nb.jit(nopython=True)
def _test_2(y):
return y
return _test_2(x)
#nb.jit(nopython=True)
def test_3(): #Errors
return BMatrix(10, 10, np.array([10,10,10]))
#nb.jit(nopython=True)
def test_4():
return BMatrix(10, 10, np.array([10,10,10])).row_index
These are the error when I run the test cases
test_1() #Runs
test_2()
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-52-0f6d1bdba40b> in <module>
----> 1 test_2()
<ipython-input-51-60141c9792c1> in test_2()
9 return y
10
---> 11 return _test_2(x)
12 #nb.jit(nopython=True)
13 def test_3():
//anaconda3/lib/python3.7/site-packages/numba/dispatcher.py in _compile_for_args(self, *args, **kws)
368 e.patch_message(''.join(e.args) + help_msg)
369 # ignore the FULL_TRACEBACKS config, this needs reporting!
--> 370 raise e
371
372 def inspect_llvm(self, signature=None):
//anaconda3/lib/python3.7/site-packages/numba/dispatcher.py in _compile_for_args(self, *args, **kws)
325 argtypes.append(self.typeof_pyval(a))
326 try:
--> 327 return self.compile(tuple(argtypes))
328 except errors.TypingError as e:
329 # Intercept typing error that may be due to an argument
//anaconda3/lib/python3.7/site-packages/numba/compiler_lock.py in _acquire_compile_lock(*args, **kwargs)
30 def _acquire_compile_lock(*args, **kwargs):
31 with self:
---> 32 return func(*args, **kwargs)
33 return _acquire_compile_lock
34
//anaconda3/lib/python3.7/site-packages/numba/dispatcher.py in compile(self, sig)
657
658 self._cache_misses[sig] += 1
--> 659 cres = self._compiler.compile(args, return_type)
660 self.add_overload(cres)
661 self._cache.save_overload(sig, cres)
//anaconda3/lib/python3.7/site-packages/numba/dispatcher.py in compile(self, args, return_type)
81 args=args, return_type=return_type,
82 flags=flags, locals=self.locals,
---> 83 pipeline_class=self.pipeline_class)
84 # Check typing error if object mode is used
85 if cres.typing_error is not None and not flags.enable_pyobject:
//anaconda3/lib/python3.7/site-packages/numba/compiler.py in compile_extra(typingctx, targetctx, func, args, return_type, flags, locals, library, pipeline_class)
953 pipeline = pipeline_class(typingctx, targetctx, library,
954 args, return_type, flags, locals)
--> 955 return pipeline.compile_extra(func)
956
957
//anaconda3/lib/python3.7/site-packages/numba/compiler.py in compile_extra(self, func)
375 self.lifted = ()
376 self.lifted_from = None
--> 377 return self._compile_bytecode()
378
379 def compile_ir(self, func_ir, lifted=(), lifted_from=None):
//anaconda3/lib/python3.7/site-packages/numba/compiler.py in _compile_bytecode(self)
884 """
885 assert self.func_ir is None
--> 886 return self._compile_core()
887
888 def _compile_ir(self):
//anaconda3/lib/python3.7/site-packages/numba/compiler.py in _compile_core(self)
871 self.define_pipelines(pm)
872 pm.finalize()
--> 873 res = pm.run(self.status)
874 if res is not None:
875 # Early pipeline completion
//anaconda3/lib/python3.7/site-packages/numba/compiler_lock.py in _acquire_compile_lock(*args, **kwargs)
30 def _acquire_compile_lock(*args, **kwargs):
31 with self:
---> 32 return func(*args, **kwargs)
33 return _acquire_compile_lock
34
//anaconda3/lib/python3.7/site-packages/numba/compiler.py in run(self, status)
252 # No more fallback pipelines?
253 if is_final_pipeline:
--> 254 raise patched_exception
255 # Go to next fallback pipeline
256 else:
//anaconda3/lib/python3.7/site-packages/numba/compiler.py in run(self, status)
243 try:
244 event("-- %s" % stage_name)
--> 245 stage()
246 except _EarlyPipelineCompletion as e:
247 return e.result
//anaconda3/lib/python3.7/site-packages/numba/compiler.py in stage_nopython_backend(self)
745 """
746 lowerfn = self.backend_nopython_mode
--> 747 self._backend(lowerfn, objectmode=False)
748
749 def stage_compile_interp_mode(self):
//anaconda3/lib/python3.7/site-packages/numba/compiler.py in _backend(self, lowerfn, objectmode)
685 self.library.enable_object_caching()
686
--> 687 lowered = lowerfn()
688 signature = typing.signature(self.return_type, *self.args)
689 self.cr = compile_result(
//anaconda3/lib/python3.7/site-packages/numba/compiler.py in backend_nopython_mode(self)
672 self.calltypes,
673 self.flags,
--> 674 self.metadata)
675
676 def _backend(self, lowerfn, objectmode):
//anaconda3/lib/python3.7/site-packages/numba/compiler.py in native_lowering_stage(targetctx, library, interp, typemap, restype, calltypes, flags, metadata)
1124 lower.lower()
1125 if not flags.no_cpython_wrapper:
-> 1126 lower.create_cpython_wrapper(flags.release_gil)
1127 env = lower.env
1128 call_helper = lower.call_helper
//anaconda3/lib/python3.7/site-packages/numba/lowering.py in create_cpython_wrapper(self, release_gil)
269 self.context.create_cpython_wrapper(self.library, self.fndesc,
270 self.env, self.call_helper,
--> 271 release_gil=release_gil)
272
273 def setup_function(self, fndesc):
//anaconda3/lib/python3.7/site-packages/numba/targets/cpu.py in create_cpython_wrapper(self, library, fndesc, env, call_helper, release_gil)
155 fndesc, env, call_helper=call_helper,
156 release_gil=release_gil)
--> 157 builder.build()
158 library.add_ir_module(wrapper_module)
159
//anaconda3/lib/python3.7/site-packages/numba/callwrapper.py in build(self)
120
121 api = self.context.get_python_api(builder)
--> 122 self.build_wrapper(api, builder, closure, args, kws)
123
124 return wrapper, api
//anaconda3/lib/python3.7/site-packages/numba/callwrapper.py in build_wrapper(self, api, builder, closure, args, kws)
153 innerargs.append(None)
154 else:
--> 155 val = cleanup_manager.add_arg(builder.load(obj), ty)
156 innerargs.append(val)
157
//anaconda3/lib/python3.7/site-packages/numba/callwrapper.py in add_arg(self, obj, ty)
30 """
31 # Unbox argument
---> 32 native = self.api.to_native_value(ty, obj)
33
34 # If an error occurred, go to the cleanup block for the previous argument.
//anaconda3/lib/python3.7/site-packages/numba/pythonapi.py in to_native_value(self, typ, obj)
1423 impl = _unboxers.lookup(typ.__class__, unbox_unsupported)
1424 c = _UnboxContext(self.context, self.builder, self)
-> 1425 return impl(typ, obj, c)
1426
1427 def from_native_return(self, typ, val, env_manager):
<ipython-input-45-d8ac5afde794> in unbox_bmatrix(typ, obj, c)
85 BMatrix.n = c.pyapi.long_as_longlong(n_obj)
86 BMatrix.row_index = nb.targets.boxing.unbox_array(types.Array(types.int64, 1, 'C'),
---> 87 row_index_obj, c)
88 c.pyapi.decref(m_obj)
89 c.pyapi.decref(n_obj)
//anaconda3/lib/python3.7/site-packages/numba/cgutils.py in __setattr__(self, field, value)
162 if field.startswith('_'):
163 return super(_StructProxy, self).__setattr__(field, value)
--> 164 self[self._datamodel.get_field_position(field)] = value
165
166 def __getitem__(self, index):
//anaconda3/lib/python3.7/site-packages/numba/cgutils.py in __setitem__(self, index, value)
177 ptr = self._get_ptr_by_index(index)
178 value = self._cast_member_from_value(index, value)
--> 179 if value.type != ptr.type.pointee:
180 if (is_pointer(value.type) and is_pointer(ptr.type.pointee)
181 and value.type.pointee == ptr.type.pointee.pointee):
AttributeError: Failed in nopython mode pipeline (step: nopython mode backend)
'NativeValue' object has no attribute 'type'
test_3()
KeyError Traceback (most recent call last)
//anaconda3/lib/python3.7/site-packages/numba/pythonapi.py in serialize_object(self, obj)
1403 try:
-> 1404 gv = self.module.__serialized[obj]
1405 except KeyError:
KeyError: <numba.cgutils.ValueStructProxy_BMatrix object at 0x11e693f28>
During handling of the above exception, another exception occurred:
PicklingError Traceback (most recent call last)
<ipython-input-53-8d78c7c0acee> in <module>
----> 1 test_3()
//anaconda3/lib/python3.7/site-packages/numba/dispatcher.py in _compile_for_args(self, *args, **kws)
368 e.patch_message(''.join(e.args) + help_msg)
369 # ignore the FULL_TRACEBACKS config, this needs reporting!
--> 370 raise e
371
372 def inspect_llvm(self, signature=None):
//anaconda3/lib/python3.7/site-packages/numba/dispatcher.py in _compile_for_args(self, *args, **kws)
325 argtypes.append(self.typeof_pyval(a))
326 try:
--> 327 return self.compile(tuple(argtypes))
328 except errors.TypingError as e:
329 # Intercept typing error that may be due to an argument
//anaconda3/lib/python3.7/site-packages/numba/compiler_lock.py in _acquire_compile_lock(*args, **kwargs)
30 def _acquire_compile_lock(*args, **kwargs):
31 with self:
---> 32 return func(*args, **kwargs)
33 return _acquire_compile_lock
34
//anaconda3/lib/python3.7/site-packages/numba/dispatcher.py in compile(self, sig)
657
658 self._cache_misses[sig] += 1
--> 659 cres = self._compiler.compile(args, return_type)
660 self.add_overload(cres)
661 self._cache.save_overload(sig, cres)
//anaconda3/lib/python3.7/site-packages/numba/dispatcher.py in compile(self, args, return_type)
81 args=args, return_type=return_type,
82 flags=flags, locals=self.locals,
---> 83 pipeline_class=self.pipeline_class)
84 # Check typing error if object mode is used
85 if cres.typing_error is not None and not flags.enable_pyobject:
//anaconda3/lib/python3.7/site-packages/numba/compiler.py in compile_extra(typingctx, targetctx, func, args, return_type, flags, locals, library, pipeline_class)
953 pipeline = pipeline_class(typingctx, targetctx, library,
954 args, return_type, flags, locals)
--> 955 return pipeline.compile_extra(func)
956
957
//anaconda3/lib/python3.7/site-packages/numba/compiler.py in compile_extra(self, func)
375 self.lifted = ()
376 self.lifted_from = None
--> 377 return self._compile_bytecode()
378
379 def compile_ir(self, func_ir, lifted=(), lifted_from=None):
//anaconda3/lib/python3.7/site-packages/numba/compiler.py in _compile_bytecode(self)
884 """
885 assert self.func_ir is None
--> 886 return self._compile_core()
887
888 def _compile_ir(self):
//anaconda3/lib/python3.7/site-packages/numba/compiler.py in _compile_core(self)
871 self.define_pipelines(pm)
872 pm.finalize()
--> 873 res = pm.run(self.status)
874 if res is not None:
875 # Early pipeline completion
//anaconda3/lib/python3.7/site-packages/numba/compiler_lock.py in _acquire_compile_lock(*args, **kwargs)
30 def _acquire_compile_lock(*args, **kwargs):
31 with self:
---> 32 return func(*args, **kwargs)
33 return _acquire_compile_lock
34
//anaconda3/lib/python3.7/site-packages/numba/compiler.py in run(self, status)
252 # No more fallback pipelines?
253 if is_final_pipeline:
--> 254 raise patched_exception
255 # Go to next fallback pipeline
256 else:
//anaconda3/lib/python3.7/site-packages/numba/compiler.py in run(self, status)
243 try:
244 event("-- %s" % stage_name)
--> 245 stage()
246 except _EarlyPipelineCompletion as e:
247 return e.result
//anaconda3/lib/python3.7/site-packages/numba/compiler.py in stage_nopython_backend(self)
745 """
746 lowerfn = self.backend_nopython_mode
--> 747 self._backend(lowerfn, objectmode=False)
748
749 def stage_compile_interp_mode(self):
//anaconda3/lib/python3.7/site-packages/numba/compiler.py in _backend(self, lowerfn, objectmode)
685 self.library.enable_object_caching()
686
--> 687 lowered = lowerfn()
688 signature = typing.signature(self.return_type, *self.args)
689 self.cr = compile_result(
//anaconda3/lib/python3.7/site-packages/numba/compiler.py in backend_nopython_mode(self)
672 self.calltypes,
673 self.flags,
--> 674 self.metadata)
675
676 def _backend(self, lowerfn, objectmode):
//anaconda3/lib/python3.7/site-packages/numba/compiler.py in native_lowering_stage(targetctx, library, interp, typemap, restype, calltypes, flags, metadata)
1124 lower.lower()
1125 if not flags.no_cpython_wrapper:
-> 1126 lower.create_cpython_wrapper(flags.release_gil)
1127 env = lower.env
1128 call_helper = lower.call_helper
//anaconda3/lib/python3.7/site-packages/numba/lowering.py in create_cpython_wrapper(self, release_gil)
269 self.context.create_cpython_wrapper(self.library, self.fndesc,
270 self.env, self.call_helper,
--> 271 release_gil=release_gil)
272
273 def setup_function(self, fndesc):
//anaconda3/lib/python3.7/site-packages/numba/targets/cpu.py in create_cpython_wrapper(self, library, fndesc, env, call_helper, release_gil)
155 fndesc, env, call_helper=call_helper,
156 release_gil=release_gil)
--> 157 builder.build()
158 library.add_ir_module(wrapper_module)
159
//anaconda3/lib/python3.7/site-packages/numba/callwrapper.py in build(self)
120
121 api = self.context.get_python_api(builder)
--> 122 self.build_wrapper(api, builder, closure, args, kws)
123
124 return wrapper, api
//anaconda3/lib/python3.7/site-packages/numba/callwrapper.py in build_wrapper(self, api, builder, closure, args, kws)
174
175 retty = self._simplified_return_type()
--> 176 obj = api.from_native_return(retty, retval, env_manager)
177 builder.ret(obj)
178
//anaconda3/lib/python3.7/site-packages/numba/pythonapi.py in from_native_return(self, typ, val, env_manager)
1429 "prevented the return of " \
1430 "optional value"
-> 1431 out = self.from_native_value(typ, val, env_manager)
1432 return out
1433
//anaconda3/lib/python3.7/site-packages/numba/pythonapi.py in from_native_value(self, typ, val, env_manager)
1443
1444 c = _BoxContext(self.context, self.builder, self, env_manager)
-> 1445 return impl(typ, val, c)
1446
1447 def reflect_native_value(self, typ, val, env_manager=None):
<ipython-input-45-d8ac5afde794> in box_bmatrix(typ, val, c)
104 Bmatrix.row_index, c)
105
--> 106 class_obj = c.pyapi.unserialize(c.pyapi.serialize_object(Bmatrix))
107 res = c.pyapi.call_function_objargs(class_obj, (m_obj, n_obj))
108 c.pyapi.decref(m_obj)
//anaconda3/lib/python3.7/site-packages/numba/pythonapi.py in serialize_object(self, obj)
1404 gv = self.module.__serialized[obj]
1405 except KeyError:
-> 1406 struct = self.serialize_uncached(obj)
1407 name = ".const.picklebuf.%s" % (id(obj) if config.DIFF_IR == 0 else "DIFF_IR")
1408 gv = self.context.insert_unique_const(self.module, name, struct)
//anaconda3/lib/python3.7/site-packages/numba/pythonapi.py in serialize_uncached(self, obj)
1383 """
1384 # First make the array constant
-> 1385 data = pickle.dumps(obj, protocol=-1)
1386 assert len(data) < 2**31
1387 name = ".const.pickledata.%s" % (id(obj) if config.DIFF_IR == 0 else "DIFF_IR")
PicklingError: Failed in nopython mode pipeline (step: nopython mode backend)
Can't pickle <class 'numba.cgutils.ValueStructProxy_BMatrix'>: attribute lookup ValueStructProxy_BMatrix on numba.cgutils failed
test_4() #Runs Wrong
array([-2387225703656530210, -2387225703656530210, -2387225703656530210])
unbox_array returns a NativeValue. Inside NativeValue is the actual value which is what you want to assign to row_index. So, just add ".value" to the end of the following line to extract the value from the NativeValue.
BMatrix.row_index = nb.targets.boxing.unbox_array(types.Array(types.int64, 1, 'C'), row_index_obj, c)

Bokeh Geoviews use Lat/Long or UTM?

I am trying to plot the Zillow dataset with Bokeh using Geoviews and Datashader but I am having the damnedest time getting it to work. I am able to plot the data on a Cartesian plane fine but when I attempt to overlay the data with a map I run into errors.
I have used code adapted from the census-hv example on the datashader github. I believe my problem is that it is looking for the coordinates to be in UTM not Lat/Long. Because the code works when I have my coordinates multiplied by a few thousand. The points are then put above the map in white space. If i attempt to plot the proper lat/long coordinates I get the following errors.
Can someone please point me in the direction of a map that uses Lat/Long
>>>props.head()
longitude latitude
0 -118.654084 34.144442
1 -118.625364 34.140430
2 -118.394633 33.989359
3 -118.437206 34.148863
4 -118.385816 34.194168
import pandas as pd
import holoviews as hv
import geoviews as gv
import datashader as ds
from bokeh.models import WMTSTileSource
from holoviews.operation.datashader import datashade, dynspread
hv.notebook_ex
tension('bokeh')
%%opts Overlay [width=900 height=525 xaxis=None yaxis=None]
geomap = gv.WMTS(WMTSTileSource(url=\
'https://server.arcgisonline.com/ArcGIS/rest/services/World_Imagery/MapServer/tile/{Z}/{Y}/{X}.jpg'))
points = hv.Points(gv.Dataset(props, kdims=['longitude', 'latitude']))
# color_key = {'w':'aqua', 'b':'lime', 'a':'red', 'h':'fuchsia', 'o':'yellow' }
race = datashade(points, x_sampling=50, y_sampling=50,
element_type=gv.Image)
geomap * race
RETURNS ERROR:
WARNING:root:dynamic_operation: Exception raised in callable
'dynamic_operation' of type 'function'.
Invoked as dynamic_operation(height=400, scale=1.0, width=400, x_range=None, y_range=None)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/IPython/core/formatters.py in __call__(self, obj)
305 pass
306 else:
--> 307 return printer(obj)
308 # Finally look for special method names
309 method = get_real_method(obj, self.print_method)
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/ipython/display_hooks.py in pprint_display(obj)
255 if not ip.display_formatter.formatters['text/plain'].pprint:
256 return None
--> 257 return display(obj, raw=True)
258
259
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/ipython/display_hooks.py in display(obj, raw, **kwargs)
241 elif isinstance(obj, (HoloMap, DynamicMap)):
242 with option_state(obj):
--> 243 html = map_display(obj)
244 else:
245 return repr(obj) if raw else IPython.display.display(obj, **kwargs)
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/ipython/display_hooks.py in wrapped(element)
127 try:
128 html = fn(element,
--> 129 max_frames=OutputMagic.options['max_frames'])
130
131 # Only want to add to the archive for one display hook...
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/ipython/display_hooks.py in map_display(vmap, max_frames)
196 return None
197
--> 198 return render(vmap)
199
200
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/ipython/display_hooks.py in render(obj, **kwargs)
57 if renderer.fig == 'pdf':
58 renderer = renderer.instance(fig='png')
---> 59 return renderer.html(obj, **kwargs)
60
61
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/plotting/renderer.py in html(self, obj, fmt, css, comm, **kwargs)
253 code to initialize a Comm, if the plot supplies one.
254 """
--> 255 plot, fmt = self._validate(obj, fmt)
256 figdata, _ = self(plot, fmt, **kwargs)
257 if css is None: css = self.css
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/plotting/renderer.py in _validate(self, obj, fmt)
189 if isinstance(obj, tuple(self.widgets.values())):
190 return obj, 'html'
--> 191 plot = self.get_plot(obj, renderer=self)
192
193 fig_formats = self.mode_formats['fig'][self.mode]
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/plotting/renderer.py in get_plot(self_or_cls, obj, renderer)
164 """
165 # Initialize DynamicMaps with first data item
--> 166 initialize_dynamic(obj)
167
168 if not isinstance(obj, Plot) and not displayable(obj):
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/plotting/util.py in initialize_dynamic(obj)
173 continue
174 if not len(dmap):
--> 175 dmap[dmap._initial_key()]
176
177
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/core/spaces.py in __getitem__(self, key)
942 # Not a cross product and nothing cached so compute element.
943 if cache is not None: return cache
--> 944 val = self._execute_callback(*tuple_key)
945 if data_slice:
946 val = self._dataslice(val, data_slice)
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/core/spaces.py in _execute_callback(self, *args)
791
792 with dynamicmap_memoization(self.callback, self.streams):
--> 793 retval = self.callback(*args, **kwargs)
794 return self._style(retval)
795
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/core/spaces.py in __call__(self, *args, **kwargs)
489 # Nothing to do for callbacks that accept no arguments
490 (inargs, inkwargs) = (args, kwargs)
--> 491 if not args and not kwargs: return self.callable()
492 inputs = [i for i in self.inputs if isinstance(i, DynamicMap)]
493 streams = []
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/core/overlay.py in dynamic_mul(*args, **kwargs)
27 from .spaces import Callable
28 def dynamic_mul(*args, **kwargs):
---> 29 element = other[args]
30 return self * element
31 callback = Callable(dynamic_mul, inputs=[self, other])
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/core/spaces.py in __getitem__(self, key)
942 # Not a cross product and nothing cached so compute element.
943 if cache is not None: return cache
--> 944 val = self._execute_callback(*tuple_key)
945 if data_slice:
946 val = self._dataslice(val, data_slice)
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/core/spaces.py in _execute_callback(self, *args)
791
792 with dynamicmap_memoization(self.callback, self.streams):
--> 793 retval = self.callback(*args, **kwargs)
794 return self._style(retval)
795
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/core/spaces.py in __call__(self, *args, **kwargs)
519
520 try:
--> 521 ret = self.callable(*args, **kwargs)
522 except:
523 posstr = ', '.join(['%r' % el for el in inargs]) if inargs else ''
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/util.py in dynamic_operation(*key, **kwargs)
101 self.p.kwargs.update(kwargs)
102 obj = map_obj[key] if isinstance(map_obj, HoloMap) else map_obj
--> 103 return self._process(obj, key)
104 else:
105 def dynamic_operation(*key, **kwargs):
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/util.py in _process(self, element, key)
87 kwargs = {k: v for k, v in self.p.kwargs.items()
88 if k in self.p.operation.params()}
---> 89 return self.p.operation.process_element(element, key, **kwargs)
90 else:
91 return self.p.operation(element, **self.p.kwargs)
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/core/operation.py in process_element(self, element, key, **params)
133 """
134 self.p = param.ParamOverrides(self, params)
--> 135 return self._process(element, key)
136
137
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/operation/datashader.py in _process(self, element, key)
357
358 def _process(self, element, key=None):
--> 359 agg = aggregate._process(self, element, key)
360 shaded = shade._process(self, agg, key)
361 return shaded
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/operation/datashader.py in _process(self, element, key)
226 agg = getattr(cvs, glyph)(data, x, y, self.p.aggregator)
227 if agg.ndim == 2:
--> 228 return self.p.element_type(agg, **params)
229 else:
230 return NdOverlay({c: self.p.element_type(agg.sel(**{column: c}),
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/geoviews/element/geo.py in __init__(self, data, **kwargs)
81 elif crs:
82 kwargs['crs'] = crs
---> 83 super(_Element, self).__init__(data, **kwargs)
84
85
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/element/raster.py in __init__(self, data, bounds, extents, xdensity, ydensity, **params)
242 if bounds is None:
243 xvals = self.dimension_values(0, False)
--> 244 l, r, xdensity, _ = util.bound_range(xvals, xdensity)
245 yvals = self.dimension_values(1, False)
246 b, t, ydensity, _ = util.bound_range(yvals, ydensity)
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/core/util.py in bound_range(vals, density)
1373 using significant digits reported by sys.float_info.dig.
1374 """
-> 1375 low, high = vals.min(), vals.max()
1376 invert = False
1377 if vals[0] > vals[1]:
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/numpy/core/_methods.py in _amin(a, axis, out, keepdims)
27
28 def _amin(a, axis=None, out=None, keepdims=False):
---> 29 return umr_minimum(a, axis, None, out, keepdims)
30
31 def _sum(a, axis=None, dtype=None, out=None, keepdims=False):
ValueError: zero-size array to reduction operation minimum which has no identity
Out[54]:
b':DynamicMap []'
I think the problem here is two-fold, first of all since the coordinates are latitudes and longitudes and you specify xsampling/ysampling values of 50 the datashaded image ends up with a tiny or zero shape, which causes this error. My suggestion would be to cast the coordinates to Google Mercator first. In future this PR will let you do so very simply by calling this:
import cartopy.crs as ccrs
projected = gv.operation.project(points, projection=ccrs.GOOGLE_MERCATOR)
...
To do this manually for now you can use the cartopy projection directly:
coords = ccrs.GOOGLE_MERCATOR.transform_points(ccrs.PlateCarree(), lons, lats)
projected = gv.Points(coords, crs=ccrs.GOOGLE_MERCATOR)
...

Categories