I'm passing kwargs to a wrapped function, and when I call the function informing some kwargs, it works ok, but when I don't pass any kwargs as parameters, I receive the error
<__array_function__ internals> in concatenate(*args, **kwargs).
Should kwargs be optional?
My functions looks like follows:
def group_data(df:pd.DataFrame, group:Union[list, str], cols:Union[list, str], func:Union[list, Callable]) -> DataFrame:
df = df.groupby(group)[cols].agg(func)
if callable(func):
df.rename(f'{func.__name__}', inplace=True)
else:
df.columns = [x.__name__ for x in func]
return df
def print_subplots(df:pd.DataFrame, x:str, y:Union[list, str, Callable], nrows:int,
ncols:int, func:Callable, #order:Union[list, str] = None,
gr_by:Union[list, str] = None, title:str = None, figsize:tuple=(15,6)
, **kwargs):
fig, axes = plt.subplots(nrows, ncols, figsize=figsize)
fig.suptitle(title)
for i, element in enumerate(y):
if type(element) == list:
df_grouped = feature.group_data(df, gr_by, element[0], element[1])
func(
ax=axes[i],
data=df_grouped.reset_index(),
x=gr_by[0],
y=element[1].__name__,
**kwargs
)
else:
func(ax=axes[i], data=df, x=x, y=element, **kwargs)
When I call :
print_subplots(
pricing,
'search_weekday',
['z_score', ['z_score', np.median]],
nrows=1,
ncols=2,
func=sns.boxplot,
gr_by=['search_weekday', 'checkin'],
figsize=(20,6),
order=order
)
...the function works fine, in this case order is a kwarg.
But when I call:
print_subplots(
pricing,
'search_weekday',
['z_score', ['z_score', np.median]],
nrows=1,
ncols=2,
func=sns.lineplot,
gr_by=['search_weekday', 'checkin'],
figsize=(20,6)
)
...causes a loop and when I stop the process I get the error:
---------------------------------------------------------------------------
KeyboardInterrupt Traceback (most recent call last)
/tmp/ipykernel_192586/2963703819.py in <module>
----> 1 plots.print_subplots(
2 pricing,
3 'search_weekday',
4 ['z_score', ['z_score', np.median]],
5 nrows=1,
~/Documentos/Work/2021/ota-searches/dsc-notebooks/ota_searchs/src/visualization/plots.py in print_subplots(df, x, y, nrows, ncols, func, gr_by, title, figsize, **kwargs)
75 )
76 else:
---> 77 func(ax=axes[i], data=df, x=x, y=element, **kwargs)
~/Documentos/Work/2021/ota-searches/dsc-notebooks/ota_searchs/ota/lib/python3.8/site-packages/seaborn/_decorators.py in inner_f(*args, **kwargs)
44 )
45 kwargs.update({k: arg for k, arg in zip(sig.parameters, args)})
---> 46 return f(**kwargs)
47 return inner_f
48
~/Documentos/Work/2021/ota-searches/dsc-notebooks/ota_searchs/ota/lib/python3.8/site-packages/seaborn/relational.py in lineplot(x, y, hue, size, style, data, palette, hue_order, hue_norm, sizes, size_order, size_norm, dashes, markers, style_order, units, estimator, ci, n_boot, seed, sort, err_style, err_kws, legend, ax, **kwargs)
708 p._attach(ax)
709
--> 710 p.plot(ax, kwargs)
711 return ax
712
~/Documentos/Work/2021/ota-searches/dsc-notebooks/ota_searchs/ota/lib/python3.8/site-packages/seaborn/relational.py in plot(self, ax, kws)
469 # Loop over the semantic subsets and add to the plot
470 grouping_vars = "hue", "size", "style"
--> 471 for sub_vars, sub_data in self.iter_data(grouping_vars, from_comp_data=True):
472
473 if self.sort:
~/Documentos/Work/2021/ota-searches/dsc-notebooks/ota_searchs/ota/lib/python3.8/site-packages/seaborn/_core.py in iter_data(self, grouping_vars, reverse, from_comp_data)
981
982 if from_comp_data:
--> 983 data = self.comp_data
984 else:
985 data = self.plot_data
~/Documentos/Work/2021/ota-searches/dsc-notebooks/ota_searchs/ota/lib/python3.8/site-packages/seaborn/_core.py in comp_data(self)
1055 orig = self.plot_data[var].dropna()
1056 comp_col = pd.Series(index=orig.index, dtype=float, name=var)
-> 1057 comp_col.loc[orig.index] = pd.to_numeric(axis.convert_units(orig))
1058
1059 if axis.get_scale() == "log":
~/Documentos/Work/2021/ota-searches/dsc-notebooks/ota_searchs/ota/lib/python3.8/site-packages/pandas/core/indexing.py in __setitem__(self, key, value)
717 else:
718 key = com.apply_if_callable(key, self.obj)
--> 719 indexer = self._get_setitem_indexer(key)
720 self._has_valid_setitem_indexer(key)
721
~/Documentos/Work/2021/ota-searches/dsc-notebooks/ota_searchs/ota/lib/python3.8/site-packages/pandas/core/indexing.py in _get_setitem_indexer(self, key)
664
665 try:
--> 666 return self._convert_to_indexer(key, axis=0, is_setter=True)
667 except TypeError as e:
668
~/Documentos/Work/2021/ota-searches/dsc-notebooks/ota_searchs/ota/lib/python3.8/site-packages/pandas/core/indexing.py in _convert_to_indexer(self, key, axis, is_setter)
1255 return inds
1256 else:
-> 1257 return self._get_listlike_indexer(key, axis)[1]
1258 else:
1259 try:
~/Documentos/Work/2021/ota-searches/dsc-notebooks/ota_searchs/ota/lib/python3.8/site-packages/pandas/core/indexing.py in _get_listlike_indexer(self, key, axis)
1310 keyarr = ax.reindex(keyarr)[0]
1311 else:
-> 1312 keyarr, indexer, new_indexer = ax._reindex_non_unique(keyarr)
1313
1314 self._validate_read_indexer(keyarr, indexer, axis)
~/Documentos/Work/2021/ota-searches/dsc-notebooks/ota_searchs/ota/lib/python3.8/site-packages/pandas/core/indexes/base.py in _reindex_non_unique(self, target)
3865 return self[:0], np.array([], dtype=np.intp), None
3866
-> 3867 indexer, missing = self.get_indexer_non_unique(target)
3868 check = indexer != -1
3869 new_labels = self.take(indexer[check])
~/Documentos/Work/2021/ota-searches/dsc-notebooks/ota_searchs/ota/lib/python3.8/site-packages/pandas/core/indexes/base.py in get_indexer_non_unique(self, target)
5256 tgt_values = target._get_engine_target()
5257
-> 5258 indexer, missing = self._engine.get_indexer_non_unique(tgt_values)
5259 return ensure_platform_int(indexer), ensure_platform_int(missing)
5260
~/Documentos/Work/2021/ota-searches/dsc-notebooks/ota_searchs/ota/lib/python3.8/site-packages/pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_indexer_non_unique()
<__array_function__ internals> in resize(*args, **kwargs)
~/Documentos/Work/2021/ota-searches/dsc-notebooks/ota_searchs/ota/lib/python3.8/site-packages/numpy/core/fromnumeric.py in resize(a, new_shape)
1415 extra = Na - extra
1416
-> 1417 a = concatenate((a,) * n_copies)
1418 if extra > 0:
1419 a = a[:-extra]
<__array_function__ internals> in concatenate(*args, **kwargs)
Related
I have a df with some columns target time series which I need to scale as 1D array and as two thus defining custom wrapper to extend scikit-learn scalers with optional preprocessing as flatten. transform and inverse_transform are not posted for simplicity
import copy
class ScalerOptFlatten( TransformerMixin,
BaseEstimator ):
def __init__( self, instance, flatten , **kwargs):
#super().__init__(**kwargs)
self.scaler = instance
self.flatten = flatten
def get_params(self, deep=True):
cp = copy.copy(self.scaler)
cp.__class__ = type(self.scaler)
params = type(self.scaler).get_params(cp, deep)
return params
def fit( self, X, y = None ):
if self.flatten:
scale_in = X.reshape(-1).reshape(-1, 1)
else:
scale_in = X
if y==None:
self.scaler.fit(scale_in)
else:
self.scaler.fit(scale_in,y)
return self
It works when used on its own
scaler_transformer = ScalerOptFlatten( instance = StandardScaler(),
flatten = True )
scaler_transformer.fit(df_unstack)
But fails when stacked in
preprocessor = ColumnTransformer(
transformers=[
("ts", scaler_transformer, target_feature_names)
]
)
preprocessor.fit(df_unstack)
With
---------------------------------------------------------------------------
Empty Traceback (most recent call last)
~/anaconda3/envs/tf_2_2_0/lib/python3.8/site-packages/joblib/parallel.py in dispatch_one_batch(self, iterator)
826 try:
--> 827 tasks = self._ready_batches.get(block=False)
828 except queue.Empty:
~/anaconda3/envs/tf_2_2_0/lib/python3.8/queue.py in get(self, block, timeout)
166 if not self._qsize():
--> 167 raise Empty
168 elif timeout is None:
Empty:
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
<ipython-input-494-0eb4b591d8a6> in <module>
----> 1 ts_scaler_step.fit(df_unstack)
~/anaconda3/envs/tf_2_2_0/lib/python3.8/site-packages/sklearn/compose/_column_transformer.py in fit(self, X, y)
492 # we use fit_transform to make sure to set sparse_output_ (for which we
493 # need the transformed data) to have consistent output type in predict
--> 494 self.fit_transform(X, y=y)
495 return self
496
~/anaconda3/envs/tf_2_2_0/lib/python3.8/site-packages/sklearn/compose/_column_transformer.py in fit_transform(self, X, y)
529 self._validate_remainder(X)
530
--> 531 result = self._fit_transform(X, y, _fit_transform_one)
532
533 if not result:
~/anaconda3/envs/tf_2_2_0/lib/python3.8/site-packages/sklearn/compose/_column_transformer.py in _fit_transform(self, X, y, func, fitted)
456 self._iter(fitted=fitted, replace_strings=True))
457 try:
--> 458 return Parallel(n_jobs=self.n_jobs)(
459 delayed(func)(
460 transformer=clone(trans) if not fitted else trans,
~/anaconda3/envs/tf_2_2_0/lib/python3.8/site-packages/joblib/parallel.py in __call__(self, iterable)
1046 # remaining jobs.
1047 self._iterating = False
-> 1048 if self.dispatch_one_batch(iterator):
1049 self._iterating = self._original_iterator is not None
1050
~/anaconda3/envs/tf_2_2_0/lib/python3.8/site-packages/joblib/parallel.py in dispatch_one_batch(self, iterator)
836 big_batch_size = batch_size * n_jobs
837
--> 838 islice = list(itertools.islice(iterator, big_batch_size))
839 if len(islice) == 0:
840 return False
~/anaconda3/envs/tf_2_2_0/lib/python3.8/site-packages/sklearn/compose/_column_transformer.py in <genexpr>(.0)
458 return Parallel(n_jobs=self.n_jobs)(
459 delayed(func)(
--> 460 transformer=clone(trans) if not fitted else trans,
461 X=_safe_indexing(X, column, axis=1),
462 y=y,
~/anaconda3/envs/tf_2_2_0/lib/python3.8/site-packages/sklearn/utils/validation.py in inner_f(*args, **kwargs)
70 FutureWarning)
71 kwargs.update({k: arg for k, arg in zip(sig.parameters, args)})
---> 72 return f(**kwargs)
73 return inner_f
74
~/anaconda3/envs/tf_2_2_0/lib/python3.8/site-packages/sklearn/base.py in clone(estimator, safe)
86 for name, param in new_object_params.items():
87 new_object_params[name] = clone(param, safe=False)
---> 88 new_object = klass(**new_object_params)
89 params_set = new_object.get_params(deep=False)
90
TypeError: __init__() missing 2 required positional arguments: 'instance' and 'flatten'
I suspect that error is in get_param method
I want to quantify some geolocations with osmnx using the nearest_edges-function. I get a value error message when running this code and don't know what I'm doing wrong:
# project graph and points
G_proj = ox.project_graph(G)
gdf_loc_p = gdf_loc["geometry"].to_crs(G_proj.graph["crs"])
ne, d = ox.nearest_edges(
G_proj, X=gdf_loc_p.x.values, Y=gdf_loc_p.y.values, return_dist=True
)
# reindex points based on results from nearest_edges
gdf_loc = (
gdf_loc.set_index(pd.MultiIndex.from_tuples(ne, names=["u", "v", "key"]))
.assign(distance=d)
.sort_index()
)
# join geometry from edges back to points
# aggregate so have number of accidents on each edge
gdf_bad_roads = (
gdf_edges.join(gdf_loc, rsuffix="_loc", how="inner")
.groupby(["u", "v", "key"])
.agg(geometry = ("geometry", "first"), number=("osmid", "size"))
.set_crs(gdf_edges.crs)
)
When running it tells me in the line .agg(geometry)# we require a list, but not a 'str' and from there on couple more issues leading to a value error data' should be a 1-dimensional array of geometry objects. I attached the whole Traceback. Thanks for your help!
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
/var/folders/jy/1f2tlvb965g30zhw9q3cvdw07r5rb_/T/ipykernel_82991/3621029527.py in <module>
2 # aggregate so have number of accidents on each edge
3 gdf_bad_roads = (
----> 4 gdf_edges.join(gdf_loc, rsuffix="_loc", how="inner")
5 .groupby(["u", "v", "key"])
6 .agg(geometry = ("geometry", "first"), number=("osmid", "size"))
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/pandas/core/groupby/generic.py in aggregate(self, func, engine, engine_kwargs, *args, **kwargs)
977
978 op = GroupByApply(self, func, args, kwargs)
--> 979 result = op.agg()
980 if not is_dict_like(func) and result is not None:
981 return result
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/pandas/core/apply.py in agg(self)
159
160 if is_dict_like(arg):
--> 161 return self.agg_dict_like()
162 elif is_list_like(arg):
163 # we require a list, but not a 'str'
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/pandas/core/apply.py in agg_dict_like(self)
457
458 axis = 0 if isinstance(obj, ABCSeries) else 1
--> 459 result = concat(
460 {k: results[k] for k in keys_to_use}, axis=axis, keys=keys_to_use
461 )
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/pandas/util/_decorators.py in wrapper(*args, **kwargs)
309 stacklevel=stacklevel,
310 )
--> 311 return func(*args, **kwargs)
312
313 return wrapper
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/pandas/core/reshape/concat.py in concat(objs, axis, join, ignore_index, keys, levels, names, verify_integrity, sort, copy)
305 )
306
--> 307 return op.get_result()
308
309
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/pandas/core/reshape/concat.py in get_result(self)
537
538 cons = sample._constructor
--> 539 return cons(new_data).__finalize__(self, method="concat")
540
541 def _get_result_dim(self) -> int:
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/geopandas/geodataframe.py in __init__(self, data, geometry, crs, *args, **kwargs)
155 try:
156 if (
--> 157 hasattr(self["geometry"].values, "crs")
158 and self["geometry"].values.crs
159 and crs
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/geopandas/geodataframe.py in __getitem__(self, key)
1325 GeoDataFrame.
1326 """
-> 1327 result = super().__getitem__(key)
1328 geo_col = self._geometry_column_name
1329 if isinstance(result, Series) and isinstance(result.dtype, GeometryDtype):
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/pandas/core/frame.py in __getitem__(self, key)
3424 if self.columns.is_unique and key in self.columns:
3425 if isinstance(self.columns, MultiIndex):
-> 3426 return self._getitem_multilevel(key)
3427 return self._get_item_cache(key)
3428
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/pandas/core/frame.py in _getitem_multilevel(self, key)
3511 result_columns = maybe_droplevels(new_columns, key)
3512 if self._is_mixed_type:
-> 3513 result = self.reindex(columns=new_columns)
3514 result.columns = result_columns
3515 else:
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/pandas/util/_decorators.py in wrapper(*args, **kwargs)
322 #wraps(func)
323 def wrapper(*args, **kwargs) -> Callable[..., Any]:
--> 324 return func(*args, **kwargs)
325
326 kind = inspect.Parameter.POSITIONAL_OR_KEYWORD
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/pandas/core/frame.py in reindex(self, *args, **kwargs)
4770 kwargs.pop("axis", None)
4771 kwargs.pop("labels", None)
-> 4772 return super().reindex(**kwargs)
4773
4774 #deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "labels"])
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/pandas/core/generic.py in reindex(self, *args, **kwargs)
4816
4817 # perform the reindex on the axes
-> 4818 return self._reindex_axes(
4819 axes, level, limit, tolerance, method, fill_value, copy
4820 ).__finalize__(self, method="reindex")
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/pandas/core/frame.py in _reindex_axes(self, axes, level, limit, tolerance, method, fill_value, copy)
4589 columns = axes["columns"]
4590 if columns is not None:
-> 4591 frame = frame._reindex_columns(
4592 columns, method, copy, level, fill_value, limit, tolerance
4593 )
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/pandas/core/frame.py in _reindex_columns(self, new_columns, method, copy, level, fill_value, limit, tolerance)
4634 new_columns, method=method, level=level, limit=limit, tolerance=tolerance
4635 )
-> 4636 return self._reindex_with_indexers(
4637 {1: [new_columns, indexer]},
4638 copy=copy,
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/pandas/core/generic.py in _reindex_with_indexers(self, reindexers, fill_value, copy, allow_dups)
4895 new_data = new_data.copy()
4896
-> 4897 return self._constructor(new_data).__finalize__(self)
4898
4899 def filter(
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/geopandas/geodataframe.py in __init__(self, data, geometry, crs, *args, **kwargs)
162 _crs_mismatch_warning()
163 # TODO: raise error in 0.9 or 0.10.
--> 164 self["geometry"] = _ensure_geometry(self["geometry"].values, crs)
165 except TypeError:
166 pass
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/geopandas/geodataframe.py in _ensure_geometry(data, crs)
44 return GeoSeries(out, index=data.index, name=data.name)
45 else:
---> 46 out = from_shapely(data, crs=crs)
47 return out
48
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/geopandas/array.py in from_shapely(data, crs)
149
150 """
--> 151 return GeometryArray(vectorized.from_shapely(data), crs=crs)
152
153
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/geopandas/array.py in __init__(self, data, crs)
278 )
279 elif not data.ndim == 1:
--> 280 raise ValueError(
281 "'data' should be a 1-dimensional array of geometry objects."
282 )
ValueError: 'data' should be a 1-dimensional array of geometry objects.
Edit: thank you! Unfortunately it doesnt work. I downgraded Python to 3.9 (and upgraded Panda to 1.4 but have same issue). I added the Traceback of the other code as well.
----
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Input In [4], in <cell line: 4>()
2 gdf_bad_roads = gdf_edges.join(gdf_loc, rsuffix="_loc", how="inner")
3 # aggregate so have number of accidents on each edge
----> 4 gdf_bad_roads_agg = gdf_bad_roads.groupby(["u", "v", "key"]).agg(
5 geometry=("geometry", "first"), number=("osmid", "size")
6 ).set_crs(gdf_edges.crs)
8 print(f"""
9 pandas: {pd.__version__}
10 geopandas: {gpd.__version__}
11 osmnx: {ox.__version__}""")
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/pandas/core/groupby/generic.py:869, in DataFrameGroupBy.aggregate(self, func, engine, engine_kwargs, *args, **kwargs)
866 func = maybe_mangle_lambdas(func)
868 op = GroupByApply(self, func, args, kwargs)
--> 869 result = op.agg()
870 if not is_dict_like(func) and result is not None:
871 return result
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/pandas/core/apply.py:168, in Apply.agg(self)
165 return self.apply_str()
167 if is_dict_like(arg):
--> 168 return self.agg_dict_like()
169 elif is_list_like(arg):
170 # we require a list, but not a 'str'
171 return self.agg_list_like()
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/pandas/core/apply.py:498, in Apply.agg_dict_like(self)
495 keys_to_use = ktu
497 axis = 0 if isinstance(obj, ABCSeries) else 1
--> 498 result = concat(
499 {k: results[k] for k in keys_to_use}, axis=axis, keys=keys_to_use
500 )
501 elif any(is_ndframe):
502 # There is a mix of NDFrames and scalars
503 raise ValueError(
504 "cannot perform both aggregation "
505 "and transformation operations "
506 "simultaneously"
507 )
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/pandas/util/_decorators.py:311, in deprecate_nonkeyword_arguments.<locals>.decorate.<locals>.wrapper(*args, **kwargs)
305 if len(args) > num_allow_args:
306 warnings.warn(
307 msg.format(arguments=arguments),
308 FutureWarning,
309 stacklevel=stacklevel,
310 )
--> 311 return func(*args, **kwargs)
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/pandas/core/reshape/concat.py:359, in concat(objs, axis, join, ignore_index, keys, levels, names, verify_integrity, sort, copy)
155 """
156 Concatenate pandas objects along a particular axis with optional set logic
157 along the other axes.
(...)
344 ValueError: Indexes have overlapping values: ['a']
345 """
346 op = _Concatenator(
347 objs,
348 axis=axis,
(...)
356 sort=sort,
357 )
--> 359 return op.get_result()
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/pandas/core/reshape/concat.py:599, in _Concatenator.get_result(self)
596 new_data._consolidate_inplace()
598 cons = sample._constructor
--> 599 return cons(new_data).__finalize__(self, method="concat")
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/geopandas/geodataframe.py:157, in GeoDataFrame.__init__(self, data, geometry, crs, *args, **kwargs)
154 index = self.index
155 try:
156 if (
--> 157 hasattr(self["geometry"].values, "crs")
158 and self["geometry"].values.crs
159 and crs
160 and not self["geometry"].values.crs == crs
161 ):
162 _crs_mismatch_warning()
163 # TODO: raise error in 0.9 or 0.10.
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/geopandas/geodataframe.py:1327, in GeoDataFrame.__getitem__(self, key)
1321 def __getitem__(self, key):
1322 """
1323 If the result is a column containing only 'geometry', return a
1324 GeoSeries. If it's a DataFrame with a 'geometry' column, return a
1325 GeoDataFrame.
1326 """
-> 1327 result = super().__getitem__(key)
1328 geo_col = self._geometry_column_name
1329 if isinstance(result, Series) and isinstance(result.dtype, GeometryDtype):
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/pandas/core/frame.py:3473, in DataFrame.__getitem__(self, key)
3471 if self.columns.is_unique and key in self.columns:
3472 if isinstance(self.columns, MultiIndex):
-> 3473 return self._getitem_multilevel(key)
3474 return self._get_item_cache(key)
3476 # Do we have a slicer (on rows)?
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/pandas/core/frame.py:3560, in DataFrame._getitem_multilevel(self, key)
3558 result_columns = maybe_droplevels(new_columns, key)
3559 if self._is_mixed_type:
-> 3560 result = self.reindex(columns=new_columns)
3561 result.columns = result_columns
3562 else:
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/pandas/util/_decorators.py:324, in rewrite_axis_style_signature.<locals>.decorate.<locals>.wrapper(*args, **kwargs)
322 #wraps(func)
323 def wrapper(*args, **kwargs) -> Callable[..., Any]:
--> 324 return func(*args, **kwargs)
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/pandas/core/frame.py:4798, in DataFrame.reindex(self, *args, **kwargs)
4796 kwargs.pop("axis", None)
4797 kwargs.pop("labels", None)
-> 4798 return super().reindex(**kwargs)
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/pandas/core/generic.py:4974, in NDFrame.reindex(self, *args, **kwargs)
4971 return self._reindex_multi(axes, copy, fill_value)
4973 # perform the reindex on the axes
-> 4974 return self._reindex_axes(
4975 axes, level, limit, tolerance, method, fill_value, copy
4976 ).__finalize__(self, method="reindex")
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/pandas/core/frame.py:4611, in DataFrame._reindex_axes(self, axes, level, limit, tolerance, method, fill_value, copy)
4609 columns = axes["columns"]
4610 if columns is not None:
-> 4611 frame = frame._reindex_columns(
4612 columns, method, copy, level, fill_value, limit, tolerance
4613 )
4615 index = axes["index"]
4616 if index is not None:
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/pandas/core/frame.py:4656, in DataFrame._reindex_columns(self, new_columns, method, copy, level, fill_value, limit, tolerance)
4643 def _reindex_columns(
4644 self,
4645 new_columns,
(...)
4651 tolerance=None,
4652 ):
4653 new_columns, indexer = self.columns.reindex(
4654 new_columns, method=method, level=level, limit=limit, tolerance=tolerance
4655 )
-> 4656 return self._reindex_with_indexers(
4657 {1: [new_columns, indexer]},
4658 copy=copy,
4659 fill_value=fill_value,
4660 allow_dups=False,
4661 )
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/pandas/core/generic.py:5054, in NDFrame._reindex_with_indexers(self, reindexers, fill_value, copy, allow_dups)
5051 if copy and new_data is self._mgr:
5052 new_data = new_data.copy()
-> 5054 return self._constructor(new_data).__finalize__(self)
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/geopandas/geodataframe.py:164, in GeoDataFrame.__init__(self, data, geometry, crs, *args, **kwargs)
162 _crs_mismatch_warning()
163 # TODO: raise error in 0.9 or 0.10.
--> 164 self["geometry"] = _ensure_geometry(self["geometry"].values, crs)
165 except TypeError:
166 pass
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/geopandas/geodataframe.py:46, in _ensure_geometry(data, crs)
44 return GeoSeries(out, index=data.index, name=data.name)
45 else:
---> 46 out = from_shapely(data, crs=crs)
47 return out
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/geopandas/array.py:151, in from_shapely(data, crs)
135 def from_shapely(data, crs=None):
136 """
137 Convert a list or array of shapely objects to a GeometryArray.
138
(...)
149
150 """
--> 151 return GeometryArray(vectorized.from_shapely(data), crs=crs)
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/geopandas/array.py:280, in GeometryArray.__init__(self, data, crs)
275 raise TypeError(
276 "'data' should be array of geometry objects. Use from_shapely, "
277 "from_wkb, from_wkt functions to construct a GeometryArray."
278 )
279 elif not data.ndim == 1:
--> 280 raise ValueError(
281 "'data' should be a 1-dimensional array of geometry objects."
282 )
283 self.data = data
285 self._crs = None
ValueError: 'data' should be a 1-dimensional array of geometry objects.
pandas: 1.4.1
geopandas: 0.10.2
osmnx: 1.1.2
have changed this to a MWE
have separated out join() and groupby() / agg()
have included versions
one difference I can see - python 3.9 vs 3.10
import osmnx as ox
import geopandas as gpd
import pandas as pd
import io
df = pd.read_csv(
io.StringIO(
"""AccidentUID,AccidentLocation_CHLV95_E,AccidentLocation_CHLV95_N
99BA5D383B96D02AE0430A865E33D02A,2663985,1213215
9B25C4871C909022E0430A865E339022,2666153,1211303
9B71AB601D948092E0430A865E338092,2666168,1211785
9C985CF7710A60C0E0430A865E3360C0,2663991,1213203
9EA9548660AB3002E0430A865E333002,2666231,1210786
9B2E8B25D5C29094E0430A865E339094,2666728,1210404
9C87C10FB73A905EE0430A865E33905E,2666220,1211811
9E30F39D35CA1058E0430A865E331058,2664599,1212960
9BC2EA43E0BFC068E0430A865E33C068,2665533,1212617
9C0BB9332AB30044E0430A865E330044,2666852,1211964"""
)
)
gdf_loc = gpd.GeoDataFrame(
data=df,
geometry=gpd.points_from_xy(
df["AccidentLocation_CHLV95_E"], df["AccidentLocation_CHLV95_N"]
),
crs="EPSG:2056",
).to_crs("epsg:4326")
# get OSM data for investigated location
G = ox.graph_from_place("Luzern, Switzerland", network_type="drive")
G_proj = ox.project_graph(G)
gdf_nodes, gdf_edges = ox.utils_graph.graph_to_gdfs(G_proj)
# project graph and points
gdf_loc_p = gdf_loc["geometry"].to_crs(G_proj.graph["crs"])
ne, d = ox.nearest_edges(
G_proj, X=gdf_loc_p.x.values, Y=gdf_loc_p.y.values, return_dist=True
)
# reindex points based on results from nearest_edges
gdf_loc = (
gdf_loc.set_index(pd.MultiIndex.from_tuples(ne, names=["u", "v", "key"]))
.assign(distance=d)
.sort_index()
)
# join geometry from edges back to points
gdf_bad_roads = gdf_edges.join(gdf_loc, rsuffix="_loc", how="inner")
# aggregate so have number of accidents on each edge
gdf_bad_roads_agg = gdf_bad_roads.groupby(["u", "v", "key"]).agg(
geometry=("geometry", "first"), number=("osmid", "size")
).set_crs(gdf_edges.crs)
print(f"""
pandas: {pd.__version__}
geopandas: {gpd.__version__}
osmnx: {ox.__version__}""")
pandas: 1.4.0
geopandas: 0.10.2
osmnx: 1.1.2
Alternative aggregate syntax. Has been confirmed both work
hence conclusion is that named aggregations are failing. Possibly should be raised as an issue on pandas, but is not failing on all environments
groupby()/apply() is doing a first on shared edges and also necessary to set CRS again
dissolve() is doing a unary union on geometries. Conceptually should be the same, but is giving slightly different geometry. (A unary union of identical geometries IMHO is an instance of one of the geometries)
gdf_bad_roads.groupby(["u", "v", "key"]).agg({"geometry":"first", "AccidentUID":"size"}).set_crs(gdf_edges.crs).explore(color="blue")
gdf_bad_roads.dissolve(["u", "v", "key"], aggfunc={"AccidentUID":"size"}).explore(color="blue")
I have the following code which correctly renders this:
plt.xlabel('Date')
plt.ylabel('Temp')
plt.title('Min and Max temperature 2005-2014')
# Plotting on the first y-axis
minimum=new_df['min']
maximum=new_df['max']
plt.plot(new_df['Date'], new_df['min'], color='orange', label='Min')
plt.plot(new_df['Date'], new_df['max'], color='olive', label='Max')
Now I need to colour the area between the 2 lines:
I tried this:
plt.fill_between(minimum, maximum, color='#539ecd')
but then I get this error:
ValueError Traceback (most recent call last)
/opt/conda/lib/python3.6/site-packages/IPython/core/formatters.py in __call__(self, obj)
305 pass
306 else:
--> 307 return printer(obj)
308 # Finally look for special method names
309 method = get_real_method(obj, self.print_method)
/opt/conda/lib/python3.6/site-packages/IPython/core/pylabtools.py in <lambda>(fig)
225
226 if 'png' in formats:
--> 227 png_formatter.for_type(Figure, lambda fig: print_figure(fig, 'png', **kwargs))
228 if 'retina' in formats or 'png2x' in formats:
229 png_formatter.for_type(Figure, lambda fig: retina_figure(fig, **kwargs))
/opt/conda/lib/python3.6/site-packages/IPython/core/pylabtools.py in print_figure(fig, fmt, bbox_inches, **kwargs)
117
118 bytes_io = BytesIO()
--> 119 fig.canvas.print_figure(bytes_io, **kw)
120 data = bytes_io.getvalue()
121 if fmt == 'svg':
/opt/conda/lib/python3.6/site-packages/matplotlib/backend_bases.py in print_figure(self, filename, dpi, facecolor, edgecolor, orientation, format, **kwargs)
2190 orientation=orientation,
2191 dryrun=True,
-> 2192 **kwargs)
2193 renderer = self.figure._cachedRenderer
2194 bbox_inches = self.figure.get_tightbbox(renderer)
/opt/conda/lib/python3.6/site-packages/matplotlib/backends/backend_agg.py in print_png(self, filename_or_obj, *args, **kwargs)
543
544 def print_png(self, filename_or_obj, *args, **kwargs):
--> 545 FigureCanvasAgg.draw(self)
546 renderer = self.get_renderer()
547 original_dpi = renderer.dpi
/opt/conda/lib/python3.6/site-packages/matplotlib/backends/backend_agg.py in draw(self)
462
463 try:
--> 464 self.figure.draw(self.renderer)
465 finally:
466 RendererAgg.lock.release()
/opt/conda/lib/python3.6/site-packages/matplotlib/artist.py in draw_wrapper(artist, renderer, *args, **kwargs)
61 def draw_wrapper(artist, renderer, *args, **kwargs):
62 before(artist, renderer)
---> 63 draw(artist, renderer, *args, **kwargs)
64 after(artist, renderer)
65
/opt/conda/lib/python3.6/site-packages/matplotlib/figure.py in draw(self, renderer)
1141
1142 mimage._draw_list_compositing_images(
-> 1143 renderer, self, dsu, self.suppressComposite)
1144
1145 renderer.close_group('figure')
/opt/conda/lib/python3.6/site-packages/matplotlib/image.py in _draw_list_compositing_images(renderer, parent, dsu, suppress_composite)
137 if not_composite or not has_images:
138 for zorder, a in dsu:
--> 139 a.draw(renderer)
140 else:
141 # Composite any adjacent images together
/opt/conda/lib/python3.6/site-packages/matplotlib/artist.py in draw_wrapper(artist, renderer, *args, **kwargs)
61 def draw_wrapper(artist, renderer, *args, **kwargs):
62 before(artist, renderer)
---> 63 draw(artist, renderer, *args, **kwargs)
64 after(artist, renderer)
65
/opt/conda/lib/python3.6/site-packages/matplotlib/axes/_base.py in draw(self, renderer, inframe)
2407 renderer.stop_rasterizing()
2408
-> 2409 mimage._draw_list_compositing_images(renderer, self, dsu)
2410
2411 renderer.close_group('axes')
/opt/conda/lib/python3.6/site-packages/matplotlib/image.py in _draw_list_compositing_images(renderer, parent, dsu, suppress_composite)
137 if not_composite or not has_images:
138 for zorder, a in dsu:
--> 139 a.draw(renderer)
140 else:
141 # Composite any adjacent images together
/opt/conda/lib/python3.6/site-packages/matplotlib/artist.py in draw_wrapper(artist, renderer, *args, **kwargs)
61 def draw_wrapper(artist, renderer, *args, **kwargs):
62 before(artist, renderer)
---> 63 draw(artist, renderer, *args, **kwargs)
64 after(artist, renderer)
65
/opt/conda/lib/python3.6/site-packages/matplotlib/axis.py in draw(self, renderer, *args, **kwargs)
1134 renderer.open_group(__name__)
1135
-> 1136 ticks_to_draw = self._update_ticks(renderer)
1137 ticklabelBoxes, ticklabelBoxes2 = self._get_tick_bboxes(ticks_to_draw,
1138 renderer)
/opt/conda/lib/python3.6/site-packages/matplotlib/axis.py in _update_ticks(self, renderer)
967
968 interval = self.get_view_interval()
--> 969 tick_tups = [t for t in self.iter_ticks()]
970 if self._smart_bounds:
971 # handle inverted limits
/opt/conda/lib/python3.6/site-packages/matplotlib/axis.py in <listcomp>(.0)
967
968 interval = self.get_view_interval()
--> 969 tick_tups = [t for t in self.iter_ticks()]
970 if self._smart_bounds:
971 # handle inverted limits
/opt/conda/lib/python3.6/site-packages/matplotlib/axis.py in iter_ticks(self)
910 Iterate through all of the major and minor ticks.
911 """
--> 912 majorLocs = self.major.locator()
913 majorTicks = self.get_major_ticks(len(majorLocs))
914 self.major.formatter.set_locs(majorLocs)
/opt/conda/lib/python3.6/site-packages/matplotlib/dates.py in __call__(self)
981 def __call__(self):
982 'Return the locations of the ticks'
--> 983 self.refresh()
984 return self._locator()
985
/opt/conda/lib/python3.6/site-packages/matplotlib/dates.py in refresh(self)
1001 def refresh(self):
1002 'Refresh internal information based on current limits.'
-> 1003 dmin, dmax = self.viewlim_to_dt()
1004 self._locator = self.get_locator(dmin, dmax)
1005
/opt/conda/lib/python3.6/site-packages/matplotlib/dates.py in viewlim_to_dt(self)
758 vmin, vmax = vmax, vmin
759
--> 760 return num2date(vmin, self.tz), num2date(vmax, self.tz)
761
762 def _get_unit(self):
/opt/conda/lib/python3.6/site-packages/matplotlib/dates.py in num2date(x, tz)
399 tz = _get_rc_timezone()
400 if not cbook.iterable(x):
--> 401 return _from_ordinalf(x, tz)
402 else:
403 x = np.asarray(x)
/opt/conda/lib/python3.6/site-packages/matplotlib/dates.py in _from_ordinalf(x, tz)
252
253 ix = int(x)
--> 254 dt = datetime.datetime.fromordinal(ix).replace(tzinfo=UTC)
255
256 remainder = float(x) - ix
ValueError: ordinal must be >= 1
<matplotlib.figure.Figure at 0x7fb311ea1cf8>
Edit:
dataframe looks like this:
Date min max min2015 max2015
0 2014-01-01 -160 156 -133 11
1 2014-01-02 -267 139 -122 39
2 2014-01-03 -267 133 -67 39
3 2014-01-04 -261 106 -88 44
4 2014-01-05 -150 128 -155 28
and I convert the Date to datetime type like this:
new_df['Date'] = pd.to_datetime(new_df['Date'], infer_datetime_format=True)
Edit:
When I run:
plt.fill_between(new_df['Date'], minimum, maximum)
I get this error
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-23-0f6fcbb48fdc> in <module>()
59
60
---> 61 leaflet_plot_stations(400,'fb441e62df2d58994928907a91895ec62c2c42e6cd075c2700843b89')
<ipython-input-23-0f6fcbb48fdc> in leaflet_plot_stations(binsize, hashid)
44 minimum = new_df['min']
45 maximum = new_df['max']
---> 46 plt.fill_between(new_df['Date'], minimum, maximum)
47 #plt.scatter(new_df['Date'], new_df['min2015'], 'o')
48 #plt.scatter(new_df['Date'], new_df['max2015'], 'o')
/opt/conda/lib/python3.6/site-packages/matplotlib/pyplot.py in fill_between(x, y1, y2, where, interpolate, step, hold, data, **kwargs)
2999 ret = ax.fill_between(x, y1, y2=y2, where=where,
3000 interpolate=interpolate, step=step, data=data,
-> 3001 **kwargs)
3002 finally:
3003 ax._hold = washold
/opt/conda/lib/python3.6/site-packages/matplotlib/__init__.py in inner(ax, *args, **kwargs)
1890 warnings.warn(msg % (label_namer, func.__name__),
1891 RuntimeWarning, stacklevel=2)
-> 1892 return func(ax, *args, **kwargs)
1893 pre_doc = inner.__doc__
1894 if pre_doc is None:
/opt/conda/lib/python3.6/site-packages/matplotlib/axes/_axes.py in fill_between(self, x, y1, y2, where, interpolate, step, **kwargs)
4770
4771 # Convert the arrays so we can work with them
-> 4772 x = ma.masked_invalid(self.convert_xunits(x))
4773 y1 = ma.masked_invalid(self.convert_yunits(y1))
4774 y2 = ma.masked_invalid(self.convert_yunits(y2))
/opt/conda/lib/python3.6/site-packages/numpy/ma/core.py in masked_invalid(a, copy)
2343 cls = type(a)
2344 else:
-> 2345 condition = ~(np.isfinite(a))
2346 cls = MaskedArray
2347 result = a.view(cls)
TypeError: ufunc 'isfinite' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''
Looks like the fill is being tried horizontally over the time axis, and minimum and maximum contain values that aren't dates. I've looked up the documentation:
matplotlib.pyplot.fill_between(x, y1, y2=0, where=None, interpolate=False, step=None, *, data=None, **kwargs)
That was the first error.
Then with numpy newer than 1.17.0, you could just do:
size = 500
minimum = np.random.normal(0, 100, size)
minimum.sort()
minimum = np.random.randint(250, 300, size) - np.abs(minimum)
df = pd.DataFrame(minimum,
pd.date_range("2005-01-01", periods=size, freq="d"),
columns=['min'],
)
df['max'] = df['min'] + np.random.randint(200, 250, size)
fig = df.plot()
fig.fill_between(df.index, df['min'], df['max'], color='#539ecd')
But from that second traceback, we can see that fill_between is trying ~(np.isfinite(a)) on all the axes. Which isn't supported on older numpy.datetime64, the type of your x-axis.
So we will have to use a numeric x-axis and then change the labels.
df = pd.DataFrame(minimum,
columns=['min'],
)
df['max'] = df['min'] + np.random.randint(200, 250, size)
fig = df.plot()
fig.fill_between(df.index, df['min'], df['max'], color='#539ecd')
# We take the original datetime axis
date_axis = pd.date_range("2005-01-01", periods=size, freq="d")
# and map a function from (axis, tick) -> wanted string
def label(axis, tick):
tick = int(tick)
if tick == len(axis):
tick -= 1
if 0 <= tick < len(axis):
return f"{axis[tick].year}-{axis[tick].month}"
else:
return ' '
fig.set_xticks(fig.get_xticks()) #silence a warning
fig.set_xticklabels(
[label(date_axis, tick) for tick in fig.get_xticks()]
)
fill_between takes the x coordinates as first argument. The following should work:
plt.fill_between(new_df["Date"], minimum, maximum, color="lemonchiffon")
Note that using matplotlib 3.4 I could not reproduce the error. Whether the values in Date were converted to dates or were left as strings, fill_between(minimum, maximum) does not throw an error but does produce unexpected plots.
Edit
Using numpy 1.11.3 and matplotlib 2.0.2, I've been able to reproduce the TypeError raises by np.isfinite (see this post for more information). Indeed this function supports datetime64 only from version 1.17. I highly suggest that you update the versions of matplotlib and numpy. However, using the earlier versions described in this paragraph, the error was bypassed by explicitly casting the dates:
plt.fill_between(np.array(new_df["Date"]), minimum, maximum, color='lemonchiffon')
I have a pandas dataframe with one column of model variables and their corresponding statistics in another column. I've done some string manipulation to get a derived summary table to join the summary table from the model.
lost_cost_final_table.loc[lost_cost_final_table['variable'].str.contains('class_cc', case = False), 'variable'] = lost_cost_final_table['variable'].str[:8]
Full traceback.
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-229-1dbe5bd14d4b> in <module>
----> 1 lost_cost_final_table.loc[lost_cost_final_table['variable'].str.contains('class_cc', case = False), 'variable'] = lost_cost_final_table['variable'].str[:8]
2 #lost_cost_final_table.loc[lost_cost_final_table['variable'].str.contains('class_v_age', case = False), 'variable'] = lost_cost_final_table['variable'].str[:11]
3 #lost_cost_final_table.loc[lost_cost_final_table['variable'].str.contains('married_age', case = False), 'variable'] = lost_cost_final_table['variable'].str[:11]
4 #lost_cost_final_table.loc[lost_cost_final_table['variable'].str.contains('state_model', case = False), 'variable'] = lost_cost_final_table['variable'].str[:11]
5
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexing.py in __setitem__(self, key, value)
187 key = com._apply_if_callable(key, self.obj)
188 indexer = self._get_setitem_indexer(key)
--> 189 self._setitem_with_indexer(indexer, value)
190
191 def _validate_key(self, key, axis):
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexing.py in _setitem_with_indexer(self, indexer, value)
467
468 if isinstance(value, ABCSeries):
--> 469 value = self._align_series(indexer, value)
470
471 info_idx = indexer[info_axis]
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexing.py in _align_series(self, indexer, ser, multiindex_indexer)
732 return ser._values.copy()
733
--> 734 return ser.reindex(new_ix)._values
735
736 # 2 dims
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\series.py in reindex(self, index, **kwargs)
3323 #Appender(generic._shared_docs['reindex'] % _shared_doc_kwargs)
3324 def reindex(self, index=None, **kwargs):
-> 3325 return super(Series, self).reindex(index=index, **kwargs)
3326
3327 def drop(self, labels=None, axis=0, index=None, columns=None,
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\generic.py in reindex(self, *args, **kwargs)
3687 # perform the reindex on the axes
3688 return self._reindex_axes(axes, level, limit, tolerance, method,
-> 3689 fill_value, copy).__finalize__(self)
3690
3691 def _reindex_axes(self, axes, level, limit, tolerance, method, fill_value,
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\generic.py in _reindex_axes(self, axes, level, limit, tolerance, method, fill_value, copy)
3705 obj = obj._reindex_with_indexers({axis: [new_index, indexer]},
3706 fill_value=fill_value,
-> 3707 copy=copy, allow_dups=False)
3708
3709 return obj
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\generic.py in _reindex_with_indexers(self, reindexers, fill_value, copy, allow_dups)
3808 fill_value=fill_value,
3809 allow_dups=allow_dups,
-> 3810 copy=copy)
3811
3812 if copy and new_data is self._data:
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\internals.py in reindex_indexer(self, new_axis, indexer, axis, fill_value, allow_dups, copy)
4412 # some axes don't allow reindexing with dups
4413 if not allow_dups:
-> 4414 self.axes[axis]._can_reindex(indexer)
4415
4416 if axis >= self.ndim:
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in _can_reindex(self, indexer)
3574 # trying to reindex on an axis with duplicates
3575 if not self.is_unique and len(indexer):
-> 3576 raise ValueError("cannot reindex from a duplicate axis")
3577
3578 def reindex(self, target, method=None, level=None, limit=None,
ValueError: cannot reindex from a duplicate axis
However, when I replace with example, it works and the only difference is the data frame name. See below. I don't see where the difference between the two codes lines are. Any ideas?
variable = ['class_cc-Harley', 'class_cc_Sport', 'class_cc_Other', 'unit_driver_experience']
unique_value = [1200, 1400, 700, 45]
p_value = [.0001, .0001, .0001, .049]
dic = {'variable': variable, 'unique_value':unique_value, 'p_value':p_value}
df = pd.DataFrame(dic)
df.loc[df['variable'].str.contains('class_cc', case = False), 'variable'] = df['variable'].str[:8]
The index of lost_cost_final_table is not unique, which can be fixed by running reset_index:
lost_cost_final_table.reset_index(inplace=True)
This question already has answers here:
KeyError when plotting a sliced pandas dataframe with datetimes
(3 answers)
Closed 7 years ago.
I have this data frame:
date_obj col1 col2 col3 col4
40038 2012-11-19 1.000 0.831856 0.986209 0.843919
40039 2012-11-20 2.015 0.521764 1.177320 0.938245
40040 2012-11-21 1.160 1.645345 1.964620 4.536440
40041 2012-11-22 3.171 2.444018 2.931550 3.737840
40042 2012-11-23 4.563 3.208111 3.587250 2.434040
40043 2012-11-24 5.379 3.863732 3.824540 1.634780
40044 2012-11-26 1.125 20.756739 4.162820 23.552100
40045 2012-11-27 3.340 5.369354 4.535090 1.129290
40046 2012-11-28 5.463 12.185730 8.102790 1.224300
40047 2012-11-29 6.596 14.328685 9.271000 24.655600
40048 2012-11-30 31.544 13.513497 12.103400 21.273500
40049 2012-12-01 24.921 26.144050 16.256200 13.883100
40050 2012-12-03 5.488 2.581351 7.220790 3.349450
40051 2012-12-04 6.977 5.893819 5.548870 2.948770
40052 2012-12-05 7.115 6.533022 5.863820 2.517030
40053 2012-12-06 5.842 8.754232 7.518660 1.447940
40054 2012-12-07 6.346 12.018631 10.263100 11.837400
40055 2012-12-08 17.666 4.548846 10.610400 11.110800
40056 2012-12-10 4.300 2.823566 1.475000 1.989210
40057 2012-12-11 2.415 2.436319 2.677440 2.908270
40058 2012-12-12 2.319 2.121092 3.455550 3.890480
40059 2012-12-13 1.000 1.633918 3.858540 4.316940
40060 2012-12-14 2.238 1.688475 5.065990 5.267850
40061 2012-12-15 1.798 2.621267 7.175370 6.957340
I try to plot it in the following way:
plt.figure(figsize=(17, 10))
plt.setp(plt.xticks()[1], rotation=45)
plt.plot_date(df_cut['date_obj'],df_cut['col1'], color='black', linestyle='-', markersize=3, linewidth=2)
plt.plot_date(df_cut['date_obj'],df_cut['col2'], color='red', linestyle='-', markersize=3)
plt.plot_date(df_cut['date_obj'],df_cut['col3'], color='green', linestyle='-', markersize=3)
plt.plot_date(df_cut['date_obj'],df_cut['col4'], color='blue', linestyle='-', markersize=3)
As a result I get an error:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-544-1b8650d1e7e7> in <module>()
/ipython/local/lib/python2.7/site-packages/matplotlib/pyplot.pyc in plot_date(x, y, fmt, tz, xdate, ydate, hold, **kwargs)
2850 try:
2851 ret = ax.plot_date(x, y, fmt=fmt, tz=tz, xdate=xdate, ydate=ydate,
-> 2852 **kwargs)
2853 draw_if_interactive()
2854 finally:
ipython/local/lib/python2.7/site-packages/matplotlib/axes.pyc in plot_date(self, x, y, fmt, tz, xdate, ydate, **kwargs)
4061 if not self._hold: self.cla()
4062
-> 4063 ret = self.plot(x, y, fmt, **kwargs)
4064
4065 if xdate:
ipython/local/lib/python2.7/site-packages/matplotlib/axes.pyc in plot(self, *args, **kwargs)
3994 lines = []
3995
-> 3996 for line in self._get_lines(*args, **kwargs):
3997 self.add_line(line)
3998 lines.append(line)
ipython/local/lib/python2.7/site-packages/matplotlib/axes.pyc in _grab_next_args(self, *args, **kwargs)
328 return
329 if len(remaining) <= 3:
--> 330 for seg in self._plot_args(remaining, kwargs):
331 yield seg
332 return
ipython/local/lib/python2.7/site-packages/matplotlib/axes.pyc in _plot_args(self, tup, kwargs)
306 x = np.arange(y.shape[0], dtype=float)
307
--> 308 x, y = self._xy_from_xy(x, y)
309
310 if self.command == 'plot':
python/local/lib/python2.7/site-packages/matplotlib/axes.pyc in _xy_from_xy(self, x, y)
222 def _xy_from_xy(self, x, y):
223 if self.axes.xaxis is not None and self.axes.yaxis is not None:
--> 224 bx = self.axes.xaxis.update_units(x)
225 by = self.axes.yaxis.update_units(y)
226
ipython/local/lib/python2.7/site-packages/matplotlib/axis.pyc in update_units(self, data)
1299 neednew = self.converter != converter
1300 self.converter = converter
-> 1301 default = self.converter.default_units(data, self)
1302 #print 'update units: default=%s, units=%s'%(default, self.units)
1303 if default is not None and self.units is None:
ipython/local/lib/python2.7/site-packages/matplotlib/dates.pyc in default_units(x, axis)
1156 'Return the tzinfo instance of *x* or of its first element, or None'
1157 try:
-> 1158 x = x[0]
1159 except (TypeError, IndexError):
1160 pass
ipython/local/lib/python2.7/site-packages/pandas/core/series.pyc in __getitem__(self, key)
611 def __getitem__(self, key):
612 try:
--> 613 return self.index.get_value(self, key)
614 except InvalidIndexError:
615 pass
ipython/local/lib/python2.7/site-packages/pandas/core/index.pyc in get_value(self, series, key)
761 """
762 try:
--> 763 return self._engine.get_value(series, key)
764 except KeyError, e1:
765 if len(self) > 0 and self.inferred_type == 'integer':
What is strange, this code works for some data frames and for some it doesn't. The data frames are not different by their structure. The only difference between them is only in values that they contain.
Could anybody please help me to resolve this problem?
Dataframe store dates as numpy.datetime64 objects, not python datetime objects.
Furthermore matplotlib.plot_date uses its own numeric representation of dates.
You could draw your data this way:
plt.plot_date(matplotlib.dates.date2num(pandas.to_datetime(df_cut['date_obj'].values)),df_cut['col1'].values, color='black', linestyle='-', markersize=3, linewidth=2)
Or you could define column 'date_obj' as the index of your data:
df0 = pd.DataFrame.from_records(YourDataSource, columns=['date_obj','col1','col2','col3','col4'],index='date_obj')
And then simply use pandas' plot() attribute:
df0['col1'].plot()