Related
I want to quantify some geolocations with osmnx using the nearest_edges-function. I get a value error message when running this code and don't know what I'm doing wrong:
# project graph and points
G_proj = ox.project_graph(G)
gdf_loc_p = gdf_loc["geometry"].to_crs(G_proj.graph["crs"])
ne, d = ox.nearest_edges(
G_proj, X=gdf_loc_p.x.values, Y=gdf_loc_p.y.values, return_dist=True
)
# reindex points based on results from nearest_edges
gdf_loc = (
gdf_loc.set_index(pd.MultiIndex.from_tuples(ne, names=["u", "v", "key"]))
.assign(distance=d)
.sort_index()
)
# join geometry from edges back to points
# aggregate so have number of accidents on each edge
gdf_bad_roads = (
gdf_edges.join(gdf_loc, rsuffix="_loc", how="inner")
.groupby(["u", "v", "key"])
.agg(geometry = ("geometry", "first"), number=("osmid", "size"))
.set_crs(gdf_edges.crs)
)
When running it tells me in the line .agg(geometry)# we require a list, but not a 'str' and from there on couple more issues leading to a value error data' should be a 1-dimensional array of geometry objects. I attached the whole Traceback. Thanks for your help!
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
/var/folders/jy/1f2tlvb965g30zhw9q3cvdw07r5rb_/T/ipykernel_82991/3621029527.py in <module>
2 # aggregate so have number of accidents on each edge
3 gdf_bad_roads = (
----> 4 gdf_edges.join(gdf_loc, rsuffix="_loc", how="inner")
5 .groupby(["u", "v", "key"])
6 .agg(geometry = ("geometry", "first"), number=("osmid", "size"))
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/pandas/core/groupby/generic.py in aggregate(self, func, engine, engine_kwargs, *args, **kwargs)
977
978 op = GroupByApply(self, func, args, kwargs)
--> 979 result = op.agg()
980 if not is_dict_like(func) and result is not None:
981 return result
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/pandas/core/apply.py in agg(self)
159
160 if is_dict_like(arg):
--> 161 return self.agg_dict_like()
162 elif is_list_like(arg):
163 # we require a list, but not a 'str'
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/pandas/core/apply.py in agg_dict_like(self)
457
458 axis = 0 if isinstance(obj, ABCSeries) else 1
--> 459 result = concat(
460 {k: results[k] for k in keys_to_use}, axis=axis, keys=keys_to_use
461 )
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/pandas/util/_decorators.py in wrapper(*args, **kwargs)
309 stacklevel=stacklevel,
310 )
--> 311 return func(*args, **kwargs)
312
313 return wrapper
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/pandas/core/reshape/concat.py in concat(objs, axis, join, ignore_index, keys, levels, names, verify_integrity, sort, copy)
305 )
306
--> 307 return op.get_result()
308
309
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/pandas/core/reshape/concat.py in get_result(self)
537
538 cons = sample._constructor
--> 539 return cons(new_data).__finalize__(self, method="concat")
540
541 def _get_result_dim(self) -> int:
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/geopandas/geodataframe.py in __init__(self, data, geometry, crs, *args, **kwargs)
155 try:
156 if (
--> 157 hasattr(self["geometry"].values, "crs")
158 and self["geometry"].values.crs
159 and crs
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/geopandas/geodataframe.py in __getitem__(self, key)
1325 GeoDataFrame.
1326 """
-> 1327 result = super().__getitem__(key)
1328 geo_col = self._geometry_column_name
1329 if isinstance(result, Series) and isinstance(result.dtype, GeometryDtype):
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/pandas/core/frame.py in __getitem__(self, key)
3424 if self.columns.is_unique and key in self.columns:
3425 if isinstance(self.columns, MultiIndex):
-> 3426 return self._getitem_multilevel(key)
3427 return self._get_item_cache(key)
3428
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/pandas/core/frame.py in _getitem_multilevel(self, key)
3511 result_columns = maybe_droplevels(new_columns, key)
3512 if self._is_mixed_type:
-> 3513 result = self.reindex(columns=new_columns)
3514 result.columns = result_columns
3515 else:
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/pandas/util/_decorators.py in wrapper(*args, **kwargs)
322 #wraps(func)
323 def wrapper(*args, **kwargs) -> Callable[..., Any]:
--> 324 return func(*args, **kwargs)
325
326 kind = inspect.Parameter.POSITIONAL_OR_KEYWORD
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/pandas/core/frame.py in reindex(self, *args, **kwargs)
4770 kwargs.pop("axis", None)
4771 kwargs.pop("labels", None)
-> 4772 return super().reindex(**kwargs)
4773
4774 #deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "labels"])
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/pandas/core/generic.py in reindex(self, *args, **kwargs)
4816
4817 # perform the reindex on the axes
-> 4818 return self._reindex_axes(
4819 axes, level, limit, tolerance, method, fill_value, copy
4820 ).__finalize__(self, method="reindex")
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/pandas/core/frame.py in _reindex_axes(self, axes, level, limit, tolerance, method, fill_value, copy)
4589 columns = axes["columns"]
4590 if columns is not None:
-> 4591 frame = frame._reindex_columns(
4592 columns, method, copy, level, fill_value, limit, tolerance
4593 )
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/pandas/core/frame.py in _reindex_columns(self, new_columns, method, copy, level, fill_value, limit, tolerance)
4634 new_columns, method=method, level=level, limit=limit, tolerance=tolerance
4635 )
-> 4636 return self._reindex_with_indexers(
4637 {1: [new_columns, indexer]},
4638 copy=copy,
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/pandas/core/generic.py in _reindex_with_indexers(self, reindexers, fill_value, copy, allow_dups)
4895 new_data = new_data.copy()
4896
-> 4897 return self._constructor(new_data).__finalize__(self)
4898
4899 def filter(
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/geopandas/geodataframe.py in __init__(self, data, geometry, crs, *args, **kwargs)
162 _crs_mismatch_warning()
163 # TODO: raise error in 0.9 or 0.10.
--> 164 self["geometry"] = _ensure_geometry(self["geometry"].values, crs)
165 except TypeError:
166 pass
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/geopandas/geodataframe.py in _ensure_geometry(data, crs)
44 return GeoSeries(out, index=data.index, name=data.name)
45 else:
---> 46 out = from_shapely(data, crs=crs)
47 return out
48
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/geopandas/array.py in from_shapely(data, crs)
149
150 """
--> 151 return GeometryArray(vectorized.from_shapely(data), crs=crs)
152
153
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/geopandas/array.py in __init__(self, data, crs)
278 )
279 elif not data.ndim == 1:
--> 280 raise ValueError(
281 "'data' should be a 1-dimensional array of geometry objects."
282 )
ValueError: 'data' should be a 1-dimensional array of geometry objects.
Edit: thank you! Unfortunately it doesnt work. I downgraded Python to 3.9 (and upgraded Panda to 1.4 but have same issue). I added the Traceback of the other code as well.
----
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Input In [4], in <cell line: 4>()
2 gdf_bad_roads = gdf_edges.join(gdf_loc, rsuffix="_loc", how="inner")
3 # aggregate so have number of accidents on each edge
----> 4 gdf_bad_roads_agg = gdf_bad_roads.groupby(["u", "v", "key"]).agg(
5 geometry=("geometry", "first"), number=("osmid", "size")
6 ).set_crs(gdf_edges.crs)
8 print(f"""
9 pandas: {pd.__version__}
10 geopandas: {gpd.__version__}
11 osmnx: {ox.__version__}""")
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/pandas/core/groupby/generic.py:869, in DataFrameGroupBy.aggregate(self, func, engine, engine_kwargs, *args, **kwargs)
866 func = maybe_mangle_lambdas(func)
868 op = GroupByApply(self, func, args, kwargs)
--> 869 result = op.agg()
870 if not is_dict_like(func) and result is not None:
871 return result
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/pandas/core/apply.py:168, in Apply.agg(self)
165 return self.apply_str()
167 if is_dict_like(arg):
--> 168 return self.agg_dict_like()
169 elif is_list_like(arg):
170 # we require a list, but not a 'str'
171 return self.agg_list_like()
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/pandas/core/apply.py:498, in Apply.agg_dict_like(self)
495 keys_to_use = ktu
497 axis = 0 if isinstance(obj, ABCSeries) else 1
--> 498 result = concat(
499 {k: results[k] for k in keys_to_use}, axis=axis, keys=keys_to_use
500 )
501 elif any(is_ndframe):
502 # There is a mix of NDFrames and scalars
503 raise ValueError(
504 "cannot perform both aggregation "
505 "and transformation operations "
506 "simultaneously"
507 )
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/pandas/util/_decorators.py:311, in deprecate_nonkeyword_arguments.<locals>.decorate.<locals>.wrapper(*args, **kwargs)
305 if len(args) > num_allow_args:
306 warnings.warn(
307 msg.format(arguments=arguments),
308 FutureWarning,
309 stacklevel=stacklevel,
310 )
--> 311 return func(*args, **kwargs)
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/pandas/core/reshape/concat.py:359, in concat(objs, axis, join, ignore_index, keys, levels, names, verify_integrity, sort, copy)
155 """
156 Concatenate pandas objects along a particular axis with optional set logic
157 along the other axes.
(...)
344 ValueError: Indexes have overlapping values: ['a']
345 """
346 op = _Concatenator(
347 objs,
348 axis=axis,
(...)
356 sort=sort,
357 )
--> 359 return op.get_result()
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/pandas/core/reshape/concat.py:599, in _Concatenator.get_result(self)
596 new_data._consolidate_inplace()
598 cons = sample._constructor
--> 599 return cons(new_data).__finalize__(self, method="concat")
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/geopandas/geodataframe.py:157, in GeoDataFrame.__init__(self, data, geometry, crs, *args, **kwargs)
154 index = self.index
155 try:
156 if (
--> 157 hasattr(self["geometry"].values, "crs")
158 and self["geometry"].values.crs
159 and crs
160 and not self["geometry"].values.crs == crs
161 ):
162 _crs_mismatch_warning()
163 # TODO: raise error in 0.9 or 0.10.
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/geopandas/geodataframe.py:1327, in GeoDataFrame.__getitem__(self, key)
1321 def __getitem__(self, key):
1322 """
1323 If the result is a column containing only 'geometry', return a
1324 GeoSeries. If it's a DataFrame with a 'geometry' column, return a
1325 GeoDataFrame.
1326 """
-> 1327 result = super().__getitem__(key)
1328 geo_col = self._geometry_column_name
1329 if isinstance(result, Series) and isinstance(result.dtype, GeometryDtype):
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/pandas/core/frame.py:3473, in DataFrame.__getitem__(self, key)
3471 if self.columns.is_unique and key in self.columns:
3472 if isinstance(self.columns, MultiIndex):
-> 3473 return self._getitem_multilevel(key)
3474 return self._get_item_cache(key)
3476 # Do we have a slicer (on rows)?
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/pandas/core/frame.py:3560, in DataFrame._getitem_multilevel(self, key)
3558 result_columns = maybe_droplevels(new_columns, key)
3559 if self._is_mixed_type:
-> 3560 result = self.reindex(columns=new_columns)
3561 result.columns = result_columns
3562 else:
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/pandas/util/_decorators.py:324, in rewrite_axis_style_signature.<locals>.decorate.<locals>.wrapper(*args, **kwargs)
322 #wraps(func)
323 def wrapper(*args, **kwargs) -> Callable[..., Any]:
--> 324 return func(*args, **kwargs)
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/pandas/core/frame.py:4798, in DataFrame.reindex(self, *args, **kwargs)
4796 kwargs.pop("axis", None)
4797 kwargs.pop("labels", None)
-> 4798 return super().reindex(**kwargs)
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/pandas/core/generic.py:4974, in NDFrame.reindex(self, *args, **kwargs)
4971 return self._reindex_multi(axes, copy, fill_value)
4973 # perform the reindex on the axes
-> 4974 return self._reindex_axes(
4975 axes, level, limit, tolerance, method, fill_value, copy
4976 ).__finalize__(self, method="reindex")
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/pandas/core/frame.py:4611, in DataFrame._reindex_axes(self, axes, level, limit, tolerance, method, fill_value, copy)
4609 columns = axes["columns"]
4610 if columns is not None:
-> 4611 frame = frame._reindex_columns(
4612 columns, method, copy, level, fill_value, limit, tolerance
4613 )
4615 index = axes["index"]
4616 if index is not None:
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/pandas/core/frame.py:4656, in DataFrame._reindex_columns(self, new_columns, method, copy, level, fill_value, limit, tolerance)
4643 def _reindex_columns(
4644 self,
4645 new_columns,
(...)
4651 tolerance=None,
4652 ):
4653 new_columns, indexer = self.columns.reindex(
4654 new_columns, method=method, level=level, limit=limit, tolerance=tolerance
4655 )
-> 4656 return self._reindex_with_indexers(
4657 {1: [new_columns, indexer]},
4658 copy=copy,
4659 fill_value=fill_value,
4660 allow_dups=False,
4661 )
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/pandas/core/generic.py:5054, in NDFrame._reindex_with_indexers(self, reindexers, fill_value, copy, allow_dups)
5051 if copy and new_data is self._mgr:
5052 new_data = new_data.copy()
-> 5054 return self._constructor(new_data).__finalize__(self)
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/geopandas/geodataframe.py:164, in GeoDataFrame.__init__(self, data, geometry, crs, *args, **kwargs)
162 _crs_mismatch_warning()
163 # TODO: raise error in 0.9 or 0.10.
--> 164 self["geometry"] = _ensure_geometry(self["geometry"].values, crs)
165 except TypeError:
166 pass
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/geopandas/geodataframe.py:46, in _ensure_geometry(data, crs)
44 return GeoSeries(out, index=data.index, name=data.name)
45 else:
---> 46 out = from_shapely(data, crs=crs)
47 return out
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/geopandas/array.py:151, in from_shapely(data, crs)
135 def from_shapely(data, crs=None):
136 """
137 Convert a list or array of shapely objects to a GeometryArray.
138
(...)
149
150 """
--> 151 return GeometryArray(vectorized.from_shapely(data), crs=crs)
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/geopandas/array.py:280, in GeometryArray.__init__(self, data, crs)
275 raise TypeError(
276 "'data' should be array of geometry objects. Use from_shapely, "
277 "from_wkb, from_wkt functions to construct a GeometryArray."
278 )
279 elif not data.ndim == 1:
--> 280 raise ValueError(
281 "'data' should be a 1-dimensional array of geometry objects."
282 )
283 self.data = data
285 self._crs = None
ValueError: 'data' should be a 1-dimensional array of geometry objects.
pandas: 1.4.1
geopandas: 0.10.2
osmnx: 1.1.2
have changed this to a MWE
have separated out join() and groupby() / agg()
have included versions
one difference I can see - python 3.9 vs 3.10
import osmnx as ox
import geopandas as gpd
import pandas as pd
import io
df = pd.read_csv(
io.StringIO(
"""AccidentUID,AccidentLocation_CHLV95_E,AccidentLocation_CHLV95_N
99BA5D383B96D02AE0430A865E33D02A,2663985,1213215
9B25C4871C909022E0430A865E339022,2666153,1211303
9B71AB601D948092E0430A865E338092,2666168,1211785
9C985CF7710A60C0E0430A865E3360C0,2663991,1213203
9EA9548660AB3002E0430A865E333002,2666231,1210786
9B2E8B25D5C29094E0430A865E339094,2666728,1210404
9C87C10FB73A905EE0430A865E33905E,2666220,1211811
9E30F39D35CA1058E0430A865E331058,2664599,1212960
9BC2EA43E0BFC068E0430A865E33C068,2665533,1212617
9C0BB9332AB30044E0430A865E330044,2666852,1211964"""
)
)
gdf_loc = gpd.GeoDataFrame(
data=df,
geometry=gpd.points_from_xy(
df["AccidentLocation_CHLV95_E"], df["AccidentLocation_CHLV95_N"]
),
crs="EPSG:2056",
).to_crs("epsg:4326")
# get OSM data for investigated location
G = ox.graph_from_place("Luzern, Switzerland", network_type="drive")
G_proj = ox.project_graph(G)
gdf_nodes, gdf_edges = ox.utils_graph.graph_to_gdfs(G_proj)
# project graph and points
gdf_loc_p = gdf_loc["geometry"].to_crs(G_proj.graph["crs"])
ne, d = ox.nearest_edges(
G_proj, X=gdf_loc_p.x.values, Y=gdf_loc_p.y.values, return_dist=True
)
# reindex points based on results from nearest_edges
gdf_loc = (
gdf_loc.set_index(pd.MultiIndex.from_tuples(ne, names=["u", "v", "key"]))
.assign(distance=d)
.sort_index()
)
# join geometry from edges back to points
gdf_bad_roads = gdf_edges.join(gdf_loc, rsuffix="_loc", how="inner")
# aggregate so have number of accidents on each edge
gdf_bad_roads_agg = gdf_bad_roads.groupby(["u", "v", "key"]).agg(
geometry=("geometry", "first"), number=("osmid", "size")
).set_crs(gdf_edges.crs)
print(f"""
pandas: {pd.__version__}
geopandas: {gpd.__version__}
osmnx: {ox.__version__}""")
pandas: 1.4.0
geopandas: 0.10.2
osmnx: 1.1.2
Alternative aggregate syntax. Has been confirmed both work
hence conclusion is that named aggregations are failing. Possibly should be raised as an issue on pandas, but is not failing on all environments
groupby()/apply() is doing a first on shared edges and also necessary to set CRS again
dissolve() is doing a unary union on geometries. Conceptually should be the same, but is giving slightly different geometry. (A unary union of identical geometries IMHO is an instance of one of the geometries)
gdf_bad_roads.groupby(["u", "v", "key"]).agg({"geometry":"first", "AccidentUID":"size"}).set_crs(gdf_edges.crs).explore(color="blue")
gdf_bad_roads.dissolve(["u", "v", "key"], aggfunc={"AccidentUID":"size"}).explore(color="blue")
I do not grasp how the dask module uses lazy evaluations under the hood.
In the minimal example the numbers replaces a significant larger data set, hence the float64 (overflow).
What is the error in this "dask-like" syntax and why does it produces this (seemingly unrelated?) Error.
import pandas as pd
import numpy as np
from dask import dataframe as dd
df = pd.DataFrame({'foo': [[1,2,3], [4,5,6]]})
ddf = dd.from_pandas(df, npartitions=2)
In pandas I would do
In[1]: df['foo'].apply(np.float64).mean()
Out[1]: array([2.5, 3.5, 4.5])
What is the same as
In[2]: ddf['foo'].apply(np.float64, meta=('foo','f8')).compute().mean()
Out[2]: array([2.5, 3.5, 4.5])
as I evaluates first the pandas.Series and then calculating the mean.
If I want to make the whole calculation lazy I tried
In[3]: ddf['foo'].apply(np.float64, meta=('foo','f8')).mean().compute()
but I do not understand the TypeError.
The stack trace:
TypeError Traceback (most recent call last)
<ipython-input-6-8704408e68ef> in <module>
----> 1 ddf['foo'].apply(np.float64, meta=('foo','f8')).mean().compute()
~/miniconda3/envs/flash-hdf/lib/python3.8/site-packages/dask/base.py in compute(self, **kwargs)
277 dask.base.compute
278 """
--> 279 (result,) = compute(self, traverse=False, **kwargs)
280 return result
281
~/miniconda3/envs/flash-hdf/lib/python3.8/site-packages/dask/base.py in compute(*args, **kwargs)
559 postcomputes.append(x.__dask_postcompute__())
560
--> 561 results = schedule(dsk, keys, **kwargs)
562 return repack([f(r, *a) for r, (f, a) in zip(results, postcomputes)])
563
~/miniconda3/envs/flash-hdf/lib/python3.8/site-packages/dask/threaded.py in get(dsk, result, cache, num_workers, pool, **kwargs)
74 pools[thread][num_workers] = pool
75
---> 76 results = get_async(
77 pool.apply_async,
78 len(pool._pool),
~/miniconda3/envs/flash-hdf/lib/python3.8/site-packages/dask/local.py in get_async(apply_async, num_workers, dsk, result, cache, get_id, rerun_exceptions_locally, pack_exception, raise_exception, callbacks, dumps, loads, **kwargs)
485 _execute_task(task, data) # Re-execute locally
486 else:
--> 487 raise_exception(exc, tb)
488 res, worker_id = loads(res_info)
489 state["cache"][key] = res
~/miniconda3/envs/flash-hdf/lib/python3.8/site-packages/dask/local.py in reraise(exc, tb)
315 if exc.__traceback__ is not tb:
316 raise exc.with_traceback(tb)
--> 317 raise exc
318
319
~/miniconda3/envs/flash-hdf/lib/python3.8/site-packages/dask/local.py in execute_task(key, task_info, dumps, loads, get_id, pack_exception)
220 try:
221 task, data = loads(task_info)
--> 222 result = _execute_task(task, data)
223 id = get_id()
224 result = dumps((result, id))
~/miniconda3/envs/flash-hdf/lib/python3.8/site-packages/dask/core.py in _execute_task(arg, cache, dsk)
119 # temporaries by their reference count and can execute certain
120 # operations in-place.
--> 121 return func(*(_execute_task(a, cache) for a in args))
122 elif not ishashable(arg):
123 return arg
~/miniconda3/envs/flash-hdf/lib/python3.8/site-packages/dask/utils.py in apply(func, args, kwargs)
29 def apply(func, args, kwargs=None):
30 if kwargs:
---> 31 return func(*args, **kwargs)
32 else:
33 return func(*args)
~/miniconda3/envs/flash-hdf/lib/python3.8/site-packages/dask/dataframe/core.py in _reduction_aggregate(x, aca_aggregate, **kwargs)
6289 if isinstance(x, list):
6290 x = pd.Series(x)
-> 6291 return aca_aggregate(x, **kwargs)
6292
6293
~/miniconda3/envs/flash-hdf/lib/python3.8/site-packages/dask/utils.py in __call__(self, obj, *args, **kwargs)
895
896 def __call__(self, obj, *args, **kwargs):
--> 897 return getattr(obj, self.method)(*args, **kwargs)
898
899 def __reduce__(self):
TypeError: _sum() got an unexpected keyword argument 'skipna'
If you could provide me with a hint to where find the answer, it would really helpful!
Python 3.8.6
Numpy 1.19.4
Dask 2021.01.0
Pandas 1.1.4
Problem description
I have a DataFrame in which last column is a format column. The purpose of this column is to contain the format of the DataFrame row.
Here is an example of such a dataframe:
df = pd.DataFrame({'ID': [1, 24, 31, 37],
'Status': ['to analyze', 'to analyze','to analyze','analyzed'],
'priority' : ['P1','P1','P2','P1'],
'format' : ['n;y;n','n;n;n','n;y;y','y;n;y']}
Each df['format'] row contains a string intended to be taken as a list (when split) to give the format of the row.
Symbols meaning:
n means "no highlight"
y means "to highlight in yellow"
df['format'].to_list()[0] = 'n;y;n' means for example:
n: first column ID item "1" not highlighted
y: second column Status item "to analyze" to be highlighted
n: third column Priority item "P1" not highlighted
So that expected outcome is:
What I've tried
I've tried to use df.format to get a list of lists containing the format needed. Here is my code:
import pandas as pd
import numpy as np
def highlight_case(df):
list_of_format_lists = []
for format_line in df['format']:
format_line_list = format_line.split(';')
format_list = []
for form in format_line_list:
if 'y' in form:
format_list.append('background-color: yellow')
else:
format_list.append('')
list_of_format_lists.append(format_list)
list_of_format_lists = list(map(list, zip(*list_of_format_lists)))#transpose
print(list_of_format_lists)
return list_of_format_lists
highlight_style = highlight_case(df)
df.style.apply(highlight_style)
It doesn't work, and I get this output:
TypeError Traceback (most recent call last)
c:\python38\lib\site-packages\IPython\core\formatters.py in __call__(self, obj)
343 method = get_real_method(obj, self.print_method)
344 if method is not None:
--> 345 return method()
346 return None
347 else:
c:\python38\lib\site-packages\pandas\io\formats\style.py in _repr_html_(self)
191 Hooks into Jupyter notebook rich display system.
192 """
--> 193 return self.render()
194
195 #doc(NDFrame.to_excel, klass="Styler")
c:\python38\lib\site-packages\pandas\io\formats\style.py in render(self, **kwargs)
538 * table_attributes
539 """
--> 540 self._compute()
541 # TODO: namespace all the pandas keys
542 d = self._translate()
c:\python38\lib\site-packages\pandas\io\formats\style.py in _compute(self)
623 r = self
624 for func, args, kwargs in self._todo:
--> 625 r = func(self)(*args, **kwargs)
626 return r
627
c:\python38\lib\site-packages\pandas\io\formats\style.py in _apply(self, func, axis, subset, **kwargs)
637 data = self.data.loc[subset]
638 if axis is not None:
--> 639 result = data.apply(func, axis=axis, result_type="expand", **kwargs)
640 result.columns = data.columns
641 else:
c:\python38\lib\site-packages\pandas\core\frame.py in apply(self, func, axis, raw, result_type, args, **kwds)
7543 kwds=kwds,
7544 )
-> 7545 return op.get_result()
7546
7547 def applymap(self, func) -> "DataFrame":
c:\python38\lib\site-packages\pandas\core\apply.py in get_result(self)
142 # dispatch to agg
143 if is_list_like(self.f) or is_dict_like(self.f):
--> 144 return self.obj.aggregate(self.f, axis=self.axis, *self.args, **self.kwds)
145
146 # all empty
c:\python38\lib\site-packages\pandas\core\frame.py in aggregate(self, func, axis, *args, **kwargs)
7353 axis = self._get_axis_number(axis)
7354
-> 7355 relabeling, func, columns, order = reconstruct_func(func, **kwargs)
7356
7357 result = None
c:\python38\lib\site-packages\pandas\core\aggregation.py in reconstruct_func(func, **kwargs)
74
75 if not relabeling:
---> 76 if isinstance(func, list) and len(func) > len(set(func)):
77
78 # GH 28426 will raise error if duplicated function names are used and
TypeError: unhashable type: 'list'
Since the formats are encoded for each row, it makes sense apply row-wise:
def format_row(r):
formats = r['format'].split(';')
return ['background-color: yellow' if y=='y' else '' for y in formats] + ['']
df.style.apply(format_row, axis=1)
Output:
I'm taking an online python course (EpiSkills, which uses the Jupyter notebook) that was written in Python 2.7, and I'm on Python 3.6.4 so I have run into a few compatibility issues along the way. Most of the time I've been able to stumble through, but can't figure out this one, so was hoping someone might be able to help.
I start with the following packages:
import pandas as pd
import epipy
import seaborn as sns
%pylab inline
import statsmodels.api as sm
from scipy import stats
import numpy as np
And use the following code to create a pandas series and model:
multivar_model = sm.formula.glm('age ~ onset_to_hospital + onset_to_death +
data=my_data).fit()
new_data = pd.Series([6, 8, 'male'], index=['onset_to_hospital', 'onset_to_death', 'sex'])
When I try to use this to the following code, I throw the error that I've attached:
multivar_model.predict(new_data)
NameError part1
NameError part2
The intended output is meant to be this:
array([ 60.6497459])
I know that a lot of NameErrors are because something has been specified in the local, not global, environment but I'm unsure how to correct it in this instance. Any help is much appreciated.
Thanks!
C
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
~\AppData\Local\Enthought\Canopy\edm\envs\User\lib\site-packages\patsy\compat.py in call_and_wrap_exc(msg, origin, f, *args, **kwargs)
116 try:
--> 117 return f(*args, **kwargs)
118 except Exception as e:
~\AppData\Local\Enthought\Canopy\edm\envs\User\lib\site-packages\patsy\eval.py in eval(self, expr, source_name, inner_namespace)
165 return eval(code, {}, VarLookupDict([inner_namespace]
--> 166 + self._namespaces))
167
<string> in <module>()
NameError: name 'onset_to_death' is not defined
The above exception was the direct cause of the following exception:
PatsyError Traceback (most recent call last)
<ipython-input-79-e0364e267da7> in <module>()
----> 1 multivar_model.predict(new_data)
~\AppData\Local\Enthought\Canopy\edm\envs\User\lib\site-packages\statsmodels\base\model.py in predict(self, exog, transform, *args, **kwargs)
774 exog_index = exog.index
775 exog = dmatrix(self.model.data.design_info.builder,
--> 776 exog, return_type="dataframe")
777 if len(exog) < len(exog_index):
778 # missing values, rows have been dropped
~\AppData\Local\Enthought\Canopy\edm\envs\User\lib\site-packages\patsy\highlevel.py in dmatrix(formula_like, data, eval_env, NA_action, return_type)
289 eval_env = EvalEnvironment.capture(eval_env, reference=1)
290 (lhs, rhs) = _do_highlevel_design(formula_like, data, eval_env,
--> 291 NA_action, return_type)
292 if lhs.shape[1] != 0:
293 raise PatsyError("encountered outcome variables for a model "
~\AppData\Local\Enthought\Canopy\edm\envs\User\lib\site-packages\patsy\highlevel.py in _do_highlevel_design(formula_like, data, eval_env, NA_action, return_type)
167 return build_design_matrices(design_infos, data,
168 NA_action=NA_action,
--> 169 return_type=return_type)
170 else:
171 # No builders, but maybe we can still get matrices
~\AppData\Local\Enthought\Canopy\edm\envs\User\lib\site-packages\patsy\build.py in build_design_matrices(design_infos, data, NA_action, return_type, dtype)
886 for factor_info in six.itervalues(design_info.factor_infos):
887 if factor_info not in factor_info_to_values:
--> 888 value, is_NA = _eval_factor(factor_info, data, NA_action)
889 factor_info_to_isNAs[factor_info] = is_NA
890 # value may now be a Series, DataFrame, or ndarray
~\AppData\Local\Enthought\Canopy\edm\envs\User\lib\site-packages\patsy\build.py in _eval_factor(factor_info, data, NA_action)
61 def _eval_factor(factor_info, data, NA_action):
62 factor = factor_info.factor
---> 63 result = factor.eval(factor_info.state, data)
64 # Returns either a 2d ndarray, or a DataFrame, plus is_NA mask
65 if factor_info.type == "numerical":
~\AppData\Local\Enthought\Canopy\edm\envs\User\lib\site-packages\patsy\eval.py in eval(self, memorize_state, data)
564 return self._eval(memorize_state["eval_code"],
565 memorize_state,
--> 566 data)
567
568 __getstate__ = no_pickling
~\AppData\Local\Enthought\Canopy\edm\envs\User\lib\site-packages\patsy\eval.py in _eval(self, code, memorize_state, data)
549 memorize_state["eval_env"].eval,
550 code,
--> 551 inner_namespace=inner_namespace)
552
553 def memorize_chunk(self, state, which_pass, data):
~\AppData\Local\Enthought\Canopy\edm\envs\User\lib\site-packages\patsy\compat.py in call_and_wrap_exc(msg, origin, f, *args, **kwargs)
122 origin)
123 # Use 'exec' to hide this syntax from the Python 2 parser:
--> 124 exec("raise new_exc from e")
125 else:
126 # In python 2, we just let the original exception escape -- better
~\AppData\Local\Enthought\Canopy\edm\envs\User\lib\site-packages\patsy\compat.py in <module>()
PatsyError: Error evaluating factor: NameError: name 'onset_to_death' is not defined
age ~ onset_to_hospital + onset_to_death + sex
^^^^^^^^^^^^^^
I am trying to plot the Zillow dataset with Bokeh using Geoviews and Datashader but I am having the damnedest time getting it to work. I am able to plot the data on a Cartesian plane fine but when I attempt to overlay the data with a map I run into errors.
I have used code adapted from the census-hv example on the datashader github. I believe my problem is that it is looking for the coordinates to be in UTM not Lat/Long. Because the code works when I have my coordinates multiplied by a few thousand. The points are then put above the map in white space. If i attempt to plot the proper lat/long coordinates I get the following errors.
Can someone please point me in the direction of a map that uses Lat/Long
>>>props.head()
longitude latitude
0 -118.654084 34.144442
1 -118.625364 34.140430
2 -118.394633 33.989359
3 -118.437206 34.148863
4 -118.385816 34.194168
import pandas as pd
import holoviews as hv
import geoviews as gv
import datashader as ds
from bokeh.models import WMTSTileSource
from holoviews.operation.datashader import datashade, dynspread
hv.notebook_ex
tension('bokeh')
%%opts Overlay [width=900 height=525 xaxis=None yaxis=None]
geomap = gv.WMTS(WMTSTileSource(url=\
'https://server.arcgisonline.com/ArcGIS/rest/services/World_Imagery/MapServer/tile/{Z}/{Y}/{X}.jpg'))
points = hv.Points(gv.Dataset(props, kdims=['longitude', 'latitude']))
# color_key = {'w':'aqua', 'b':'lime', 'a':'red', 'h':'fuchsia', 'o':'yellow' }
race = datashade(points, x_sampling=50, y_sampling=50,
element_type=gv.Image)
geomap * race
RETURNS ERROR:
WARNING:root:dynamic_operation: Exception raised in callable
'dynamic_operation' of type 'function'.
Invoked as dynamic_operation(height=400, scale=1.0, width=400, x_range=None, y_range=None)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/IPython/core/formatters.py in __call__(self, obj)
305 pass
306 else:
--> 307 return printer(obj)
308 # Finally look for special method names
309 method = get_real_method(obj, self.print_method)
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/ipython/display_hooks.py in pprint_display(obj)
255 if not ip.display_formatter.formatters['text/plain'].pprint:
256 return None
--> 257 return display(obj, raw=True)
258
259
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/ipython/display_hooks.py in display(obj, raw, **kwargs)
241 elif isinstance(obj, (HoloMap, DynamicMap)):
242 with option_state(obj):
--> 243 html = map_display(obj)
244 else:
245 return repr(obj) if raw else IPython.display.display(obj, **kwargs)
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/ipython/display_hooks.py in wrapped(element)
127 try:
128 html = fn(element,
--> 129 max_frames=OutputMagic.options['max_frames'])
130
131 # Only want to add to the archive for one display hook...
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/ipython/display_hooks.py in map_display(vmap, max_frames)
196 return None
197
--> 198 return render(vmap)
199
200
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/ipython/display_hooks.py in render(obj, **kwargs)
57 if renderer.fig == 'pdf':
58 renderer = renderer.instance(fig='png')
---> 59 return renderer.html(obj, **kwargs)
60
61
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/plotting/renderer.py in html(self, obj, fmt, css, comm, **kwargs)
253 code to initialize a Comm, if the plot supplies one.
254 """
--> 255 plot, fmt = self._validate(obj, fmt)
256 figdata, _ = self(plot, fmt, **kwargs)
257 if css is None: css = self.css
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/plotting/renderer.py in _validate(self, obj, fmt)
189 if isinstance(obj, tuple(self.widgets.values())):
190 return obj, 'html'
--> 191 plot = self.get_plot(obj, renderer=self)
192
193 fig_formats = self.mode_formats['fig'][self.mode]
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/plotting/renderer.py in get_plot(self_or_cls, obj, renderer)
164 """
165 # Initialize DynamicMaps with first data item
--> 166 initialize_dynamic(obj)
167
168 if not isinstance(obj, Plot) and not displayable(obj):
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/plotting/util.py in initialize_dynamic(obj)
173 continue
174 if not len(dmap):
--> 175 dmap[dmap._initial_key()]
176
177
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/core/spaces.py in __getitem__(self, key)
942 # Not a cross product and nothing cached so compute element.
943 if cache is not None: return cache
--> 944 val = self._execute_callback(*tuple_key)
945 if data_slice:
946 val = self._dataslice(val, data_slice)
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/core/spaces.py in _execute_callback(self, *args)
791
792 with dynamicmap_memoization(self.callback, self.streams):
--> 793 retval = self.callback(*args, **kwargs)
794 return self._style(retval)
795
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/core/spaces.py in __call__(self, *args, **kwargs)
489 # Nothing to do for callbacks that accept no arguments
490 (inargs, inkwargs) = (args, kwargs)
--> 491 if not args and not kwargs: return self.callable()
492 inputs = [i for i in self.inputs if isinstance(i, DynamicMap)]
493 streams = []
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/core/overlay.py in dynamic_mul(*args, **kwargs)
27 from .spaces import Callable
28 def dynamic_mul(*args, **kwargs):
---> 29 element = other[args]
30 return self * element
31 callback = Callable(dynamic_mul, inputs=[self, other])
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/core/spaces.py in __getitem__(self, key)
942 # Not a cross product and nothing cached so compute element.
943 if cache is not None: return cache
--> 944 val = self._execute_callback(*tuple_key)
945 if data_slice:
946 val = self._dataslice(val, data_slice)
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/core/spaces.py in _execute_callback(self, *args)
791
792 with dynamicmap_memoization(self.callback, self.streams):
--> 793 retval = self.callback(*args, **kwargs)
794 return self._style(retval)
795
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/core/spaces.py in __call__(self, *args, **kwargs)
519
520 try:
--> 521 ret = self.callable(*args, **kwargs)
522 except:
523 posstr = ', '.join(['%r' % el for el in inargs]) if inargs else ''
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/util.py in dynamic_operation(*key, **kwargs)
101 self.p.kwargs.update(kwargs)
102 obj = map_obj[key] if isinstance(map_obj, HoloMap) else map_obj
--> 103 return self._process(obj, key)
104 else:
105 def dynamic_operation(*key, **kwargs):
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/util.py in _process(self, element, key)
87 kwargs = {k: v for k, v in self.p.kwargs.items()
88 if k in self.p.operation.params()}
---> 89 return self.p.operation.process_element(element, key, **kwargs)
90 else:
91 return self.p.operation(element, **self.p.kwargs)
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/core/operation.py in process_element(self, element, key, **params)
133 """
134 self.p = param.ParamOverrides(self, params)
--> 135 return self._process(element, key)
136
137
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/operation/datashader.py in _process(self, element, key)
357
358 def _process(self, element, key=None):
--> 359 agg = aggregate._process(self, element, key)
360 shaded = shade._process(self, agg, key)
361 return shaded
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/operation/datashader.py in _process(self, element, key)
226 agg = getattr(cvs, glyph)(data, x, y, self.p.aggregator)
227 if agg.ndim == 2:
--> 228 return self.p.element_type(agg, **params)
229 else:
230 return NdOverlay({c: self.p.element_type(agg.sel(**{column: c}),
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/geoviews/element/geo.py in __init__(self, data, **kwargs)
81 elif crs:
82 kwargs['crs'] = crs
---> 83 super(_Element, self).__init__(data, **kwargs)
84
85
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/element/raster.py in __init__(self, data, bounds, extents, xdensity, ydensity, **params)
242 if bounds is None:
243 xvals = self.dimension_values(0, False)
--> 244 l, r, xdensity, _ = util.bound_range(xvals, xdensity)
245 yvals = self.dimension_values(1, False)
246 b, t, ydensity, _ = util.bound_range(yvals, ydensity)
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/core/util.py in bound_range(vals, density)
1373 using significant digits reported by sys.float_info.dig.
1374 """
-> 1375 low, high = vals.min(), vals.max()
1376 invert = False
1377 if vals[0] > vals[1]:
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/numpy/core/_methods.py in _amin(a, axis, out, keepdims)
27
28 def _amin(a, axis=None, out=None, keepdims=False):
---> 29 return umr_minimum(a, axis, None, out, keepdims)
30
31 def _sum(a, axis=None, dtype=None, out=None, keepdims=False):
ValueError: zero-size array to reduction operation minimum which has no identity
Out[54]:
b':DynamicMap []'
I think the problem here is two-fold, first of all since the coordinates are latitudes and longitudes and you specify xsampling/ysampling values of 50 the datashaded image ends up with a tiny or zero shape, which causes this error. My suggestion would be to cast the coordinates to Google Mercator first. In future this PR will let you do so very simply by calling this:
import cartopy.crs as ccrs
projected = gv.operation.project(points, projection=ccrs.GOOGLE_MERCATOR)
...
To do this manually for now you can use the cartopy projection directly:
coords = ccrs.GOOGLE_MERCATOR.transform_points(ccrs.PlateCarree(), lons, lats)
projected = gv.Points(coords, crs=ccrs.GOOGLE_MERCATOR)
...