HDF error when trying to access variables of Xarray - python

I have downloaded temperature data from a model. Because I chose the whole temporal range the data was split into different netcdf files. However, when I am trying to access these data for some I get an error which I do not understand. First of all I have imported the following packages (and installed correctly)
import netCDF4 as nc
import h5py
import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
#first read the first file of the same model
c1 = xr.open_dataset(path+'tas_day_CMCC-ESM2_historical_r1i1p1f1_gn_18500101-18741231_v20210114.nc')
#then read the second file
c2 = xr.open_dataset(path+'tas_day_CMCC-ESM2_historical_r1i1p1f1_gn_18750101-18991231_v20210114.nc')
#then take only the longitude and latitude values which I am interested
cc1 = c1.sel(lon = 35.86,lat = 14.375,method='nearest')
cc2 = c2.sel(lon = 35.86,lat = 14.375,method='nearest')
However when I am trying to access the temperature variable for the second file I get an error. For example let s print the first xarray
#for the first Xarray
time , (time) , object, ,1850-01-01 12:00:00 ... 1874-12-...
lat, () ,float64, ,14.61
lon, () ,float64. ,36.25
height, (). ,float64. ,2.0
Data variables:
time_bnds. , (time, bnds). , object. , ...
lat_bnds. , (bnds). , float64. , ...
lon_bnds. , (bnds). , float64. , ...
tas. ,(time). , float32. , 297.9 298.0 297.9 ... 297.8 298.2
but when I try to print the second one
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
File ~/miniforge3/lib/python3.9/site-packages/IPython/core/formatters.py:343, in BaseFormatter.__call__(self, obj)
341 method = get_real_method(obj, self.print_method)
342 if method is not None:
--> 343 return method()
344 return None
345 else:
File ~/miniforge3/lib/python3.9/site-packages/xarray/core/dataset.py:2058, in Dataset._repr_html_(self)
2056 if OPTIONS["display_style"] == "text":
2057 return f"<pre>{escape(repr(self))}</pre>"
-> 2058 return formatting_html.dataset_repr(self)
File ~/miniforge3/lib/python3.9/site-packages/xarray/core/formatting_html.py:297, in dataset_repr(ds)
290 obj_type = f"xarray.{type(ds).__name__}"
292 header_components = [f"<div class='xr-obj-type'>{escape(obj_type)}</div>"]
294 sections = [
295 dim_section(ds),
296 coord_section(ds.coords),
--> 297 datavar_section(ds.data_vars),
298 attr_section(ds.attrs),
299 ]
301 return _obj_repr(ds, header_components, sections)
File ~/miniforge3/lib/python3.9/site-packages/xarray/core/formatting_html.py:179, in _mapping_section(mapping, name, details_func, max_items_collapse, expand_option_name, enabled)
172 expanded = _get_boolean_with_default(
173 expand_option_name, n_items < max_items_collapse
174 )
175 collapsed = not expanded
177 return collapsible_section(
178 name,
--> 179 details=details_func(mapping),
180 n_items=n_items,
181 enabled=enabled,
182 collapsed=collapsed,
183 )
File ~/miniforge3/lib/python3.9/site-packages/xarray/core/formatting_html.py:138, in summarize_vars(variables)
137 def summarize_vars(variables):
--> 138 vars_li = "".join(
139 f"<li class='xr-var-item'>{summarize_variable(k, v)}</li>"
140 for k, v in variables.items()
141 )
143 return f"<ul class='xr-var-list'>{vars_li}</ul>"
File ~/miniforge3/lib/python3.9/site-packages/xarray/core/formatting_html.py:139, in <genexpr>(.0)
137 def summarize_vars(variables):
138 vars_li = "".join(
--> 139 f"<li class='xr-var-item'>{summarize_variable(k, v)}</li>"
140 for k, v in variables.items()
141 )
143 return f"<ul class='xr-var-list'>{vars_li}</ul>"
File ~/miniforge3/lib/python3.9/site-packages/xarray/core/formatting_html.py:115, in summarize_variable(name, var, is_index, dtype, preview)
113 preview = preview or escape(inline_variable_array_repr(variable, 35))
114 attrs_ul = summarize_attrs(var.attrs)
--> 115 data_repr = short_data_repr_html(variable)
117 attrs_icon = _icon("icon-file-text2")
118 data_icon = _icon("icon-database")
File ~/miniforge3/lib/python3.9/site-packages/xarray/core/formatting_html.py:30, in short_data_repr_html(array)
28 if hasattr(internal_data, "_repr_html_"):
29 return internal_data._repr_html_()
---> 30 text = escape(short_data_repr(array))
31 return f"<pre>{text}</pre>"
File ~/miniforge3/lib/python3.9/site-packages/xarray/core/formatting.py:572, in short_data_repr(array)
570 return limit_lines(repr(array.data), limit=40)
571 elif array._in_memory or array.size < 1e5:
--> 572 return short_numpy_repr(array)
573 else:
574 # internal xarray array type
575 return f"[{array.size} values with dtype={array.dtype}]"
File ~/miniforge3/lib/python3.9/site-packages/xarray/core/formatting.py:548, in short_numpy_repr(array)
547 def short_numpy_repr(array):
--> 548 array = np.asarray(array)
550 # default to lower precision so a full (abbreviated) line can fit on
551 # one line with the default display_width
552 options = {"precision": 6, "linewidth": OPTIONS["display_width"], "threshold": 200}
File ~/miniforge3/lib/python3.9/site-packages/xarray/core/common.py:143, in AbstractArray.__array__(self, dtype)
142 def __array__(self: Any, dtype: DTypeLike = None) -> np.ndarray:
--> 143 return np.asarray(self.values, dtype=dtype)
File ~/miniforge3/lib/python3.9/site-packages/xarray/core/variable.py:512, in Variable.values(self)
509 #property
510 def values(self):
511 """The variable's data as a numpy.ndarray"""
--> 512 return _as_array_or_item(self._data)
File ~/miniforge3/lib/python3.9/site-packages/xarray/core/variable.py:252, in _as_array_or_item(data)
238 def _as_array_or_item(data):
239 """Return the given values as a numpy array, or as an individual item if
240 it's a 0d datetime64 or timedelta64 array.
241
(...)
250 TODO: remove this (replace with np.asarray) once these issues are fixed
251 """
--> 252 data = np.asarray(data)
253 if data.ndim == 0:
254 if data.dtype.kind == "M":
File ~/miniforge3/lib/python3.9/site-packages/xarray/core/indexing.py:552, in MemoryCachedArray.__array__(self, dtype)
551 def __array__(self, dtype=None):
--> 552 self._ensure_cached()
553 return np.asarray(self.array, dtype=dtype)
File ~/miniforge3/lib/python3.9/site-packages/xarray/core/indexing.py:549, in MemoryCachedArray._ensure_cached(self)
547 def _ensure_cached(self):
548 if not isinstance(self.array, NumpyIndexingAdapter):
--> 549 self.array = NumpyIndexingAdapter(np.asarray(self.array))
File ~/miniforge3/lib/python3.9/site-packages/xarray/core/indexing.py:522, in CopyOnWriteArray.__array__(self, dtype)
521 def __array__(self, dtype=None):
--> 522 return np.asarray(self.array, dtype=dtype)
File ~/miniforge3/lib/python3.9/site-packages/xarray/core/indexing.py:423, in LazilyIndexedArray.__array__(self, dtype)
421 def __array__(self, dtype=None):
422 array = as_indexable(self.array)
--> 423 return np.asarray(array[self.key], dtype=None)
File ~/miniforge3/lib/python3.9/site-packages/xarray/coding/variables.py:70, in _ElementwiseFunctionArray.__array__(self, dtype)
69 def __array__(self, dtype=None):
---> 70 return self.func(self.array)
File ~/miniforge3/lib/python3.9/site-packages/xarray/coding/variables.py:137, in _apply_mask(data, encoded_fill_values, decoded_fill_value, dtype)
133 def _apply_mask(
134 data: np.ndarray, encoded_fill_values: list, decoded_fill_value: Any, dtype: Any
135 ) -> np.ndarray:
136 """Mask all matching values in a NumPy arrays."""
--> 137 data = np.asarray(data, dtype=dtype)
138 condition = False
139 for fv in encoded_fill_values:
File ~/miniforge3/lib/python3.9/site-packages/xarray/core/indexing.py:423, in LazilyIndexedArray.__array__(self, dtype)
421 def __array__(self, dtype=None):
422 array = as_indexable(self.array)
--> 423 return np.asarray(array[self.key], dtype=None)
File ~/miniforge3/lib/python3.9/site-packages/xarray/backends/netCDF4_.py:93, in NetCDF4ArrayWrapper.__getitem__(self, key)
92 def __getitem__(self, key):
---> 93 return indexing.explicit_indexing_adapter(
94 key, self.shape, indexing.IndexingSupport.OUTER, self._getitem
95 )
File ~/miniforge3/lib/python3.9/site-packages/xarray/core/indexing.py:712, in explicit_indexing_adapter(key, shape, indexing_support, raw_indexing_method)
690 """Support explicit indexing by delegating to a raw indexing method.
691
692 Outer and/or vectorized indexers are supported by indexing a second time
(...)
709 Indexing result, in the form of a duck numpy-array.
710 """
711 raw_key, numpy_indices = decompose_indexer(key, shape, indexing_support)
--> 712 result = raw_indexing_method(raw_key.tuple)
713 if numpy_indices.tuple:
714 # index the loaded np.ndarray
715 result = NumpyIndexingAdapter(np.asarray(result))[numpy_indices]
File ~/miniforge3/lib/python3.9/site-packages/xarray/backends/netCDF4_.py:106, in NetCDF4ArrayWrapper._getitem(self, key)
104 with self.datastore.lock:
105 original_array = self.get_array(needs_lock=False)
--> 106 array = getitem(original_array, key)
107 except IndexError:
108 # Catch IndexError in netCDF4 and return a more informative
109 # error message. This is most often called when an unsorted
110 # indexer is used before the data is loaded from disk.
111 msg = (
112 "The indexing operation you are attempting to perform "
113 "is not valid on netCDF4.Variable object. Try loading "
114 "your data into memory first by calling .load()."
115 )
File src/netCDF4/_netCDF4.pyx:4406, in netCDF4._netCDF4.Variable.__getitem__()
File src/netCDF4/_netCDF4.pyx:5350, in netCDF4._netCDF4.Variable._get()
File src/netCDF4/_netCDF4.pyx:1927, in netCDF4._netCDF4._ensure_nc_success()
RuntimeError: NetCDF: HDF error
<xarray.Dataset>
Dimensions: (time: 9125, bnds: 2)
Coordinates:
* time (time) object 1875-01-01 12:00:00 ... 1899-12-31 12:00:00
lat float64 14.61
lon float64 36.25
height float64 ...
Dimensions without coordinates: bnds
Data variables:
time_bnds (time, bnds) object ...
lat_bnds (bnds) float64 ...
lon_bnds (bnds) float64 ...
tas (time) float32 ...
Attributes: (12/48)
Conventions: CF-1.7 CMIP-6.2
activity_id: CMIP
branch_method: standard
branch_time_in_child: 0.0
branch_time_in_parent: 0.0
comment: none
... ...
title: CMCC-ESM2 output prepared for CMIP6
variable_id: tas
variant_label: r1i1p1f1
license: CMIP6 model data produced by CMCC is licensed und...
cmor_version: 3.6.0
tracking_id: hdl:21.14100/88ee8b49-0196-4c37-b1cd-a3cc6e2e598e
It should be noted that when I try to print the second Xarray without selecting a smaller domain (basically the .sel) it can be printed. Why does it not work when I use the sel command? I have confirmed that the coordinates I want are indeed inside c2 (my second Xarray). Also all my xarrays, with or without the sel command, are of type xarray.core.dataset.Dataset.
In order to identify the problem, I changed the .sel command. First I wanted to see the coordinates that are available in my Xarray dataset. For Lon the nearest was 35 and for lat the closes was 14.60733. First I tried to .sel only the Lon. The following is the command
d1 = cc2.sel(lon = 35.)
d1
and the result I get is
time, (time), object, 1875-01-01 12:00:00 ... 1899-12-...
lat, (lat), float64, -90.0 -89.06 -88.12 ... 89.06 90.0
lon, (), float64, 35.0
height, (), float64, 2.0
Data variables:
time_bnds, (time, bnds), object, 1875-01-01 00:00:00... 1900-01-...
lat_bnds
(lat, bnds)
float64
-90.0 -89.53 -89.53 ... 89.53 90.0
lon_bnds, (bnds), float64, ...
tas, (time, lat), float32, ...
In this dataset I saw that the coordinate for the lat = 14.60732984
can be found in the index 111 which I found by trial and error by printing using the following command
cc2['lat'][111].
which gives the following outcome.
array(14.60732984)
lat, (), float64, 14.61
lon, (), float64, 35.0
height, (), float64, 2.0
Now I tried the same procedure for lat both for values 14.60732984 and 14.61 but I get the following error
File ~/miniforge3/lib/python3.9/site-packages/xarray/core/coordinates.py:421, in remap_label_indexers(obj, indexers, method, tolerance, **indexers_kwargs)
414 indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "remap_label_indexers")
416 v_indexers = {
417 k: v.variable.data if isinstance(v, DataArray) else v
418 for k, v in indexers.items()
419 }
--> 421 pos_indexers, new_indexes = indexing.remap_label_indexers(
422 obj, v_indexers, method=method, tolerance=tolerance
423 )
424 # attach indexer's coordinate to pos_indexers
425 for k, v in indexers.items():
File ~/miniforge3/lib/python3.9/site-packages/xarray/core/indexing.py:121, in remap_label_indexers(data_obj, indexers, method, tolerance)
119 for dim, index in indexes.items():
120 labels = grouped_indexers[dim]
--> 121 idxr, new_idx = index.query(labels, method=method, tolerance=tolerance)
122 pos_indexers[dim] = idxr
123 if new_idx is not None:
File ~/miniforge3/lib/python3.9/site-packages/xarray/core/indexes.py:241, in PandasIndex.query(self, labels, method, tolerance)
237 raise KeyError(
238 f"not all values found in index {coord_name!r}"
239 )
240 else:
--> 241 indexer = self.index.get_loc(label_value)
242 elif label.dtype.kind == "b":
243 indexer = label
File ~/miniforge3/lib/python3.9/site-packages/pandas/core/indexes/base.py:3623, in Index.get_loc(self, key, method, tolerance)
3621 return self._engine.get_loc(casted_key)
3622 except KeyError as err:
-> 3623 raise KeyError(key) from err
3624 except TypeError:
3625 # If we have a listlike key, _check_indexing_error will raise
3626 # InvalidIndexError. Otherwise we fall through and re-raise
3627 # the TypeError.
3628 self._check_indexing_error(key)
KeyError: 14.60732984

Related

FSSpec Error Handling in Python - Timeout Error

I am trying to get Terraclimate Data from Microsoft Planetary and facing time out error. Is there a possiblity of increasing the timeout time ? Please find the code below and the error I am facing. I am using fsspec and xarray for downloading spatial data from MS Planetary portal.
import fsspec
import xarray as xr
store = fsspec.get_mapper(asset.href)
data = xr.open_zarr(store, **asset.extra_fields["xarray:open_kwargs"])
clipped_data = data.sel(time=slice('2015-01-01','2019-12-31'),lon=slice(min_lon,max_lon),lat=slice(max_lat,min_lat))
parsed_data = clipped_data[['tmax', 'tmin', 'ppt', 'soil']]
lat_list = parsed_data['lat'].values.tolist()
lon_list = parsed_data['lon'].values.tolist()
filename = "Soil_Moisture_sample.csv"
for(i,j) in zip(lat_list,lon_list):
parsed_data[["soil","tmax","tmin","ppt"]].sel(lon=i, lat=j, method="nearest").to_dataframe().to_csv(filename,mode='a',index=False, header=False)
I am getting the following error
TimeoutError Traceback (most recent call last)
File ~\Anaconda3\envs\satellite\lib\site-packages\fsspec\asyn.py:53, in _runner(event, coro, result, timeout)
52 try:
---> 53 result[0] = await coro
54 except Exception as ex:
File ~\Anaconda3\envs\satellite\lib\site-packages\fsspec\asyn.py:423, in AsyncFileSystem._cat(self, path, recursive, on_error, batch_size, **kwargs)
422 if ex:
--> 423 raise ex
424 if (
425 len(paths) > 1
426 or isinstance(path, list)
427 or paths[0] != self._strip_protocol(path)
428 ):
File ~\Anaconda3\envs\satellite\lib\asyncio\tasks.py:455, in wait_for(fut, timeout, loop)
454 if timeout is None:
--> 455 return await fut
457 if timeout <= 0:
File ~\Anaconda3\envs\satellite\lib\site-packages\fsspec\implementations\http.py:221, in HTTPFileSystem._cat_file(self, url, start, end, **kwargs)
220 async with session.get(url, **kw) as r:
--> 221 out = await r.read()
222 self._raise_not_found_for_status(r, url)
File ~\Anaconda3\envs\satellite\lib\site-packages\aiohttp\client_reqrep.py:1036, in ClientResponse.read(self)
1035 try:
-> 1036 self._body = await self.content.read()
1037 for trace in self._traces:
File ~\Anaconda3\envs\satellite\lib\site-packages\aiohttp\streams.py:375, in StreamReader.read(self, n)
374 while True:
--> 375 block = await self.readany()
376 if not block:
File ~\Anaconda3\envs\satellite\lib\site-packages\aiohttp\streams.py:397, in StreamReader.readany(self)
396 while not self._buffer and not self._eof:
--> 397 await self._wait("readany")
399 return self._read_nowait(-1)
File ~\Anaconda3\envs\satellite\lib\site-packages\aiohttp\streams.py:304, in StreamReader._wait(self, func_name)
303 with self._timer:
--> 304 await waiter
305 else:
File ~\Anaconda3\envs\satellite\lib\site-packages\aiohttp\helpers.py:721, in TimerContext.__exit__(self, exc_type, exc_val, exc_tb)
720 if exc_type is asyncio.CancelledError and self._cancelled:
--> 721 raise asyncio.TimeoutError from None
722 return None
TimeoutError:
The above exception was the direct cause of the following exception:
FSTimeoutError Traceback (most recent call last)
Input In [62], in <cell line: 3>()
1 # Flood Region Point - Thiruvanthpuram
2 filename = "Soil_Moisture_sample.csv"
----> 3 parsed_data[["soil","tmax","tmin","ppt"]].sel(lon=8.520833, lat=76.4375, method="nearest").to_dataframe().to_csv(filename,mode='a',index=False, header=False)
File ~\Anaconda3\envs\satellite\lib\site-packages\xarray\core\dataset.py:5898, in Dataset.to_dataframe(self, dim_order)
5870 """Convert this dataset into a pandas.DataFrame.
5871
5872 Non-index variables in this dataset form the columns of the
(...)
5893
5894 """
5896 ordered_dims = self._normalize_dim_order(dim_order=dim_order)
-> 5898 return self._to_dataframe(ordered_dims=ordered_dims)
File ~\Anaconda3\envs\satellite\lib\site-packages\xarray\core\dataset.py:5862, in Dataset._to_dataframe(self, ordered_dims)
5860 def _to_dataframe(self, ordered_dims: Mapping[Any, int]):
5861 columns = [k for k in self.variables if k not in self.dims]
-> 5862 data = [
5863 self._variables[k].set_dims(ordered_dims).values.reshape(-1)
5864 for k in columns
5865 ]
5866 index = self.coords.to_index([*ordered_dims])
5867 return pd.DataFrame(dict(zip(columns, data)), index=index)
File ~\Anaconda3\envs\satellite\lib\site-packages\xarray\core\dataset.py:5863, in <listcomp>(.0)
5860 def _to_dataframe(self, ordered_dims: Mapping[Any, int]):
5861 columns = [k for k in self.variables if k not in self.dims]
5862 data = [
-> 5863 self._variables[k].set_dims(ordered_dims).values.reshape(-1)
5864 for k in columns
5865 ]
5866 index = self.coords.to_index([*ordered_dims])
5867 return pd.DataFrame(dict(zip(columns, data)), index=index)
File ~\Anaconda3\envs\satellite\lib\site-packages\xarray\core\variable.py:527, in Variable.values(self)
524 #property
525 def values(self):
526 """The variable's data as a numpy.ndarray"""
--> 527 return _as_array_or_item(self._data)
File ~\Anaconda3\envs\satellite\lib\site-packages\xarray\core\variable.py:267, in _as_array_or_item(data)
253 def _as_array_or_item(data):
254 """Return the given values as a numpy array, or as an individual item if
255 it's a 0d datetime64 or timedelta64 array.
256
(...)
265 TODO: remove this (replace with np.asarray) once these issues are fixed
266 """
--> 267 data = np.asarray(data)
268 if data.ndim == 0:
269 if data.dtype.kind == "M":
File ~\AppData\Roaming\Python\Python38\site-packages\dask\array\core.py:1696, in Array.__array__(self, dtype, **kwargs)
1695 def __array__(self, dtype=None, **kwargs):
-> 1696 x = self.compute()
1697 if dtype and x.dtype != dtype:
1698 x = x.astype(dtype)
File ~\AppData\Roaming\Python\Python38\site-packages\dask\base.py:315, in DaskMethodsMixin.compute(self, **kwargs)
291 def compute(self, **kwargs):
292 """Compute this dask collection
293
294 This turns a lazy Dask collection into its in-memory equivalent.
(...)
313 dask.base.compute
314 """
--> 315 (result,) = compute(self, traverse=False, **kwargs)
316 return result
File ~\AppData\Roaming\Python\Python38\site-packages\dask\base.py:600, in compute(traverse, optimize_graph, scheduler, get, *args, **kwargs)
597 keys.append(x.__dask_keys__())
598 postcomputes.append(x.__dask_postcompute__())
--> 600 results = schedule(dsk, keys, **kwargs)
601 return repack([f(r, *a) for r, (f, a) in zip(results, postcomputes)])
File ~\AppData\Roaming\Python\Python38\site-packages\dask\threaded.py:89, in get(dsk, keys, cache, num_workers, pool, **kwargs)
86 elif isinstance(pool, multiprocessing.pool.Pool):
87 pool = MultiprocessingPoolExecutor(pool)
---> 89 results = get_async(
90 pool.submit,
91 pool._max_workers,
92 dsk,
93 keys,
94 cache=cache,
95 get_id=_thread_get_id,
96 pack_exception=pack_exception,
97 **kwargs,
98 )
100 # Cleanup pools associated to dead threads
101 with pools_lock:
File ~\AppData\Roaming\Python\Python38\site-packages\dask\local.py:511, in get_async(submit, num_workers, dsk, result, cache, get_id, rerun_exceptions_locally, pack_exception, raise_exception, callbacks, dumps, loads, chunksize, **kwargs)
509 _execute_task(task, data) # Re-execute locally
510 else:
--> 511 raise_exception(exc, tb)
512 res, worker_id = loads(res_info)
513 state["cache"][key] = res
File ~\AppData\Roaming\Python\Python38\site-packages\dask\local.py:319, in reraise(exc, tb)
317 if exc.__traceback__ is not tb:
318 raise exc.with_traceback(tb)
--> 319 raise exc
File ~\AppData\Roaming\Python\Python38\site-packages\dask\local.py:224, in execute_task(key, task_info, dumps, loads, get_id, pack_exception)
222 try:
223 task, data = loads(task_info)
--> 224 result = _execute_task(task, data)
225 id = get_id()
226 result = dumps((result, id))
File ~\AppData\Roaming\Python\Python38\site-packages\dask\core.py:119, in _execute_task(arg, cache, dsk)
115 func, args = arg[0], arg[1:]
116 # Note: Don't assign the subtask results to a variable. numpy detects
117 # temporaries by their reference count and can execute certain
118 # operations in-place.
--> 119 return func(*(_execute_task(a, cache) for a in args))
120 elif not ishashable(arg):
121 return arg
File ~\AppData\Roaming\Python\Python38\site-packages\dask\array\core.py:128, in getter(a, b, asarray, lock)
123 # Below we special-case `np.matrix` to force a conversion to
124 # `np.ndarray` and preserve original Dask behavior for `getter`,
125 # as for all purposes `np.matrix` is array-like and thus
126 # `is_arraylike` evaluates to `True` in that case.
127 if asarray and (not is_arraylike(c) or isinstance(c, np.matrix)):
--> 128 c = np.asarray(c)
129 finally:
130 if lock:
File ~\Anaconda3\envs\satellite\lib\site-packages\xarray\core\indexing.py:459, in ImplicitToExplicitIndexingAdapter.__array__(self, dtype)
458 def __array__(self, dtype=None):
--> 459 return np.asarray(self.array, dtype=dtype)
File ~\Anaconda3\envs\satellite\lib\site-packages\xarray\core\indexing.py:623, in CopyOnWriteArray.__array__(self, dtype)
622 def __array__(self, dtype=None):
--> 623 return np.asarray(self.array, dtype=dtype)
File ~\Anaconda3\envs\satellite\lib\site-packages\xarray\core\indexing.py:524, in LazilyIndexedArray.__array__(self, dtype)
522 def __array__(self, dtype=None):
523 array = as_indexable(self.array)
--> 524 return np.asarray(array[self.key], dtype=None)
File ~\Anaconda3\envs\satellite\lib\site-packages\xarray\backends\zarr.py:76, in ZarrArrayWrapper.__getitem__(self, key)
74 array = self.get_array()
75 if isinstance(key, indexing.BasicIndexer):
---> 76 return array[key.tuple]
77 elif isinstance(key, indexing.VectorizedIndexer):
78 return array.vindex[
79 indexing._arrayize_vectorized_indexer(key, self.shape).tuple
80 ]
File ~\Anaconda3\envs\satellite\lib\site-packages\zarr\core.py:788, in Array.__getitem__(self, selection)
786 result = self.vindex[selection]
787 else:
--> 788 result = self.get_basic_selection(pure_selection, fields=fields)
789 return result
File ~\Anaconda3\envs\satellite\lib\site-packages\zarr\core.py:914, in Array.get_basic_selection(self, selection, out, fields)
911 return self._get_basic_selection_zd(selection=selection, out=out,
912 fields=fields)
913 else:
--> 914 return self._get_basic_selection_nd(selection=selection, out=out,
915 fields=fields)
File ~\Anaconda3\envs\satellite\lib\site-packages\zarr\core.py:957, in Array._get_basic_selection_nd(self, selection, out, fields)
951 def _get_basic_selection_nd(self, selection, out=None, fields=None):
952 # implementation of basic selection for array with at least one dimension
953
954 # setup indexer
955 indexer = BasicIndexer(selection, self)
--> 957 return self._get_selection(indexer=indexer, out=out, fields=fields)
File ~\Anaconda3\envs\satellite\lib\site-packages\zarr\core.py:1247, in Array._get_selection(self, indexer, out, fields)
1241 if not hasattr(self.chunk_store, "getitems") or \
1242 any(map(lambda x: x == 0, self.shape)):
1243 # sequentially get one key at a time from storage
1244 for chunk_coords, chunk_selection, out_selection in indexer:
1245
1246 # load chunk selection into output array
-> 1247 self._chunk_getitem(chunk_coords, chunk_selection, out, out_selection,
1248 drop_axes=indexer.drop_axes, fields=fields)
1249 else:
1250 # allow storage to get multiple items at once
1251 lchunk_coords, lchunk_selection, lout_selection = zip(*indexer)
File ~\Anaconda3\envs\satellite\lib\site-packages\zarr\core.py:1939, in Array._chunk_getitem(self, chunk_coords, chunk_selection, out, out_selection, drop_axes, fields)
1935 ckey = self._chunk_key(chunk_coords)
1937 try:
1938 # obtain compressed data for chunk
-> 1939 cdata = self.chunk_store[ckey]
1941 except KeyError:
1942 # chunk not initialized
1943 if self._fill_value is not None:
File ~\Anaconda3\envs\satellite\lib\site-packages\zarr\storage.py:717, in KVStore.__getitem__(self, key)
716 def __getitem__(self, key):
--> 717 return self._mutable_mapping[key]
File ~\Anaconda3\envs\satellite\lib\site-packages\fsspec\mapping.py:137, in FSMap.__getitem__(self, key, default)
135 k = self._key_to_str(key)
136 try:
--> 137 result = self.fs.cat(k)
138 except self.missing_exceptions:
139 if default is not None:
File ~\Anaconda3\envs\satellite\lib\site-packages\fsspec\asyn.py:111, in sync_wrapper.<locals>.wrapper(*args, **kwargs)
108 #functools.wraps(func)
109 def wrapper(*args, **kwargs):
110 self = obj or args[0]
--> 111 return sync(self.loop, func, *args, **kwargs)
File ~\Anaconda3\envs\satellite\lib\site-packages\fsspec\asyn.py:94, in sync(loop, func, timeout, *args, **kwargs)
91 return_result = result[0]
92 if isinstance(return_result, asyncio.TimeoutError):
93 # suppress asyncio.TimeoutError, raise FSTimeoutError
---> 94 raise FSTimeoutError from return_result
95 elif isinstance(return_result, BaseException):
96 raise return_result
FSTimeoutError:
In the line:
store = fsspec.get_mapper(asset.href)
You can pass extra arguments to the fsspec backend, in this case HTTP, see fsspec.implementations.http.HTTPFileSystem. In this case, client_kwargs get passed to aiohttp.ClientSession, and include an optional timeout argument. Your call may look something like
from aiohttp import ClientTimeout
store = get_mapper(asset.href, client_kwargs={"timeout": ClientTimeout(total=5000, connect=1000)})

GeoDataFrame Value Error: 'data' should be a 1-dimensional array of geometry objects'

I want to quantify some geolocations with osmnx using the nearest_edges-function. I get a value error message when running this code and don't know what I'm doing wrong:
# project graph and points
G_proj = ox.project_graph(G)
gdf_loc_p = gdf_loc["geometry"].to_crs(G_proj.graph["crs"])
ne, d = ox.nearest_edges(
G_proj, X=gdf_loc_p.x.values, Y=gdf_loc_p.y.values, return_dist=True
)
# reindex points based on results from nearest_edges
gdf_loc = (
gdf_loc.set_index(pd.MultiIndex.from_tuples(ne, names=["u", "v", "key"]))
.assign(distance=d)
.sort_index()
)
# join geometry from edges back to points
# aggregate so have number of accidents on each edge
gdf_bad_roads = (
gdf_edges.join(gdf_loc, rsuffix="_loc", how="inner")
.groupby(["u", "v", "key"])
.agg(geometry = ("geometry", "first"), number=("osmid", "size"))
.set_crs(gdf_edges.crs)
)
When running it tells me in the line .agg(geometry)# we require a list, but not a 'str' and from there on couple more issues leading to a value error data' should be a 1-dimensional array of geometry objects. I attached the whole Traceback. Thanks for your help!
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
/var/folders/jy/1f2tlvb965g30zhw9q3cvdw07r5rb_/T/ipykernel_82991/3621029527.py in <module>
2 # aggregate so have number of accidents on each edge
3 gdf_bad_roads = (
----> 4 gdf_edges.join(gdf_loc, rsuffix="_loc", how="inner")
5 .groupby(["u", "v", "key"])
6 .agg(geometry = ("geometry", "first"), number=("osmid", "size"))
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/pandas/core/groupby/generic.py in aggregate(self, func, engine, engine_kwargs, *args, **kwargs)
977
978 op = GroupByApply(self, func, args, kwargs)
--> 979 result = op.agg()
980 if not is_dict_like(func) and result is not None:
981 return result
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/pandas/core/apply.py in agg(self)
159
160 if is_dict_like(arg):
--> 161 return self.agg_dict_like()
162 elif is_list_like(arg):
163 # we require a list, but not a 'str'
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/pandas/core/apply.py in agg_dict_like(self)
457
458 axis = 0 if isinstance(obj, ABCSeries) else 1
--> 459 result = concat(
460 {k: results[k] for k in keys_to_use}, axis=axis, keys=keys_to_use
461 )
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/pandas/util/_decorators.py in wrapper(*args, **kwargs)
309 stacklevel=stacklevel,
310 )
--> 311 return func(*args, **kwargs)
312
313 return wrapper
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/pandas/core/reshape/concat.py in concat(objs, axis, join, ignore_index, keys, levels, names, verify_integrity, sort, copy)
305 )
306
--> 307 return op.get_result()
308
309
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/pandas/core/reshape/concat.py in get_result(self)
537
538 cons = sample._constructor
--> 539 return cons(new_data).__finalize__(self, method="concat")
540
541 def _get_result_dim(self) -> int:
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/geopandas/geodataframe.py in __init__(self, data, geometry, crs, *args, **kwargs)
155 try:
156 if (
--> 157 hasattr(self["geometry"].values, "crs")
158 and self["geometry"].values.crs
159 and crs
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/geopandas/geodataframe.py in __getitem__(self, key)
1325 GeoDataFrame.
1326 """
-> 1327 result = super().__getitem__(key)
1328 geo_col = self._geometry_column_name
1329 if isinstance(result, Series) and isinstance(result.dtype, GeometryDtype):
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/pandas/core/frame.py in __getitem__(self, key)
3424 if self.columns.is_unique and key in self.columns:
3425 if isinstance(self.columns, MultiIndex):
-> 3426 return self._getitem_multilevel(key)
3427 return self._get_item_cache(key)
3428
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/pandas/core/frame.py in _getitem_multilevel(self, key)
3511 result_columns = maybe_droplevels(new_columns, key)
3512 if self._is_mixed_type:
-> 3513 result = self.reindex(columns=new_columns)
3514 result.columns = result_columns
3515 else:
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/pandas/util/_decorators.py in wrapper(*args, **kwargs)
322 #wraps(func)
323 def wrapper(*args, **kwargs) -> Callable[..., Any]:
--> 324 return func(*args, **kwargs)
325
326 kind = inspect.Parameter.POSITIONAL_OR_KEYWORD
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/pandas/core/frame.py in reindex(self, *args, **kwargs)
4770 kwargs.pop("axis", None)
4771 kwargs.pop("labels", None)
-> 4772 return super().reindex(**kwargs)
4773
4774 #deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "labels"])
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/pandas/core/generic.py in reindex(self, *args, **kwargs)
4816
4817 # perform the reindex on the axes
-> 4818 return self._reindex_axes(
4819 axes, level, limit, tolerance, method, fill_value, copy
4820 ).__finalize__(self, method="reindex")
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/pandas/core/frame.py in _reindex_axes(self, axes, level, limit, tolerance, method, fill_value, copy)
4589 columns = axes["columns"]
4590 if columns is not None:
-> 4591 frame = frame._reindex_columns(
4592 columns, method, copy, level, fill_value, limit, tolerance
4593 )
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/pandas/core/frame.py in _reindex_columns(self, new_columns, method, copy, level, fill_value, limit, tolerance)
4634 new_columns, method=method, level=level, limit=limit, tolerance=tolerance
4635 )
-> 4636 return self._reindex_with_indexers(
4637 {1: [new_columns, indexer]},
4638 copy=copy,
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/pandas/core/generic.py in _reindex_with_indexers(self, reindexers, fill_value, copy, allow_dups)
4895 new_data = new_data.copy()
4896
-> 4897 return self._constructor(new_data).__finalize__(self)
4898
4899 def filter(
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/geopandas/geodataframe.py in __init__(self, data, geometry, crs, *args, **kwargs)
162 _crs_mismatch_warning()
163 # TODO: raise error in 0.9 or 0.10.
--> 164 self["geometry"] = _ensure_geometry(self["geometry"].values, crs)
165 except TypeError:
166 pass
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/geopandas/geodataframe.py in _ensure_geometry(data, crs)
44 return GeoSeries(out, index=data.index, name=data.name)
45 else:
---> 46 out = from_shapely(data, crs=crs)
47 return out
48
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/geopandas/array.py in from_shapely(data, crs)
149
150 """
--> 151 return GeometryArray(vectorized.from_shapely(data), crs=crs)
152
153
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/geopandas/array.py in __init__(self, data, crs)
278 )
279 elif not data.ndim == 1:
--> 280 raise ValueError(
281 "'data' should be a 1-dimensional array of geometry objects."
282 )
ValueError: 'data' should be a 1-dimensional array of geometry objects.
Edit: thank you! Unfortunately it doesnt work. I downgraded Python to 3.9 (and upgraded Panda to 1.4 but have same issue). I added the Traceback of the other code as well.
----
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Input In [4], in <cell line: 4>()
2 gdf_bad_roads = gdf_edges.join(gdf_loc, rsuffix="_loc", how="inner")
3 # aggregate so have number of accidents on each edge
----> 4 gdf_bad_roads_agg = gdf_bad_roads.groupby(["u", "v", "key"]).agg(
5 geometry=("geometry", "first"), number=("osmid", "size")
6 ).set_crs(gdf_edges.crs)
8 print(f"""
9 pandas: {pd.__version__}
10 geopandas: {gpd.__version__}
11 osmnx: {ox.__version__}""")
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/pandas/core/groupby/generic.py:869, in DataFrameGroupBy.aggregate(self, func, engine, engine_kwargs, *args, **kwargs)
866 func = maybe_mangle_lambdas(func)
868 op = GroupByApply(self, func, args, kwargs)
--> 869 result = op.agg()
870 if not is_dict_like(func) and result is not None:
871 return result
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/pandas/core/apply.py:168, in Apply.agg(self)
165 return self.apply_str()
167 if is_dict_like(arg):
--> 168 return self.agg_dict_like()
169 elif is_list_like(arg):
170 # we require a list, but not a 'str'
171 return self.agg_list_like()
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/pandas/core/apply.py:498, in Apply.agg_dict_like(self)
495 keys_to_use = ktu
497 axis = 0 if isinstance(obj, ABCSeries) else 1
--> 498 result = concat(
499 {k: results[k] for k in keys_to_use}, axis=axis, keys=keys_to_use
500 )
501 elif any(is_ndframe):
502 # There is a mix of NDFrames and scalars
503 raise ValueError(
504 "cannot perform both aggregation "
505 "and transformation operations "
506 "simultaneously"
507 )
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/pandas/util/_decorators.py:311, in deprecate_nonkeyword_arguments.<locals>.decorate.<locals>.wrapper(*args, **kwargs)
305 if len(args) > num_allow_args:
306 warnings.warn(
307 msg.format(arguments=arguments),
308 FutureWarning,
309 stacklevel=stacklevel,
310 )
--> 311 return func(*args, **kwargs)
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/pandas/core/reshape/concat.py:359, in concat(objs, axis, join, ignore_index, keys, levels, names, verify_integrity, sort, copy)
155 """
156 Concatenate pandas objects along a particular axis with optional set logic
157 along the other axes.
(...)
344 ValueError: Indexes have overlapping values: ['a']
345 """
346 op = _Concatenator(
347 objs,
348 axis=axis,
(...)
356 sort=sort,
357 )
--> 359 return op.get_result()
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/pandas/core/reshape/concat.py:599, in _Concatenator.get_result(self)
596 new_data._consolidate_inplace()
598 cons = sample._constructor
--> 599 return cons(new_data).__finalize__(self, method="concat")
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/geopandas/geodataframe.py:157, in GeoDataFrame.__init__(self, data, geometry, crs, *args, **kwargs)
154 index = self.index
155 try:
156 if (
--> 157 hasattr(self["geometry"].values, "crs")
158 and self["geometry"].values.crs
159 and crs
160 and not self["geometry"].values.crs == crs
161 ):
162 _crs_mismatch_warning()
163 # TODO: raise error in 0.9 or 0.10.
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/geopandas/geodataframe.py:1327, in GeoDataFrame.__getitem__(self, key)
1321 def __getitem__(self, key):
1322 """
1323 If the result is a column containing only 'geometry', return a
1324 GeoSeries. If it's a DataFrame with a 'geometry' column, return a
1325 GeoDataFrame.
1326 """
-> 1327 result = super().__getitem__(key)
1328 geo_col = self._geometry_column_name
1329 if isinstance(result, Series) and isinstance(result.dtype, GeometryDtype):
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/pandas/core/frame.py:3473, in DataFrame.__getitem__(self, key)
3471 if self.columns.is_unique and key in self.columns:
3472 if isinstance(self.columns, MultiIndex):
-> 3473 return self._getitem_multilevel(key)
3474 return self._get_item_cache(key)
3476 # Do we have a slicer (on rows)?
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/pandas/core/frame.py:3560, in DataFrame._getitem_multilevel(self, key)
3558 result_columns = maybe_droplevels(new_columns, key)
3559 if self._is_mixed_type:
-> 3560 result = self.reindex(columns=new_columns)
3561 result.columns = result_columns
3562 else:
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/pandas/util/_decorators.py:324, in rewrite_axis_style_signature.<locals>.decorate.<locals>.wrapper(*args, **kwargs)
322 #wraps(func)
323 def wrapper(*args, **kwargs) -> Callable[..., Any]:
--> 324 return func(*args, **kwargs)
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/pandas/core/frame.py:4798, in DataFrame.reindex(self, *args, **kwargs)
4796 kwargs.pop("axis", None)
4797 kwargs.pop("labels", None)
-> 4798 return super().reindex(**kwargs)
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/pandas/core/generic.py:4974, in NDFrame.reindex(self, *args, **kwargs)
4971 return self._reindex_multi(axes, copy, fill_value)
4973 # perform the reindex on the axes
-> 4974 return self._reindex_axes(
4975 axes, level, limit, tolerance, method, fill_value, copy
4976 ).__finalize__(self, method="reindex")
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/pandas/core/frame.py:4611, in DataFrame._reindex_axes(self, axes, level, limit, tolerance, method, fill_value, copy)
4609 columns = axes["columns"]
4610 if columns is not None:
-> 4611 frame = frame._reindex_columns(
4612 columns, method, copy, level, fill_value, limit, tolerance
4613 )
4615 index = axes["index"]
4616 if index is not None:
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/pandas/core/frame.py:4656, in DataFrame._reindex_columns(self, new_columns, method, copy, level, fill_value, limit, tolerance)
4643 def _reindex_columns(
4644 self,
4645 new_columns,
(...)
4651 tolerance=None,
4652 ):
4653 new_columns, indexer = self.columns.reindex(
4654 new_columns, method=method, level=level, limit=limit, tolerance=tolerance
4655 )
-> 4656 return self._reindex_with_indexers(
4657 {1: [new_columns, indexer]},
4658 copy=copy,
4659 fill_value=fill_value,
4660 allow_dups=False,
4661 )
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/pandas/core/generic.py:5054, in NDFrame._reindex_with_indexers(self, reindexers, fill_value, copy, allow_dups)
5051 if copy and new_data is self._mgr:
5052 new_data = new_data.copy()
-> 5054 return self._constructor(new_data).__finalize__(self)
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/geopandas/geodataframe.py:164, in GeoDataFrame.__init__(self, data, geometry, crs, *args, **kwargs)
162 _crs_mismatch_warning()
163 # TODO: raise error in 0.9 or 0.10.
--> 164 self["geometry"] = _ensure_geometry(self["geometry"].values, crs)
165 except TypeError:
166 pass
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/geopandas/geodataframe.py:46, in _ensure_geometry(data, crs)
44 return GeoSeries(out, index=data.index, name=data.name)
45 else:
---> 46 out = from_shapely(data, crs=crs)
47 return out
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/geopandas/array.py:151, in from_shapely(data, crs)
135 def from_shapely(data, crs=None):
136 """
137 Convert a list or array of shapely objects to a GeometryArray.
138
(...)
149
150 """
--> 151 return GeometryArray(vectorized.from_shapely(data), crs=crs)
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/geopandas/array.py:280, in GeometryArray.__init__(self, data, crs)
275 raise TypeError(
276 "'data' should be array of geometry objects. Use from_shapely, "
277 "from_wkb, from_wkt functions to construct a GeometryArray."
278 )
279 elif not data.ndim == 1:
--> 280 raise ValueError(
281 "'data' should be a 1-dimensional array of geometry objects."
282 )
283 self.data = data
285 self._crs = None
ValueError: 'data' should be a 1-dimensional array of geometry objects.
pandas: 1.4.1
geopandas: 0.10.2
osmnx: 1.1.2
have changed this to a MWE
have separated out join() and groupby() / agg()
have included versions
one difference I can see - python 3.9 vs 3.10
import osmnx as ox
import geopandas as gpd
import pandas as pd
import io
df = pd.read_csv(
io.StringIO(
"""AccidentUID,AccidentLocation_CHLV95_E,AccidentLocation_CHLV95_N
99BA5D383B96D02AE0430A865E33D02A,2663985,1213215
9B25C4871C909022E0430A865E339022,2666153,1211303
9B71AB601D948092E0430A865E338092,2666168,1211785
9C985CF7710A60C0E0430A865E3360C0,2663991,1213203
9EA9548660AB3002E0430A865E333002,2666231,1210786
9B2E8B25D5C29094E0430A865E339094,2666728,1210404
9C87C10FB73A905EE0430A865E33905E,2666220,1211811
9E30F39D35CA1058E0430A865E331058,2664599,1212960
9BC2EA43E0BFC068E0430A865E33C068,2665533,1212617
9C0BB9332AB30044E0430A865E330044,2666852,1211964"""
)
)
gdf_loc = gpd.GeoDataFrame(
data=df,
geometry=gpd.points_from_xy(
df["AccidentLocation_CHLV95_E"], df["AccidentLocation_CHLV95_N"]
),
crs="EPSG:2056",
).to_crs("epsg:4326")
# get OSM data for investigated location
G = ox.graph_from_place("Luzern, Switzerland", network_type="drive")
G_proj = ox.project_graph(G)
gdf_nodes, gdf_edges = ox.utils_graph.graph_to_gdfs(G_proj)
# project graph and points
gdf_loc_p = gdf_loc["geometry"].to_crs(G_proj.graph["crs"])
ne, d = ox.nearest_edges(
G_proj, X=gdf_loc_p.x.values, Y=gdf_loc_p.y.values, return_dist=True
)
# reindex points based on results from nearest_edges
gdf_loc = (
gdf_loc.set_index(pd.MultiIndex.from_tuples(ne, names=["u", "v", "key"]))
.assign(distance=d)
.sort_index()
)
# join geometry from edges back to points
gdf_bad_roads = gdf_edges.join(gdf_loc, rsuffix="_loc", how="inner")
# aggregate so have number of accidents on each edge
gdf_bad_roads_agg = gdf_bad_roads.groupby(["u", "v", "key"]).agg(
geometry=("geometry", "first"), number=("osmid", "size")
).set_crs(gdf_edges.crs)
print(f"""
pandas: {pd.__version__}
geopandas: {gpd.__version__}
osmnx: {ox.__version__}""")
pandas: 1.4.0
geopandas: 0.10.2
osmnx: 1.1.2
Alternative aggregate syntax. Has been confirmed both work
hence conclusion is that named aggregations are failing. Possibly should be raised as an issue on pandas, but is not failing on all environments
groupby()/apply() is doing a first on shared edges and also necessary to set CRS again
dissolve() is doing a unary union on geometries. Conceptually should be the same, but is giving slightly different geometry. (A unary union of identical geometries IMHO is an instance of one of the geometries)
gdf_bad_roads.groupby(["u", "v", "key"]).agg({"geometry":"first", "AccidentUID":"size"}).set_crs(gdf_edges.crs).explore(color="blue")
gdf_bad_roads.dissolve(["u", "v", "key"], aggfunc={"AccidentUID":"size"}).explore(color="blue")

What is causing a circular reference when plotting a map using ArcGIS API for Python?

Following the documentation for the ArcGIS API for Python, I'm attempting to plot coordinates from a spatially enabled dataframe. Perhaps my understanding of a circular reference is off, but I can't see how the object would be referencing itself.
import pandas as pd
from arcgis.gis import GIS
## importing geocoded data from pickle
prop_sdf = pd.read_pickle("./recent_geocoded.pkl")
prop_sdf = pd.DataFrame.spatial.from_xy(prop_sdf, 'lat','long')
prop_sdf.head()
This is what my resulting dataframe looks like:
unique_id lat long
0 43432884 41.701011 -70.019244
2 43400770 41.641784 -70.366659
3 43425636 41.701954 -70.146602
4 43427274 41.720506 -70.021849
5 43427818 41.649288 -70.490767
I'm able to render the interactive map
m1 = GIS().map("United States")
m1.zoom = 4
m1.center = [39,-98]
But hit an error when plotting:
prop_sdf.spatial.plot(map_widget=m1)
m1
Trace:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-30-d48a9f223ff4> in <module>
----> 1 prop_sdf.spatial.plot(map_widget=m1)
2 m1
~\anaconda3\lib\site-packages\arcgis\features\geo\_accessor.py in plot(self, map_widget, **kwargs)
2002 self._data.columns = [c.replace(" ", "_") for c in self._data.columns]
2003 # plot and be merry
-> 2004 _plot_map_widget(map_widget)
2005 self._data.columns = orig_col
2006 return True
~\anaconda3\lib\site-packages\arcgis\features\geo\_accessor.py in _plot_map_widget(mp_wdgt)
1984 colors=kwargs.pop('cmap', None) or kwargs.pop('colors', None) or kwargs.pop('pallette', 'jet'),
1985 alpha=kwargs.pop('alpha', 1),
-> 1986 **kwargs)
1987
1988 # small helper to address zoom level
~\anaconda3\lib\site-packages\arcgis\features\geo\_viz\mapping.py in plot(df, map_widget, name, renderer_type, symbol_type, symbol_style, col, colors, alpha, **kwargs)
191 fc.layer['layerDefinition']['drawingInfo']['renderer'] = renderer
192 if map_exists:
--> 193 map_widget.add_layer(fc, options={'title':name})
194 else:
195 map_widget.add_layer(fc, options={'title':name})
~\anaconda3\lib\site-packages\arcgis\widgets\_mapview\_mapview.py in add_layer(self, item, options)
1051 _is_geoenabled(item):
1052 item = item.spatial.to_feature_collection()
-> 1053 self._add_layer_to_widget(item, options)
1054
1055 def _add_layer_to_webmap(self, item, options):
~\anaconda3\lib\site-packages\arcgis\widgets\_mapview\_mapview.py in _add_layer_to_widget(self, item, options)
1091 # (i.e., do what was done for ImageryLayer for all major Layers)
1092 # 'No type' layer just means that we'll figure it out at JS time
-> 1093 _lyr = _make_jsonable_dict(item._lyr_json)
1094 if ('type' in _lyr and \
1095 _lyr['type'] == 'MapImageLayer') and \
~\anaconda3\lib\site-packages\arcgis\widgets\_mapview\_mapview.py in _make_jsonable_dict(obj)
60 will later be used to delete all values with this value"""
61 return flag
---> 62 dict_ = json.loads(json.dumps(obj, default=default_func))
63 return { k:v for k, v in dict_.items() if v != flag }
64
~\anaconda3\lib\json\__init__.py in dumps(obj, skipkeys, ensure_ascii, check_circular, allow_nan, cls, indent, separators, default, sort_keys, **kw)
236 check_circular=check_circular, allow_nan=allow_nan, indent=indent,
237 separators=separators, default=default, sort_keys=sort_keys,
--> 238 **kw).encode(obj)
239
240
~\anaconda3\lib\json\encoder.py in encode(self, o)
197 # exceptions aren't as detailed. The list call should be roughly
198 # equivalent to the PySequence_Fast that ''.join() would do.
--> 199 chunks = self.iterencode(o, _one_shot=True)
200 if not isinstance(chunks, (list, tuple)):
201 chunks = list(chunks)
~\anaconda3\lib\json\encoder.py in iterencode(self, o, _one_shot)
255 self.key_separator, self.item_separator, self.sort_keys,
256 self.skipkeys, _one_shot)
--> 257 return _iterencode(o, 0)
258
259 def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
ValueError: Circular reference detected

I have a series of netcdf files of sst data. I want to compute sst gradient to locate the oceanic front

`sst_gradient = xr.Dataset({'sst_gradient':(['lat','lon','time'],sst_gradient)},/error in this line
coords={'lat':(selected_sst.lat.values),
'lon':(selected_sst.lon.values),
'time':(selected_sst.time.values)})
`
--------------------------------------------------------------------------- ValueError Traceback (most recent call
last)
~/anaconda3/envs/myenv/lib/python3.6/site-packages/xarray/core/variable.py
in as_variable(obj, name)
106 try:
--> 107 obj = Variable(*obj)
108 except (TypeError, ValueError) as error:
~/anaconda3/envs/myenv/lib/python3.6/site-packages/xarray/core/variable.py
in init(self, dims, data, attrs, encoding, fastpath)
308 self._data = as_compatible_data(data, fastpath=fastpath)
--> 309 self._dims = self._parse_dimensions(dims)
310 self._attrs = None
~/anaconda3/envs/myenv/lib/python3.6/site-packages/xarray/core/variable.py
in _parse_dimensions(self, dims)
499 "dimensions %s must have the same length as the "
--> 500 "number of data dimensions, ndim=%s" % (dims, self.ndim)
501 )
ValueError: dimensions ('lat', 'lon', 'time') must have the same
length as the number of data dimensions, ndim=0
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call
last) in
3 coords={'lats':(selected_sst.lat.values),
4 'lons':(selected_sst.lon.values),
----> 5 'times':(selected_sst.time.values)})
6
7
~/anaconda3/envs/myenv/lib/python3.6/site-packages/xarray/core/dataset.py
in init(self, data_vars, coords, attrs, compat)
533
534 variables, coord_names, dims, indexes = merge_data_and_coords(
--> 535 data_vars, coords, compat=compat
536 )
537
~/anaconda3/envs/myenv/lib/python3.6/site-packages/xarray/core/merge.py
in merge_data_and_coords(data, coords, compat, join)
465 indexes = dict(_extract_indexes_from_coords(coords))
466 return merge_core(
--> 467 objects, compat, join, explicit_coords=explicit_coords, indexes=indexes
468 )
469
~/anaconda3/envs/myenv/lib/python3.6/site-packages/xarray/core/merge.py
in merge_core(objects, compat, join, priority_arg, explicit_coords,
indexes, fill_value)
550 coerced, join=join, copy=False, indexes=indexes, fill_value=fill_value
551 )
--> 552 collected = collect_variables_and_indexes(aligned)
553
554 prioritized = _get_priority_vars_and_indexes(aligned, priority_arg, compat=compat)
~/anaconda3/envs/myenv/lib/python3.6/site-packages/xarray/core/merge.py
in collect_variables_and_indexes(list_of_mappings)
275 append_all(coords, indexes)
276
--> 277 variable = as_variable(variable, name=name)
278 if variable.dims == (name,):
279 variable = variable.to_index_variable()
~/anaconda3/envs/myenv/lib/python3.6/site-packages/xarray/core/variable.py
in as_variable(obj, name)
111 "Could not convert tuple of form "
112 "(dims, data[, attrs, encoding]): "
--> 113 "{} to Variable.".format(obj)
114 )
115 elif utils.is_scalar(obj):
ValueError: Could not convert tuple of form (dims, data[, attrs,
encoding]): (['lat', 'lon', 'time'], Dimensions:
(lat: 600, lon: 4320, sst.lat: 72, sst.lon: 600, sst.time: 4320, time:
72) Coordinates: * lat (lat) float32 -40.041668 -40.12501
... -89.87501 -89.958336 * lon (lon) float32 -179.95833
-179.875 ... 179.87502 179.95836 * time (time) datetime64[ns] 2005-01-15 2005-02-15 ... 2010-12-15 Dimensions without
coordinates: sst.lat, sst.lon, sst.time Data variables:
sst_gradient (sst.lat, sst.lon, sst.time) float32 2.7785575e-08 ... nan) to Variable.
The equivalent function for numpy.gradient is xarray.DataSet.differentiate.
You can find details here: xarray differentiate

Bokeh Geoviews use Lat/Long or UTM?

I am trying to plot the Zillow dataset with Bokeh using Geoviews and Datashader but I am having the damnedest time getting it to work. I am able to plot the data on a Cartesian plane fine but when I attempt to overlay the data with a map I run into errors.
I have used code adapted from the census-hv example on the datashader github. I believe my problem is that it is looking for the coordinates to be in UTM not Lat/Long. Because the code works when I have my coordinates multiplied by a few thousand. The points are then put above the map in white space. If i attempt to plot the proper lat/long coordinates I get the following errors.
Can someone please point me in the direction of a map that uses Lat/Long
>>>props.head()
longitude latitude
0 -118.654084 34.144442
1 -118.625364 34.140430
2 -118.394633 33.989359
3 -118.437206 34.148863
4 -118.385816 34.194168
import pandas as pd
import holoviews as hv
import geoviews as gv
import datashader as ds
from bokeh.models import WMTSTileSource
from holoviews.operation.datashader import datashade, dynspread
hv.notebook_ex
tension('bokeh')
%%opts Overlay [width=900 height=525 xaxis=None yaxis=None]
geomap = gv.WMTS(WMTSTileSource(url=\
'https://server.arcgisonline.com/ArcGIS/rest/services/World_Imagery/MapServer/tile/{Z}/{Y}/{X}.jpg'))
points = hv.Points(gv.Dataset(props, kdims=['longitude', 'latitude']))
# color_key = {'w':'aqua', 'b':'lime', 'a':'red', 'h':'fuchsia', 'o':'yellow' }
race = datashade(points, x_sampling=50, y_sampling=50,
element_type=gv.Image)
geomap * race
RETURNS ERROR:
WARNING:root:dynamic_operation: Exception raised in callable
'dynamic_operation' of type 'function'.
Invoked as dynamic_operation(height=400, scale=1.0, width=400, x_range=None, y_range=None)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/IPython/core/formatters.py in __call__(self, obj)
305 pass
306 else:
--> 307 return printer(obj)
308 # Finally look for special method names
309 method = get_real_method(obj, self.print_method)
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/ipython/display_hooks.py in pprint_display(obj)
255 if not ip.display_formatter.formatters['text/plain'].pprint:
256 return None
--> 257 return display(obj, raw=True)
258
259
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/ipython/display_hooks.py in display(obj, raw, **kwargs)
241 elif isinstance(obj, (HoloMap, DynamicMap)):
242 with option_state(obj):
--> 243 html = map_display(obj)
244 else:
245 return repr(obj) if raw else IPython.display.display(obj, **kwargs)
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/ipython/display_hooks.py in wrapped(element)
127 try:
128 html = fn(element,
--> 129 max_frames=OutputMagic.options['max_frames'])
130
131 # Only want to add to the archive for one display hook...
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/ipython/display_hooks.py in map_display(vmap, max_frames)
196 return None
197
--> 198 return render(vmap)
199
200
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/ipython/display_hooks.py in render(obj, **kwargs)
57 if renderer.fig == 'pdf':
58 renderer = renderer.instance(fig='png')
---> 59 return renderer.html(obj, **kwargs)
60
61
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/plotting/renderer.py in html(self, obj, fmt, css, comm, **kwargs)
253 code to initialize a Comm, if the plot supplies one.
254 """
--> 255 plot, fmt = self._validate(obj, fmt)
256 figdata, _ = self(plot, fmt, **kwargs)
257 if css is None: css = self.css
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/plotting/renderer.py in _validate(self, obj, fmt)
189 if isinstance(obj, tuple(self.widgets.values())):
190 return obj, 'html'
--> 191 plot = self.get_plot(obj, renderer=self)
192
193 fig_formats = self.mode_formats['fig'][self.mode]
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/plotting/renderer.py in get_plot(self_or_cls, obj, renderer)
164 """
165 # Initialize DynamicMaps with first data item
--> 166 initialize_dynamic(obj)
167
168 if not isinstance(obj, Plot) and not displayable(obj):
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/plotting/util.py in initialize_dynamic(obj)
173 continue
174 if not len(dmap):
--> 175 dmap[dmap._initial_key()]
176
177
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/core/spaces.py in __getitem__(self, key)
942 # Not a cross product and nothing cached so compute element.
943 if cache is not None: return cache
--> 944 val = self._execute_callback(*tuple_key)
945 if data_slice:
946 val = self._dataslice(val, data_slice)
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/core/spaces.py in _execute_callback(self, *args)
791
792 with dynamicmap_memoization(self.callback, self.streams):
--> 793 retval = self.callback(*args, **kwargs)
794 return self._style(retval)
795
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/core/spaces.py in __call__(self, *args, **kwargs)
489 # Nothing to do for callbacks that accept no arguments
490 (inargs, inkwargs) = (args, kwargs)
--> 491 if not args and not kwargs: return self.callable()
492 inputs = [i for i in self.inputs if isinstance(i, DynamicMap)]
493 streams = []
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/core/overlay.py in dynamic_mul(*args, **kwargs)
27 from .spaces import Callable
28 def dynamic_mul(*args, **kwargs):
---> 29 element = other[args]
30 return self * element
31 callback = Callable(dynamic_mul, inputs=[self, other])
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/core/spaces.py in __getitem__(self, key)
942 # Not a cross product and nothing cached so compute element.
943 if cache is not None: return cache
--> 944 val = self._execute_callback(*tuple_key)
945 if data_slice:
946 val = self._dataslice(val, data_slice)
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/core/spaces.py in _execute_callback(self, *args)
791
792 with dynamicmap_memoization(self.callback, self.streams):
--> 793 retval = self.callback(*args, **kwargs)
794 return self._style(retval)
795
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/core/spaces.py in __call__(self, *args, **kwargs)
519
520 try:
--> 521 ret = self.callable(*args, **kwargs)
522 except:
523 posstr = ', '.join(['%r' % el for el in inargs]) if inargs else ''
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/util.py in dynamic_operation(*key, **kwargs)
101 self.p.kwargs.update(kwargs)
102 obj = map_obj[key] if isinstance(map_obj, HoloMap) else map_obj
--> 103 return self._process(obj, key)
104 else:
105 def dynamic_operation(*key, **kwargs):
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/util.py in _process(self, element, key)
87 kwargs = {k: v for k, v in self.p.kwargs.items()
88 if k in self.p.operation.params()}
---> 89 return self.p.operation.process_element(element, key, **kwargs)
90 else:
91 return self.p.operation(element, **self.p.kwargs)
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/core/operation.py in process_element(self, element, key, **params)
133 """
134 self.p = param.ParamOverrides(self, params)
--> 135 return self._process(element, key)
136
137
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/operation/datashader.py in _process(self, element, key)
357
358 def _process(self, element, key=None):
--> 359 agg = aggregate._process(self, element, key)
360 shaded = shade._process(self, agg, key)
361 return shaded
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/operation/datashader.py in _process(self, element, key)
226 agg = getattr(cvs, glyph)(data, x, y, self.p.aggregator)
227 if agg.ndim == 2:
--> 228 return self.p.element_type(agg, **params)
229 else:
230 return NdOverlay({c: self.p.element_type(agg.sel(**{column: c}),
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/geoviews/element/geo.py in __init__(self, data, **kwargs)
81 elif crs:
82 kwargs['crs'] = crs
---> 83 super(_Element, self).__init__(data, **kwargs)
84
85
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/element/raster.py in __init__(self, data, bounds, extents, xdensity, ydensity, **params)
242 if bounds is None:
243 xvals = self.dimension_values(0, False)
--> 244 l, r, xdensity, _ = util.bound_range(xvals, xdensity)
245 yvals = self.dimension_values(1, False)
246 b, t, ydensity, _ = util.bound_range(yvals, ydensity)
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/holoviews/core/util.py in bound_range(vals, density)
1373 using significant digits reported by sys.float_info.dig.
1374 """
-> 1375 low, high = vals.min(), vals.max()
1376 invert = False
1377 if vals[0] > vals[1]:
/home/mcamp/anaconda3/envs/py3.6/lib/python3.6/site-packages/numpy/core/_methods.py in _amin(a, axis, out, keepdims)
27
28 def _amin(a, axis=None, out=None, keepdims=False):
---> 29 return umr_minimum(a, axis, None, out, keepdims)
30
31 def _sum(a, axis=None, dtype=None, out=None, keepdims=False):
ValueError: zero-size array to reduction operation minimum which has no identity
Out[54]:
b':DynamicMap []'
I think the problem here is two-fold, first of all since the coordinates are latitudes and longitudes and you specify xsampling/ysampling values of 50 the datashaded image ends up with a tiny or zero shape, which causes this error. My suggestion would be to cast the coordinates to Google Mercator first. In future this PR will let you do so very simply by calling this:
import cartopy.crs as ccrs
projected = gv.operation.project(points, projection=ccrs.GOOGLE_MERCATOR)
...
To do this manually for now you can use the cartopy projection directly:
coords = ccrs.GOOGLE_MERCATOR.transform_points(ccrs.PlateCarree(), lons, lats)
projected = gv.Points(coords, crs=ccrs.GOOGLE_MERCATOR)
...

Categories