it looks like .append is deprecated now
The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
I am trying to get tweets with tweepy (more than 100), so I use Paginator, however I am not sure how to properly append/concat rows to the pandas dataframe
paginator = tweepy.Paginator(
client.search_recent_tweets, # The method you want to use
"#publictransport -is:retweet", # Some argument for this method
max_results=100 # How many tweets asked per request
)
import pandas as pd
df = pd.DataFrame()
for tweet in paginator.flatten(limit=1000): # Total number of tweets to retrieve
df2 = df.append({'Tweet':tweet}, ignore_index = True)
I get this error:
df2.head(5)
---------------------------------------------------------------------------
StopIteration Traceback (most recent call last)
File /anaconda/envs/GPSAnalysis/lib/python3.9/site-packages/IPython/core/formatters.py:707, in PlainTextFormatter.__call__(self, obj)
700 stream = StringIO()
701 printer = pretty.RepresentationPrinter(stream, self.verbose,
702 self.max_width, self.newline,
703 max_seq_length=self.max_seq_length,
704 singleton_pprinters=self.singleton_printers,
705 type_pprinters=self.type_printers,
706 deferred_pprinters=self.deferred_printers)
--> 707 printer.pretty(obj)
708 printer.flush()
709 return stream.getvalue()
File /anaconda/envs/GPSAnalysis/lib/python3.9/site-packages/IPython/lib/pretty.py:410, in RepresentationPrinter.pretty(self, obj)
407 return meth(obj, self, cycle)
408 if cls is not object \
409 and callable(cls.__dict__.get('__repr__')):
--> 410 return _repr_pprint(obj, self, cycle)
412 return _default_pprint(obj, self, cycle)
413 finally:
File /anaconda/envs/GPSAnalysis/lib/python3.9/site-packages/IPython/lib/pretty.py:778, in _repr_pprint(obj, p, cycle)
776 """A pprint that just redirects to the normal repr function."""
777 # Find newlines and replace them with p.break_()
--> 778 output = repr(obj)
779 lines = output.splitlines()
780 with p.group():
File /anaconda/envs/GPSAnalysis/lib/python3.9/site-packages/pandas/core/frame.py:1011, in DataFrame.__repr__(self)
1008 return buf.getvalue()
1010 repr_params = fmt.get_dataframe_repr_params()
-> 1011 return self.to_string(**repr_params)
File /anaconda/envs/GPSAnalysis/lib/python3.9/site-packages/pandas/core/frame.py:1192, in DataFrame.to_string(self, buf, columns, col_space, header, index, na_rep, formatters, float_format, sparsify, index_names, justify, max_rows, max_cols, show_dimensions, decimal, line_width, min_rows, max_colwidth, encoding)
1173 with option_context("display.max_colwidth", max_colwidth):
1174 formatter = fmt.DataFrameFormatter(
1175 self,
1176 columns=columns,
(...)
1190 decimal=decimal,
1191 )
-> 1192 return fmt.DataFrameRenderer(formatter).to_string(
1193 buf=buf,
1194 encoding=encoding,
1195 line_width=line_width,
1196 )
File /anaconda/envs/GPSAnalysis/lib/python3.9/site-packages/pandas/io/formats/format.py:1128, in DataFrameRenderer.to_string(self, buf, encoding, line_width)
1125 from pandas.io.formats.string import StringFormatter
1127 string_formatter = StringFormatter(self.fmt, line_width=line_width)
-> 1128 string = string_formatter.to_string()
1129 return save_to_buffer(string, buf=buf, encoding=encoding)
File /anaconda/envs/GPSAnalysis/lib/python3.9/site-packages/pandas/io/formats/string.py:25, in StringFormatter.to_string(self)
24 def to_string(self) -> str:
---> 25 text = self._get_string_representation()
26 if self.fmt.should_show_dimensions:
27 text = "".join([text, self.fmt.dimensions_info])
File /anaconda/envs/GPSAnalysis/lib/python3.9/site-packages/pandas/io/formats/string.py:40, in StringFormatter._get_string_representation(self)
37 if self.fmt.frame.empty:
38 return self._empty_info_line
---> 40 strcols = self._get_strcols()
42 if self.line_width is None:
43 # no need to wrap around just print the whole frame
44 return self.adj.adjoin(1, *strcols)
File /anaconda/envs/GPSAnalysis/lib/python3.9/site-packages/pandas/io/formats/string.py:31, in StringFormatter._get_strcols(self)
30 def _get_strcols(self) -> list[list[str]]:
---> 31 strcols = self.fmt.get_strcols()
32 if self.fmt.is_truncated:
33 strcols = self._insert_dot_separators(strcols)
File /anaconda/envs/GPSAnalysis/lib/python3.9/site-packages/pandas/io/formats/format.py:611, in DataFrameFormatter.get_strcols(self)
607 def get_strcols(self) -> list[list[str]]:
608 """
609 Render a DataFrame to a list of columns (as lists of strings).
610 """
--> 611 strcols = self._get_strcols_without_index()
613 if self.index:
614 str_index = self._get_formatted_index(self.tr_frame)
File /anaconda/envs/GPSAnalysis/lib/python3.9/site-packages/pandas/io/formats/format.py:875, in DataFrameFormatter._get_strcols_without_index(self)
871 cheader = str_columns[i]
872 header_colwidth = max(
873 int(self.col_space.get(c, 0)), *(self.adj.len(x) for x in cheader)
874 )
--> 875 fmt_values = self.format_col(i)
876 fmt_values = _make_fixed_width(
877 fmt_values, self.justify, minimum=header_colwidth, adj=self.adj
878 )
880 max_len = max(max(self.adj.len(x) for x in fmt_values), header_colwidth)
File /anaconda/envs/GPSAnalysis/lib/python3.9/site-packages/pandas/io/formats/format.py:889, in DataFrameFormatter.format_col(self, i)
887 frame = self.tr_frame
888 formatter = self._get_formatter(i)
--> 889 return format_array(
890 frame.iloc[:, i]._values,
891 formatter,
892 float_format=self.float_format,
893 na_rep=self.na_rep,
894 space=self.col_space.get(frame.columns[i]),
895 decimal=self.decimal,
896 leading_space=self.index,
897 )
File /anaconda/envs/GPSAnalysis/lib/python3.9/site-packages/pandas/io/formats/format.py:1316, in format_array(values, formatter, float_format, na_rep, digits, space, justify, decimal, leading_space, quoting)
1301 digits = get_option("display.precision")
1303 fmt_obj = fmt_klass(
1304 values,
1305 digits=digits,
(...)
1313 quoting=quoting,
1314 )
-> 1316 return fmt_obj.get_result()
File /anaconda/envs/GPSAnalysis/lib/python3.9/site-packages/pandas/io/formats/format.py:1347, in GenericArrayFormatter.get_result(self)
1346 def get_result(self) -> list[str]:
-> 1347 fmt_values = self._format_strings()
1348 return _make_fixed_width(fmt_values, self.justify)
File /anaconda/envs/GPSAnalysis/lib/python3.9/site-packages/pandas/io/formats/format.py:1410, in GenericArrayFormatter._format_strings(self)
1408 for i, v in enumerate(vals):
1409 if not is_float_type[i] and leading_space:
-> 1410 fmt_values.append(f" {_format(v)}")
1411 elif is_float_type[i]:
1412 fmt_values.append(float_format(v))
File /anaconda/envs/GPSAnalysis/lib/python3.9/site-packages/pandas/io/formats/format.py:1390, in GenericArrayFormatter._format_strings.<locals>._format(x)
1387 return str(x)
1388 else:
1389 # object dtype
-> 1390 return str(formatter(x))
File /anaconda/envs/GPSAnalysis/lib/python3.9/site-packages/pandas/io/formats/printing.py:222, in pprint_thing(thing, _nest_lvl, escape_chars, default_escapes, quote_strings, max_seq_items)
218 result = _pprint_dict(
219 thing, _nest_lvl, quote_strings=True, max_seq_items=max_seq_items
220 )
221 elif is_sequence(thing) and _nest_lvl < get_option("display.pprint_nest_depth"):
--> 222 result = _pprint_seq(
223 thing,
224 _nest_lvl,
225 escape_chars=escape_chars,
226 quote_strings=quote_strings,
227 max_seq_items=max_seq_items,
228 )
229 elif isinstance(thing, str) and quote_strings:
230 result = f"'{as_escaped_string(thing)}'"
File /anaconda/envs/GPSAnalysis/lib/python3.9/site-packages/pandas/io/formats/printing.py:119, in _pprint_seq(seq, _nest_lvl, max_seq_items, **kwds)
117 s = iter(seq)
118 # handle sets, no slicing
--> 119 r = [
120 pprint_thing(next(s), _nest_lvl + 1, max_seq_items=max_seq_items, **kwds)
121 for i in range(min(nitems, len(seq)))
122 ]
123 body = ", ".join(r)
125 if nitems < len(seq):
File /anaconda/envs/GPSAnalysis/lib/python3.9/site-packages/pandas/io/formats/printing.py:120, in <listcomp>(.0)
117 s = iter(seq)
118 # handle sets, no slicing
119 r = [
--> 120 pprint_thing(next(s), _nest_lvl + 1, max_seq_items=max_seq_items, **kwds)
121 for i in range(min(nitems, len(seq)))
122 ]
123 body = ", ".join(r)
125 if nitems < len(seq):
StopIteration:
I can't reproduce your error, so I am walking blind here, but here is one way to do it like you asked:
df = pd.concat(
[pd.DataFrame({"Tweet": [tweet]}) for tweet in paginator.flatten(limit=1000)]
).reset_index(drop=True)
Although you do not need pd.concat or append to achieve the same result:
df = pd.DataFrame({"Tweets": [tweet for tweet in paginator.flatten(limit=1000)]})
I want to quantify some geolocations with osmnx using the nearest_edges-function. I get a value error message when running this code and don't know what I'm doing wrong:
# project graph and points
G_proj = ox.project_graph(G)
gdf_loc_p = gdf_loc["geometry"].to_crs(G_proj.graph["crs"])
ne, d = ox.nearest_edges(
G_proj, X=gdf_loc_p.x.values, Y=gdf_loc_p.y.values, return_dist=True
)
# reindex points based on results from nearest_edges
gdf_loc = (
gdf_loc.set_index(pd.MultiIndex.from_tuples(ne, names=["u", "v", "key"]))
.assign(distance=d)
.sort_index()
)
# join geometry from edges back to points
# aggregate so have number of accidents on each edge
gdf_bad_roads = (
gdf_edges.join(gdf_loc, rsuffix="_loc", how="inner")
.groupby(["u", "v", "key"])
.agg(geometry = ("geometry", "first"), number=("osmid", "size"))
.set_crs(gdf_edges.crs)
)
When running it tells me in the line .agg(geometry)# we require a list, but not a 'str' and from there on couple more issues leading to a value error data' should be a 1-dimensional array of geometry objects. I attached the whole Traceback. Thanks for your help!
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
/var/folders/jy/1f2tlvb965g30zhw9q3cvdw07r5rb_/T/ipykernel_82991/3621029527.py in <module>
2 # aggregate so have number of accidents on each edge
3 gdf_bad_roads = (
----> 4 gdf_edges.join(gdf_loc, rsuffix="_loc", how="inner")
5 .groupby(["u", "v", "key"])
6 .agg(geometry = ("geometry", "first"), number=("osmid", "size"))
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/pandas/core/groupby/generic.py in aggregate(self, func, engine, engine_kwargs, *args, **kwargs)
977
978 op = GroupByApply(self, func, args, kwargs)
--> 979 result = op.agg()
980 if not is_dict_like(func) and result is not None:
981 return result
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/pandas/core/apply.py in agg(self)
159
160 if is_dict_like(arg):
--> 161 return self.agg_dict_like()
162 elif is_list_like(arg):
163 # we require a list, but not a 'str'
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/pandas/core/apply.py in agg_dict_like(self)
457
458 axis = 0 if isinstance(obj, ABCSeries) else 1
--> 459 result = concat(
460 {k: results[k] for k in keys_to_use}, axis=axis, keys=keys_to_use
461 )
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/pandas/util/_decorators.py in wrapper(*args, **kwargs)
309 stacklevel=stacklevel,
310 )
--> 311 return func(*args, **kwargs)
312
313 return wrapper
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/pandas/core/reshape/concat.py in concat(objs, axis, join, ignore_index, keys, levels, names, verify_integrity, sort, copy)
305 )
306
--> 307 return op.get_result()
308
309
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/pandas/core/reshape/concat.py in get_result(self)
537
538 cons = sample._constructor
--> 539 return cons(new_data).__finalize__(self, method="concat")
540
541 def _get_result_dim(self) -> int:
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/geopandas/geodataframe.py in __init__(self, data, geometry, crs, *args, **kwargs)
155 try:
156 if (
--> 157 hasattr(self["geometry"].values, "crs")
158 and self["geometry"].values.crs
159 and crs
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/geopandas/geodataframe.py in __getitem__(self, key)
1325 GeoDataFrame.
1326 """
-> 1327 result = super().__getitem__(key)
1328 geo_col = self._geometry_column_name
1329 if isinstance(result, Series) and isinstance(result.dtype, GeometryDtype):
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/pandas/core/frame.py in __getitem__(self, key)
3424 if self.columns.is_unique and key in self.columns:
3425 if isinstance(self.columns, MultiIndex):
-> 3426 return self._getitem_multilevel(key)
3427 return self._get_item_cache(key)
3428
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/pandas/core/frame.py in _getitem_multilevel(self, key)
3511 result_columns = maybe_droplevels(new_columns, key)
3512 if self._is_mixed_type:
-> 3513 result = self.reindex(columns=new_columns)
3514 result.columns = result_columns
3515 else:
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/pandas/util/_decorators.py in wrapper(*args, **kwargs)
322 #wraps(func)
323 def wrapper(*args, **kwargs) -> Callable[..., Any]:
--> 324 return func(*args, **kwargs)
325
326 kind = inspect.Parameter.POSITIONAL_OR_KEYWORD
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/pandas/core/frame.py in reindex(self, *args, **kwargs)
4770 kwargs.pop("axis", None)
4771 kwargs.pop("labels", None)
-> 4772 return super().reindex(**kwargs)
4773
4774 #deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "labels"])
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/pandas/core/generic.py in reindex(self, *args, **kwargs)
4816
4817 # perform the reindex on the axes
-> 4818 return self._reindex_axes(
4819 axes, level, limit, tolerance, method, fill_value, copy
4820 ).__finalize__(self, method="reindex")
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/pandas/core/frame.py in _reindex_axes(self, axes, level, limit, tolerance, method, fill_value, copy)
4589 columns = axes["columns"]
4590 if columns is not None:
-> 4591 frame = frame._reindex_columns(
4592 columns, method, copy, level, fill_value, limit, tolerance
4593 )
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/pandas/core/frame.py in _reindex_columns(self, new_columns, method, copy, level, fill_value, limit, tolerance)
4634 new_columns, method=method, level=level, limit=limit, tolerance=tolerance
4635 )
-> 4636 return self._reindex_with_indexers(
4637 {1: [new_columns, indexer]},
4638 copy=copy,
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/pandas/core/generic.py in _reindex_with_indexers(self, reindexers, fill_value, copy, allow_dups)
4895 new_data = new_data.copy()
4896
-> 4897 return self._constructor(new_data).__finalize__(self)
4898
4899 def filter(
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/geopandas/geodataframe.py in __init__(self, data, geometry, crs, *args, **kwargs)
162 _crs_mismatch_warning()
163 # TODO: raise error in 0.9 or 0.10.
--> 164 self["geometry"] = _ensure_geometry(self["geometry"].values, crs)
165 except TypeError:
166 pass
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/geopandas/geodataframe.py in _ensure_geometry(data, crs)
44 return GeoSeries(out, index=data.index, name=data.name)
45 else:
---> 46 out = from_shapely(data, crs=crs)
47 return out
48
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/geopandas/array.py in from_shapely(data, crs)
149
150 """
--> 151 return GeometryArray(vectorized.from_shapely(data), crs=crs)
152
153
~/opt/anaconda3/envs/pyproj_env/lib/python3.10/site-packages/geopandas/array.py in __init__(self, data, crs)
278 )
279 elif not data.ndim == 1:
--> 280 raise ValueError(
281 "'data' should be a 1-dimensional array of geometry objects."
282 )
ValueError: 'data' should be a 1-dimensional array of geometry objects.
Edit: thank you! Unfortunately it doesnt work. I downgraded Python to 3.9 (and upgraded Panda to 1.4 but have same issue). I added the Traceback of the other code as well.
----
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Input In [4], in <cell line: 4>()
2 gdf_bad_roads = gdf_edges.join(gdf_loc, rsuffix="_loc", how="inner")
3 # aggregate so have number of accidents on each edge
----> 4 gdf_bad_roads_agg = gdf_bad_roads.groupby(["u", "v", "key"]).agg(
5 geometry=("geometry", "first"), number=("osmid", "size")
6 ).set_crs(gdf_edges.crs)
8 print(f"""
9 pandas: {pd.__version__}
10 geopandas: {gpd.__version__}
11 osmnx: {ox.__version__}""")
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/pandas/core/groupby/generic.py:869, in DataFrameGroupBy.aggregate(self, func, engine, engine_kwargs, *args, **kwargs)
866 func = maybe_mangle_lambdas(func)
868 op = GroupByApply(self, func, args, kwargs)
--> 869 result = op.agg()
870 if not is_dict_like(func) and result is not None:
871 return result
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/pandas/core/apply.py:168, in Apply.agg(self)
165 return self.apply_str()
167 if is_dict_like(arg):
--> 168 return self.agg_dict_like()
169 elif is_list_like(arg):
170 # we require a list, but not a 'str'
171 return self.agg_list_like()
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/pandas/core/apply.py:498, in Apply.agg_dict_like(self)
495 keys_to_use = ktu
497 axis = 0 if isinstance(obj, ABCSeries) else 1
--> 498 result = concat(
499 {k: results[k] for k in keys_to_use}, axis=axis, keys=keys_to_use
500 )
501 elif any(is_ndframe):
502 # There is a mix of NDFrames and scalars
503 raise ValueError(
504 "cannot perform both aggregation "
505 "and transformation operations "
506 "simultaneously"
507 )
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/pandas/util/_decorators.py:311, in deprecate_nonkeyword_arguments.<locals>.decorate.<locals>.wrapper(*args, **kwargs)
305 if len(args) > num_allow_args:
306 warnings.warn(
307 msg.format(arguments=arguments),
308 FutureWarning,
309 stacklevel=stacklevel,
310 )
--> 311 return func(*args, **kwargs)
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/pandas/core/reshape/concat.py:359, in concat(objs, axis, join, ignore_index, keys, levels, names, verify_integrity, sort, copy)
155 """
156 Concatenate pandas objects along a particular axis with optional set logic
157 along the other axes.
(...)
344 ValueError: Indexes have overlapping values: ['a']
345 """
346 op = _Concatenator(
347 objs,
348 axis=axis,
(...)
356 sort=sort,
357 )
--> 359 return op.get_result()
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/pandas/core/reshape/concat.py:599, in _Concatenator.get_result(self)
596 new_data._consolidate_inplace()
598 cons = sample._constructor
--> 599 return cons(new_data).__finalize__(self, method="concat")
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/geopandas/geodataframe.py:157, in GeoDataFrame.__init__(self, data, geometry, crs, *args, **kwargs)
154 index = self.index
155 try:
156 if (
--> 157 hasattr(self["geometry"].values, "crs")
158 and self["geometry"].values.crs
159 and crs
160 and not self["geometry"].values.crs == crs
161 ):
162 _crs_mismatch_warning()
163 # TODO: raise error in 0.9 or 0.10.
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/geopandas/geodataframe.py:1327, in GeoDataFrame.__getitem__(self, key)
1321 def __getitem__(self, key):
1322 """
1323 If the result is a column containing only 'geometry', return a
1324 GeoSeries. If it's a DataFrame with a 'geometry' column, return a
1325 GeoDataFrame.
1326 """
-> 1327 result = super().__getitem__(key)
1328 geo_col = self._geometry_column_name
1329 if isinstance(result, Series) and isinstance(result.dtype, GeometryDtype):
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/pandas/core/frame.py:3473, in DataFrame.__getitem__(self, key)
3471 if self.columns.is_unique and key in self.columns:
3472 if isinstance(self.columns, MultiIndex):
-> 3473 return self._getitem_multilevel(key)
3474 return self._get_item_cache(key)
3476 # Do we have a slicer (on rows)?
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/pandas/core/frame.py:3560, in DataFrame._getitem_multilevel(self, key)
3558 result_columns = maybe_droplevels(new_columns, key)
3559 if self._is_mixed_type:
-> 3560 result = self.reindex(columns=new_columns)
3561 result.columns = result_columns
3562 else:
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/pandas/util/_decorators.py:324, in rewrite_axis_style_signature.<locals>.decorate.<locals>.wrapper(*args, **kwargs)
322 #wraps(func)
323 def wrapper(*args, **kwargs) -> Callable[..., Any]:
--> 324 return func(*args, **kwargs)
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/pandas/core/frame.py:4798, in DataFrame.reindex(self, *args, **kwargs)
4796 kwargs.pop("axis", None)
4797 kwargs.pop("labels", None)
-> 4798 return super().reindex(**kwargs)
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/pandas/core/generic.py:4974, in NDFrame.reindex(self, *args, **kwargs)
4971 return self._reindex_multi(axes, copy, fill_value)
4973 # perform the reindex on the axes
-> 4974 return self._reindex_axes(
4975 axes, level, limit, tolerance, method, fill_value, copy
4976 ).__finalize__(self, method="reindex")
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/pandas/core/frame.py:4611, in DataFrame._reindex_axes(self, axes, level, limit, tolerance, method, fill_value, copy)
4609 columns = axes["columns"]
4610 if columns is not None:
-> 4611 frame = frame._reindex_columns(
4612 columns, method, copy, level, fill_value, limit, tolerance
4613 )
4615 index = axes["index"]
4616 if index is not None:
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/pandas/core/frame.py:4656, in DataFrame._reindex_columns(self, new_columns, method, copy, level, fill_value, limit, tolerance)
4643 def _reindex_columns(
4644 self,
4645 new_columns,
(...)
4651 tolerance=None,
4652 ):
4653 new_columns, indexer = self.columns.reindex(
4654 new_columns, method=method, level=level, limit=limit, tolerance=tolerance
4655 )
-> 4656 return self._reindex_with_indexers(
4657 {1: [new_columns, indexer]},
4658 copy=copy,
4659 fill_value=fill_value,
4660 allow_dups=False,
4661 )
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/pandas/core/generic.py:5054, in NDFrame._reindex_with_indexers(self, reindexers, fill_value, copy, allow_dups)
5051 if copy and new_data is self._mgr:
5052 new_data = new_data.copy()
-> 5054 return self._constructor(new_data).__finalize__(self)
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/geopandas/geodataframe.py:164, in GeoDataFrame.__init__(self, data, geometry, crs, *args, **kwargs)
162 _crs_mismatch_warning()
163 # TODO: raise error in 0.9 or 0.10.
--> 164 self["geometry"] = _ensure_geometry(self["geometry"].values, crs)
165 except TypeError:
166 pass
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/geopandas/geodataframe.py:46, in _ensure_geometry(data, crs)
44 return GeoSeries(out, index=data.index, name=data.name)
45 else:
---> 46 out = from_shapely(data, crs=crs)
47 return out
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/geopandas/array.py:151, in from_shapely(data, crs)
135 def from_shapely(data, crs=None):
136 """
137 Convert a list or array of shapely objects to a GeometryArray.
138
(...)
149
150 """
--> 151 return GeometryArray(vectorized.from_shapely(data), crs=crs)
File ~/opt/anaconda3/envs/pyproj_env/lib/python3.9/site-packages/geopandas/array.py:280, in GeometryArray.__init__(self, data, crs)
275 raise TypeError(
276 "'data' should be array of geometry objects. Use from_shapely, "
277 "from_wkb, from_wkt functions to construct a GeometryArray."
278 )
279 elif not data.ndim == 1:
--> 280 raise ValueError(
281 "'data' should be a 1-dimensional array of geometry objects."
282 )
283 self.data = data
285 self._crs = None
ValueError: 'data' should be a 1-dimensional array of geometry objects.
pandas: 1.4.1
geopandas: 0.10.2
osmnx: 1.1.2
have changed this to a MWE
have separated out join() and groupby() / agg()
have included versions
one difference I can see - python 3.9 vs 3.10
import osmnx as ox
import geopandas as gpd
import pandas as pd
import io
df = pd.read_csv(
io.StringIO(
"""AccidentUID,AccidentLocation_CHLV95_E,AccidentLocation_CHLV95_N
99BA5D383B96D02AE0430A865E33D02A,2663985,1213215
9B25C4871C909022E0430A865E339022,2666153,1211303
9B71AB601D948092E0430A865E338092,2666168,1211785
9C985CF7710A60C0E0430A865E3360C0,2663991,1213203
9EA9548660AB3002E0430A865E333002,2666231,1210786
9B2E8B25D5C29094E0430A865E339094,2666728,1210404
9C87C10FB73A905EE0430A865E33905E,2666220,1211811
9E30F39D35CA1058E0430A865E331058,2664599,1212960
9BC2EA43E0BFC068E0430A865E33C068,2665533,1212617
9C0BB9332AB30044E0430A865E330044,2666852,1211964"""
)
)
gdf_loc = gpd.GeoDataFrame(
data=df,
geometry=gpd.points_from_xy(
df["AccidentLocation_CHLV95_E"], df["AccidentLocation_CHLV95_N"]
),
crs="EPSG:2056",
).to_crs("epsg:4326")
# get OSM data for investigated location
G = ox.graph_from_place("Luzern, Switzerland", network_type="drive")
G_proj = ox.project_graph(G)
gdf_nodes, gdf_edges = ox.utils_graph.graph_to_gdfs(G_proj)
# project graph and points
gdf_loc_p = gdf_loc["geometry"].to_crs(G_proj.graph["crs"])
ne, d = ox.nearest_edges(
G_proj, X=gdf_loc_p.x.values, Y=gdf_loc_p.y.values, return_dist=True
)
# reindex points based on results from nearest_edges
gdf_loc = (
gdf_loc.set_index(pd.MultiIndex.from_tuples(ne, names=["u", "v", "key"]))
.assign(distance=d)
.sort_index()
)
# join geometry from edges back to points
gdf_bad_roads = gdf_edges.join(gdf_loc, rsuffix="_loc", how="inner")
# aggregate so have number of accidents on each edge
gdf_bad_roads_agg = gdf_bad_roads.groupby(["u", "v", "key"]).agg(
geometry=("geometry", "first"), number=("osmid", "size")
).set_crs(gdf_edges.crs)
print(f"""
pandas: {pd.__version__}
geopandas: {gpd.__version__}
osmnx: {ox.__version__}""")
pandas: 1.4.0
geopandas: 0.10.2
osmnx: 1.1.2
Alternative aggregate syntax. Has been confirmed both work
hence conclusion is that named aggregations are failing. Possibly should be raised as an issue on pandas, but is not failing on all environments
groupby()/apply() is doing a first on shared edges and also necessary to set CRS again
dissolve() is doing a unary union on geometries. Conceptually should be the same, but is giving slightly different geometry. (A unary union of identical geometries IMHO is an instance of one of the geometries)
gdf_bad_roads.groupby(["u", "v", "key"]).agg({"geometry":"first", "AccidentUID":"size"}).set_crs(gdf_edges.crs).explore(color="blue")
gdf_bad_roads.dissolve(["u", "v", "key"], aggfunc={"AccidentUID":"size"}).explore(color="blue")
I am working on a following type of data.
itemid category subcategory title
1 10000010 Транспорт Автомобили с пробегом Toyota Sera, 1991
2 10000025 Услуги Предложения услуг Монтаж кровли
3 10000094 Личные вещи Одежда, обувь, аксессуары Костюм Steilmann
4 10000101 Транспорт Автомобили с пробегом Ford Focus, 2011
5 10000132 Транспорт Запчасти и аксессуары Турбина 3.0 Bar
6 10000152 Транспорт Автомобили с пробегом ВАЗ 2115 Samara, 2005
Now I run the following commands
import pandas as pd
trainingData = pd.read_table("train.tsv",nrows=10, header=0,encoding='utf-8')
trainingData['itemid'].head()
0 10000010
1 10000025
2 10000094
3 10000101
4 10000132
Name: itemid
Everything is good this point but when I do something like
trainingData['itemid','category'].head()
Error:
---------------------------------------------------------------------------
UnicodeDecodeError Traceback (most recent call last)
/home/vikram/Documents/Avito/ in ()
----> 1 trainingData[['itemid','category']].head()
/usr/lib/python2.7/dist-packages/IPython/core/displayhook.pyc in __call__(self, result)
236 self.start_displayhook()
237 self.write_output_prompt()
--> 238 format_dict = self.compute_format_data(result)
239 self.write_format_data(format_dict)
240 self.update_user_ns(result)
/usr/lib/python2.7/dist-packages/IPython/core/displayhook.pyc in compute_format_data(self, result)
148 MIME type representation of the object.
149 """
--> 150 return self.shell.display_formatter.format(result)
151
152 def write_format_data(self, format_dict):
/usr/lib/python2.7/dist-packages/IPython/core/formatters.pyc in format(self, obj, include, exclude)
124 continue
125 try:
--> 126 data = formatter(obj)
127 except:
128 # FIXME: log the exception
/usr/lib/python2.7/dist-packages/IPython/core/formatters.pyc in __call__(self, obj)
445 type_pprinters=self.type_printers,
446 deferred_pprinters=self.deferred_printers)
--> 447 printer.pretty(obj)
448 printer.flush()
449 return stream.getvalue()
/usr/lib/python2.7/dist-packages/IPython/lib/pretty.pyc in pretty(self, obj)
352 if callable(obj_class._repr_pretty_):
353 return obj_class._repr_pretty_(obj, self, cycle)
--> 354 return _default_pprint(obj, self, cycle)
355 finally:
356 self.end_group()
/usr/lib/python2.7/dist-packages/IPython/lib/pretty.pyc in _default_pprint(obj, p, cycle)
472 if getattr(klass, '__repr__', None) not in _baseclass_reprs:
473 # A user-provided repr.
--> 474 p.text(repr(obj))
475 return
476 p.begin_group(1, ' 456 self.to_string(buf=buf)
457 value = buf.getvalue()
458 if max([len(l) for l in value.split('\n')]) > terminal_width:
/usr/lib/pymodules/python2.7/pandas/core/frame.pyc in to_string(self, buf, columns, col_space, colSpace, header, index, na_rep, formatters, float_format, sparsify, nanRep, index_names, justify, force_unicode)
1024 index_names=index_names,
1025 header=header, index=index)
-> 1026 formatter.to_string(force_unicode=force_unicode)
1027
1028 if buf is None:
/usr/lib/pymodules/python2.7/pandas/core/format.pyc in to_string(self, force_unicode)
176 for i, c in enumerate(self.columns):
177 if self.header:
--> 178 fmt_values = self._format_col(c)
179 cheader = str_columns[i]
180 max_len = max(max(len(x) for x in fmt_values),
/usr/lib/pymodules/python2.7/pandas/core/format.pyc in _format_col(self, col)
217 float_format=self.float_format,
218 na_rep=self.na_rep,
--> 219 space=self.col_space)
220
221 def to_html(self):
/usr/lib/pymodules/python2.7/pandas/core/format.pyc in format_array(values, formatter, float_format, na_rep, digits, space, justify)
424 justify=justify)
425
--> 426 return fmt_obj.get_result()
427
428
/usr/lib/pymodules/python2.7/pandas/core/format.pyc in get_result(self)
471 fmt_values.append(float_format(v))
472 else:
--> 473 fmt_values.append(' %s' % _format(v))
474
475 return _make_fixed_width(fmt_values, self.justify)
/usr/lib/pymodules/python2.7/pandas/core/format.pyc in _format(x)
457 else:
458 # object dtype
--> 459 return '%s' % formatter(x)
460
461 vals = self.values
/usr/lib/pymodules/python2.7/pandas/core/common.pyc in _stringify(col)
503 def _stringify(col):
504 # unicode workaround
--> 505 return unicode(col)
506
507 def _maybe_make_list(obj):
UnicodeDecodeError: 'ascii' codec can't decode byte 0xd0 in position 0: ordinal not in range(128)
please help me "display" the data properly.
I had the same issue caused by IPython, which could not display non-ASCII text returned by the Pandas head() function. It turned out that the default encoding for Python was set to 'ascii' on my machine. You can check this with
import sys
sys.getdefaultencoding()
The solution was to re-set the default encoding to UTF-8:
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
After this, IPython displayed Pandas data frames with non-ASCII characters correctly.
Note that the reload call is necessary to make the setdefaultencoding function available. Without it you'll get the error:
AttributeError: 'module' object has no attribute 'setdefaultencoding'