This question already has an answer here:
Pandas filtering based on OR AND
(1 answer)
Closed 2 years ago.
I am trying to code a condition that the date for a trade being entered is the same as the trade date. My code is below. Similar to creating a filter.
Tradedate = ba[ba['TIMESTAMP'] <= '10.15'] & ba[ba['TRADE_ENTRY'] == 'TRADE_DATE']
I get the following error below. Any advice on how to only display the trades if trade date = trade entry date. Both dates are in datetime64[ns] format.
For example:
Trade Date Trade Entry Date
2019-06-17 2019-06-17
2019-06-18 2019-06-19
The only trade I want to be displayed is the first date since the trade date and trade entry dare for the second one is different.
Thanks
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-31-5cdaf27939a8> in <module>
1 df['TRADE_ENTRY'] = pd.to_datetime(df['TRADE_ENTRY']).dt.date
2 df['TRADE_DATE'] = pd.to_datetime(df['TRADE_DATE']).dt.date
----> 3 Tradedate = ba[ba['TIMESTAMP'] <= '10.15'] & ba[ba['TRADE_ENTRY'] == 'TRADE_DATE']
4
5 #Trade date = trade entry date as filter
~\Anaconda3\lib\site-packages\pandas\core\ops\__init__.py in f(self, other, axis, level, fill_value)
701
702 left, right = self.align(other, join="outer", level=level, copy=False)
--> 703 new_data = left._combine_frame(right, pass_op, fill_value)
704 return left._construct_result(new_data)
705
~\Anaconda3\lib\site-packages\pandas\core\frame.py in _combine_frame(self, other, func, fill_value, level)
5298 if ops.should_series_dispatch(self, other, func):
5299 # iterate over columns
-> 5300 new_data = ops.dispatch_to_series(self, other, _arith_op)
5301 else:
5302 with np.errstate(all="ignore"):
~\Anaconda3\lib\site-packages\pandas\core\ops\__init__.py in dispatch_to_series(left, right, func, str_rep, axis)
414 raise NotImplementedError(right)
415
--> 416 new_data = expressions.evaluate(column_op, str_rep, left, right)
417 return new_data
418
~\Anaconda3\lib\site-packages\pandas\core\computation\expressions.py in evaluate(op, op_str, a, b, use_numexpr)
206 use_numexpr = use_numexpr and _bool_arith_check(op_str, a, b)
207 if use_numexpr:
--> 208 return _evaluate(op, op_str, a, b)
209 return _evaluate_standard(op, op_str, a, b)
210
~\Anaconda3\lib\site-packages\pandas\core\computation\expressions.py in _evaluate_numexpr(op, op_str, a, b)
119
120 if result is None:
--> 121 result = _evaluate_standard(op, op_str, a, b)
122
123 return result
~\Anaconda3\lib\site-packages\pandas\core\computation\expressions.py in _evaluate_standard(op, op_str, a, b)
68 _store_test_result(False)
69 with np.errstate(all="ignore"):
---> 70 return op(a, b)
71
72
~\Anaconda3\lib\site-packages\pandas\core\ops\__init__.py in column_op(a, b)
383
384 def column_op(a, b):
--> 385 return {i: func(a.iloc[:, i], b.iloc[:, i]) for i in range(len(a.columns))}
386
387 elif isinstance(right, ABCSeries) and axis == "columns":
~\Anaconda3\lib\site-packages\pandas\core\ops\__init__.py in <dictcomp>(.0)
383
384 def column_op(a, b):
--> 385 return {i: func(a.iloc[:, i], b.iloc[:, i]) for i in range(len(a.columns))}
386
387 elif isinstance(right, ABCSeries) and axis == "columns":
~\Anaconda3\lib\site-packages\pandas\core\ops\common.py in new_method(self, other)
62 other = item_from_zerodim(other)
63
---> 64 return method(self, other)
65
66 return new_method
~\Anaconda3\lib\site-packages\pandas\core\ops\__init__.py in wrapper(self, other)
547 rvalues = extract_array(other, extract_numpy=True)
548
--> 549 res_values = logical_op(lvalues, rvalues, op)
550 return _construct_result(self, res_values, index=self.index, name=res_name)
551
~\Anaconda3\lib\site-packages\pandas\core\ops\array_ops.py in logical_op(left, right, op)
349
350 if should_extension_dispatch(lvalues, rvalues):
--> 351 res_values = dispatch_to_extension_op(op, lvalues, rvalues)
352
353 else:
~\Anaconda3\lib\site-packages\pandas\core\ops\dispatch.py in dispatch_to_extension_op(op, left, right)
123 # The op calls will raise TypeError if the op is not defined
124 # on the ExtensionArray
--> 125 res_values = op(left, right)
126 return res_values
TypeError: unsupported operand type(s) for &: 'DatetimeArray' and 'DatetimeArray'
Use this :
Tradedate = ba[(ba['TIMESTAMP'] <= '10.15') & (ba['TRADE_ENTRY'] == 'TRADE_DATE')]
Related
df:
avg
count
date
val
prop
unit
distance
d-atmp
d-clouds
d-dewpoint
0.0786107
12
2014-10-03 00:00:00
22
atmp
(Deg C)
24829.6
24829.6
nan
nan
0.0786107
12
2014-10-03 00:00:00
0
clouds
(oktas)
22000.6
nan
22000.6
nan
0.0786107
12
2014-10-03 00:00:00
32
dewpoint
(Deg C)
21344.1
nan
nan
21344.1
0.0684246
6
2014-10-04 00:00:00
21.5
atmp
(Deg C)
26345.1
26345.1
nan
nan
cols = ['avg', 'date', 'count', 'd-atmp', 'd-cloud', 'd-dewpoint']
d = pd.pivot_table(x, index=cols, columns=['prop', 'unit'], values='val', aggfunc=max)
Ideal result:
date
countObs
avg
d-atmp
atmp (Deg C)
d-clouds
clouds (oktas)
d-dewpoint
dewpoint (Deg C)
2014-10-03 00:00:00
12
0.0786107
24829.6
22
22000.6
0
21344.1
32
2014-10-04 00:00:00
6
0.0684246
26345.1
21.5
nan
nan
nan
nan
Error
--------------------------------------------------------------------------- NotImplementedError Traceback (most recent call last) ~/.local/lib/python3.9/site-packages/pandas/core/groupby/generic.py in array_func(values) 1067 try:
-> 1068 result = self.grouper._cython_operation( 1069 "aggregate", values, how, axis=data.ndim - 1, min_count=min_count
~/.local/lib/python3.9/site-packages/pandas/core/groupby/ops.py in
_cython_operation(self, kind, values, how, axis, min_count, **kwargs)
998 ngroups = self.ngroups
--> 999 return cy_op.cython_operation( 1000 values=values,
~/.local/lib/python3.9/site-packages/pandas/core/groupby/ops.py in cython_operation(self, values, axis, min_count, comp_ids, ngroups,
**kwargs)
659
--> 660 return self._cython_op_ndim_compat(
661 values,
~/.local/lib/python3.9/site-packages/pandas/core/groupby/ops.py in
_cython_op_ndim_compat(self, values, min_count, ngroups, comp_ids, mask, **kwargs)
515
--> 516 return self._call_cython_op(
517 values,
~/.local/lib/python3.9/site-packages/pandas/core/groupby/ops.py in
_call_cython_op(self, values, min_count, ngroups, comp_ids, mask, **kwargs)
561 out_shape = self._get_output_shape(ngroups, values)
--> 562 func, values = self.get_cython_func_and_vals(values, is_numeric)
563 out_dtype = self.get_out_dtype(values.dtype)
~/.local/lib/python3.9/site-packages/pandas/core/groupby/ops.py in get_cython_func_and_vals(self, values, is_numeric)
204
--> 205 func = self._get_cython_function(kind, how, values.dtype, is_numeric)
206
~/.local/lib/python3.9/site-packages/pandas/core/groupby/ops.py in
_get_cython_function(cls, kind, how, dtype, is_numeric)
169 # raise NotImplementedError here rather than TypeError later
--> 170 raise NotImplementedError(
171 f"function is not implemented for this dtype: "
NotImplementedError: function is not implemented for this dtype: [how->mean,dtype->object]
During handling of the above exception, another exception occurred:
AssertionError Traceback (most recent call last) <ipython-input-119-b64b487d2810> in <module>
5 # o
6 # cols += []
----> 7 d = pd.pivot_table(x, index=cols, columns=['osmcObsProperty', 'unit'], values='val') #, aggfunc=max #np.mean or max appear similar , dropna=False
8
9 d.reset_index(inplace=True)
~/.local/lib/python3.9/site-packages/pandas/core/reshape/pivot.py in pivot_table(data, values, index, columns, aggfunc, fill_value, margins, dropna, margins_name, observed, sort)
93 return table.__finalize__(data, method="pivot_table")
94
---> 95 table = __internal_pivot_table(
96 data,
97 values,
~/.local/lib/python3.9/site-packages/pandas/core/reshape/pivot.py in
__internal_pivot_table(data, values, index, columns, aggfunc, fill_value, margins, dropna, margins_name, observed, sort)
163
164 grouped = data.groupby(keys, observed=observed, sort=sort)
--> 165 agged = grouped.agg(aggfunc)
166 if dropna and isinstance(agged, ABCDataFrame) and len(agged.columns):
167 agged = agged.dropna(how="all")
~/.local/lib/python3.9/site-packages/pandas/core/groupby/generic.py in aggregate(self, func, engine, engine_kwargs, *args, **kwargs)
977
978 op = GroupByApply(self, func, args, kwargs)
--> 979 result = op.agg()
980 if not is_dict_like(func) and result is not None:
981 return result
~/.local/lib/python3.9/site-packages/pandas/core/apply.py in agg(self)
156
157 if isinstance(arg, str):
--> 158 return self.apply_str()
159
160 if is_dict_like(arg):
~/.local/lib/python3.9/site-packages/pandas/core/apply.py in apply_str(self)
505 elif self.axis != 0:
506 raise ValueError(f"Operation {f} does not support axis=1")
--> 507 return self._try_aggregate_string_function(obj, f, *self.args, **self.kwargs)
508
509 def apply_multiple(self) -> FrameOrSeriesUnion:
~/.local/lib/python3.9/site-packages/pandas/core/apply.py in
_try_aggregate_string_function(self, obj, arg, *args, **kwargs)
575 if f is not None:
576 if callable(f):
--> 577 return f(*args, **kwargs)
578
579 # people may try to aggregate on a non-callable attribute
~/.local/lib/python3.9/site-packages/pandas/core/groupby/groupby.py in mean(self, numeric_only) 1685 numeric_only = self._resolve_numeric_only(numeric_only) 1686
-> 1687 result = self._cython_agg_general( 1688 "mean", 1689 alt=lambda x: Series(x).mean(numeric_only=numeric_only),
~/.local/lib/python3.9/site-packages/pandas/core/groupby/generic.py in
_cython_agg_general(self, how, alt, numeric_only, min_count) 1080 # TypeError -> we may have an exception in trying to aggregate 1081 # continue and exclude the block
-> 1082 new_mgr = data.grouped_reduce(array_func, ignore_failures=True) 1083 1084 if len(new_mgr) < len(data):
~/.local/lib/python3.9/site-packages/pandas/core/internals/managers.py in grouped_reduce(self, func, ignore_failures) 1233 for sb in blk._split(): 1234 try:
-> 1235 applied = sb.apply(func) 1236 except (TypeError, NotImplementedError): 1237 if not ignore_failures:
~/.local/lib/python3.9/site-packages/pandas/core/internals/blocks.py in apply(self, func, **kwargs)
379 """
380 with np.errstate(all="ignore"):
--> 381 result = func(self.values, **kwargs)
382
383 return self._split_op_result(result)
~/.local/lib/python3.9/site-packages/pandas/core/groupby/generic.py in array_func(values) 1074 # try to python agg 1075
# TODO: shouldn't min_count matter?
-> 1076 result = self._agg_py_fallback(values, ndim=data.ndim, alt=alt) 1077 1078 return result
~/.local/lib/python3.9/site-packages/pandas/core/groupby/groupby.py in
_agg_py_fallback(self, values, ndim, alt) 1396 # should always be preserved by the implemented aggregations 1397 # TODO: Is this exactly right; see WrappedCythonOp get_result_dtype?
-> 1398 res_values = self.grouper.agg_series(ser, alt, preserve_dtype=True) 1399 1400 if isinstance(values, Categorical):
~/.local/lib/python3.9/site-packages/pandas/core/groupby/ops.py in agg_series(self, obj, func, preserve_dtype) 1047 1048 else:
-> 1049 result = self._aggregate_series_fast(obj, func) 1050 1051 npvalues = lib.maybe_convert_objects(result, try_float=False)
~/.local/lib/python3.9/site-packages/pandas/core/groupby/ops.py in
_aggregate_series_fast(self, obj, func) 1072 ids = ids.take(indexer) 1073 sgrouper = libreduction.SeriesGrouper(obj, func, ids, ngroups)
-> 1074 result, _ = sgrouper.get_result() 1075 return result 1076
~/.local/lib/python3.9/site-packages/pandas/_libs/reduction.pyx in pandas._libs.reduction.SeriesGrouper.get_result()
AssertionError: `result` has not been initialized.
IIUC, you can use groupby_agg:
out = df.groupby('date', as_index=False).agg(max)
Output:
date
avg
count
val
prop
unit
distance
d-atmp
d-clouds
d-dewpoint
2014-10-03 00:00:00
0.0786107
12
32
dewpoint
(oktas)
24829.6
24829.6
22000.6
21344.1
2014-10-04 00:00:00
0.0684246
6
21.5
atmp
(Deg C)
26345.1
26345.1
nan
nan
You could pivot; then use groupby + max:
cols = ['avg', 'date', 'count', 'd-atmp', 'd-clouds', 'd-dewpoint']
tmp = df.pivot(index=cols, columns=['prop', 'unit'], values='val')
tmp.columns = tmp.columns.map(' '.join)
out = tmp.reset_index().groupby('date', as_index=False).max()\
[['date', 'count', 'avg', 'd-atmp', 'atmp (Deg C)', 'd-clouds',
'clouds (oktas)', 'd-dewpoint', 'dewpoint (Deg C)']]
Output:
date count avg d-atmp atmp (Deg C) d-clouds clouds (oktas) d-dewpoint dewpoint (Deg C)
0 2014-10-03 00:00:00 12 0.078611 24829.6 22.0 22000.6 0.0 21344.1 32.0
1 2014-10-04 00:00:00 6 0.068425 26345.1 21.5 NaN NaN NaN NaN
The hue feature is not working when I am using pairplot.
Here is my data frame:
Here is the code that doesn't work:
sns.pairplot(activities, hue="Day")
If I remove the hue option it works. Also if I change the hue to a numerical column (such as Distance) it works, but it is not working with the Day column for some reason. Here's the error:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
/tmp/ipykernel_121/1783531066.py in <module>
----> 1 sns.pairplot(activities, hue="Day")
/opt/conda/lib/python3.7/site-packages/seaborn/_decorators.py in inner_f(*args, **kwargs)
44 )
45 kwargs.update({k: arg for k, arg in zip(sig.parameters, args)})
---> 46 return f(**kwargs)
47 return inner_f
48
/opt/conda/lib/python3.7/site-packages/seaborn/axisgrid.py in pairplot(data, hue, hue_order, palette, vars, x_vars, y_vars, kind, diag_kind, markers, height, aspect, corner, dropna, plot_kws, diag_kws, grid_kws, size)
2020 elif diag_kind == "kde":
2021 diag_kws.setdefault("fill", True)
-> 2022 grid.map_diag(kdeplot, **diag_kws)
2023
2024 # Maybe plot on the off-diagonals
/opt/conda/lib/python3.7/site-packages/seaborn/axisgrid.py in map_diag(self, func, **kwargs)
1400 plot_kwargs.setdefault("hue_order", self._hue_order)
1401 plot_kwargs.setdefault("palette", self._orig_palette)
-> 1402 func(x=vector, **plot_kwargs)
1403 self._clean_axis(ax)
1404
/opt/conda/lib/python3.7/site-packages/seaborn/_decorators.py in inner_f(*args, **kwargs)
44 )
45 kwargs.update({k: arg for k, arg in zip(sig.parameters, args)})
---> 46 return f(**kwargs)
47 return inner_f
48
/opt/conda/lib/python3.7/site-packages/seaborn/distributions.py in kdeplot(x, y, shade, vertical, kernel, bw, gridsize, cut, clip, legend, cumulative, shade_lowest, cbar, cbar_ax, cbar_kws, ax, weights, hue, palette, hue_order, hue_norm, multiple, common_norm, common_grid, levels, thresh, bw_method, bw_adjust, log_scale, color, fill, data, data2, **kwargs)
1733 legend=legend,
1734 estimate_kws=estimate_kws,
-> 1735 **plot_kws,
1736 )
1737
/opt/conda/lib/python3.7/site-packages/seaborn/distributions.py in plot_univariate_density(self, multiple, common_norm, common_grid, fill, legend, estimate_kws, **plot_kws)
914 common_grid,
915 estimate_kws,
--> 916 log_scale,
917 )
918
/opt/conda/lib/python3.7/site-packages/seaborn/distributions.py in _compute_univariate_density(self, data_variable, common_norm, common_grid, estimate_kws, log_scale)
314
315 # Estimate the density of observations at this level
--> 316 density, support = estimator(observations, weights=weights)
317
318 if log_scale:
/opt/conda/lib/python3.7/site-packages/seaborn/_statistics.py in __call__(self, x1, x2, weights)
185 """Fit and evaluate on univariate or bivariate data."""
186 if x2 is None:
--> 187 return self._eval_univariate(x1, weights)
188 else:
189 return self._eval_bivariate(x1, x2, weights)
/opt/conda/lib/python3.7/site-packages/seaborn/_statistics.py in _eval_univariate(self, x, weights)
144 support = self.support
145 if support is None:
--> 146 support = self.define_support(x, cache=False)
147
148 kde = self._fit(x, weights)
/opt/conda/lib/python3.7/site-packages/seaborn/_statistics.py in define_support(self, x1, x2, weights, cache)
117 """Create the evaluation grid for a given data set."""
118 if x2 is None:
--> 119 support = self._define_support_univariate(x1, weights)
120 else:
121 support = self._define_support_bivariate(x1, x2, weights)
/opt/conda/lib/python3.7/site-packages/seaborn/_statistics.py in _define_support_univariate(self, x, weights)
89 def _define_support_univariate(self, x, weights):
90 """Create a 1D grid of evaluation points."""
---> 91 kde = self._fit(x, weights)
92 bw = np.sqrt(kde.covariance.squeeze())
93 grid = self._define_support_grid(
/opt/conda/lib/python3.7/site-packages/seaborn/_statistics.py in _fit(self, fit_data, weights)
135 fit_kws["weights"] = weights
136
--> 137 kde = stats.gaussian_kde(fit_data, **fit_kws)
138 kde.set_bandwidth(kde.factor * self.bw_adjust)
139
/opt/conda/lib/python3.7/site-packages/scipy/stats/kde.py in __init__(self, dataset, bw_method, weights)
204 self._neff = 1/sum(self._weights**2)
205
--> 206 self.set_bandwidth(bw_method=bw_method)
207
208 def evaluate(self, points):
/opt/conda/lib/python3.7/site-packages/scipy/stats/kde.py in set_bandwidth(self, bw_method)
552 raise ValueError(msg)
553
--> 554 self._compute_covariance()
555
556 def _compute_covariance(self):
/opt/conda/lib/python3.7/site-packages/scipy/stats/kde.py in _compute_covariance(self)
564 bias=False,
565 aweights=self.weights))
--> 566 self._data_inv_cov = linalg.inv(self._data_covariance)
567
568 self.covariance = self._data_covariance * self.factor**2
/opt/conda/lib/python3.7/site-packages/scipy/linalg/basic.py in inv(a, overwrite_a, check_finite)
937
938 """
--> 939 a1 = _asarray_validated(a, check_finite=check_finite)
940 if len(a1.shape) != 2 or a1.shape[0] != a1.shape[1]:
941 raise ValueError('expected square matrix')
/opt/conda/lib/python3.7/site-packages/scipy/_lib/_util.py in _asarray_validated(a, check_finite, sparse_ok, objects_ok, mask_ok, as_inexact)
294 if not objects_ok:
295 if a.dtype is np.dtype('O'):
--> 296 raise ValueError('object arrays are not supported')
297 if as_inexact:
298 if not np.issubdtype(a.dtype, np.inexact):
ValueError: object arrays are not supported
Any ideas why hue isn't working?
You can see the error:
ValueError: object arrays are not supported
Means the variable needs to be numerical.
I am getting the following error:
--------------------------------------------------------------------------- TypeError Traceback (most recent call
last)
~/anaconda3/lib/python3.8/site-packages/pandas/core/ops/array_ops.py
in na_arithmetic_op(left, right, op, is_cmp)
142 try:
--> 143 result = expressions.evaluate(op, left, right)
144 except TypeError:
~/anaconda3/lib/python3.8/site-packages/pandas/core/computation/expressions.py
in evaluate(op, a, b, use_numexpr)
232 if use_numexpr:
--> 233 return _evaluate(op, op_str, a, b) # type: ignore
234 return _evaluate_standard(op, op_str, a, b)
~/anaconda3/lib/python3.8/site-packages/pandas/core/computation/expressions.py
in _evaluate_numexpr(op, op_str, a, b)
118 if result is None:
--> 119 result = _evaluate_standard(op, op_str, a, b)
120
~/anaconda3/lib/python3.8/site-packages/pandas/core/computation/expressions.py
in _evaluate_standard(op, op_str, a, b)
67 with np.errstate(all="ignore"):
---> 68 return op(a, b)
69
TypeError: unsupported operand type(s) for -: 'datetime.time' and
'builtin_function_or_method'
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call
last) in
----> 1 NVAX['snap_ts'].dt.time - datetime.strptime('14:30:00', '%H:%M:%S').time
~/anaconda3/lib/python3.8/site-packages/pandas/core/ops/common.py in
new_method(self, other)
63 other = item_from_zerodim(other)
64
---> 65 return method(self, other)
66
67 return new_method
~/anaconda3/lib/python3.8/site-packages/pandas/core/ops/init.py in
wrapper(left, right)
341 lvalues = extract_array(left, extract_numpy=True)
342 rvalues = extract_array(right, extract_numpy=True)
--> 343 result = arithmetic_op(lvalues, rvalues, op)
344
345 return left._construct_result(result, name=res_name)
~/anaconda3/lib/python3.8/site-packages/pandas/core/ops/array_ops.py
in arithmetic_op(left, right, op)
188 else:
189 with np.errstate(all="ignore"):
--> 190 res_values = na_arithmetic_op(lvalues, rvalues, op)
191
192 return res_values
~/anaconda3/lib/python3.8/site-packages/pandas/core/ops/array_ops.py
in na_arithmetic_op(left, right, op, is_cmp)
148 # will handle complex numbers incorrectly, see GH#32047
149 raise
--> 150 result = masked_arith_op(left, right, op)
151
152 if is_cmp and (is_scalar(result) or result is NotImplemented):
~/anaconda3/lib/python3.8/site-packages/pandas/core/ops/array_ops.py
in masked_arith_op(x, y, op)
94 else:
95 if not is_scalar(y):
---> 96 raise TypeError(
97 f"Cannot broadcast np.ndarray with operand of type { type(y) }"
98 )
TypeError: Cannot broadcast np.ndarray with operand of type <class
'builtin_function_or_method'>
when performing this:
df['snap_ts'].dt.time - datetime.strptime('14:30:00', '%H:%M:%S')
df['snap_ts'].dt.time is equivalent to this:
0 14:30:10
1 14:30:20
2 14:30:30
3 14:30:40
4 14:30:50
...
157763 19:59:20
157764 19:59:30
157765 19:59:40
157766 19:59:50
157767 20:00:00
Name: snap_ts, Length: 157768, dtype: object
and it's a pandas.core.series.Series
What am I doing wrong?
Are you looking for that?
df['snap_ts'].sub(pd.Timedelta('14:30:00')).dt.time
I am trying to calculate the log returns on two time series. I have imported numpy as np and wrote the following code:
# Calculate the Log Returns
stocks_data_log = np.log(stocks_data / stocks_data.shift(1))
stocks_data_log.head()
Unfortunately I am getting the following (never ending) error message.
# Calculate the Log Returns
stocks_data_log = np.log(stocks_data / stocks_data.shift(1))
stocks_data_log.head()
1
# Calculate the Log Returns
2
3
stocks_data_log = np.log(stocks_data / stocks_data.shift(1))
4
stocks_data_log.head()
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
~\anaconda3\lib\site-packages\pandas\core\ops\array_ops.py in na_arithmetic_op(left, right, op, str_rep)
148 try:
--> 149 result = expressions.evaluate(op, str_rep, left, right)
150 except TypeError:
~\anaconda3\lib\site-packages\pandas\core\computation\expressions.py in evaluate(op, op_str, a, b, use_numexpr)
207 if use_numexpr:
--> 208 return _evaluate(op, op_str, a, b)
209 return _evaluate_standard(op, op_str, a, b)
~\anaconda3\lib\site-packages\pandas\core\computation\expressions.py in _evaluate_numexpr(op, op_str, a, b)
120 if result is None:
--> 121 result = _evaluate_standard(op, op_str, a, b)
122
~\anaconda3\lib\site-packages\pandas\core\computation\expressions.py in _evaluate_standard(op, op_str, a, b)
69 with np.errstate(all="ignore"):
---> 70 return op(a, b)
71
TypeError: unsupported operand type(s) for /: 'str' and 'float'
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
<ipython-input-62-5c8fcb5cdb27> in <module>
1 # Calculate the Log Returns
2
----> 3 stocks_data_log = np.log(stocks_data / stocks_data.shift(1))
4 stocks_data_log.head()
~\anaconda3\lib\site-packages\pandas\core\ops\__init__.py in f(self, other, axis, level, fill_value)
701
702 left, right = self.align(other, join="outer", level=level, copy=False)
--> 703 new_data = left._combine_frame(right, pass_op, fill_value)
704 return left._construct_result(new_data)
705
~\anaconda3\lib\site-packages\pandas\core\frame.py in _combine_frame(self, other, func, fill_value, level)
5298 if ops.should_series_dispatch(self, other, func):
5299 # iterate over columns
-> 5300 new_data = ops.dispatch_to_series(self, other, _arith_op)
5301 else:
5302 with np.errstate(all="ignore"):
~\anaconda3\lib\site-packages\pandas\core\ops\__init__.py in dispatch_to_series(left, right, func, str_rep, axis)
414 raise NotImplementedError(right)
415
--> 416 new_data = expressions.evaluate(column_op, str_rep, left, right)
417 return new_data
418
~\anaconda3\lib\site-packages\pandas\core\computation\expressions.py in evaluate(op, op_str, a, b, use_numexpr)
206 use_numexpr = use_numexpr and _bool_arith_check(op_str, a, b)
207 if use_numexpr:
--> 208 return _evaluate(op, op_str, a, b)
209 return _evaluate_standard(op, op_str, a, b)
210
~\anaconda3\lib\site-packages\pandas\core\computation\expressions.py in _evaluate_numexpr(op, op_str, a, b)
119
120 if result is None:
--> 121 result = _evaluate_standard(op, op_str, a, b)
122
123 return result
~\anaconda3\lib\site-packages\pandas\core\computation\expressions.py in _evaluate_standard(op, op_str, a, b)
68 _store_test_result(False)
69 with np.errstate(all="ignore"):
---> 70 return op(a, b)
71
72
~\anaconda3\lib\site-packages\pandas\core\ops\__init__.py in column_op(a, b)
383
384 def column_op(a, b):
--> 385 return {i: func(a.iloc[:, i], b.iloc[:, i]) for i in range(len(a.columns))}
386
387 elif isinstance(right, ABCSeries) and axis == "columns":
~\anaconda3\lib\site-packages\pandas\core\ops\__init__.py in <dictcomp>(.0)
383
384 def column_op(a, b):
--> 385 return {i: func(a.iloc[:, i], b.iloc[:, i]) for i in range(len(a.columns))}
386
387 elif isinstance(right, ABCSeries) and axis == "columns":
~\anaconda3\lib\site-packages\pandas\core\ops\common.py in new_method(self, other)
62 other = item_from_zerodim(other)
63
---> 64 return method(self, other)
65
66 return new_method
~\anaconda3\lib\site-packages\pandas\core\ops\__init__.py in wrapper(left, right)
498 lvalues = extract_array(left, extract_numpy=True)
499 rvalues = extract_array(right, extract_numpy=True)
--> 500 result = arithmetic_op(lvalues, rvalues, op, str_rep)
501
502 return _construct_result(left, result, index=left.index, name=res_name)
~\anaconda3\lib\site-packages\pandas\core\ops\array_ops.py in arithmetic_op(left, right, op, str_rep)
195 else:
196 with np.errstate(all="ignore"):
--> 197 res_values = na_arithmetic_op(lvalues, rvalues, op, str_rep)
198
199 return res_values
~\anaconda3\lib\site-packages\pandas\core\ops\array_ops.py in na_arithmetic_op(left, right, op, str_rep)
149 result = expressions.evaluate(op, str_rep, left, right)
150 except TypeError:
--> 151 result = masked_arith_op(left, right, op)
152
153 return missing.dispatch_fill_zeros(op, left, right, result)
~\anaconda3\lib\site-packages\pandas\core\ops\array_ops.py in masked_arith_op(x, y, op)
92 if mask.any():
93 with np.errstate(all="ignore"):
---> 94 result[mask] = op(xrav[mask], yrav[mask])
95
96 else:
TypeError: unsupported operand type(s) for /: 'str' and 'str'
Does anyone know what I am doing wrong?
Here the whole script
import quandl
import pandas as pd
import numpy as np
import statsmodels.formula.api as sm
from scipy import stats
import matplotlib.pyplot as plt
import matplotlib
# Load ABB Data
ABB = pd.read_csv('C:/Users/Startklar/AppData/Roaming/Microsoft/Windows/Start Menu/Programs/Anaconda3 (64-bit)/ABB.csv',sep=";",names=['Datum','ABBN_Preis'])
Roche = pd.read_csv('C:/Users/Startklar/AppData/Roaming/Microsoft/Windows/Start Menu/Programs/Anaconda3 (64-bit)/Roche.csv',sep=";", names=['ROG_Datum','ROG_Preis'])
Roche.head(10)
# Concatenate the different dataframes
stocks_data = pd.concat([ABB,Roche], axis=1)
stocks_data.head()
stocks_data['Datum'] = pd.to_datetime(stocks_data['Datum'])
stocks_data.head()
# Kick out the Dates
stocks_data = stocks_data.loc[:,['Datum','ABBN_Preis','ROG_Preis']]
stocks_data.head()
# Compute Log Returns
stocks_data_log = np.log(stocks_data / stocks_data.shift(1))
stocks_data_log.head()
According to the logs, it seems you are trying to perform / operation on a string or float column. It is not clear what is stocks_data from the post.
I'm getting a floating point error on a simple time series in pandas. I'm trying to do shift operations... but this also happens with the window functions like rolling_mean.
EDIT: For some more info... I tried to actually build this from source yesterday prior to the error. I'm not sure if the error would've occurred prior the build attempt, as I'd never messed around w/ these functions.
EDIT2: I thought I'd fixed this, but when I run this inside python it works, but when it's in ipython I get the error.
EDIT3: Numpy 1.7.0, iPython 0.13, pandas 0.7.3
In [35]: ts = Series(np.arange(12), index=DateRange('1/1/2000', periods=12, freq='T'))
In [36]: ts.shift(0)
Out[36]:
2000-01-03 0
2000-01-04 1
2000-01-05 2
2000-01-06 3
2000-01-07 4
2000-01-10 5
2000-01-11 6
2000-01-12 7
2000-01-13 8
2000-01-14 9
2000-01-17 10
2000-01-18 11
In [37]: ts.shift(1)
Out[37]: ---------------------------------------------------------------------------
FloatingPointError Traceback (most recent call last)
/Users/trenthauck/Repository/work/SQS/analysis/campaign/tv2/data/<ipython-input-37-2b7cec97d440> in <module>()
----> 1 ts.shift(1)
/Library/Python/2.7/site-packages/ipython-0.13.dev-py2.7.egg/IPython/core/displayhook.pyc in __call__(self, result)
236 self.start_displayhook()
237 self.write_output_prompt()
--> 238 format_dict = self.compute_format_data(result)
239 self.write_format_data(format_dict)
240 self.update_user_ns(result)
/Library/Python/2.7/site-packages/ipython-0.13.dev-py2.7.egg/IPython/core/displayhook.pyc in compute_format_data(self, result)
148 MIME type representation of the object.
149 """
--> 150 return self.shell.display_formatter.format(result)
151
152 def write_format_data(self, format_dict):
/Library/Python/2.7/site-packages/ipython-0.13.dev-py2.7.egg/IPython/core/formatters.pyc in format(self, obj, include, exclude)
124 continue
125 try:
--> 126 data = formatter(obj)
127 except:
128 # FIXME: log the exception
/Library/Python/2.7/site-packages/ipython-0.13.dev-py2.7.egg/IPython/core/formatters.pyc in __call__(self, obj)
445 type_pprinters=self.type_printers,
446 deferred_pprinters=self.deferred_printers)
--> 447 printer.pretty(obj)
448 printer.flush()
449 return stream.getvalue()
/Library/Python/2.7/site-packages/ipython-0.13.dev-py2.7.egg/IPython/lib/pretty.pyc in pretty(self, obj)
353 if callable(obj_class._repr_pretty_):
354 return obj_class._repr_pretty_(obj, self, cycle)
--> 355 return _default_pprint(obj, self, cycle)
356 finally:
357 self.end_group()
/Library/Python/2.7/site-packages/ipython-0.13.dev-py2.7.egg/IPython/lib/pretty.pyc in _default_pprint(obj, p, cycle)
473 if getattr(klass, '__repr__', None) not in _baseclass_reprs:
474 # A user-provided repr.
--> 475 p.text(repr(obj))
476 return
477 p.begin_group(1, '<')
/Library/Python/2.7/site-packages/pandas/core/series.pyc in __repr__(self)
696 result = self._get_repr(print_header=True,
697 length=len(self) > 50,
--> 698 name=True)
699 else:
700 result = '%s' % ndarray.__repr__(self)
/Library/Python/2.7/site-packages/pandas/core/series.pyc in _get_repr(self, name, print_header, length, na_rep, float_format)
756 length=length, na_rep=na_rep,
757 float_format=float_format)
--> 758 return formatter.to_string()
759
760 def __str__(self):
/Library/Python/2.7/site-packages/pandas/core/format.pyc in to_string(self)
99
100 fmt_index, have_header = self._get_formatted_index()
--> 101 fmt_values = self._get_formatted_values()
102
103 maxlen = max(len(x) for x in fmt_index)
/Library/Python/2.7/site-packages/pandas/core/format.pyc in _get_formatted_values(self)
90 return format_array(self.series.values, None,
91 float_format=self.float_format,
---> 92 na_rep=self.na_rep)
93
94 def to_string(self):
/Library/Python/2.7/site-packages/pandas/core/format.pyc in format_array(values, formatter, float_format, na_rep, digits, space, justify)
431 justify=justify)
432
--> 433 return fmt_obj.get_result()
434
435
/Library/Python/2.7/site-packages/pandas/core/format.pyc in get_result(self)
528
529 # this is pretty arbitrary for now
--> 530 has_large_values = (np.abs(self.values) > 1e8).any()
531
532 if too_long and has_large_values:
FloatingPointError: invalid value encountered in absolute
In [38]: ts.shift(-1)
Out[38]: ---------------------------------------------------------------------------
FloatingPointError Traceback (most recent call last)
/Users/myusername/Repository/work/SQS/analysis/campaign/tv2/data/<ipython-input-38-314ec815a7c5> in <module>()
----> 1 ts.shift(-1)
/Library/Python/2.7/site-packages/ipython-0.13.dev-py2.7.egg/IPython/core/displayhook.pyc in __call__(self, result)
236 self.start_displayhook()
237 self.write_output_prompt()
--> 238 format_dict = self.compute_format_data(result)
239 self.write_format_data(format_dict)
240 self.update_user_ns(result)
/Library/Python/2.7/site-packages/ipython-0.13.dev-py2.7.egg/IPython/core/displayhook.pyc in compute_format_data(self, result)
148 MIME type representation of the object.
149 """
--> 150 return self.shell.display_formatter.format(result)
151
152 def write_format_data(self, format_dict):
/Library/Python/2.7/site-packages/ipython-0.13.dev-py2.7.egg/IPython/core/formatters.pyc in format(self, obj, include, exclude)
124 continue
125 try:
--> 126 data = formatter(obj)
127 except:
128 # FIXME: log the exception
/Library/Python/2.7/site-packages/ipython-0.13.dev-py2.7.egg/IPython/core/formatters.pyc in __call__(self, obj)
445 type_pprinters=self.type_printers,
446 deferred_pprinters=self.deferred_printers)
--> 447 printer.pretty(obj)
448 printer.flush()
449 return stream.getvalue()
/Library/Python/2.7/site-packages/ipython-0.13.dev-py2.7.egg/IPython/lib/pretty.pyc in pretty(self, obj)
353 if callable(obj_class._repr_pretty_):
354 return obj_class._repr_pretty_(obj, self, cycle)
--> 355 return _default_pprint(obj, self, cycle)
356 finally:
357 self.end_group()
/Library/Python/2.7/site-packages/ipython-0.13.dev-py2.7.egg/IPython/lib/pretty.pyc in _default_pprint(obj, p, cycle)
473 if getattr(klass, '__repr__', None) not in _baseclass_reprs:
474 # A user-provided repr.
--> 475 p.text(repr(obj))
476 return
477 p.begin_group(1, '<')
/Library/Python/2.7/site-packages/pandas/core/series.pyc in __repr__(self)
696 result = self._get_repr(print_header=True,
697 length=len(self) > 50,
--> 698 name=True)
699 else:
700 result = '%s' % ndarray.__repr__(self)
/Library/Python/2.7/site-packages/pandas/core/series.pyc in _get_repr(self, name, print_header, length, na_rep, float_format)
756 length=length, na_rep=na_rep,
757 float_format=float_format)
--> 758 return formatter.to_string()
759
760 def __str__(self):
/Library/Python/2.7/site-packages/pandas/core/format.pyc in to_string(self)
99
100 fmt_index, have_header = self._get_formatted_index()
--> 101 fmt_values = self._get_formatted_values()
102
103 maxlen = max(len(x) for x in fmt_index)
/Library/Python/2.7/site-packages/pandas/core/format.pyc in _get_formatted_values(self)
90 return format_array(self.series.values, None,
91 float_format=self.float_format,
---> 92 na_rep=self.na_rep)
93
94 def to_string(self):
/Library/Python/2.7/site-packages/pandas/core/format.pyc in format_array(values, formatter, float_format, na_rep, digits, space, justify)
431 justify=justify)
432
--> 433 return fmt_obj.get_result()
434
435
/Library/Python/2.7/site-packages/pandas/core/format.pyc in get_result(self)
528
529 # this is pretty arbitrary for now
--> 530 has_large_values = (np.abs(self.values) > 1e8).any()
531
532 if too_long and has_large_values:
FloatingPointError: invalid value encountered in absolute
I would add this as a comment, but I don't have the privilege to do that yet :)
It works for me in python and iPython 0.12; iPython 0.13 is still in development (see http://ipython.org/ ), and, since the errors you're getting seem to involve formatting in the iPython 0.13 egg, I suspect that might be the cause. Try with iPython 0.12 instead-- if it works, file a bug report with iPython and then probably stick with 0.12 until 0.13 is (more) stable.