Pandas ticker to ohlc - python
rows is a list of dict from mysql.
rows example
[{'date': datetime.datetime(2017, 3, 21, 13, 27, 20), 'tid': 648605515L, 'price': Decimal('1080.04000000'), 'type': 1, 'amount': Decimal('10.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 20), 'tid': 648605549L, 'price': Decimal('1081.55000000'), 'type': 1, 'amount': Decimal('16.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 20), 'tid': 648605547L, 'price': Decimal('1081.33000000'), 'type': 1, 'amount': Decimal('20.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 20), 'tid': 648605545L, 'price': Decimal('1081.30000000'), 'type': 1, 'amount': Decimal('16.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 20), 'tid': 648605543L, 'price': Decimal('1081.29000000'), 'type': 1, 'amount': Decimal('20.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 20), 'tid': 648605541L, 'price': Decimal('1080.46000000'), 'type': 1, 'amount': Decimal('26.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 20), 'tid': 648605517L, 'price': Decimal('1080.04000000'), 'type': 1, 'amount': Decimal('8.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 22), 'tid': 648605601L, 'price': Decimal('1079.69000000'), 'type': -1, 'amount': Decimal('70.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 25), 'tid': 648605686L, 'price': Decimal('1079.72000000'), 'type': -1, 'amount': Decimal('4.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 26), 'tid': 648605765L, 'price': Decimal('1079.45000000'), 'type': 1, 'amount': Decimal('6.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 26), 'tid': 648605753L, 'price': Decimal('1079.60000000'), 'type': -1, 'amount': Decimal('106.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 26), 'tid': 648605751L, 'price': Decimal('1079.60000000'), 'type': -1, 'amount': Decimal('80.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 26), 'tid': 648605749L, 'price': Decimal('1079.67000000'), 'type': -1, 'amount': Decimal('430.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 26), 'tid': 648605747L, 'price': Decimal('1079.70000000'), 'type': -1, 'amount': Decimal('66.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 26), 'tid': 648605745L, 'price': Decimal('1079.74000000'), 'type': -1, 'amount': Decimal('12.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 27), 'tid': 648605785L, 'price': Decimal('1079.45000000'), 'type': 1, 'amount': Decimal('2.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 27), 'tid': 648605774L, 'price': Decimal('1079.45000000'), 'type': 1, 'amount': Decimal('6.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 27), 'tid': 648605771L, 'price': Decimal('1079.45000000'), 'type': 1, 'amount': Decimal('14.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 28), 'tid': 648605827L, 'price': Decimal('1079.45000000'), 'type': 1, 'amount': Decimal('42.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 28), 'tid': 648605842L, 'price': Decimal('1079.45000000'), 'type': 1, 'amount': Decimal('10.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 32), 'tid': 648605973L, 'price': Decimal('1079.45000000'), 'type': 1, 'amount': Decimal('2.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 37), 'tid': 648606114L, 'price': Decimal('1079.44000000'), 'type': 1, 'amount': Decimal('24.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 37), 'tid': 648606116L, 'price': Decimal('1079.45000000'), 'type': 1, 'amount': Decimal('40.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 42), 'tid': 648606258L, 'price': Decimal('1079.45000000'), 'type': 1, 'amount': Decimal('56.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 45), 'tid': 648606345L, 'price': Decimal('1079.46000000'), 'type': -1, 'amount': Decimal('10.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 46), 'tid': 648606392L, 'price': Decimal('1079.69000000'), 'type': 1, 'amount': Decimal('44.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 48), 'tid': 648606418L, 'price': Decimal('1079.60000000'), 'type': -1, 'amount': Decimal('40.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 48), 'tid': 648606420L, 'price': Decimal('1079.46000000'), 'type': -1, 'amount': Decimal('36.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 48), 'tid': 648606422L, 'price': Decimal('1079.46000000'), 'type': -1, 'amount': Decimal('94.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 50), 'tid': 648606499L, 'price': Decimal('1079.31000000'), 'type': 1, 'amount': Decimal('80.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 50), 'tid': 648606478L, 'price': Decimal('1079.31000000'), 'type': -1, 'amount': Decimal('6.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 50), 'tid': 648606476L, 'price': Decimal('1079.31000000'), 'type': -1, 'amount': Decimal('34.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 50), 'tid': 648606474L, 'price': Decimal('1079.55000000'), 'type': -1, 'amount': Decimal('8.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 55), 'tid': 648606666L, 'price': Decimal('1079.31000000'), 'type': 1, 'amount': Decimal('44.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 55), 'tid': 648606650L, 'price': Decimal('1079.17000000'), 'type': 1, 'amount': Decimal('8.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 55), 'tid': 648606648L, 'price': Decimal('1079.17000000'), 'type': 1, 'amount': Decimal('8.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 1), 'tid': 648606820L, 'price': Decimal('1079.03000000'), 'type': -1, 'amount': Decimal('28.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 2), 'tid': 648606825L, 'price': Decimal('1079.03000000'), 'type': 1, 'amount': Decimal('30.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 2), 'tid': 648606836L, 'price': Decimal('1079.02000000'), 'type': -1, 'amount': Decimal('22.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 5), 'tid': 648606945L, 'price': Decimal('1078.58000000'), 'type': -1, 'amount': Decimal('2.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 5), 'tid': 648606943L, 'price': Decimal('1078.61000000'), 'type': -1, 'amount': Decimal('2.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 5), 'tid': 648606941L, 'price': Decimal('1078.63000000'), 'type': -1, 'amount': Decimal('2.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 5), 'tid': 648606939L, 'price': Decimal('1078.88000000'), 'type': -1, 'amount': Decimal('2.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 5), 'tid': 648606926L, 'price': Decimal('1078.88000000'), 'type': -1, 'amount': Decimal('428.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 6), 'tid': 648606984L, 'price': Decimal('1078.58000000'), 'type': 1, 'amount': Decimal('2.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 6), 'tid': 648606982L, 'price': Decimal('1078.05000000'), 'type': -1, 'amount': Decimal('10.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 6), 'tid': 648606971L, 'price': Decimal('1078.58000000'), 'type': 1, 'amount': Decimal('2.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 6), 'tid': 648606957L, 'price': Decimal('1078.05000000'), 'type': -1, 'amount': Decimal('74.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 6), 'tid': 648606955L, 'price': Decimal('1078.15000000'), 'type': -1, 'amount': Decimal('6.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 6), 'tid': 648606953L, 'price': Decimal('1078.15000000'), 'type': -1, 'amount': Decimal('14.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 6), 'tid': 648606951L, 'price': Decimal('1078.42000000'), 'type': -1, 'amount': Decimal('16.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 7), 'tid': 648606992L, 'price': Decimal('1078.05000000'), 'type': -1, 'amount': Decimal('2.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 7), 'tid': 648606995L, 'price': Decimal('1078.58000000'), 'type': 1, 'amount': Decimal('2.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 7), 'tid': 648607023L, 'price': Decimal('1078.06000000'), 'type': -1, 'amount': Decimal('4.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 8), 'tid': 648607047L, 'price': Decimal('1078.86000000'), 'type': 1, 'amount': Decimal('2.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 10), 'tid': 648607113L, 'price': Decimal('1078.06000000'), 'type': -1, 'amount': Decimal('2.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 10), 'tid': 648607115L, 'price': Decimal('1078.03000000'), 'type': -1, 'amount': Decimal('148.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 12), 'tid': 648607192L, 'price': Decimal('1079.00000000'), 'type': -1, 'amount': Decimal('10.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 13), 'tid': 648607218L, 'price': Decimal('1078.99000000'), 'type': 1, 'amount': Decimal('98.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 13), 'tid': 648607220L, 'price': Decimal('1079.00000000'), 'type': 1, 'amount': Decimal('42.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 13), 'tid': 648607222L, 'price': Decimal('1079.03000000'), 'type': 1, 'amount': Decimal('342.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 13), 'tid': 648607224L, 'price': Decimal('1079.16000000'), 'type': 1, 'amount': Decimal('512.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 14), 'tid': 648607250L, 'price': Decimal('1078.98000000'), 'type': 1, 'amount': Decimal('44.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 14), 'tid': 648607252L, 'price': Decimal('1078.98000000'), 'type': 1, 'amount': Decimal('12.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 14), 'tid': 648607254L, 'price': Decimal('1079.00000000'), 'type': 1, 'amount': Decimal('106.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 14), 'tid': 648607256L, 'price': Decimal('1079.16000000'), 'type': 1, 'amount': Decimal('40.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 20), 'tid': 648607431L, 'price': Decimal('1079.16000000'), 'type': 1, 'amount': Decimal('28.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 20), 'tid': 648607429L, 'price': Decimal('1079.01000000'), 'type': 1, 'amount': Decimal('2.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 20), 'tid': 648607427L, 'price': Decimal('1079.01000000'), 'type': 1, 'amount': Decimal('2.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 23), 'tid': 648607518L, 'price': Decimal('1079.16000000'), 'type': 1, 'amount': Decimal('8.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 24), 'tid': 648607544L, 'price': Decimal('1079.16000000'), 'type': 1, 'amount': Decimal('344.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 25), 'tid': 648607593L, 'price': Decimal('1078.79000000'), 'type': -1, 'amount': Decimal('6.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 26), 'tid': 648607631L, 'price': Decimal('1079.16000000'), 'type': 1, 'amount': Decimal('430.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 26), 'tid': 648607623L, 'price': Decimal('1079.16000000'), 'type': 1, 'amount': Decimal('18.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 26), 'tid': 648607621L, 'price': Decimal('1078.79000000'), 'type': 1, 'amount': Decimal('14.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 29), 'tid': 648607695L, 'price': Decimal('1079.16000000'), 'type': 1, 'amount': Decimal('776.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 32), 'tid': 648607803L, 'price': Decimal('1079.16000000'), 'type': 1, 'amount': Decimal('2.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 32), 'tid': 648607805L, 'price': Decimal('1079.16000000'), 'type': 1, 'amount': Decimal('10.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 36), 'tid': 648607905L, 'price': Decimal('1079.16000000'), 'type': 1, 'amount': Decimal('2.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 37), 'tid': 648607940L, 'price': Decimal('1079.31000000'), 'type': 1, 'amount': Decimal('2.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 42), 'tid': 648608110L, 'price': Decimal('1079.46000000'), 'type': -1, 'amount': Decimal('12.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 46), 'tid': 648608211L, 'price': Decimal('1079.88000000'), 'type': -1, 'amount': Decimal('12.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 46), 'tid': 648608213L, 'price': Decimal('1079.88000000'), 'type': -1, 'amount': Decimal('6.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 57), 'tid': 648608534L, 'price': Decimal('1080.29000000'), 'type': 1, 'amount': Decimal('14.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 57), 'tid': 648608536L, 'price': Decimal('1080.30000000'), 'type': 1, 'amount': Decimal('2.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 29, 2), 'tid': 648608683L, 'price': Decimal('1080.59000000'), 'type': 1, 'amount': Decimal('40.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 29, 3), 'tid': 648608733L, 'price': Decimal('1080.59000000'), 'type': 1, 'amount': Decimal('360.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 29, 7), 'tid': 648608838L, 'price': Decimal('1080.90000000'), 'type': 1, 'amount': Decimal('82.00000000')}]
if I didn't use set_index ,it will have an TypeError: Only valid with DatetimeIndex, TimedeltaIndex or PeriodIndex, but got an instance of 'RangeIndex'
if rows:
df = pd.DataFrame(rows)
print df.head()
# TypeError: Only valid with DatetimeIndex, TimedeltaIndex or PeriodIndex, but got an instance of 'RangeIndex'
df = df.set_index("date")
print df.head()
resample_data = df.resample("1min", how={"price": "ohlc", "amount": "sum"})
print resample_data
Result :
Connected to pydev debugger (build 162.1967.10)
amount date price tid type
0 2.00000000 2017-03-21 11:15:12 1075.83000000 648370156 -1
1 10.00000000 2017-03-21 11:15:15 1076.00000000 648370241 -1
2 10.00000000 2017-03-21 11:15:17 1075.83000000 648370297 -1
3 10.00000000 2017-03-21 11:15:17 1075.83000000 648370311 1
4 8.00000000 2017-03-21 11:15:19 1076.13000000 648370370 1
amount price tid type
date
2017-03-21 11:15:12 2.00000000 1075.83000000 648370156 -1
2017-03-21 11:15:15 10.00000000 1076.00000000 648370241 -1
2017-03-21 11:15:17 10.00000000 1075.83000000 648370297 -1
2017-03-21 11:15:17 10.00000000 1075.83000000 648370311 1
2017-03-21 11:15:19 8.00000000 1076.13000000 648370370 1
/Users/wyx/bitcoin_workspace/fibo-strategy/ticker.py:45: FutureWarning: how in .resample() is deprecated
the new syntax is .resample(...)..apply(<func>)
resample_data = df.resample("1min", how={"price": "ohlc", "amount": "sum"})
Traceback (most recent call last):
File "/Applications/PyCharm.app/Contents/helpers/pydev/pydevd.py", line 1580, in <module>
globals = debugger.run(setup['file'], None, None, is_module)
File "/Applications/PyCharm.app/Contents/helpers/pydev/pydevd.py", line 964, in run
pydev_imports.execfile(file, globals, locals) # execute the script
File "/Users/wyx/bitcoin_workspace/fibo-strategy/ticker.py", line 45, in <module>
resample_data = df.resample("1min", how={"price": "ohlc", "amount": "sum"})
File "/Users/wyx/bitcoin_workspace/fibo-strategy/.env/lib/python2.7/site-packages/pandas/core/generic.py", line 4216, in resample
limit=limit)
File "/Users/wyx/bitcoin_workspace/fibo-strategy/.env/lib/python2.7/site-packages/pandas/tseries/resample.py", line 582, in _maybe_process_deprecations
r = r.aggregate(how)
File "/Users/wyx/bitcoin_workspace/fibo-strategy/.env/lib/python2.7/site-packages/pandas/tseries/resample.py", line 320, in aggregate
result, how = self._aggregate(arg, *args, **kwargs)
File "/Users/wyx/bitcoin_workspace/fibo-strategy/.env/lib/python2.7/site-packages/pandas/core/base.py", line 549, in _aggregate
result = _agg(arg, _agg_1dim)
File "/Users/wyx/bitcoin_workspace/fibo-strategy/.env/lib/python2.7/site-packages/pandas/core/base.py", line 500, in _agg
result[fname] = func(fname, agg_how)
File "/Users/wyx/bitcoin_workspace/fibo-strategy/.env/lib/python2.7/site-packages/pandas/core/base.py", line 483, in _agg_1dim
return colg.aggregate(how, _level=(_level or 0) + 1)
File "/Users/wyx/bitcoin_workspace/fibo-strategy/.env/lib/python2.7/site-packages/pandas/core/groupby.py", line 2652, in aggregate
return getattr(self, func_or_funcs)(*args, **kwargs)
File "/Users/wyx/bitcoin_workspace/fibo-strategy/.env/lib/python2.7/site-packages/pandas/core/groupby.py", line 1128, in ohlc
lambda x: x._cython_agg_general('ohlc'))
File "/Users/wyx/bitcoin_workspace/fibo-strategy/.env/lib/python2.7/site-packages/pandas/core/groupby.py", line 3103, in _apply_to_column_groupbys
return func(self)
File "/Users/wyx/bitcoin_workspace/fibo-strategy/.env/lib/python2.7/site-packages/pandas/core/groupby.py", line 1128, in <lambda>
lambda x: x._cython_agg_general('ohlc'))
File "/Users/wyx/bitcoin_workspace/fibo-strategy/.env/lib/python2.7/site-packages/pandas/core/groupby.py", line 808, in _cython_agg_general
raise DataError('No numeric types to aggregate')
pandas.core.base.DataError: No numeric types to aggregate
Process finished with exit code 1
I am a rookie for pandas.
How solve the error?
And if I want to use the last close price to fill the NaN of next
min ohlc. How to do that?
You need to set an index using your dates.
Code:
from io import StringIO
df = pd.read_csv(StringIO(
u"""amount date price tid type
6.00000000 2017-03-21t10:46:32 1059.26000000 648313975 -1
4.00000000 2017-03-21t10:46:37 1059.42000000 648314094 -1
2.00000000 2017-03-21t10:46:37 1059.42000000 648314096 -1
2.00000000 2017-03-21t10:46:41 1059.26000000 648314176 -1
32.00000000 2017-03-21t10:46:41 1059.26000000 648314189 -1
"""), sep='\s+', parse_dates='date'.split())
print(df)
resample_data = df.set_index('date').resample(
"1min", how={"price": "ohlc", "amount": "sum"})
print(resample_data)
Results:
amount date price tid type
0 6.0 2017-03-21 10:46:32 1059.26 648313975 -1
1 4.0 2017-03-21 10:46:37 1059.42 648314094 -1
2 2.0 2017-03-21 10:46:37 1059.42 648314096 -1
3 2.0 2017-03-21 10:46:41 1059.26 648314176 -1
4 32.0 2017-03-21 10:46:41 1059.26 648314189 -1
price amount
open high low close amount
date
2017-03-21 10:46:00 1059.26 1059.42 1059.26 1059.26 46.0
Related
How to change all datetime objects in a list to standard YYYY-MM-DD HH:MM:SS
When I query MySQL with Python and the query has datetime fields then I get this list as a result. [{'_id': 1, 'name': 'index', '_cdate': datetime.datetime(2020, 10, 27, 9, 4, 34), 'title': 'DataExtract'}, {'_id': 2, 'name': 'topmenu', '_cdate': datetime.datetime(2020, 11, 4, 19, 52, 17), 'title': 'topmenu'}, {'_id': 3, 'name': 'functions_common', '_cdate': datetime.datetime(2020, 11, 4, 19, 52, 50), 'title': 'common functions'}, {'_id': 4, 'name': 'leftmenu', '_cdate': datetime.datetime(2020, 11, 4, 19, 53, 56), 'title': 'Left Menu'}, {'_id': 5, 'name': 'todo', '_cdate': datetime.datetime(2020, 11, 7, 8, 49, 38), 'title': 'Todo'}, {'_id': 6, 'name': 'cron_publish', '_cdate': datetime.datetime(2020, 12, 2, 19, 30, 11), 'title': 'Run Publish reports'}, {'_id': 7, 'name': 'test', '_cdate': datetime.datetime(2020, 12, 2, 22, 32, 54), 'title': 'test'}, {'_id': 8, 'name': 'help', '_cdate': datetime.datetime(2020, 12, 5, 7, 12, 44), 'title': 'Help'}, {'_id': 9, 'name': 'api', '_cdate': datetime.datetime(2020, 12, 5, 21, 22, 13), 'title': 'API'}, {'_id': 10, 'name': 'ben', '_cdate': datetime.datetime(2021, 10, 4, 11, 37, 3), 'title': 'List of Reports'}] How do I either get the query to return the date fields in YYYY-MM-DD HH:MM:SS format? Or how do I convert them in the returned list. When I try to change them by enumerating over the results python throw as error that the dictionary has changed.
The datetime.datetime() objects you're getting are the standard representation of these objects - if you were expecting strings instead, you could simple convert them with datetime.strftime('%Y-%m-%d %H:%M:%S', value) but keep in mind that the datetime object is a more flexible way of keeping the data around. I'd recommend only formatting the date in a specific way if you're writing it to the screen or a file format that expects a string. Example: data = [{'_id': 1, 'name': 'index', '_cdate': datetime.datetime(2020, 10, 27, 9, 4, 34), 'title': 'DataExtract'}, {'_id': 2, 'name': 'topmenu', '_cdate': datetime.datetime(2020, 11, 4, 19, 52, 17), 'title': 'topmenu'}, {'_id': 3, 'name': 'functions_common', '_cdate': datetime.datetime(2020, 11, 4, 19, 52, 50), 'title': 'common functions'}, {'_id': 4, 'name': 'leftmenu', '_cdate': datetime.datetime(2020, 11, 4, 19, 53, 56), 'title': 'Left Menu'}, {'_id': 5, 'name': 'todo', '_cdate': datetime.datetime(2020, 11, 7, 8, 49, 38), 'title': 'Todo'}, {'_id': 6, 'name': 'cron_publish', '_cdate': datetime.datetime(2020, 12, 2, 19, 30, 11), 'title': 'Run Publish reports'}, {'_id': 7, 'name': 'test', '_cdate': datetime.datetime(2020, 12, 2, 22, 32, 54), 'title': 'test'}, {'_id': 8, 'name': 'help', '_cdate': datetime.datetime(2020, 12, 5, 7, 12, 44), 'title': 'Help'}, {'_id': 9, 'name': 'api', '_cdate': datetime.datetime(2020, 12, 5, 21, 22, 13), 'title': 'API'}, {'_id': 10, 'name': 'ben', '_cdate': datetime.datetime(2021, 10, 4, 11, 37, 3), 'title': 'List of Reports'}] for rec in data: rec['date_str'] = datetime.datetime.strftime('%Y-%m-%d %H:%M:%S', rec['_cdate']) That would add 'date_str' field to every record with the format you require. Of course, you could also modify it to overwrite the original value.
How to group items by month and year using itertools.groupby()
Problem: I am trying to take a sorted list and group it based on the month and year but having trouble returning the grouped value correctly... Assuming this data, we have a title and date/time list that has been ordered by datetime lst = [ {'title': 'in the past','date_time': datetime.datetime(2020, 3, 18, 0, 0)}, {'title': 'Just another event','date_time': datetime.datetime(2020, 10, 1, 19, 7)}, {'title': 'earlier today 9am','date_time': datetime.datetime(2020, 10, 21, 9, 0)}, {'title': 'greater than .now()','date_time': datetime.datetime(2020, 10, 21, 23, 0)}, {'title': 'another one','date_time': datetime.datetime(2020, 10, 30, 10, 0)}, {'title': 'Me testing the latest event','date_time': datetime.datetime(2020, 10, 30, 12, 0)}, {'title': '18 Nov 20','date_time': datetime.datetime(2020, 11, 18, 20, 27)}, {'title': '18 January 2021','date_time': datetime.datetime(2021, 1, 18, 20, 0)}, {'title': '18 March 21','date_time': datetime.datetime(2021, 3, 18, 20, 0)} ] Then to group it, run it through itertools.groupby() from itertools import groupby def loop_tupe(): diction = {} for key,group in groupby(lst, key=lambda x: (x['date_time'].month, x['date_time'].year)): for element in group: append_value(diction, key, element) return diction After grouping it by the month and year the returned result looks like { (3, 2020): {'title': 'in the past', 'date_time': datetime.datetime(2020, 3, 18, 0, 0)}, (10, 2020): [ {'title': 'Just another event', 'date_time': datetime.datetime(2020, 10, 1, 19, 7)}, {'title': 'earlier today 9am', 'date_time': datetime.datetime(2020, 10, 21, 9, 0)}, {'title': 'greater than .now()', 'date_time': datetime.datetime(2020, 10, 21, 23, 0)}, {'title': 'another one', 'date_time': datetime.datetime(2020, 10, 30, 10, 0)}, {'title': 'Me testing the latest event', 'date_time': datetime.datetime(2020, 10, 30, 12, 0)} ], (11, 2020): {'title': '18 Nov 20', 'date_time': datetime.datetime(2020, 11, 18, 20, 27)}, (1, 2021): {'title': '18 January 2021', 'date_time': datetime.datetime(2021, 1, 18, 20, 0)}, (3, 2021): {'title': '18 March 21', 'date_time': datetime.datetime(2021, 3, 18, 20, 0)} } It has been grouped correctly, however the dates are within a tuple whereas I would need them as one "value", and while it's in this format I am unable to loop over it in the way I would with the original list. I realise it has something to do with the way I'm using the anonymous function within the groupby() (and maybe how the return result is created perhaps?) but I'm unsure how else to apply a month and year grouping within it. Question: What can I do to group my original data by month & year while also keeping its format relatively similar to the list going in? Edit The append_value function that I'm using def append_value(dict_obj, key, value): if key in dict_obj: if not isinstance(dict_obj[key], list): dict_obj[key] = [dict_obj[key]] dict_obj[key].append(value) else: dict_obj[key] = value Edit 2 So far this is the closest I'm getting to a solution. I have changed the function used in groupby to take the datetime and change it into a string to be compared. (I've left the print in there to visualise) def loop_str(to_sort): output={} for key,group in groupby(to_sort, key=lambda item: item['date_time'].strftime('%B %Y')): for element in group: append_value(output,key,element) return output Doing so gives me this output { 'March 2020': { 'title': 'in the past', 'date_time': datetime.datetime(2020, 3, 18, 0, 0) }, 'October 2020': [ {'title': 'Just another event', 'date_time': datetime.datetime(2020, 10, 1, 19, 7)}, {'title': 'earlier today 9am', 'date_time': datetime.datetime(2020, 10, 21, 9, 0)}, {'title': 'greater than .now()', 'date_time': datetime.datetime(2020, 10, 21, 23, 0)}, {'title': 'another one', 'date_time': datetime.datetime(2020, 10, 30, 10, 0)}, {'title': 'Me testing the latest event', 'date_time': datetime.datetime(2020, 10, 30, 12, 0)} ], 'November 2020': { 'title': '18 Nov 20', 'date_time': datetime.datetime(2020, 11, 18, 20, 27) }, 'January 2021': { 'title': '18 January 2021', 'date_time': datetime.datetime(2021, 1, 18, 20, 0) }, 'March 2021': { 'title': '18 March 21', 'date_time': datetime.datetime(2021, 3, 18, 20, 0) } } This is closer to what I need however unless I'm not seeing something it seems that this output could be a mix of dicts and lists which will be more difficult to loop over within a django template?
You can make the groupby key the string you want my formatting the date. Then you can just use it in a dict comprehension. It is easier to create the data structure if the values are constantly lists. It will probably also be easier to use it. from itertools import groupby import datetime lst = [ {'title': 'in the past','date_time': datetime.datetime(2020, 3, 18, 0, 0)}, {'title': 'Just another event','date_time': datetime.datetime(2020, 10, 1, 19, 7)}, {'title': 'earlier today 9am','date_time': datetime.datetime(2020, 10, 21, 9, 0)}, {'title': 'greater than .now()','date_time': datetime.datetime(2020, 10, 21, 23, 0)}, {'title': 'another one','date_time': datetime.datetime(2020, 10, 30, 10, 0)}, {'title': 'Me testing the latest event','date_time': datetime.datetime(2020, 10, 30, 12, 0)}, {'title': '18 Nov 20','date_time': datetime.datetime(2020, 11, 18, 20, 27)}, {'title': '18 January 2021','date_time': datetime.datetime(2021, 1, 18, 20, 0)}, {'title': '18 March 21','date_time': datetime.datetime(2021, 3, 18, 20, 0)} ] groups = groupby(lst, key=lambda x: (x['date_time'].strftime("%B %Y"))) {k: list(g) for k, g in groups} Result: {'March 2020': [{'title': 'in the past', 'date_time': datetime.datetime(2020, 3, 18, 0, 0)}], 'October 2020': [{'title': 'Just another event', 'date_time': datetime.datetime(2020, 10, 1, 19, 7)}, {'title': 'earlier today 9am', 'date_time': datetime.datetime(2020, 10, 21, 9, 0)}, {'title': 'greater than .now()', 'date_time': datetime.datetime(2020, 10, 21, 23, 0)}, {'title': 'another one', 'date_time': datetime.datetime(2020, 10, 30, 10, 0)}, {'title': 'Me testing the latest event', 'date_time': datetime.datetime(2020, 10, 30, 12, 0)}], 'November 2020': [{'title': '18 Nov 20', 'date_time': datetime.datetime(2020, 11, 18, 20, 27)}], 'January 2021': [{'title': '18 January 2021', 'date_time': datetime.datetime(2021, 1, 18, 20, 0)}], 'March 2021': [{'title': '18 March 21', 'date_time': datetime.datetime(2021, 3, 18, 20, 0)}]}
Normalize JSON API data to columns
I'm trying to get data from our Hubspot CRM database and convert it to a dataframe using pandas. I'm still a beginner in python, but I can't get json_normalize to work. The output from the database is i JSON format like this: {'archived': False, 'archived_at': None, 'associations': None, 'created_at': datetime.datetime(2019, 12, 21, 17, 56, 24, 739000, tzinfo=tzutc()), 'id': 'xxx', 'properties': {'createdate': '2019-12-21T17:56:24.739Z', 'email': 'xxxxx#xxxxx.com', 'firstname': 'John', 'hs_object_id': 'xxx', 'lastmodifieddate': '2020-04-22T04:37:40.274Z', 'lastname': 'Hansen'}, 'updated_at': datetime.datetime(2020, 4, 22, 4, 37, 40, 274000, tzinfo=tzutc())}, {'archived': False, 'archived_at': None, 'associations': None, 'created_at': datetime.datetime(2019, 12, 21, 17, 52, 38, 485000, tzinfo=tzutc()), 'id': 'bbb', 'properties': {'createdate': '2019-12-21T17:52:38.485Z', 'email': 'bbb#bbb.dk', 'firstname': 'John2', 'hs_object_id': 'bbb', 'lastmodifieddate': '2020-05-19T07:18:28.384Z', 'lastname': 'Hansen2'}, 'updated_at': datetime.datetime(2020, 5, 19, 7, 18, 28, 384000, tzinfo=tzutc())}, {'archived': False, 'archived_at': None, 'associations': None, etc. Trying to put it into a dataframe using this code: import hubspot import pandas as pd import json from pandas.io.json import json_normalize import os client = hubspot.Client.create(api_key='################') all_contacts = contacts_client = client.crm.contacts.get_all() df=pd.io.json.json_normalize(all_contacts,'properties') df.head df.to_csv ('All contacts.csv') But i keep getting an error that i can't resolve. I have also tried the pd.dataframe(all_contacts) and pf.dataframe.from_dict(all_contacts)
The all_contacts variable is a list of dictionary-like elements. So to create the dataframe I have used list comprehension to create a tuple that only contains the 'properties' for each dictionary-like element. import datetime import pandas as pd from dateutil.tz import tzutc data = ({'archived': False, 'archived_at': None, 'associations': None, 'created_at': datetime.datetime(2019, 12, 21, 17, 56, 24, 739000, tzinfo=tzutc()), 'id': 'xxx', 'properties': {'createdate': '2019-12-21T17:56:24.739Z', 'email': 'xxxxx#xxxxx.com', 'firstname': 'John', 'hs_object_id': 'xxx', 'lastmodifieddate': '2020-04-22T04:37:40.274Z', 'lastname': 'Hansen'}, 'updated_at': datetime.datetime(2020, 4, 22, 4, 37, 40, 274000, tzinfo=tzutc())}, {'archived': False, 'archived_at': None, 'associations': None, 'created_at': datetime.datetime(2019, 12, 21, 17, 52, 38, 485000, tzinfo=tzutc()), 'id': 'bbb', 'properties': { 'createdate': '2019-12-21T17:52:38.485Z', 'email': 'bbb#bbb.dk', 'firstname': 'John2', 'hs_object_id': 'bbb', 'lastmodifieddate': '2020-05-19T07:18:28.384Z', 'lastname': 'Hansen2'}, 'updated_at': datetime.datetime(2020, 5, 19, 7, 18, 28, 384000, tzinfo=tzutc())}) df = pd.DataFrame([row['properties'] for row in data]) print(df) OUTPUT: createdate email ... lastmodifieddate lastname 0 2019-12-21T17:56:24.739Z xxxxx#xxxxx.com ... 2020-04-22T04:37:40.274Z Hansen 1 2019-12-21T17:52:38.485Z bbb#bbb.dk ... 2020-05-19T07:18:28.384Z Hansen2 [2 rows x 6 columns]
how to calculate the average disk_available, based on the hostname hourly , python
result data : <QuerySet [{'disk_available': 26, 'hostname': '2', 'day': datetime.datetime(2020, 2, 11, 0, 0, tzinfo=<UTC>), 'c': 354}, {'disk_available': 27, 'hostname': '2', 'day': datetime.datetime(2020, 2, 10, 0, 0, tzinfo=<UTC>), 'c': 273}, {'disk_available': 19, 'hostname': '2', 'day': datetime.datetime(2020, 2, 12, 0, 0, tzinfo=<UTC>), 'c': 12}, {'disk_available': 26, 'hostname': '2', 'day': datetime.datetime(2020, 2, 12, 0, 0, tzinfo=<UTC>), 'c': 45}, {'disk_available': 26, 'hostname': 'tes', 'day': datetime.datetime(2020, 2, 11, 0, 0, tzinfo=<UTC>), 'c': 1945}, {'disk_available': 19, 'hostname': 'tes', 'day': datetime.datetime(2020, 2, 12, 0, 0, tzinfo=<UTC>), 'c': 53}, {'disk_available': 1, 'hostname': 'tes', 'day': datetime.datetime(2020, 2, 11, 0, 0, tzinfo=<UTC>), 'c': 1}, {'disk_available': 26, 'hostname': 'tes', 'day': datetime.datetime(2020, 2, 12, 0, 0, tzinfo=<UTC>), 'c': 45}, {'disk_available': 27, 'hostname': 'tes', 'day': datetime.datetime(2020, 2, 10, 0, 0, tzinfo=<UTC>), 'c': 291}]>
SocketClient.objects.annotate(day=TruncDay('create')).values('day').annotate(c=Count('id')).values('day', 'disk_available', 'hostname', 'c').order_by('hostname') . from the results above I want to do that , I want to display the average result of hostname hourly
Grouping data on year
mydata = [{'date': datetime.datetime(2009, 1, 31, 0, 0), 'value': 14, 'year': u'2009'}, {'date': datetime.datetime(2009, 2, 28, 0, 0), 'value': 84, 'year': u'2009'}, {'date': datetime.datetime(2009, 3, 31, 0, 0), 'value': 77, 'year': u'2009'}, {'date': datetime.datetime(2009, 4, 30, 0, 0), 'value': 80, 'year': u'2009'}, {'date': datetime.datetime(2009, 5, 31, 0, 0), 'value': 6, 'year': u'2009'}, {'date': datetime.datetime(2009, 6, 30, 0, 0), 'value': 16, 'year': u'2009'}, {'date': datetime.datetime(2009, 7, 31, 0, 0), 'value': 16, 'year': u'2009'}, {'date': datetime.datetime(2009, 8, 31, 0, 0), 'value': 1, 'year': u'2009'}, {'date': datetime.datetime(2009, 9, 30, 0, 0), 'value': 9, 'year': u'2009'}, {'date': datetime.datetime(2008, 1, 31, 0, 0), 'value': 77, 'year': u'2008'}, {'date': datetime.datetime(2008, 2, 29, 0, 0), 'value': 60, 'year': u'2008'}, {'date': datetime.datetime(2008, 3, 31, 0, 0), 'value': 28, 'year': u'2008'}, {'date': datetime.datetime(2008, 4, 30, 0, 0), 'value': 9, 'year': u'2008'}, {'date': datetime.datetime(2008, 5, 31, 0, 0), 'value': 74, 'year': u'2008'}, {'date': datetime.datetime(2008, 6, 30, 0, 0), 'value': 70, 'year': u'2008'}, {'date': datetime.datetime(2008, 7, 31, 0, 0), 'value': 75, 'year': u'2008'}, {'date': datetime.datetime(2008, 8, 31, 0, 0), 'value': 7, 'year': u'2008'}, {'date': datetime.datetime(2008, 9, 30, 0, 0), 'value': 10, 'year': u'2008'}, {'date': datetime.datetime(2008, 10, 31, 0, 0), 'value': 54, 'year': u'2008'}, {'date': datetime.datetime(2008, 11, 30, 0, 0), 'value': 55, 'year': u'2008'}, {'date': datetime.datetime(2008, 12, 31, 0, 0), 'value': 40, 'year': u'2008'}, {'date': datetime.datetime(2007, 12, 31, 0, 0), 'value': 93, 'year': u'2007'},] In 'mydata', I get list of sequential monthly data. I wrote some code to group them on year. partial_req_data = dict([(k,[f for f in v]) for k,v in itertools.groupby(mydata, key=lambda x : x.get('year'))]) Now I further need some efficient code to fill the missing months with {}, i.e. empty dict. There are bad ways to do that, but am looking for good ones. required_data = {"2009": [{'date': datetime.datetime(2009, 1, 31, 0, 0), 'value': 14, 'year': u'2009' }, {'date': datetime.datetime(2009, 2, 28, 0, 0), 'value': 84, 'year': u'2009'}, {'date': datetime.datetime(2009, 3, 31, 0, 0), 'value': 77, 'year': u'2009'}, {'date': datetime.datetime(2009, 4, 30, 0, 0), 'value': 80, 'year': u'2009'}, {'date': datetime.datetime(2009, 5, 31, 0, 0), 'value': 6, 'year': u'2009'}, {'date': datetime.datetime(2009, 6, 30, 0, 0), 'value': 16, 'year': u'2009'}, {'date': datetime.datetime(2009, 7, 31, 0, 0), 'value': 16, 'year': u'2009'}, {'date': datetime.datetime(2009, 8, 31, 0, 0), 'value': 1, 'year': u'2009'}, {'date': datetime.datetime(2009, 9, 30, 0, 0), 'value': 9, 'year': u'2009'}, {}, {}, {}], "2008": [{'date': datetime.datetime(2008, 1, 31, 0, 0), 'value': 77, 'year': u'2008'}, {'date': datetime.datetime(2008, 2, 29, 0, 0), 'value': 60, 'year': u'2008'}, {'date': datetime.datetime(2008, 3, 31, 0, 0), 'value': 28, 'year': u'2008'}, {'date': datetime.datetime(2008, 4, 30, 0, 0), 'value': 9, 'year': u'2008'}, {'date': datetime.datetime(2008, 5, 31, 0, 0), 'value': 74, 'year': u'2008'}, {'date': datetime.datetime(2008, 6, 30, 0, 0), 'value': 70, 'year': u'2008'}, {'date': datetime.datetime(2008, 7, 31, 0, 0), 'value': 75, 'year': u'2008'}, {'date': datetime.datetime(2008, 8, 31, 0, 0), 'value': 7, 'year': u'2008'}, {'date': datetime.datetime(2008, 9, 30, 0, 0), 'value': 10, 'year': u'2008'}, {'date': datetime.datetime(2008, 10, 31, 0, 0), 'value': 54, 'year': u'2008'}, {'date': datetime.datetime(2008, 11, 30, 0, 0), 'value': 55, 'year': u'2008'}, {'date': datetime.datetime(2008, 12, 31, 0, 0), 'value': 40, 'year': u'2008'},] "2007": [{}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {'date': datetime.datetime(2007, 12, 31, 0, 0), 'value': 93, 'year': u'2007'}] }
import datetime from itertools import groupby from pprint import pprint required_data={} for k,g in groupby(mydata,key=lambda x: x.get('year')): partial={} for datum in g: partial[datum.get('date').month]=datum required_data[k]=[partial.get(m,{}) for m in range(1,13)] pprint(required_data) For each year k, partial is a dict whose keys are months. The trick is to use partial.get(m,{}) since this will return the datum when it exists, or {} when it does not.