Pandas ticker to ohlc - python

rows is a list of dict from mysql.
rows example
[{'date': datetime.datetime(2017, 3, 21, 13, 27, 20), 'tid': 648605515L, 'price': Decimal('1080.04000000'), 'type': 1, 'amount': Decimal('10.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 20), 'tid': 648605549L, 'price': Decimal('1081.55000000'), 'type': 1, 'amount': Decimal('16.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 20), 'tid': 648605547L, 'price': Decimal('1081.33000000'), 'type': 1, 'amount': Decimal('20.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 20), 'tid': 648605545L, 'price': Decimal('1081.30000000'), 'type': 1, 'amount': Decimal('16.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 20), 'tid': 648605543L, 'price': Decimal('1081.29000000'), 'type': 1, 'amount': Decimal('20.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 20), 'tid': 648605541L, 'price': Decimal('1080.46000000'), 'type': 1, 'amount': Decimal('26.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 20), 'tid': 648605517L, 'price': Decimal('1080.04000000'), 'type': 1, 'amount': Decimal('8.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 22), 'tid': 648605601L, 'price': Decimal('1079.69000000'), 'type': -1, 'amount': Decimal('70.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 25), 'tid': 648605686L, 'price': Decimal('1079.72000000'), 'type': -1, 'amount': Decimal('4.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 26), 'tid': 648605765L, 'price': Decimal('1079.45000000'), 'type': 1, 'amount': Decimal('6.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 26), 'tid': 648605753L, 'price': Decimal('1079.60000000'), 'type': -1, 'amount': Decimal('106.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 26), 'tid': 648605751L, 'price': Decimal('1079.60000000'), 'type': -1, 'amount': Decimal('80.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 26), 'tid': 648605749L, 'price': Decimal('1079.67000000'), 'type': -1, 'amount': Decimal('430.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 26), 'tid': 648605747L, 'price': Decimal('1079.70000000'), 'type': -1, 'amount': Decimal('66.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 26), 'tid': 648605745L, 'price': Decimal('1079.74000000'), 'type': -1, 'amount': Decimal('12.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 27), 'tid': 648605785L, 'price': Decimal('1079.45000000'), 'type': 1, 'amount': Decimal('2.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 27), 'tid': 648605774L, 'price': Decimal('1079.45000000'), 'type': 1, 'amount': Decimal('6.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 27), 'tid': 648605771L, 'price': Decimal('1079.45000000'), 'type': 1, 'amount': Decimal('14.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 28), 'tid': 648605827L, 'price': Decimal('1079.45000000'), 'type': 1, 'amount': Decimal('42.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 28), 'tid': 648605842L, 'price': Decimal('1079.45000000'), 'type': 1, 'amount': Decimal('10.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 32), 'tid': 648605973L, 'price': Decimal('1079.45000000'), 'type': 1, 'amount': Decimal('2.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 37), 'tid': 648606114L, 'price': Decimal('1079.44000000'), 'type': 1, 'amount': Decimal('24.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 37), 'tid': 648606116L, 'price': Decimal('1079.45000000'), 'type': 1, 'amount': Decimal('40.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 42), 'tid': 648606258L, 'price': Decimal('1079.45000000'), 'type': 1, 'amount': Decimal('56.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 45), 'tid': 648606345L, 'price': Decimal('1079.46000000'), 'type': -1, 'amount': Decimal('10.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 46), 'tid': 648606392L, 'price': Decimal('1079.69000000'), 'type': 1, 'amount': Decimal('44.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 48), 'tid': 648606418L, 'price': Decimal('1079.60000000'), 'type': -1, 'amount': Decimal('40.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 48), 'tid': 648606420L, 'price': Decimal('1079.46000000'), 'type': -1, 'amount': Decimal('36.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 48), 'tid': 648606422L, 'price': Decimal('1079.46000000'), 'type': -1, 'amount': Decimal('94.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 50), 'tid': 648606499L, 'price': Decimal('1079.31000000'), 'type': 1, 'amount': Decimal('80.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 50), 'tid': 648606478L, 'price': Decimal('1079.31000000'), 'type': -1, 'amount': Decimal('6.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 50), 'tid': 648606476L, 'price': Decimal('1079.31000000'), 'type': -1, 'amount': Decimal('34.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 50), 'tid': 648606474L, 'price': Decimal('1079.55000000'), 'type': -1, 'amount': Decimal('8.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 55), 'tid': 648606666L, 'price': Decimal('1079.31000000'), 'type': 1, 'amount': Decimal('44.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 55), 'tid': 648606650L, 'price': Decimal('1079.17000000'), 'type': 1, 'amount': Decimal('8.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 27, 55), 'tid': 648606648L, 'price': Decimal('1079.17000000'), 'type': 1, 'amount': Decimal('8.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 1), 'tid': 648606820L, 'price': Decimal('1079.03000000'), 'type': -1, 'amount': Decimal('28.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 2), 'tid': 648606825L, 'price': Decimal('1079.03000000'), 'type': 1, 'amount': Decimal('30.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 2), 'tid': 648606836L, 'price': Decimal('1079.02000000'), 'type': -1, 'amount': Decimal('22.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 5), 'tid': 648606945L, 'price': Decimal('1078.58000000'), 'type': -1, 'amount': Decimal('2.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 5), 'tid': 648606943L, 'price': Decimal('1078.61000000'), 'type': -1, 'amount': Decimal('2.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 5), 'tid': 648606941L, 'price': Decimal('1078.63000000'), 'type': -1, 'amount': Decimal('2.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 5), 'tid': 648606939L, 'price': Decimal('1078.88000000'), 'type': -1, 'amount': Decimal('2.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 5), 'tid': 648606926L, 'price': Decimal('1078.88000000'), 'type': -1, 'amount': Decimal('428.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 6), 'tid': 648606984L, 'price': Decimal('1078.58000000'), 'type': 1, 'amount': Decimal('2.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 6), 'tid': 648606982L, 'price': Decimal('1078.05000000'), 'type': -1, 'amount': Decimal('10.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 6), 'tid': 648606971L, 'price': Decimal('1078.58000000'), 'type': 1, 'amount': Decimal('2.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 6), 'tid': 648606957L, 'price': Decimal('1078.05000000'), 'type': -1, 'amount': Decimal('74.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 6), 'tid': 648606955L, 'price': Decimal('1078.15000000'), 'type': -1, 'amount': Decimal('6.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 6), 'tid': 648606953L, 'price': Decimal('1078.15000000'), 'type': -1, 'amount': Decimal('14.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 6), 'tid': 648606951L, 'price': Decimal('1078.42000000'), 'type': -1, 'amount': Decimal('16.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 7), 'tid': 648606992L, 'price': Decimal('1078.05000000'), 'type': -1, 'amount': Decimal('2.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 7), 'tid': 648606995L, 'price': Decimal('1078.58000000'), 'type': 1, 'amount': Decimal('2.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 7), 'tid': 648607023L, 'price': Decimal('1078.06000000'), 'type': -1, 'amount': Decimal('4.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 8), 'tid': 648607047L, 'price': Decimal('1078.86000000'), 'type': 1, 'amount': Decimal('2.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 10), 'tid': 648607113L, 'price': Decimal('1078.06000000'), 'type': -1, 'amount': Decimal('2.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 10), 'tid': 648607115L, 'price': Decimal('1078.03000000'), 'type': -1, 'amount': Decimal('148.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 12), 'tid': 648607192L, 'price': Decimal('1079.00000000'), 'type': -1, 'amount': Decimal('10.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 13), 'tid': 648607218L, 'price': Decimal('1078.99000000'), 'type': 1, 'amount': Decimal('98.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 13), 'tid': 648607220L, 'price': Decimal('1079.00000000'), 'type': 1, 'amount': Decimal('42.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 13), 'tid': 648607222L, 'price': Decimal('1079.03000000'), 'type': 1, 'amount': Decimal('342.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 13), 'tid': 648607224L, 'price': Decimal('1079.16000000'), 'type': 1, 'amount': Decimal('512.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 14), 'tid': 648607250L, 'price': Decimal('1078.98000000'), 'type': 1, 'amount': Decimal('44.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 14), 'tid': 648607252L, 'price': Decimal('1078.98000000'), 'type': 1, 'amount': Decimal('12.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 14), 'tid': 648607254L, 'price': Decimal('1079.00000000'), 'type': 1, 'amount': Decimal('106.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 14), 'tid': 648607256L, 'price': Decimal('1079.16000000'), 'type': 1, 'amount': Decimal('40.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 20), 'tid': 648607431L, 'price': Decimal('1079.16000000'), 'type': 1, 'amount': Decimal('28.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 20), 'tid': 648607429L, 'price': Decimal('1079.01000000'), 'type': 1, 'amount': Decimal('2.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 20), 'tid': 648607427L, 'price': Decimal('1079.01000000'), 'type': 1, 'amount': Decimal('2.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 23), 'tid': 648607518L, 'price': Decimal('1079.16000000'), 'type': 1, 'amount': Decimal('8.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 24), 'tid': 648607544L, 'price': Decimal('1079.16000000'), 'type': 1, 'amount': Decimal('344.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 25), 'tid': 648607593L, 'price': Decimal('1078.79000000'), 'type': -1, 'amount': Decimal('6.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 26), 'tid': 648607631L, 'price': Decimal('1079.16000000'), 'type': 1, 'amount': Decimal('430.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 26), 'tid': 648607623L, 'price': Decimal('1079.16000000'), 'type': 1, 'amount': Decimal('18.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 26), 'tid': 648607621L, 'price': Decimal('1078.79000000'), 'type': 1, 'amount': Decimal('14.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 29), 'tid': 648607695L, 'price': Decimal('1079.16000000'), 'type': 1, 'amount': Decimal('776.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 32), 'tid': 648607803L, 'price': Decimal('1079.16000000'), 'type': 1, 'amount': Decimal('2.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 32), 'tid': 648607805L, 'price': Decimal('1079.16000000'), 'type': 1, 'amount': Decimal('10.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 36), 'tid': 648607905L, 'price': Decimal('1079.16000000'), 'type': 1, 'amount': Decimal('2.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 37), 'tid': 648607940L, 'price': Decimal('1079.31000000'), 'type': 1, 'amount': Decimal('2.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 42), 'tid': 648608110L, 'price': Decimal('1079.46000000'), 'type': -1, 'amount': Decimal('12.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 46), 'tid': 648608211L, 'price': Decimal('1079.88000000'), 'type': -1, 'amount': Decimal('12.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 46), 'tid': 648608213L, 'price': Decimal('1079.88000000'), 'type': -1, 'amount': Decimal('6.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 57), 'tid': 648608534L, 'price': Decimal('1080.29000000'), 'type': 1, 'amount': Decimal('14.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 28, 57), 'tid': 648608536L, 'price': Decimal('1080.30000000'), 'type': 1, 'amount': Decimal('2.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 29, 2), 'tid': 648608683L, 'price': Decimal('1080.59000000'), 'type': 1, 'amount': Decimal('40.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 29, 3), 'tid': 648608733L, 'price': Decimal('1080.59000000'), 'type': 1, 'amount': Decimal('360.00000000')}, {'date': datetime.datetime(2017, 3, 21, 13, 29, 7), 'tid': 648608838L, 'price': Decimal('1080.90000000'), 'type': 1, 'amount': Decimal('82.00000000')}]
if I didn't use set_index ,it will have an TypeError: Only valid with DatetimeIndex, TimedeltaIndex or PeriodIndex, but got an instance of 'RangeIndex'
if rows:
df = pd.DataFrame(rows)
print df.head()
# TypeError: Only valid with DatetimeIndex, TimedeltaIndex or PeriodIndex, but got an instance of 'RangeIndex'
df = df.set_index("date")
print df.head()
resample_data = df.resample("1min", how={"price": "ohlc", "amount": "sum"})
print resample_data
Result :
Connected to pydev debugger (build 162.1967.10)
amount date price tid type
0 2.00000000 2017-03-21 11:15:12 1075.83000000 648370156 -1
1 10.00000000 2017-03-21 11:15:15 1076.00000000 648370241 -1
2 10.00000000 2017-03-21 11:15:17 1075.83000000 648370297 -1
3 10.00000000 2017-03-21 11:15:17 1075.83000000 648370311 1
4 8.00000000 2017-03-21 11:15:19 1076.13000000 648370370 1
amount price tid type
date
2017-03-21 11:15:12 2.00000000 1075.83000000 648370156 -1
2017-03-21 11:15:15 10.00000000 1076.00000000 648370241 -1
2017-03-21 11:15:17 10.00000000 1075.83000000 648370297 -1
2017-03-21 11:15:17 10.00000000 1075.83000000 648370311 1
2017-03-21 11:15:19 8.00000000 1076.13000000 648370370 1
/Users/wyx/bitcoin_workspace/fibo-strategy/ticker.py:45: FutureWarning: how in .resample() is deprecated
the new syntax is .resample(...)..apply(<func>)
resample_data = df.resample("1min", how={"price": "ohlc", "amount": "sum"})
Traceback (most recent call last):
File "/Applications/PyCharm.app/Contents/helpers/pydev/pydevd.py", line 1580, in <module>
globals = debugger.run(setup['file'], None, None, is_module)
File "/Applications/PyCharm.app/Contents/helpers/pydev/pydevd.py", line 964, in run
pydev_imports.execfile(file, globals, locals) # execute the script
File "/Users/wyx/bitcoin_workspace/fibo-strategy/ticker.py", line 45, in <module>
resample_data = df.resample("1min", how={"price": "ohlc", "amount": "sum"})
File "/Users/wyx/bitcoin_workspace/fibo-strategy/.env/lib/python2.7/site-packages/pandas/core/generic.py", line 4216, in resample
limit=limit)
File "/Users/wyx/bitcoin_workspace/fibo-strategy/.env/lib/python2.7/site-packages/pandas/tseries/resample.py", line 582, in _maybe_process_deprecations
r = r.aggregate(how)
File "/Users/wyx/bitcoin_workspace/fibo-strategy/.env/lib/python2.7/site-packages/pandas/tseries/resample.py", line 320, in aggregate
result, how = self._aggregate(arg, *args, **kwargs)
File "/Users/wyx/bitcoin_workspace/fibo-strategy/.env/lib/python2.7/site-packages/pandas/core/base.py", line 549, in _aggregate
result = _agg(arg, _agg_1dim)
File "/Users/wyx/bitcoin_workspace/fibo-strategy/.env/lib/python2.7/site-packages/pandas/core/base.py", line 500, in _agg
result[fname] = func(fname, agg_how)
File "/Users/wyx/bitcoin_workspace/fibo-strategy/.env/lib/python2.7/site-packages/pandas/core/base.py", line 483, in _agg_1dim
return colg.aggregate(how, _level=(_level or 0) + 1)
File "/Users/wyx/bitcoin_workspace/fibo-strategy/.env/lib/python2.7/site-packages/pandas/core/groupby.py", line 2652, in aggregate
return getattr(self, func_or_funcs)(*args, **kwargs)
File "/Users/wyx/bitcoin_workspace/fibo-strategy/.env/lib/python2.7/site-packages/pandas/core/groupby.py", line 1128, in ohlc
lambda x: x._cython_agg_general('ohlc'))
File "/Users/wyx/bitcoin_workspace/fibo-strategy/.env/lib/python2.7/site-packages/pandas/core/groupby.py", line 3103, in _apply_to_column_groupbys
return func(self)
File "/Users/wyx/bitcoin_workspace/fibo-strategy/.env/lib/python2.7/site-packages/pandas/core/groupby.py", line 1128, in <lambda>
lambda x: x._cython_agg_general('ohlc'))
File "/Users/wyx/bitcoin_workspace/fibo-strategy/.env/lib/python2.7/site-packages/pandas/core/groupby.py", line 808, in _cython_agg_general
raise DataError('No numeric types to aggregate')
pandas.core.base.DataError: No numeric types to aggregate
Process finished with exit code 1
I am a rookie for pandas.
How solve the error?
And if I want to use the last close price to fill the NaN of next
min ohlc. How to do that?

You need to set an index using your dates.
Code:
from io import StringIO
df = pd.read_csv(StringIO(
u"""amount date price tid type
6.00000000 2017-03-21t10:46:32 1059.26000000 648313975 -1
4.00000000 2017-03-21t10:46:37 1059.42000000 648314094 -1
2.00000000 2017-03-21t10:46:37 1059.42000000 648314096 -1
2.00000000 2017-03-21t10:46:41 1059.26000000 648314176 -1
32.00000000 2017-03-21t10:46:41 1059.26000000 648314189 -1
"""), sep='\s+', parse_dates='date'.split())
print(df)
resample_data = df.set_index('date').resample(
"1min", how={"price": "ohlc", "amount": "sum"})
print(resample_data)
Results:
amount date price tid type
0 6.0 2017-03-21 10:46:32 1059.26 648313975 -1
1 4.0 2017-03-21 10:46:37 1059.42 648314094 -1
2 2.0 2017-03-21 10:46:37 1059.42 648314096 -1
3 2.0 2017-03-21 10:46:41 1059.26 648314176 -1
4 32.0 2017-03-21 10:46:41 1059.26 648314189 -1
price amount
open high low close amount
date
2017-03-21 10:46:00 1059.26 1059.42 1059.26 1059.26 46.0

Related

How to change all datetime objects in a list to standard YYYY-MM-DD HH:MM:SS

When I query MySQL with Python and the query has datetime fields then I get this list as a result.
[{'_id': 1, 'name': 'index', '_cdate': datetime.datetime(2020, 10, 27, 9, 4, 34), 'title': 'DataExtract'}, {'_id': 2, 'name': 'topmenu', '_cdate': datetime.datetime(2020, 11, 4, 19, 52, 17), 'title': 'topmenu'}, {'_id': 3, 'name': 'functions_common', '_cdate': datetime.datetime(2020, 11, 4, 19, 52, 50), 'title': 'common functions'}, {'_id': 4, 'name': 'leftmenu', '_cdate': datetime.datetime(2020, 11, 4, 19, 53, 56), 'title': 'Left Menu'}, {'_id': 5, 'name': 'todo', '_cdate': datetime.datetime(2020, 11, 7, 8, 49, 38), 'title': 'Todo'}, {'_id': 6, 'name': 'cron_publish', '_cdate': datetime.datetime(2020, 12, 2, 19, 30, 11), 'title': 'Run Publish reports'}, {'_id': 7, 'name': 'test', '_cdate': datetime.datetime(2020, 12, 2, 22, 32, 54), 'title': 'test'}, {'_id': 8, 'name': 'help', '_cdate': datetime.datetime(2020, 12, 5, 7, 12, 44), 'title': 'Help'}, {'_id': 9, 'name': 'api', '_cdate': datetime.datetime(2020, 12, 5, 21, 22, 13), 'title': 'API'}, {'_id': 10, 'name': 'ben', '_cdate': datetime.datetime(2021, 10, 4, 11, 37, 3), 'title': 'List of Reports'}]
How do I either get the query to return the date fields in YYYY-MM-DD HH:MM:SS format? Or how do I convert them in the returned list. When I try to change them by enumerating over the results python throw as error that the dictionary has changed.
The datetime.datetime() objects you're getting are the standard representation of these objects - if you were expecting strings instead, you could simple convert them with datetime.strftime('%Y-%m-%d %H:%M:%S', value) but keep in mind that the datetime object is a more flexible way of keeping the data around. I'd recommend only formatting the date in a specific way if you're writing it to the screen or a file format that expects a string.
Example:
data = [{'_id': 1, 'name': 'index', '_cdate': datetime.datetime(2020, 10, 27, 9, 4, 34), 'title': 'DataExtract'}, {'_id': 2, 'name': 'topmenu', '_cdate': datetime.datetime(2020, 11, 4, 19, 52, 17), 'title': 'topmenu'}, {'_id': 3, 'name': 'functions_common', '_cdate': datetime.datetime(2020, 11, 4, 19, 52, 50), 'title': 'common functions'}, {'_id': 4, 'name': 'leftmenu', '_cdate': datetime.datetime(2020, 11, 4, 19, 53, 56), 'title': 'Left Menu'}, {'_id': 5, 'name': 'todo', '_cdate': datetime.datetime(2020, 11, 7, 8, 49, 38), 'title': 'Todo'}, {'_id': 6, 'name': 'cron_publish', '_cdate': datetime.datetime(2020, 12, 2, 19, 30, 11), 'title': 'Run Publish reports'}, {'_id': 7, 'name': 'test', '_cdate': datetime.datetime(2020, 12, 2, 22, 32, 54), 'title': 'test'}, {'_id': 8, 'name': 'help', '_cdate': datetime.datetime(2020, 12, 5, 7, 12, 44), 'title': 'Help'}, {'_id': 9, 'name': 'api', '_cdate': datetime.datetime(2020, 12, 5, 21, 22, 13), 'title': 'API'}, {'_id': 10, 'name': 'ben', '_cdate': datetime.datetime(2021, 10, 4, 11, 37, 3), 'title': 'List of Reports'}]
for rec in data:
rec['date_str'] = datetime.datetime.strftime('%Y-%m-%d %H:%M:%S', rec['_cdate'])
That would add 'date_str' field to every record with the format you require. Of course, you could also modify it to overwrite the original value.

How to group items by month and year using itertools.groupby()

Problem: I am trying to take a sorted list and group it based on the month and year but having trouble returning the grouped value correctly...
Assuming this data, we have a title and date/time list that has been ordered by datetime
lst = [
{'title': 'in the past','date_time': datetime.datetime(2020, 3, 18, 0, 0)},
{'title': 'Just another event','date_time': datetime.datetime(2020, 10, 1, 19, 7)},
{'title': 'earlier today 9am','date_time': datetime.datetime(2020, 10, 21, 9, 0)},
{'title': 'greater than .now()','date_time': datetime.datetime(2020, 10, 21, 23, 0)},
{'title': 'another one','date_time': datetime.datetime(2020, 10, 30, 10, 0)},
{'title': 'Me testing the latest event','date_time': datetime.datetime(2020, 10, 30, 12, 0)},
{'title': '18 Nov 20','date_time': datetime.datetime(2020, 11, 18, 20, 27)},
{'title': '18 January 2021','date_time': datetime.datetime(2021, 1, 18, 20, 0)},
{'title': '18 March 21','date_time': datetime.datetime(2021, 3, 18, 20, 0)}
]
Then to group it, run it through itertools.groupby()
from itertools import groupby
def loop_tupe():
diction = {}
for key,group in groupby(lst, key=lambda x: (x['date_time'].month, x['date_time'].year)):
for element in group:
append_value(diction, key, element)
return diction
After grouping it by the month and year the returned result looks like
{
(3, 2020): {'title': 'in the past', 'date_time': datetime.datetime(2020, 3, 18, 0, 0)},
(10, 2020): [
{'title': 'Just another event', 'date_time': datetime.datetime(2020, 10, 1, 19, 7)},
{'title': 'earlier today 9am', 'date_time': datetime.datetime(2020, 10, 21, 9, 0)},
{'title': 'greater than .now()', 'date_time': datetime.datetime(2020, 10, 21, 23, 0)},
{'title': 'another one', 'date_time': datetime.datetime(2020, 10, 30, 10, 0)},
{'title': 'Me testing the latest event', 'date_time': datetime.datetime(2020, 10, 30, 12, 0)}
],
(11, 2020): {'title': '18 Nov 20', 'date_time': datetime.datetime(2020, 11, 18, 20, 27)},
(1, 2021): {'title': '18 January 2021', 'date_time': datetime.datetime(2021, 1, 18, 20, 0)},
(3, 2021): {'title': '18 March 21', 'date_time': datetime.datetime(2021, 3, 18, 20, 0)}
}
It has been grouped correctly, however the dates are within a tuple whereas I would need them as one "value", and while it's in this format I am unable to loop over it in the way I would with the original list.
I realise it has something to do with the way I'm using the anonymous function within the groupby() (and maybe how the return result is created perhaps?) but I'm unsure how else to apply a month and year grouping within it.
Question: What can I do to group my original data by month & year while also keeping its format relatively similar to the list going in?
Edit
The append_value function that I'm using
def append_value(dict_obj, key, value):
if key in dict_obj:
if not isinstance(dict_obj[key], list):
dict_obj[key] = [dict_obj[key]]
dict_obj[key].append(value)
else:
dict_obj[key] = value
Edit 2
So far this is the closest I'm getting to a solution.
I have changed the function used in groupby to take the datetime and change it into a string to be compared. (I've left the print in there to visualise)
def loop_str(to_sort):
output={}
for key,group in groupby(to_sort, key=lambda item: item['date_time'].strftime('%B %Y')):
for element in group:
append_value(output,key,element)
return output
Doing so gives me this output
{
'March 2020': {
'title': 'in the past', 'date_time': datetime.datetime(2020, 3, 18, 0, 0)
},
'October 2020': [
{'title': 'Just another event', 'date_time': datetime.datetime(2020, 10, 1, 19, 7)},
{'title': 'earlier today 9am', 'date_time': datetime.datetime(2020, 10, 21, 9, 0)},
{'title': 'greater than .now()', 'date_time': datetime.datetime(2020, 10, 21, 23, 0)},
{'title': 'another one', 'date_time': datetime.datetime(2020, 10, 30, 10, 0)},
{'title': 'Me testing the latest event', 'date_time': datetime.datetime(2020, 10, 30, 12, 0)}
],
'November 2020': {
'title': '18 Nov 20', 'date_time': datetime.datetime(2020, 11, 18, 20, 27)
},
'January 2021': {
'title': '18 January 2021', 'date_time': datetime.datetime(2021, 1, 18, 20, 0)
},
'March 2021': {
'title': '18 March 21', 'date_time': datetime.datetime(2021, 3, 18, 20, 0)
}
}
This is closer to what I need however unless I'm not seeing something it seems that this output could be a mix of dicts and lists which will be more difficult to loop over within a django template?
You can make the groupby key the string you want my formatting the date. Then you can just use it in a dict comprehension. It is easier to create the data structure if the values are constantly lists. It will probably also be easier to use it.
from itertools import groupby
import datetime
lst = [
{'title': 'in the past','date_time': datetime.datetime(2020, 3, 18, 0, 0)},
{'title': 'Just another event','date_time': datetime.datetime(2020, 10, 1, 19, 7)},
{'title': 'earlier today 9am','date_time': datetime.datetime(2020, 10, 21, 9, 0)},
{'title': 'greater than .now()','date_time': datetime.datetime(2020, 10, 21, 23, 0)},
{'title': 'another one','date_time': datetime.datetime(2020, 10, 30, 10, 0)},
{'title': 'Me testing the latest event','date_time': datetime.datetime(2020, 10, 30, 12, 0)},
{'title': '18 Nov 20','date_time': datetime.datetime(2020, 11, 18, 20, 27)},
{'title': '18 January 2021','date_time': datetime.datetime(2021, 1, 18, 20, 0)},
{'title': '18 March 21','date_time': datetime.datetime(2021, 3, 18, 20, 0)}
]
groups = groupby(lst, key=lambda x: (x['date_time'].strftime("%B %Y")))
{k: list(g) for k, g in groups}
Result:
{'March 2020': [{'title': 'in the past',
'date_time': datetime.datetime(2020, 3, 18, 0, 0)}],
'October 2020': [{'title': 'Just another event',
'date_time': datetime.datetime(2020, 10, 1, 19, 7)},
{'title': 'earlier today 9am',
'date_time': datetime.datetime(2020, 10, 21, 9, 0)},
{'title': 'greater than .now()',
'date_time': datetime.datetime(2020, 10, 21, 23, 0)},
{'title': 'another one',
'date_time': datetime.datetime(2020, 10, 30, 10, 0)},
{'title': 'Me testing the latest event',
'date_time': datetime.datetime(2020, 10, 30, 12, 0)}],
'November 2020': [{'title': '18 Nov 20',
'date_time': datetime.datetime(2020, 11, 18, 20, 27)}],
'January 2021': [{'title': '18 January 2021',
'date_time': datetime.datetime(2021, 1, 18, 20, 0)}],
'March 2021': [{'title': '18 March 21',
'date_time': datetime.datetime(2021, 3, 18, 20, 0)}]}

Normalize JSON API data to columns

I'm trying to get data from our Hubspot CRM database and convert it to a dataframe using pandas. I'm still a beginner in python, but I can't get json_normalize to work.
The output from the database is i JSON format like this:
{'archived': False,
'archived_at': None,
'associations': None,
'created_at': datetime.datetime(2019, 12, 21, 17, 56, 24, 739000, tzinfo=tzutc()),
'id': 'xxx',
'properties': {'createdate': '2019-12-21T17:56:24.739Z',
'email': 'xxxxx#xxxxx.com',
'firstname': 'John',
'hs_object_id': 'xxx',
'lastmodifieddate': '2020-04-22T04:37:40.274Z',
'lastname': 'Hansen'},
'updated_at': datetime.datetime(2020, 4, 22, 4, 37, 40, 274000, tzinfo=tzutc())}, {'archived': False,
'archived_at': None,
'associations': None,
'created_at': datetime.datetime(2019, 12, 21, 17, 52, 38, 485000, tzinfo=tzutc()),
'id': 'bbb',
'properties': {'createdate': '2019-12-21T17:52:38.485Z',
'email': 'bbb#bbb.dk',
'firstname': 'John2',
'hs_object_id': 'bbb',
'lastmodifieddate': '2020-05-19T07:18:28.384Z',
'lastname': 'Hansen2'},
'updated_at': datetime.datetime(2020, 5, 19, 7, 18, 28, 384000, tzinfo=tzutc())}, {'archived': False,
'archived_at': None,
'associations': None,
etc.
Trying to put it into a dataframe using this code:
import hubspot
import pandas as pd
import json
from pandas.io.json import json_normalize
import os
client = hubspot.Client.create(api_key='################')
all_contacts = contacts_client = client.crm.contacts.get_all()
df=pd.io.json.json_normalize(all_contacts,'properties')
df.head
df.to_csv ('All contacts.csv')
But i keep getting an error that i can't resolve.
I have also tried the
pd.dataframe(all_contacts)
and
pf.dataframe.from_dict(all_contacts)
The all_contacts variable is a list of dictionary-like elements. So to create the dataframe I have used list comprehension to create a tuple that only contains the 'properties' for each dictionary-like element.
import datetime
import pandas as pd
from dateutil.tz import tzutc
data = ({'archived': False,
'archived_at': None,
'associations': None,
'created_at': datetime.datetime(2019, 12, 21, 17, 56, 24, 739000, tzinfo=tzutc()),
'id': 'xxx',
'properties': {'createdate': '2019-12-21T17:56:24.739Z',
'email': 'xxxxx#xxxxx.com',
'firstname': 'John',
'hs_object_id': 'xxx',
'lastmodifieddate': '2020-04-22T04:37:40.274Z',
'lastname': 'Hansen'},
'updated_at': datetime.datetime(2020, 4, 22, 4, 37, 40, 274000, tzinfo=tzutc())},
{'archived': False,
'archived_at': None,
'associations': None,
'created_at': datetime.datetime(2019, 12, 21, 17, 52, 38, 485000, tzinfo=tzutc()),
'id': 'bbb',
'properties': {
'createdate': '2019-12-21T17:52:38.485Z',
'email': 'bbb#bbb.dk',
'firstname': 'John2',
'hs_object_id': 'bbb',
'lastmodifieddate': '2020-05-19T07:18:28.384Z',
'lastname': 'Hansen2'},
'updated_at': datetime.datetime(2020, 5, 19, 7, 18, 28, 384000, tzinfo=tzutc())})
df = pd.DataFrame([row['properties'] for row in data])
print(df)
OUTPUT:
createdate email ... lastmodifieddate lastname
0 2019-12-21T17:56:24.739Z xxxxx#xxxxx.com ... 2020-04-22T04:37:40.274Z Hansen
1 2019-12-21T17:52:38.485Z bbb#bbb.dk ... 2020-05-19T07:18:28.384Z Hansen2
[2 rows x 6 columns]

how to calculate the average disk_available, based on the hostname hourly , python

result data :
<QuerySet [{'disk_available': 26, 'hostname': '2', 'day': datetime.datetime(2020, 2, 11, 0, 0, tzinfo=<UTC>), 'c': 354}, {'disk_available': 27, 'hostname': '2', 'day': datetime.datetime(2020, 2, 10, 0, 0, tzinfo=<UTC>), 'c': 273}, {'disk_available': 19, 'hostname': '2', 'day': datetime.datetime(2020, 2, 12, 0, 0, tzinfo=<UTC>), 'c': 12}, {'disk_available': 26, 'hostname': '2', 'day': datetime.datetime(2020, 2, 12, 0, 0, tzinfo=<UTC>), 'c': 45}, {'disk_available': 26, 'hostname': 'tes', 'day': datetime.datetime(2020, 2, 11, 0, 0, tzinfo=<UTC>), 'c': 1945}, {'disk_available': 19, 'hostname': 'tes', 'day': datetime.datetime(2020, 2, 12, 0, 0, tzinfo=<UTC>), 'c': 53}, {'disk_available': 1, 'hostname': 'tes', 'day': datetime.datetime(2020, 2, 11, 0, 0, tzinfo=<UTC>), 'c': 1}, {'disk_available': 26, 'hostname': 'tes', 'day': datetime.datetime(2020, 2, 12, 0, 0, tzinfo=<UTC>), 'c': 45}, {'disk_available': 27, 'hostname': 'tes', 'day': datetime.datetime(2020, 2, 10, 0, 0, tzinfo=<UTC>), 'c': 291}]>
SocketClient.objects.annotate(day=TruncDay('create')).values('day').annotate(c=Count('id')).values('day', 'disk_available', 'hostname', 'c').order_by('hostname')
.
from the results above I want to do that , I want to display the average result of hostname hourly

Grouping data on year

mydata = [{'date': datetime.datetime(2009, 1, 31, 0, 0), 'value': 14, 'year': u'2009'},
{'date': datetime.datetime(2009, 2, 28, 0, 0), 'value': 84, 'year': u'2009'},
{'date': datetime.datetime(2009, 3, 31, 0, 0), 'value': 77, 'year': u'2009'},
{'date': datetime.datetime(2009, 4, 30, 0, 0), 'value': 80, 'year': u'2009'},
{'date': datetime.datetime(2009, 5, 31, 0, 0), 'value': 6, 'year': u'2009'},
{'date': datetime.datetime(2009, 6, 30, 0, 0), 'value': 16, 'year': u'2009'},
{'date': datetime.datetime(2009, 7, 31, 0, 0), 'value': 16, 'year': u'2009'},
{'date': datetime.datetime(2009, 8, 31, 0, 0), 'value': 1, 'year': u'2009'},
{'date': datetime.datetime(2009, 9, 30, 0, 0), 'value': 9, 'year': u'2009'},
{'date': datetime.datetime(2008, 1, 31, 0, 0), 'value': 77, 'year': u'2008'},
{'date': datetime.datetime(2008, 2, 29, 0, 0), 'value': 60, 'year': u'2008'},
{'date': datetime.datetime(2008, 3, 31, 0, 0), 'value': 28, 'year': u'2008'},
{'date': datetime.datetime(2008, 4, 30, 0, 0), 'value': 9, 'year': u'2008'},
{'date': datetime.datetime(2008, 5, 31, 0, 0), 'value': 74, 'year': u'2008'},
{'date': datetime.datetime(2008, 6, 30, 0, 0), 'value': 70, 'year': u'2008'},
{'date': datetime.datetime(2008, 7, 31, 0, 0), 'value': 75, 'year': u'2008'},
{'date': datetime.datetime(2008, 8, 31, 0, 0), 'value': 7, 'year': u'2008'},
{'date': datetime.datetime(2008, 9, 30, 0, 0), 'value': 10, 'year': u'2008'},
{'date': datetime.datetime(2008, 10, 31, 0, 0), 'value': 54, 'year': u'2008'},
{'date': datetime.datetime(2008, 11, 30, 0, 0), 'value': 55, 'year': u'2008'},
{'date': datetime.datetime(2008, 12, 31, 0, 0), 'value': 40, 'year': u'2008'},
{'date': datetime.datetime(2007, 12, 31, 0, 0), 'value': 93, 'year': u'2007'},]
In 'mydata', I get list of sequential monthly data. I wrote some code to group them on year.
partial_req_data = dict([(k,[f for f in v]) for k,v in itertools.groupby(mydata, key=lambda x : x.get('year'))])
Now I further need some efficient code to fill the missing months with {}, i.e. empty dict. There are bad ways to do that, but am looking for good ones.
required_data = {"2009": [{'date': datetime.datetime(2009, 1, 31, 0, 0), 'value': 14, 'year': u'2009' },
{'date': datetime.datetime(2009, 2, 28, 0, 0), 'value': 84, 'year': u'2009'},
{'date': datetime.datetime(2009, 3, 31, 0, 0), 'value': 77, 'year': u'2009'},
{'date': datetime.datetime(2009, 4, 30, 0, 0), 'value': 80, 'year': u'2009'},
{'date': datetime.datetime(2009, 5, 31, 0, 0), 'value': 6, 'year': u'2009'},
{'date': datetime.datetime(2009, 6, 30, 0, 0), 'value': 16, 'year': u'2009'},
{'date': datetime.datetime(2009, 7, 31, 0, 0), 'value': 16, 'year': u'2009'},
{'date': datetime.datetime(2009, 8, 31, 0, 0), 'value': 1, 'year': u'2009'},
{'date': datetime.datetime(2009, 9, 30, 0, 0), 'value': 9, 'year': u'2009'},
{}, {}, {}],
"2008": [{'date': datetime.datetime(2008, 1, 31, 0, 0), 'value': 77, 'year': u'2008'},
{'date': datetime.datetime(2008, 2, 29, 0, 0), 'value': 60, 'year': u'2008'},
{'date': datetime.datetime(2008, 3, 31, 0, 0), 'value': 28, 'year': u'2008'},
{'date': datetime.datetime(2008, 4, 30, 0, 0), 'value': 9, 'year': u'2008'},
{'date': datetime.datetime(2008, 5, 31, 0, 0), 'value': 74, 'year': u'2008'},
{'date': datetime.datetime(2008, 6, 30, 0, 0), 'value': 70, 'year': u'2008'},
{'date': datetime.datetime(2008, 7, 31, 0, 0), 'value': 75, 'year': u'2008'},
{'date': datetime.datetime(2008, 8, 31, 0, 0), 'value': 7, 'year': u'2008'},
{'date': datetime.datetime(2008, 9, 30, 0, 0), 'value': 10, 'year': u'2008'},
{'date': datetime.datetime(2008, 10, 31, 0, 0), 'value': 54, 'year': u'2008'},
{'date': datetime.datetime(2008, 11, 30, 0, 0), 'value': 55, 'year': u'2008'},
{'date': datetime.datetime(2008, 12, 31, 0, 0), 'value': 40, 'year': u'2008'},]
"2007": [{}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {},
{'date': datetime.datetime(2007, 12, 31, 0, 0), 'value': 93, 'year': u'2007'}]
}
import datetime
from itertools import groupby
from pprint import pprint
required_data={}
for k,g in groupby(mydata,key=lambda x: x.get('year')):
partial={}
for datum in g:
partial[datum.get('date').month]=datum
required_data[k]=[partial.get(m,{}) for m in range(1,13)]
pprint(required_data)
For each year k, partial is a dict whose keys are months.
The trick is to use partial.get(m,{}) since this will return the datum when it exists, or {} when it does not.

Categories