*js=['{"id":42352,"user_id":11770,"recipient_id":29936,"exchange_rate_list_id":39298,"send_amount_cents":"73860000","send_amount_currency":"KRW","commission_cents":"3000000","commission_currency":"KRW","receive_amount_cents":"3000000","receive_amount_currency":"PHP","save_amount_cents":"3336382","save_amount_currency":"KRW","status":0,"created_at":"2016-10-28T09:10:26.751Z","updated_at":"2016-10-28T09:10:26.751Z","transfer_list_id":null,"purpose":"living_expenses","external_fee_cents":"708000","external_fee_currency":"KRW","sender_country":"KR"}',
'{"id":42362,"user_id":995,"recipient_id":13068,"exchange_rate_list_id":39306,"send_amount_cents":"37500000","send_amount_currency":"KRW","commission_cents":"1875000","commission_currency":"KRW","receive_amount_cents":"1509500","receive_amount_currency":"PHP","save_amount_cents":"3411736","save_amount_currency":"KRW","status":0,"created_at":"2016-10-28T10:22:35.831Z","updated_at":"2016-10-28T10:22:35.831Z","transfer_list_id":null,"purpose":"living_expenses","external_fee_cents":"472000","external_fee_currency":"KRW","sender_country":"KR"}',
'{"id":42351,"user_id":3563,"recipient_id":29935,"exchange_rate_list_id":39298,"send_amount_cents":"8703000","send_amount_currency":"KRW","commission_cents":"436000","commission_currency":"KRW","receive_amount_cents":"350000","receive_amount_currency":"PHP","save_amount_cents":"4413495","save_amount_currency":"KRW","status":0,"created_at":"2016-10-28T09:08:41.488Z","updated_at":"2016-10-28T09:08:41.488Z","transfer_list_id":null,"purpose":"living_expenses","external_fee_cents":"283000","external_fee_currency":"KRW","sender_country":"KR"}',
'{"id":42359,"user_id":2657,"recipient_id":27757,"exchange_rate_list_id":39302,"send_amount_cents":"9937000","send_amount_currency":"KRW","commission_cents":"497000","commission_currency":"KRW","receive_amount_cents":"400000","receive_amount_currency":"PHP","save_amount_cents":"4369830","save_amount_currency":"KRW","status":0,"created_at":"2016-10-28T09:47:35.891Z","updated_at":"2016-10-28T09:47:35.891Z","transfer_list_id":null,"purpose":"living_expenses","external_fee_cents":"283000","external_fee_currency":"KRW","sender_country":"KR"}',
'{"id":42341,"user_id":4472,"recipient_id":29931,"exchange_rate_list_id":39290,"send_amount_cents":"49727000","send_amount_currency":"KRW","commission_cents":"2487000","commission_currency":"KRW","receive_amount_cents":"2000000","receive_amount_currency":"PHP","save_amount_cents":"2987161","save_amount_currency":"KRW","status":0,"created_at":"2016-10-28T07:45:21.924Z","updated_at":"2016-10-28T07:45:21.924Z","transfer_list_id":null,"purpose":"living_expenses","external_fee_cents":"472000","external_fee_currency":"KRW","sender_country":"KR"}',
'{"id":42342,"user_id":4471,"recipient_id":17086,"exchange_rate_list_id":39292,"send_amount_cents":"25000000","send_amount_currency":"KRW","commission_cents":"1250000","commission_currency":"KRW","receive_amount_cents":"1005500","receive_amount_currency":"PHP","save_amount_cents":"3846653","save_amount_currency":"KRW","status":0,"created_at":"2016-10-28T08:07:36.936Z","updated_at":"2016-10-28T08:07:36.936Z","transfer_list_id":null,"purpose":"living_expenses","external_fee_cents":"354000","external_fee_currency":"KRW","sender_country":"KR"}',
'{"id":42343,"user_id":4471,"recipient_id":12681,"exchange_rate_list_id":39292,"send_amount_cents":"6000000","send_amount_currency":"KRW","commission_cents":"300000","commission_currency":"KRW","receive_amount_cents":"241300","receive_amount_currency":"PHP","save_amount_cents":"4506244","save_amount_currency":"KRW","status":0,"created_at":"2016-10-28T08:09:24.871Z","updated_at":"2016-10-28T08:09:24.871Z","transfer_list_id":null,"purpose":"living_expenses","external_fee_cents":"236000","external_fee_currency":"KRW","sender_country":"KR"}',
'{"id":42198,"user_id":9950,"recipient_id":29834,"exchange_rate_list_id":39165,"send_amount_cents":"7453000","send_amount_currency":"KRW","commission_cents":"373000","commission_currency":"KRW","receive_amount_cents":"300000","receive_amount_currency":"PHP","save_amount_cents":"4451416","save_amount_currency":"KRW","status":0,"created_at":"2016-10-27T10:58:31.712Z","updated_at":"2016-10-27T10:58:31.712Z","transfer_list_id":null,"purpose":"living_expenses","external_fee_cents":"0","external_fee_currency":"KRW","sender_country":"KR"}',
'{"id":42199,"user_id":2001,"recipient_id":29835,"exchange_rate_list_id":39166,"send_amount_cents":"4969000","send_amount_currency":"KRW","commission_cents":"249000","commission_currency":"KRW","receive_amount_cents":"200000","receive_amount_currency":"PHP","save_amount_cents":"4537501","save_amount_currency":"KRW","status":0,"created_at":"2016-10-27T11:00:02.677Z","updated_at":"2016-10-27T11:00:02.677Z","transfer_list_id":null,"purpose":"living_expenses","external_fee_cents":"188000","external_fee_currency":"KRW","sender_country":"KR"}']*
I have a list of string with json format named js.
But when I do
pd.read_json(js)
I get an error saying
TypeError Traceback (most recent call last)
in ()
----> 1 pd.read_json(js)
//anaconda/lib/python2.7/site-packages/pandas/io/json.pyc in >read_json(path_or_buf, orient, typ, dtype, convert_axes, convert_dates, >keep_default_dates, numpy, precise_float, date_unit)
209 obj = FrameParser(json, orient, dtype, convert_axes, >convert_dates,
210 keep_default_dates, numpy, precise_float,
--> 211 date_unit).parse()
212
213 if typ == 'series' or obj is None:
//anaconda/lib/python2.7/site-packages/pandas/io/json.pyc in parse(self)
277
278 else:
--> 279 self._parse_no_numpy()
280
281 if self.obj is None:
//anaconda/lib/python2.7/site-packages/pandas/io/json.pyc in >_parse_no_numpy(self)
494 if orient == "columns":
495 self.obj = DataFrame(
--> 496 loads(json, precise_float=self.precise_float), >dtype=None)
497 elif orient == "split":
498 decoded = dict((str(k), v)
TypeError: Expected String or Unicode
I got it to work by doing
df = pd.DataFrame()
for j in js:
data = pd.read_json(j, typ='list')
df=df.append(data,ignore_index=True)
which took forever to execute.
My question is, if I am allowed to read the list one at a time and append that to an empty dataframe, why cant I just read the whole list without getting an error? Is there any way to fix this problem ? thanks.
import pandas as pd
js=['{"id":42352,"user_id":11770,"recipient_id":29936,"exchange_rate_list_id":39298,"send_amount_cents":"73860000","send_amount_currency":"KRW","commission_cents":"3000000","commission_currency":"KRW","receive_amount_cents":"3000000","receive_amount_currency":"PHP","save_amount_cents":"3336382","save_amount_currency":"KRW","status":0,"created_at":"2016-10-28T09:10:26.751Z","updated_at":"2016-10-28T09:10:26.751Z","transfer_list_id":null,"purpose":"living_expenses","external_fee_cents":"708000","external_fee_currency":"KRW","sender_country":"KR"}',
'{"id":42362,"user_id":995,"recipient_id":13068,"exchange_rate_list_id":39306,"send_amount_cents":"37500000","send_amount_currency":"KRW","commission_cents":"1875000","commission_currency":"KRW","receive_amount_cents":"1509500","receive_amount_currency":"PHP","save_amount_cents":"3411736","save_amount_currency":"KRW","status":0,"created_at":"2016-10-28T10:22:35.831Z","updated_at":"2016-10-28T10:22:35.831Z","transfer_list_id":null,"purpose":"living_expenses","external_fee_cents":"472000","external_fee_currency":"KRW","sender_country":"KR"}',
'{"id":42351,"user_id":3563,"recipient_id":29935,"exchange_rate_list_id":39298,"send_amount_cents":"8703000","send_amount_currency":"KRW","commission_cents":"436000","commission_currency":"KRW","receive_amount_cents":"350000","receive_amount_currency":"PHP","save_amount_cents":"4413495","save_amount_currency":"KRW","status":0,"created_at":"2016-10-28T09:08:41.488Z","updated_at":"2016-10-28T09:08:41.488Z","transfer_list_id":null,"purpose":"living_expenses","external_fee_cents":"283000","external_fee_currency":"KRW","sender_country":"KR"}',
'{"id":42359,"user_id":2657,"recipient_id":27757,"exchange_rate_list_id":39302,"send_amount_cents":"9937000","send_amount_currency":"KRW","commission_cents":"497000","commission_currency":"KRW","receive_amount_cents":"400000","receive_amount_currency":"PHP","save_amount_cents":"4369830","save_amount_currency":"KRW","status":0,"created_at":"2016-10-28T09:47:35.891Z","updated_at":"2016-10-28T09:47:35.891Z","transfer_list_id":null,"purpose":"living_expenses","external_fee_cents":"283000","external_fee_currency":"KRW","sender_country":"KR"}',
'{"id":42341,"user_id":4472,"recipient_id":29931,"exchange_rate_list_id":39290,"send_amount_cents":"49727000","send_amount_currency":"KRW","commission_cents":"2487000","commission_currency":"KRW","receive_amount_cents":"2000000","receive_amount_currency":"PHP","save_amount_cents":"2987161","save_amount_currency":"KRW","status":0,"created_at":"2016-10-28T07:45:21.924Z","updated_at":"2016-10-28T07:45:21.924Z","transfer_list_id":null,"purpose":"living_expenses","external_fee_cents":"472000","external_fee_currency":"KRW","sender_country":"KR"}',
'{"id":42342,"user_id":4471,"recipient_id":17086,"exchange_rate_list_id":39292,"send_amount_cents":"25000000","send_amount_currency":"KRW","commission_cents":"1250000","commission_currency":"KRW","receive_amount_cents":"1005500","receive_amount_currency":"PHP","save_amount_cents":"3846653","save_amount_currency":"KRW","status":0,"created_at":"2016-10-28T08:07:36.936Z","updated_at":"2016-10-28T08:07:36.936Z","transfer_list_id":null,"purpose":"living_expenses","external_fee_cents":"354000","external_fee_currency":"KRW","sender_country":"KR"}',
'{"id":42343,"user_id":4471,"recipient_id":12681,"exchange_rate_list_id":39292,"send_amount_cents":"6000000","send_amount_currency":"KRW","commission_cents":"300000","commission_currency":"KRW","receive_amount_cents":"241300","receive_amount_currency":"PHP","save_amount_cents":"4506244","save_amount_currency":"KRW","status":0,"created_at":"2016-10-28T08:09:24.871Z","updated_at":"2016-10-28T08:09:24.871Z","transfer_list_id":null,"purpose":"living_expenses","external_fee_cents":"236000","external_fee_currency":"KRW","sender_country":"KR"}',
'{"id":42198,"user_id":9950,"recipient_id":29834,"exchange_rate_list_id":39165,"send_amount_cents":"7453000","send_amount_currency":"KRW","commission_cents":"373000","commission_currency":"KRW","receive_amount_cents":"300000","receive_amount_currency":"PHP","save_amount_cents":"4451416","save_amount_currency":"KRW","status":0,"created_at":"2016-10-27T10:58:31.712Z","updated_at":"2016-10-27T10:58:31.712Z","transfer_list_id":null,"purpose":"living_expenses","external_fee_cents":"0","external_fee_currency":"KRW","sender_country":"KR"}',
'{"id":42199,"user_id":2001,"recipient_id":29835,"exchange_rate_list_id":39166,"send_amount_cents":"4969000","send_amount_currency":"KRW","commission_cents":"249000","commission_currency":"KRW","receive_amount_cents":"200000","receive_amount_currency":"PHP","save_amount_cents":"4537501","save_amount_currency":"KRW","status":0,"created_at":"2016-10-27T11:00:02.677Z","updated_at":"2016-10-27T11:00:02.677Z","transfer_list_id":null,"purpose":"living_expenses","external_fee_cents":"188000","external_fee_currency":"KRW","sender_country":"KR"}']
a = pd.read_json('[{}]'.format(','.join(js)))
print(a)
Related
I have the following pandas series:
>>>df.A.head()
0 {"Date_": "2022-06-01T01:00:00+05:30", "submit...
1 {"Growth": [{"textField": "", "Change_Size": "...
2 {"submit": true, "HSI_Tag": "xyz...
3 {"submit": true, "HSI_Tag": "xyz...
4 {"submit": true, "roleList": "xy...
Name: A, dtype: object
Every item in the series is a serialized JSON
item. I would like to turn every item into a dictionary. I am trying to do the following, but I get an error:
for i in range(len(df.A)):
df.A.iloc[i] = json.loads(df.A.iloc[i])
The error:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-27-9b4e8d4e6d76> in <module>
1 for i in range(len(df.A)):
----> 2 df.A.iloc[i] = json.loads(df.A.iloc[i])
C:\ANACONDA3\lib\site-packages\pandas\core\indexing.py in __setitem__(self, key, value)
188 key = com.apply_if_callable(key, self.obj)
189 indexer = self._get_setitem_indexer(key)
--> 190 self._setitem_with_indexer(indexer, value)
191
192 def _validate_key(self, key, axis):
C:\ANACONDA3\lib\site-packages\pandas\core\indexing.py in _setitem_with_indexer(self, indexer, value)
640 # setting for extensionarrays that store dicts. Need to decide
641 # if it's worth supporting that.
--> 642 value = self._align_series(indexer, Series(value))
643
644 elif isinstance(value, ABCDataFrame):
C:\ANACONDA3\lib\site-packages\pandas\core\indexing.py in _align_series(self, indexer, ser, multiindex_indexer)
774
775 elif is_scalar(indexer):
--> 776 ax = self.obj._get_axis(1)
777
778 if ser.index.equals(ax):
C:\ANACONDA3\lib\site-packages\pandas\core\generic.py in _get_axis(self, axis)
376
377 def _get_axis(self, axis):
--> 378 name = self._get_axis_name(axis)
379 return getattr(self, name)
380
C:\ANACONDA3\lib\site-packages\pandas\core\generic.py in _get_axis_name(cls, axis)
373 pass
374 raise ValueError('No axis named {0} for object type {1}'
--> 375 .format(axis, type(cls)))
376
377 def _get_axis(self, axis):
ValueError: No axis named 1 for object type <class 'type'>
How can I fix it?
I managed to do it eventually with apply and a lambda like this:
df.A = df.A.apply(lambda x: json.loads(x))
I am really a beginner with python, but I am trying to use IBM's sentiment analyzer to make a dataset. I get a JSON response which I want to put into a table. So far what I have is:
response = natural_language_understanding.analyze(
text = df_text,
features=Features(sentiment=SentimentOptions(targets=['Pericles']))).get_result()
print(json.dumps(response, indent=2))
respj = json.dumps(response['sentiment'])
respj
which prints
'{"targets": [{"text": "Pericles", "score": -0.939436, "label": "negative"}], "document": {"score": -0.903556, "label": "negative"}}'
Now it is at this point that I would really like to make a pandas table with this data. Ideally, I would like all the above information formated like -> Text | text score | Document score
I don't really need the label positive or negative but it doesn't hurt to have it. How would I accomplish this? Right now when I try
json_df = pd.read_json(respj)
json_df.head()
I get
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-20-b06d8a1caf3f> in <module>
----> 1 json_df = pd.read_json(respj)
2 json_df.head()
/opt/conda/envs/Python-3.8-main/lib/python3.8/site-packages/pandas/util/_decorators.py in wrapper(*args, **kwargs)
212 else:
213 kwargs[new_arg_name] = new_arg_value
--> 214 return func(*args, **kwargs)
215
216 return cast(F, wrapper)
/opt/conda/envs/Python-3.8-main/lib/python3.8/site-packages/pandas/io/json/_json.py in read_json(path_or_buf, orient, typ, dtype, convert_axes, convert_dates, keep_default_dates, numpy, precise_float, date_unit, encoding, lines, chunksize, compression)
606 return json_reader
607
--> 608 result = json_reader.read()
609 if should_close:
610 filepath_or_buffer.close()
/opt/conda/envs/Python-3.8-main/lib/python3.8/site-packages/pandas/io/json/_json.py in read(self)
729 obj = self._get_object_parser(self._combine_lines(data.split("\n")))
730 else:
--> 731 obj = self._get_object_parser(self.data)
732 self.close()
733 return obj
/opt/conda/envs/Python-3.8-main/lib/python3.8/site-packages/pandas/io/json/_json.py in _get_object_parser(self, json)
751 obj = None
752 if typ == "frame":
--> 753 obj = FrameParser(json, **kwargs).parse()
754
755 if typ == "series" or obj is None:
/opt/conda/envs/Python-3.8-main/lib/python3.8/site-packages/pandas/io/json/_json.py in parse(self)
855
856 else:
--> 857 self._parse_no_numpy()
858
859 if self.obj is None:
/opt/conda/envs/Python-3.8-main/lib/python3.8/site-packages/pandas/io/json/_json.py in _parse_no_numpy(self)
1086
1087 if orient == "columns":
-> 1088 self.obj = DataFrame(
1089 loads(json, precise_float=self.precise_float), dtype=None
1090 )
/opt/conda/envs/Python-3.8-main/lib/python3.8/site-packages/pandas/core/frame.py in __init__(self, data, index, columns, dtype, copy)
433 )
434 elif isinstance(data, dict):
--> 435 mgr = init_dict(data, index, columns, dtype=dtype)
436 elif isinstance(data, ma.MaskedArray):
437 import numpy.ma.mrecords as mrecords
/opt/conda/envs/Python-3.8-main/lib/python3.8/site-packages/pandas/core/internals/construction.py in init_dict(data, index, columns, dtype)
252 arr if not is_datetime64tz_dtype(arr) else arr.copy() for arr in arrays
253 ]
--> 254 return arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype)
255
256
/opt/conda/envs/Python-3.8-main/lib/python3.8/site-packages/pandas/core/internals/construction.py in arrays_to_mgr(arrays, arr_names, index, columns, dtype)
62 # figure out the index, if necessary
63 if index is None:
---> 64 index = extract_index(arrays)
65 else:
66 index = ensure_index(index)
/opt/conda/envs/Python-3.8-main/lib/python3.8/site-packages/pandas/core/internals/construction.py in extract_index(data)
366
367 if have_dicts:
--> 368 raise ValueError(
369 "Mixing dicts with non-Series may lead to ambiguous ordering."
370 )
ValueError: Mixing dicts with non-Series may lead to ambiguous ordering
If anyone can give me some tips as to how to make the table I am trying to make I would really appreciate it. Also if anyone can explain the error I have right now that would be really great too. I think I get the basic premise that it's because the JSON has two incompatible "tables" in it already. Thank you for any help.
You don't need to dump the response['sentiment'] as a JSON string if you just want to turn it into a DataFrame. Use pandas.json_normalize instead.
It seems that response['sentiment'] looks something like
>>> response['sentiment']
{
"targets": [{"text": "Pericles",
"score": -0.939436,
"label": "negative"}],
"document": {"score": -0.903556,
"label": "negative"}
}
Then, you just need
df = pd.json_normalize(response['sentiment'],
record_path='targets',
meta=[['document','score'], ['document','label']])
Output
>>> df
text score label document.score document.label
0 Pericles -0.939436 negative -0.903556 negative
Optionally, you can rename the columns afterwards as you wish using DataFrame.rename:
cols_mapping = {
'text': 'Text',
'score': 'Text Score',
'label': 'Text Label',
'document.score': 'Document Score',
'document.label': 'Document Label'
}
df = df.rename(columns=cols_mapping)
>>> df
Text Text Score Text Label Document Score Document Label
0 Pericles -0.939436 negative -0.903556 negative
I believe this should work for you:
targets = {k: [t[k] for t in j['targets']] for k in j['targets'][0].keys()}
doc_scores = [j['document']['score']] * len(j['targets'])
pd.DataFrame({'document_score': doc_scores, **targets})
I want to read in a JSON file in to my Jupiter notebook as a pandas dataframe.
macOS 10.12, Python 3.7, pandas 0.24.2
my dataset: https://open.fda.gov/apis/drug/label/download/
Similar question with same error message (I have tried to use the solution from here but gives me the same error message): Read JSON to pandas dataframe - ValueError: Mixing dicts with non-Series may lead to ambiguous ordering
import json
import pandas as pd
data = json.load(open('drug-label-0001-of-0008.json'))
df = pd.DataFrame(data)
As this answer says I am not doing double conversion: Pandas vs JSON library to read a JSON file in Python
His code just works, mine gets an error:
import pandas as pd
pd_example = pd.read_json('some_json_file.json')
My code is similar but I get the following error:
import pandas as pd
df = pd.read_json('drug-label-0008-of-0008.json')
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-23-77b3c3e486fc> in <module>
----> 1 df = pd.read_json('drug-label-0008-of-0008.json')
~/anaconda3/lib/python3.7/site-packages/pandas/io/json/json.py in read_json(path_or_buf, orient, typ, dtype, convert_axes, convert_dates, keep_default_dates, numpy, precise_float, date_unit, encoding, lines, chunksize, compression)
425 return json_reader
426
--> 427 result = json_reader.read()
428 if should_close:
429 try:
~/anaconda3/lib/python3.7/site-packages/pandas/io/json/json.py in read(self)
535 )
536 else:
--> 537 obj = self._get_object_parser(self.data)
538 self.close()
539 return obj
~/anaconda3/lib/python3.7/site-packages/pandas/io/json/json.py in _get_object_parser(self, json)
554 obj = None
555 if typ == 'frame':
--> 556 obj = FrameParser(json, **kwargs).parse()
557
558 if typ == 'series' or obj is None:
~/anaconda3/lib/python3.7/site-packages/pandas/io/json/json.py in parse(self)
650
651 else:
--> 652 self._parse_no_numpy()
653
654 if self.obj is None:
~/anaconda3/lib/python3.7/site-packages/pandas/io/json/json.py in _parse_no_numpy(self)
869 if orient == "columns":
870 self.obj = DataFrame(
--> 871 loads(json, precise_float=self.precise_float), dtype=None)
872 elif orient == "split":
873 decoded = {str(k): v for k, v in compat.iteritems(
~/anaconda3/lib/python3.7/site-packages/pandas/core/frame.py in __init__(self, data, index, columns, dtype, copy)
390 dtype=dtype, copy=copy)
391 elif isinstance(data, dict):
--> 392 mgr = init_dict(data, index, columns, dtype=dtype)
393 elif isinstance(data, ma.MaskedArray):
394 import numpy.ma.mrecords as mrecords
~/anaconda3/lib/python3.7/site-packages/pandas/core/internals/construction.py in init_dict(data, index, columns, dtype)
210 arrays = [data[k] for k in keys]
211
--> 212 return arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype)
213
214
~/anaconda3/lib/python3.7/site-packages/pandas/core/internals/construction.py in arrays_to_mgr(arrays, arr_names, index, columns, dtype)
49 # figure out the index, if necessary
50 if index is None:
---> 51 index = extract_index(arrays)
52 else:
53 index = ensure_index(index)
~/anaconda3/lib/python3.7/site-packages/pandas/core/internals/construction.py in extract_index(data)
318
319 if have_dicts:
--> 320 raise ValueError('Mixing dicts with non-Series may lead to '
321 'ambiguous ordering.')
322
ValueError: Mixing dicts with non-Series may lead to ambiguous ordering.
You can just use python's built in JSON handling capabilities :
import json
with open("drug-label-0008-of-0008.json", "r") as read_file:
data = json.load(read_file)
"When you have a single JSON structure inside a json file, use read_json because it loads the JSON directly into a DataFrame. With json.loads, you've to load it into a python dictionary/list, and then into a DataFrame - an unnecessary two step process. Pandas vs JSON library to read a JSON file in Python "
I updated to pandas 0.20.1 recently and I tried to use the new feature of to_json(orient='table')
import pandas as pd
pd.__version__
# '0.20.1'
a = pd.DataFrame({'a':[1,2,3], 'b':[4,5,6]})
a.to_json('a.json', orient='table')
But how can I read this JSON file to DataFrame?
I tried pd.read_json('a.json', orient='table') but it raised ValueError
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-22-7527b25107ef> in <module>()
----> 1 pd.read_json('a.json', orient='table')
C:\Anaconda3\lib\site-packages\pandas\io\json\json.py in read_json(path_or_buf, orient, typ, dtype, convert_axes, convert_dates, keep_default_dates, numpy, precise_float, date_unit, encoding, lines)
352 obj = FrameParser(json, orient, dtype, convert_axes, convert_dates,
353 keep_default_dates, numpy, precise_float,
--> 354 date_unit).parse()
355
356 if typ == 'series' or obj is None:
C:\Anaconda3\lib\site-packages\pandas\io\json\json.py in parse(self)
420
421 else:
--> 422 self._parse_no_numpy()
423
424 if self.obj is None:
C:\Anaconda3\lib\site-packages\pandas\io\json\json.py in _parse_no_numpy(self)
650 else:
651 self.obj = DataFrame(
--> 652 loads(json, precise_float=self.precise_float), dtype=None)
653
654 def _process_converter(self, f, filt=None):
C:\Anaconda3\lib\site-packages\pandas\core\frame.py in __init__(self, data, index, columns, dtype, copy)
273 dtype=dtype, copy=copy)
274 elif isinstance(data, dict):
--> 275 mgr = self._init_dict(data, index, columns, dtype=dtype)
276 elif isinstance(data, ma.MaskedArray):
277 import numpy.ma.mrecords as mrecords
C:\Anaconda3\lib\site-packages\pandas\core\frame.py in _init_dict(self, data, index, columns, dtype)
409 arrays = [data[k] for k in keys]
410
--> 411 return _arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype)
412
413 def _init_ndarray(self, values, index, columns, dtype=None, copy=False):
C:\Anaconda3\lib\site-packages\pandas\core\frame.py in _arrays_to_mgr(arrays, arr_names, index, columns, dtype)
5592 # figure out the index, if necessary
5593 if index is None:
-> 5594 index = extract_index(arrays)
5595 else:
5596 index = _ensure_index(index)
C:\Anaconda3\lib\site-packages\pandas\core\frame.py in extract_index(data)
5643
5644 if have_dicts:
-> 5645 raise ValueError('Mixing dicts with non-Series may lead to '
5646 'ambiguous ordering.')
5647
ValueError: Mixing dicts with non-Series may lead to ambiguous ordering.
So is there a way i can read that JSON file? Thanks in advance.
PS: the JSON file looks like this:
{"schema": {"pandas_version":"0.20.0","fields":[{"type":"integer","name":"index"},{"type":"integer","name":"a"},{"type":"integer","name":"b"}],"primaryKey":["index"]}, "data": [{"index":0,"a":1,"b":4},{"index":1,"a":2,"b":5},{"index":2,"a":3,"b":6}]}
Apparently the new method outputs some metadata with the dataset into json such as the pandas version. Hence, consider using the built-in json module to read in this nested object to extract the value at data key:
import json
...
with open('a.json', 'r') as f:
json_obj = json.loads(f.read())
df = pd.DataFrame(json_obj['data']).set_index('index')
df.index.name = None
print(df)
# a b
# 0 1 4
# 1 2 5
# 2 3 6
Should you intend to use type and name, run dictionary and list comprehension on those parts in nested json. Though here, integer has to be sliced to int. The dtype argument cannot be used since names are not saved until after the step:
with open('a.json', 'r') as f:
json_obj = json.loads(f.read())
df = pd.DataFrame(json_obj['data'], columns=[t['name']
for t in json_obj['schema']['fields']])
df = df.astype(dtype={t['name']: t['type'][:3]
for t in json_obj['schema']['fields']}).set_index('index')
df.index.name = None
print(df)
# a b
# 0 1 4
# 1 2 5
# 2 3 6
Here is a function I have developed from Parfait answer:
def table_to_df(table):
df = pd.DataFrame(table['data'],
columns=[t['name'] for t in table['schema']['fields']])
for t in table['schema']['fields']:
if t['type'] == "datetime":
df[t['name']] = pd.to_datetime(df[t['name']], infer_datetime_format=True)
df.set_index(table['schema']['primaryKey'], inplace=True)
return df
This question already has answers here:
Convert stringified list of dictionaries back to a list of dictionaries
(3 answers)
What is deserialize and serialize in JSON?
(4 answers)
Closed 3 years ago.
I'm trying to convert a list of JSON objects to a pandas DataFrame. However, the JSON object has unquoted True/False, and None values, which appears to make read_json error out. Is there a way to make the pandas.read_json method handle Boolean and None values?
Updated with real code (my actual JSON I am getting from a web service, but I'm not able to post the real content):
import pandas as pd
x = '[{"A": "some text","B": True,"C":7},{"A": "more text","B":False,"C":8},{"A":None,"B":False,"C":9}]'
pd.read_json(x)
ValueError: Expected object or value
if I quote the Nones and Booleans it seems to work.
import pandas as pd
x = '[{"A": "some text","B": "True","C":7},{"A": "more text","B":"False","C":8},{"A":"None","B":"False","C":9}]'
pd.read_json(x)
Of course they are then strings rather than Booleans and NaNs
Updated with error message
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-42-c251da58cd34> in <module>()
1 import pandas as pd
2 x = '[{"A": "some text","B": True,"C":7},{"A": "more text","B":False,"C":8},{"A":None,"B":False,"C":9}]'
----> 3 pd.read_json(x)
C:\Users\Chris\AppData\Local\Continuum\Miniconda3\lib\site-packages\pandas\io\json.py in read_json(path_or_buf, orient, typ, dtype, convert_axes, convert_dates, keep_default_dates, numpy, precise_float, date_unit)
196 obj = FrameParser(json, orient, dtype, convert_axes, convert_dates,
197 keep_default_dates, numpy, precise_float,
--> 198 date_unit).parse()
199
200 if typ == 'series' or obj is None:
C:\Users\Chris\AppData\Local\Continuum\Miniconda3\lib\site-packages\pandas\io\json.py in parse(self)
264
265 else:
--> 266 self._parse_no_numpy()
267
268 if self.obj is None:
C:\Users\Chris\AppData\Local\Continuum\Miniconda3\lib\site-packages\pandas\io\json.py in _parse_no_numpy(self)
481 if orient == "columns":
482 self.obj = DataFrame(
--> 483 loads(json, precise_float=self.precise_float), dtype=None)
484 elif orient == "split":
485 decoded = dict((str(k), v)
ValueError: Expected object or value
So, if I treat the JSON as a python object rather than JSON I get a different error. This code
x = [{"A": "some text","B": "True","C":7},{"A": "more text","B":"False","C":8},{"A":"None","B":"False","C":9}]
pd.read_json(x)
yields
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-59-076202b1f4ce> in <module>()
1 x = [{"A": "some text","B": True,"C":7},{"A": "more text","B":False,"C":8},{"A":"None","B":False,"C":9}]
----> 2 pd.read_json(x)
C:\Users\Chris\AppData\Local\Continuum\Miniconda3\lib\site-packages\pandas\io\json.py in read_json(path_or_buf, orient, typ, dtype, convert_axes, convert_dates, keep_default_dates, numpy, precise_float, date_unit)
196 obj = FrameParser(json, orient, dtype, convert_axes, convert_dates,
197 keep_default_dates, numpy, precise_float,
--> 198 date_unit).parse()
199
200 if typ == 'series' or obj is None:
C:\Users\Chris\AppData\Local\Continuum\Miniconda3\lib\site-packages\pandas\io\json.py in parse(self)
264
265 else:
--> 266 self._parse_no_numpy()
267
268 if self.obj is None:
C:\Users\Chris\AppData\Local\Continuum\Miniconda3\lib\site-packages\pandas\io\json.py in _parse_no_numpy(self)
481 if orient == "columns":
482 self.obj = DataFrame(
--> 483 loads(json, precise_float=self.precise_float), dtype=None)
484 elif orient == "split":
485 decoded = dict((str(k), v)
TypeError: Expected String or Unicode