Rename JSON key names - python

I receive a JSON object like this:
{
"Question Communicating": "Natural language",
"interpretation_type": "recognition",
"output1": "test",
"Question Learning": "Reinforcement",
"output2": "test2",
"output3": "something"
}
My question is, is it possible to rename the key name: 'outputX' to 'output'.
I don't know how many times 'outputX' will be in the JSON, but I need all the outputs renamed to 'output'.
So it will end up like this:
{
"Question Communicating": "Natural language",
"interpretation_type": "recognition",
"output": "test",
"Question Learning": "Reinforcement",
"output": "test2",
"output": "something"
}

Trying to use duplicate keys in a JSON object is not recommended. You can see the problems that arise when you serialize and deserialize duplicate keys, or try to force them into a dictionary. The duplicate keys are not retained.
>>> from json import dumps, loads
>>> json = '{"a": "x", "a": "y"}'
>>> loads(json)
{'a': 'y'}
>>> json = {'a': 'x', 'a': 'y'}
>>> dumps(json)
'{"a": "y"}'
>>> json = {'a': 'x', 'a': 'y'}
>>> json
{'a': 'y'}
Instead you could try grouping all keys that start with "output" into a list ["test", "test2", "something"].
from json import dumps
d = {
"Question Communicating": "Natural language",
"interpretation_type": "recognition",
"output1": "test",
"Question Learning": "Reinforcement",
"output2": "test2",
"output3": "something"
}
result = {}
for k, v in d.items():
if k.startswith("output"):
result.setdefault("output", []).append(v)
else:
result[k] = v
print(dumps(result, indent=4))
Output JSON:
{
"Question Communicating": "Natural language",
"interpretation_type": "recognition",
"output": [
"test",
"test2",
"something"
],
"Question Learning": "Reinforcement"
}

One possibility is to use a data structure that allows duplicate keys, such as webob.multidict.Multidict.
import webob.multidict
import json
class MultiDictEncoder(json.JSONEncoder):
def default(self, o):
if isinstance(o, webob.multidict.MultiDict):
return o
else:
return super().default(o)
def encode(self, o):
if isinstance(o, webob.multidict.MultiDict):
# Just a proof of concept. No attempt is made
# to properly encode keys for values.
return ('{'
+ ', '.join(f'"{k}": "{v}"' for k, v in o.items())
+ '}')
else:
return super().encode(o)
with open("tmp1.json") as f:
input_data = json.load(f)
output_data = webob.multidict.MultiDict()
for k, v in input_data.items():
if k.startswith("output"):
k = 'output'
output_data.add(k, v)
with open("tmp2.json", 'w') as f:
print(json.dumps(output_data, cls=MultiDictEncoder), file=f)
For some reason in testing this, using json.dump produced an error involving circular references. I don't know if this is a problem with how I defined MultiDictEncoder.default, but the resulting tmp2.json does have duplicate output keys.

Related

Convert Nested JSON list API data into CSV using PYTHON

Want to convert Sample JSON data into CSV file using python. I am retrieving JSON data from API.
As my JSON has nested objects, so it normally cannot be directly converted to CSV.I don't want to do any hard coding and I want to make a python code fully dynamic.
So, I have written a function that flatten my JSON Data but I am not able to work out how to iterate all records, finding relevant column names and then output those data into CSV.
In the Sample JSON file I have mentioned only 2 records but in actual there are 100 records.
Sample JSON Look like this:
[
{
"id":"Random_Company_57",
"unid":"75",
"fieldsToValues":{
"Email":"None",
"occupation":"SO1 Change",
"manager":"None",
"First Name":"Bells",
"employeeID":"21011.0",
"loginRequired":"true",
"superUser":"false",
"ldapSuperUser":"false",
"archived":"true",
"password":"None",
"externalUser":"false",
"Username":"Random_Company_57",
"affiliation":"",
"Phone":"+16 22 22 222",
"unidDominoKey":"",
"externalUserActive":"false",
"secondaryOccupation":"SO1 Change",
"retypePassword":"None",
"Last Name":"Christmas"
},
"hierarchyFieldAccess":[
],
"userHierarchies":[
{
"hierarchyField":"Company",
"value":"ABC Company"
},
{
"hierarchyField":"Department",
"value":"gfds"
},
{
"hierarchyField":"Project",
"value":"JKL-SDFGHJW"
},
{
"hierarchyField":"Division",
"value":"Silver RC"
},
{
"hierarchyField":"Site",
"value":"SQ06"
}
],
"locale":{
"id":1,
"dateFormat":"dd/MM/yyyy",
"languageTag":"en-UA"
},
"roles":[
"User"
],
"readAccessRoles":[
],
"preferredLanguage":"en-AU",
"prefName":"Christmas Bells",
"startDate":"None",
"firstName":"Bells",
"lastName":"Christmas",
"fullName":"Christmas Bells",
"lastModified":"2022-02-22T03:47:41.632Z",
"email":"None",
"docNo":"None",
"virtualSuperUser":false
},
{
"id":"xyz.abc#safe.net",
"unid":"98",
"fieldsToValues":{
"Email":"xyz.abc#safe.net",
"occupation":"SO1 Change",
"manager":"None",
"First Name":"Bells",
"employeeID":"21011.0",
"loginRequired":"false",
"superUser":"false",
"ldapSuperUser":"false",
"archived":"false",
"password":"None",
"externalUser":"false",
"Username":"xyz.abc#safe.net",
"affiliation":"",
"Phone":"+16 2222 222 222",
"unidDominoKey":"",
"externalUserActive":"false",
"secondaryOccupation":"SO1 Change",
"retypePassword":"None",
"Last Name":"Christmas"
},
"hierarchyFieldAccess":[
],
"userHierarchies":[
{
"hierarchyField":"Company",
"value":"ABC Company"
},
{
"hierarchyField":"Department",
"value":"PUHJ"
},
{
"hierarchyField":"Project",
"value":"RPOJ-SDFGHJW"
},
{
"hierarchyField":"Division",
"value":"Silver RC"
},
{
"hierarchyField":"Site",
"value":"SQ06"
}
],
"locale":{
"id":1,
"dateFormat":"dd/MM/yyyy",
"languageTag":"en-UA"
},
"roles":[
"User"
],
"readAccessRoles":[
],
"preferredLanguage":"en-AU",
"prefName":"Christmas Bells",
"startDate":"None",
"firstName":"Bells",
"lastName":"Christmas",
"fullName":"Christmas Bells",
"lastModified":"2022-03-16T05:04:13.085Z",
"email":"xyz.abc#safe.net",
"docNo":"None",
"virtualSuperUser":false
}
]
What I have tried.
def flattenjson(b, delim):
val = {}
for i in b.keys():
if isinstance(b[i], dict):
get = flattenjson(b[i], delim)
for j in get.keys():
val[i + delim + j] = get[j]
else:
val[i] = b[i]
print(val)
return val
json=[{Sample JSON String that mentioned above}]
flattenjson(json,"__")
I don't know it is a right way to deal this problem or not?
My final aim is that all the above json data will output in a csv file.
Based on this answer, you could loop through your list of json data and flatten each json with the given function (they always have the same structure?), then build a DataFrame and write the data to csv. That's the easiest way I can think of,
try this:
import pandas as pd
import json
import collections
def flatten(dictionary, parent_key=False, separator='__'):
items = []
for key, value in dictionary.items():
new_key = str(parent_key) + separator + key if parent_key else key
if isinstance(value, collections.MutableMapping):
items.extend(flatten(value, new_key, separator).items())
elif isinstance(value, list):
for k, v in enumerate(value):
items.extend(flatten({str(k): v}, new_key).items())
else:
items.append((new_key, value))
return dict(items)
with open('your_json.json') as f:
data = json.load(f) # data is a the example you provided (list of dicts)
all_records=[]
for jsn in data:
tmp = flatten(jsn)
all_records.append(tmp)
df = pd.DataFrame(all_records)
out = df.to_csv('json_to_csv.csv')

"TypeError: list indices must be integers or slices, not str" when trying to change keys

I want to remove some problematic $oid and everything that contains $ in a json file. I wrote:
import json
with open('C:\\Windows\\System32\\files\\news.json', 'r', encoding="utf8") as handle:
data = [json.loads(line) for line in handle]
for k,v in data[0].items():
#check if key has dict value
if type(v) == dict:
#find id with $
r = list(data[k].keys())[0]
#change value if $ occurs
if r[0] == '$':
data[k] = data[k][r]
print(data)
But I get TypeError: list indices must be integers or slices, not str. I know it is because the json dictionaries are made redeable for Python, but how do I fix it?
Edit: the .json file in my computer looks like this:
{
"_id": {
"$oid": "5e7511c45cb29ef48b8cfcff"
},
"description": "some text",
"startDate": {
"$date": "5e7511c45cb29ef48b8cfcff"
},
"completionDate": {
"$date": "2021-01-05T14:59:58.046Z"
}
}
I believe this is because your k is a str and you try to call data[k]?
It will be better if you show the format of the json as well.
Updating with answer.
This should work for the given json. But if you want to for a larger file. looping can be tricky, specially because you're trying to modify the keys of a dictionary.
import json
line = '{"_id": { "$oid": "5e7511c45cb29ef48b8cfcff" }, "description": "some text", "startDate": { "$date": "5e7511c45cb29ef48b8cfcff"},"completionDate": {"$date": "2021-01-05T14:59:58.046Z"}}'
data = [json.loads(line)]
for k,v in data[0].items():
if type(v) == dict:
for k2, v2 in data[0][k].items():
if k2[0] == '$':
formatted = k2[1:]
del data[0][k][k2]
data[0][k][formatted] = v2
print(data)
# import json
# with open('C:\\Windows\\System32\\files\\news.json', 'r', encoding="utf8") as handle:
# data = [json.loads(line) for line in handle]
data = [
{
"_id": {
"$oid": "5e7511c45cb29ef48b8cfcff"
},
"description": "some text",
"startDate": {
"$date": "5e7511c45cb29ef48b8cfcff"
},
"completionDate": {
"$date": "2021-01-05T14:59:58.046Z"
}
}
]
for d in data:
for k, v in d.items():
# check if key has dict value
del_keys = set()
if type(v) == dict:
# find id with $
del_keys.update([i for i in v if i.startswith("$")])
[v.pop(key) for key in del_keys]
print(data)
# [{'_id': {}, 'description': 'some text', 'startDate': {}, 'completionDate': {}}]

Read dictionary from a file and print the keys

Basically I am trying to make a code in python that takes a dictionary from a file. It should print the displayed keys.
{
"Name": "namename",
"Surname": "klsajdak",
"Mhtrwo": "lsdkaslkd",
"Phone": ["545454545454", "4554545454545"],
"Age": 84,
"kids": {
"Name": "Zero",
"Age": 0
}
}
my_dict = open("9listes.txt", "r")
for key,value in my_dict.items():
print("Key : {}".format(key))
You can achieve this via using json.load() as:
import json
with open('9listes.txt') as f:
my_dict = json.load(f) # `my_dict ` is the `dict` you need
# To print "key" & "value", uncomment below lines:
# for key, value in my_dict.items():
# print("Key: {}, Value: {}".format(key, value))
Refer json.load() document for more details.

Python: How to set a value from JSON as the index in a list?

I'm trying to read a dataset and set the integer value of the JSON file as the array of the list. This is the example JSON file,
[{
"index_id": "1234",
"text": "hello world",
},
{
"index_id": "5678",
"text": "roses are red",
}]
Right now, I have just tried with reading the JSON file and putting everything to a defaultdict(list), this messes things up. Assume I read everything to L1
If I try to get L1[1234] this would give an error as 1234 is not a valid index in the L1 and the indexes are 0,1.
If L1 was printed,
{u'1234': u'hello world'}, {u'5678': u'roses are red'}]
I understand that the list has my potential value for the index as a value stored and in unicode (makes it worse).
So how to turn L1 into or a method so if I try to pull up L1[1234] it would pull up the 'hello world',
{1234: u'hello world'}, {5678: u'roses are red'}]
Thank you
Edited: Changed the JSON.
Assuming you have a list of dicts you could do something like this:
json_lst = [{
"1234": "hello world"
},
{
"5678": "roses are red"
}]
result = {int(k) : v for element in json_lst for k, v in element.items()}
print(result[1234])
Output
hello world
The above dictionary comprehension is equivalent to the following nested loops:
result = {}
for element in json_lst:
for k, v in element.items():
result[int(k)] = v
Or try merging list of dictionaries:
>>> [i['1234'] for i in L1 if '1234' in i][0]
'hello world'
>>>
Whole thing:
>>> L1=[{
"1234": "hello world"
},
{
"5678": "roses are red"
}]
>>> [i['1234'] for i in L1 if '1234' in i][0]
'hello world'
>>>
I think you can read this in as a python dictionary, where 1234 and 5678 are "keys" and the respective strings are the values.
For example,
{
1234: 'hello world',
5678: 'roses are red'
}
You can index into it as you have mentioned, L1[1234] and you will get 'hello world'.
You can read a bit about dictionaries here.
Change your json like this
L1 = {
"1234": "hello world",
"5678": "roses are red"
}
# call it with quote or as string
print L1["1234"]
or create function
jsonList = [{
"1234": "hello world"
},
{
"5678": "roses are red"
}]
def L1(key):
key = str(key)
for i in jsonList:
if key in i:
return i[key]
print L1(5678)
In case you are reading from a json file, when json is loaded the type of data is dictionary and you can directly read the keys of loaded data.
Still if you want to create a list out of it, please refer below code
My sample.json file
{
"1234": {
"id": "blabla",
"iscategorical": "0"
},
"5678": {
"id": "valore"
},
"8975": "value",
"6985": {
"id": "valore"
}
}
Code in separate python file:
import json
import io
from collections import defaultdict
with io.open('sample.json') as data_file:
data_loaded = json.load(data_file)
print(data_loaded)
print(type(data_loaded))
l1 = defaultdict(list)
for key in data_loaded:
l1[key] = data_loaded[key]
print(l1)
print(l1['1234'])

load duplicate keys from nested json file as dictionary of list

I have a json file in this format,
{
"details": {
"hawk_branch": {
"tandem": {
"value": "4210bnd72"
}
},
"uclif_branch": {
"tandem": {
"value": "e2nc712nma89",
"value": "23s24212",
"value": "12338cm82",
}
}
}
}
The problem is, I need to keep all the value, however when i use json.load to load this file i only get one value, which make sense since dict can keep only unique keys.
Here is the expected output,
{ "hawk_branch": ["4210bnd72"] }
{ "uclif_branch": ["e2nc712nma89" , "23s24212", "12338cm82"] }
I have read this answer, Python json parser allow duplicate keys to use object_pairs_hook like this,
def parse_object_pairs(pairs):
return pairs
# f is file
json.load(f, object_pairs_hook=parse_object_pairs)
but it returns entire json file as list.
I think its possible to do it using lambda as object_pairs_hook but i can't understand how can I use it.
Can someone please guide me
You can use a custom duplicate key resolver function that turns the values of the value keys into a list:
def value_resolver(pairs):
if all(k == 'value' for k, _ in pairs):
return [v for _, v in pairs]
return dict(pairs)
so that:
json.load(f, object_pairs_hook=value_resolver)
returns:
{'details': {'hawk_branch': {'tandem': ['4210bnd72']}, 'uclif_branch': {'tandem': ['e2nc712nma89', '23s24212', '12338cm82']}}}
And to dump the new data structure back to the original JSON format by converting lists to dicts with duplicate value keys, you can use a custom json.JSONEncoder subclass:
class restore_value(json.JSONEncoder):
def encode(self, o):
if isinstance(o, dict):
return '{%s}' % ', '.join(': '.join((json.encoder.py_encode_basestring(k), self.encode(v))) for k, v in o.items())
if isinstance(o, list):
return '{%s}' % ', '.join('"value": %s' % self.encode(v) for v in o)
return super().encode(o)
so that:
d = {'details': {'hawk_branch': {'tandem': ['4210bnd72']}, 'uclif_branch': {'tandem': ['e2nc712nma89', '23s24212', '12338cm82']}}}
print(json.dumps(d, cls=restore_value))
would output:
{"details": {"hawk_branch": {"tandem": {"value": "4210bnd72"}}, "uclif_branch": {"tandem": {"value": "e2nc712nma89", "value": "23s24212", "value": "12338cm82"}}}}

Categories