creating df to generate json in the given format - python

I am trying to generate a df to produce this below json.
Json data:
{
"name": "flare",
"children": [
{
"name": "K1",
"children": [
{"name": "Exact", "size": 4},
{"name": "synonyms", "size": 14}
]
},
{
"name": "K2",
"children": [
{"name": "Exact", "size": 10},
{"name": "synonyms", "size": 20}
]
},
{
"name": "K3",
"children": [
{"name": "Exact", "size": 0},
{"name": "synonyms", "size": 5}
]
},
{
"name": "K4",
"children": [
{"name": "Exact", "size": 13},
{"name": "synonyms", "size": 15}
]
},
{
"name": "K5",
"children": [
{"name": "Exact", "size": 0},
{"name": "synonyms", "size": 0}
]
}
]
}
input data:
name Exact synonyms
K1 4 14
K2 10 20
K3 0 5
K4 13 15
K5 0 0
I tried creating df with values in the json but I was not able to get the desired json on df.to_json, please help.

You need reshape data by set_index + stack and then use groupby with apply for nested list of dict:
import json
df = (df.set_index('name')
.stack()
.reset_index(level=1)
.rename(columns={'level_1':'name', 0:'size'})
.groupby(level=0).apply(lambda x: x.to_dict(orient='records'))
.reset_index(name='children')
)
print (df)
name children
0 K1 [{'name': 'Exact', 'size': 4}, {'name': 'synon...
1 K2 [{'name': 'Exact', 'size': 10}, {'name': 'syno...
2 K3 [{'name': 'Exact', 'size': 0}, {'name': 'synon...
3 K4 [{'name': 'Exact', 'size': 13}, {'name': 'syno...
4 K5 [{'name': 'Exact', 'size': 0}, {'name': 'synon...
#convert output to dict
j = { "name": "flare", "children": df.to_dict(orient='records')}
#for nice output - easier check
import pprint
pp = pprint.PrettyPrinter(indent=4)
pp.pprint(j)
{ 'children': [ { 'children': [ {'name': 'Exact', 'size': 4},
{'name': 'synonyms', 'size': 14}],
'name': 'K1'},
{ 'children': [ {'name': 'Exact', 'size': 10},
{'name': 'synonyms', 'size': 20}],
'name': 'K2'},
{ 'children': [ {'name': 'Exact', 'size': 0},
{'name': 'synonyms', 'size': 5}],
'name': 'K3'},
{ 'children': [ {'name': 'Exact', 'size': 13},
{'name': 'synonyms', 'size': 15}],
'name': 'K4'},
{ 'children': [ {'name': 'Exact', 'size': 0},
{'name': 'synonyms', 'size': 0}],
'name': 'K5'}],
'name': 'flare'}
#convert data to json and write to file
with open('data.json', 'w') as outfile:
json.dump(j, outfile)

Related

Python: Change a JSON value

Let's say I have the following JSON file named output.
{'fields': [{'name': 2, 'type': 'Int32'},
{'name': 12, 'type': 'string'},
{'name': 9, 'type': 'datetimeoffset'},
}],
'type': 'struct'}
If type key has a value datetimeoffset, I would like to change it to dateTime and if If type key has a value Int32, I would like to change it to integer and like this, I have multiple values to replace.
The expected output is
{'fields': [{ 'name': 2, 'type': 'integer'},
{ 'name': 12, 'type': 'string'},
{ 'name': 9, 'type': 'dateTime'},
,
}],
'type': 'struct'}
Can anyone help with this in Python?
You can try this out:
substitute = {"Int32": "integer", "datetimeoffset": "dateTime"}
x = {'fields': [
{'name': 2, 'type': 'Int32'},
{'name': 12, 'type': 'string'},
{'name': 9, 'type': 'datetimeoffset'}
],'type': 'struct'}
for i in range(len(x['fields'])):
if x['fields'][i]["type"] in substitute:
x['fields'][i]['type'] = substitute[x['fields'][i]['type']]
print(x)
You can use the following code. Include in equivalences dict the values you want to replace:
json = {
'fields': [
{'name': 2, 'type': 'Int32'},
{'name': 12, 'type': 'string'},
{'name': 9, 'type': 'datetimeoffset'},
],
'type': 'struct'
}
equivalences = {"datetimeoffset": "dateTime", "Int32": "integer"}
#Replace values based on equivalences dict
for i, data in enumerate(json["fields"]):
if data["type"] in equivalences.keys():
json["fields"][i]["type"] = equivalences[data["type"]]
print(json)
The output is:
{
"fields": [
{
"name": 2,
"type": "integer"
},
{
"name": 12,
"type": "string"
},
{
"name": 9,
"type": "dateTime"
}
],
"type": "struct"
}
simple but ugly way:
json_ ={'fields': [{'name': 2, 'type': 'Int32'},
{'name': 12, 'type': 'string'},
{'name': 9, 'type': 'datetimeoffset'}], 'type': 'struct'}
result = json.loads(json.dumps(json_ ).replace("datetimeoffset", "dateTime").replace("Int32", "integer"))

How to extract group count from dictionary?

I need to get the count of groups which is same 'id' and 'name'
Input:
myd = {
"Items": [
{
"id": 1,
"name": "ABC",
"value": 666
},
{
"id": 1,
"name": "ABC",
"value": 89
},
{
"id": 2,
"name": "DEF",
"value": 111
},
{
"id": 3,
"name": "GHI",
"value": 111
}
]
}
Expected output:
The count of {'id':1, 'name': 'ABC' } is 2
The count of {'id':2, 'name': 'DEF' } is 1
The count of {'id':3, 'name': 'GHI' } is 1
for total length we can get by len(myd) for single key its len(myd['id'])
How to get the count for the combination of id and name
You can use collections.OrderedDict and set both 'id' and 'name' as tuple keys. In this way, the OrderedDict automatically groups the dictionaries with same 'id' and 'name' values in order:
myd = {'Items': [
{'id':1, 'name': 'ABC', 'value': 666},
{'id':1, 'name': 'ABC', 'value': 89},
{'id':2, 'name': 'DEF', 'value': 111 },
{'id':3, 'name': 'GHI', 'value': 111 }]
}
from collections import OrderedDict
od = OrderedDict()
for d in myd['Items']:
od.setdefault((d['id'], d['name']), set()).add(d['value'])
for ks, v in od.items():
print("The count of {{'id': {}, 'name': {}}} is {}".format(ks[0], ks[1], len(v)))
Output:
The count of {'id': 1, 'name': ABC} is 2
The count of {'id': 2, 'name': DEF} is 1
The count of {'id': 3, 'name': GHI} is 1
This is a good candidate for groupby and itemgetter usage:
from itertools import groupby
from operator import itemgetter
myd = {'Items': [
{'id': 1, 'name': 'ABC', 'value': 666},
{'id': 1, 'name': 'ABC', 'value': 89},
{'id': 2, 'name': 'DEF', 'value': 111},
{'id': 3, 'name': 'GHI', 'value': 111}]
}
grouper = itemgetter('id', 'name')
for i, v in groupby(sorted(myd['Items'], key=grouper), key=grouper):
print(f"the count for {dict(id=i[0], name=i[1])} is {len(list(v))}")

How to get the count for a particular key in the dictionary

My content inside a dictionary is below
I need to now for BusinessArea how many different name key is there, like this need to know Designation also
test=
[ { 'masterid': '1', 'name': 'Group1', 'BusinessArea': [ { 'id': '14', 'name': 'Accounting', 'parentname': 'Finance'}, { 'id': '3', 'name': 'Research', 'parentname': 'R & D' } ], 'Designation': [ { 'id': '16', 'name': 'L1' }, { 'id': '20', 'name': 'L2' }, { 'id': '25', 'name': 'L2' }] },
{ 'masterid': '2', 'name': 'Group1', 'BusinessArea': [ { 'id': '14', 'name': 'Research', 'parentname': '' }, { 'id': '3', 'name': 'Accounting', 'parentname': '' } ], 'Role': [ { 'id': '5032', 'name': 'Tester' }, { 'id': '5033', 'name': 'Developer' } ], 'Designation': [ { 'id': '16', 'name': 'L1' }, { 'id': '20', 'name': 'L2' }, { 'id': '25', 'name': 'L2' }]},
{ 'masterid': '3', 'name': 'Group1', 'BusinessArea': [ { 'id': '14', 'name': 'Engineering' }, { 'id': '3', 'name': 'Engineering', 'parentname': '' } ], 'Role': [ { 'id': '5032', 'name': 'Developer' }, { 'id': '5033', 'name': 'Developer', 'parentname': '' } ], 'Designation': [ { 'id': '16', 'name': 'L1' }, { 'id': '20', 'name': 'L2' }, { 'id': '25', 'name': 'L2' }]}]
I want to get the count of masterid of BusinessArea and Designation which is all the names
Expected out is below
[
{
"name": "BusinessArea",
"values": [
{
"name": "Accounting",
"count": "2"
},
{
"name": "Research",
"count": "2"
},
{
"name": "Engineering",
"count": "1"
}
]
},
{
"name": "Designation",
"values": [
{
"name": "L1",
"count": "3"
},
{
"name": "l2",
"count": "3"
}
]
}
]
Try this:
res=[{'name': 'BusinessArea', 'values': []}, {'name': 'Designation', 'values': []}]
listbus=sum([i['BusinessArea'] for i in test], [])
listdes=sum([i['Designation'] for i in test], [])
res[0]['values']=[{'name':i, 'count':0} for i in set(k['name'] for k in listbus)]
res[1]['values']=[{'name':i, 'count':0} for i in set(k['name'] for k in listdes)]
for i in listbus:
for k in range(len(res[0]['values'])):
if i['name']==res[0]['values'][k]['name']:
res[0]['values'][k]['count']+=1
for i in listdes:
for k in range(len(res[1]['values'])):
if i['name']==res[1]['values'][k]['name']:
res[1]['values'][k]['count']+=1
>>> print(res)
[{'name': 'BusinessArea', 'values': [{'name': 'Accounting', 'count': 2}, {'name': 'Research', 'count': 2}, {'name': 'Engineering', 'count': 2}]}, {'name': 'Designation', 'values': [{'name': 'L1', 'count': 3}, {'name': 'L2', 'count': 6}]}]
You could count unique names using a nested collections.defaultdict:
from collections import defaultdict
from json import dumps
keys = ["BusinessArea", "Designation"]
group_counts = defaultdict(lambda: defaultdict(int))
for group in test:
for key in keys:
names = [item["name"] for item in group[key]]
unique_names = list(dict.fromkeys(names))
for name in unique_names:
group_counts[key][name] += 1
print(dumps(group_counts, indent=2))
Which will give you these counts:
{
"BusinessArea": {
"Accounting": 2,
"Research": 2,
"Engineering": 1
},
"Designation": {
"L1": 3,
"L2": 3
}
}
Then you could modify the result to get the list of dicts you expect:
result = [
{
"name": name,
"values": [{"name": value, "count": count} for value, count in counts.items()],
}
for name, counts in group_counts.items()
]
print(dumps(result, indent=2))
Which gives you this:
[
{
"name": "BusinessArea",
"values": [
{
"name": "Accounting",
"count": 2
},
{
"name": "Research",
"count": 2
},
{
"name": "Engineering",
"count": 1
}
]
},
{
"name": "Designation",
"values": [
{
"name": "L1",
"count": 3
},
{
"name": "L2",
"count": 3
}
]
}
]

How do I isolate dicts from a list if the id is not found in a second list of dicts (in python)?

I have two lists of dicts
list1 =
[
{"name": "Maria",
"id": "16a",
},
{"name": "Tania",
"id": "13b",
},
{"name": "Steve",
"id": "5a",
}
]
list2 =
[
{"name": "Eric",
"id": "16a",
},
{"name": "Mike",
"id": "7b",
},
{"name": "Steve",
id: "57a",
}
]
I want to be able to return a list of dicts from list2, if the same id is not found in list1
For example, it should return
[
{"name": "Mike",
"id": "7b",
},
{"name": "Steve",
"id": "57a",
}
]
I tried a few suggestions here on stack overflow but haven't been able to get it right.
Use a list-comprehension that iterates through list2 checking the id with ids in list1:
list1 = [
{'name': "Maria",
'id': "16a",
},
{'name': "Tania",
'id': "13b",
},
{'name': "Steve",
'id': "5a",
}
]
list2 = [
{'name': "Eric",
'id': "16a",
},
{'name': "Mike",
'id': "7b",
},
{'name': "Steve",
'id': "57a",
}
]
list1_ids = [y['id'] for y in list1]
result = [x for x in list2 if x['id'] not in list1_ids]
# [{'name': 'Mike', 'id': '7b'}, {'name': 'Steve', 'id': '57a'}]
This should do:
[d2 for d2 in list2 if d2['id'] not in [d1['id'] for d1 in list1]]
Output:
[{'id': '7b', 'name': 'Mike'}, {'id': '57a', 'name': 'Steve'}]
You can also do it using filter function:
list1 = [
{"name": "Maria",
"id": "16a",
},
{"name": "Tania",
"id": "13b",
},
{"name": "Steve",
"id": "5a",
}
]
list2 = [
{"name": "Eric",
"id": "16a",
},
{"name": "Mike",
"id": "7b",
},
{"name": "Steve",
"id": "57a",
}
]
IDs = set(value["id"] for value in list1)
output = list(filter(lambda elem: elem["id"] not in IDs, list2))
print(output)
Output:
[{'name': 'Mike', 'id': '7b'}, {'name': 'Steve', 'id': '57a'}]

n-depth tree: set parent value based on children values

In a n-depth dict where values are set in the deepest level of a hierarchy:
{
"name": "root",
"value": None, # expected value to be 80
"children": [
{
"name": "a",
"value": None, # expected value to be 30
"children": [
{ "name": "a.1", "value": 10 },
{ "name": "a.2", "value": 20 }
]
},
{
"name": "b",
"value": None, # expected value to be 50
"children": [
{ "name": "b.1", "value": 25 },
{
"name": "b.2",
"value": None, # expected value to be 25
"children": [
{"name": "b.2.1", "value": 5},
{"name": "b.2.2", "value": 5},
{"name": "b.2.3", "value": 5},
{"name": "b.2.4", "value": 5},
{"name": "b.2.5", "value": 5}
]
}
]
}
]
}
What could be the approach to recursively set each parent value based on the result of an operation perfomed with its children value (i.e. sum)?
I finally managed to do it using the iterative level order traversal pattern (BFS), I was missing just a couple of details.
This approach works because the depth iteration order is guaranteed, so once we are getting to a node wich has children, all its sub-level children are already calculated.
The solution:
def reverseTraversal(obj):
def parentOperation(node):
out = 0
for child in node['children']:
out = out + child['value']
return out
if obj is None:
return
queue = []
stack = []
queue.append(obj)
while len(queue) > 0:
temp = queue.pop(0)
stack.append(temp)
if 'children' in temp and len(temp['children']) > 0:
for child in temp['children']:
queue.append(child)
while len(stack)>0:
node = stack.pop()
if 'children' in node and len(node['children']) > 0:
node['value'] = parentOperation(node)
# obj is the original dict
obj = reverseTraversal(obj)
print(obj)
Results in:
{
"name": "root",
"value": 80,
"children": [
{
"name": "a",
"value": 30,
"children": [
{"name": "a.1","value": 10},
{"name": "a.2","value": 20}
]
},
{
"name": "b",
"value": 50,
"children": [
{"name": "b.1","value": 25},
{
"name": "b.2",
"value": 25,
"children": [
{"name": "b.2.1","value": 5},
{"name": "b.2.2","value": 5},
{"name": "b.2.3","value": 5},
{"name": "b.2.4","value": 5},
{"name": "b.2.5","value": 5}
]
}
]
}
]
}
Given your datastructure and a list of values to update, you can use next in recursion:
def update(d, targets):
return {a:[update(i, targets) for i in b] if isinstance(b, list) else update(b, targets) if isinstance(b, dict) else next(targets) if not b else b for a, b in d.items()}
targets = [80, 30, 50, 25]
results = update(nlist, iter(targets))
Output:
{'children': [{'children': [{'name': 'a.1', 'value': 10},
{'name': 'a.2', 'value': 20}],
'name': 'a',
'value': 30},
{'children': [{'name': 'b.1', 'value': 25},
{'children': [{'name': 'b.2.1', 'value': 5},
{'name': 'b.2.2', 'value': 5},
{'name': 'b.2.3', 'value': 5},
{'name': 'b.2.4', 'value': 5},
{'name': 'b.2.5', 'value': 5}],
'name': 'b.2',
'value': 25}],
'name': 'b',
'value': 50}],
'name': 'root',
'value': 80}

Categories