n-depth tree: set parent value based on children values - python

In a n-depth dict where values are set in the deepest level of a hierarchy:
{
"name": "root",
"value": None, # expected value to be 80
"children": [
{
"name": "a",
"value": None, # expected value to be 30
"children": [
{ "name": "a.1", "value": 10 },
{ "name": "a.2", "value": 20 }
]
},
{
"name": "b",
"value": None, # expected value to be 50
"children": [
{ "name": "b.1", "value": 25 },
{
"name": "b.2",
"value": None, # expected value to be 25
"children": [
{"name": "b.2.1", "value": 5},
{"name": "b.2.2", "value": 5},
{"name": "b.2.3", "value": 5},
{"name": "b.2.4", "value": 5},
{"name": "b.2.5", "value": 5}
]
}
]
}
]
}
What could be the approach to recursively set each parent value based on the result of an operation perfomed with its children value (i.e. sum)?

I finally managed to do it using the iterative level order traversal pattern (BFS), I was missing just a couple of details.
This approach works because the depth iteration order is guaranteed, so once we are getting to a node wich has children, all its sub-level children are already calculated.
The solution:
def reverseTraversal(obj):
def parentOperation(node):
out = 0
for child in node['children']:
out = out + child['value']
return out
if obj is None:
return
queue = []
stack = []
queue.append(obj)
while len(queue) > 0:
temp = queue.pop(0)
stack.append(temp)
if 'children' in temp and len(temp['children']) > 0:
for child in temp['children']:
queue.append(child)
while len(stack)>0:
node = stack.pop()
if 'children' in node and len(node['children']) > 0:
node['value'] = parentOperation(node)
# obj is the original dict
obj = reverseTraversal(obj)
print(obj)
Results in:
{
"name": "root",
"value": 80,
"children": [
{
"name": "a",
"value": 30,
"children": [
{"name": "a.1","value": 10},
{"name": "a.2","value": 20}
]
},
{
"name": "b",
"value": 50,
"children": [
{"name": "b.1","value": 25},
{
"name": "b.2",
"value": 25,
"children": [
{"name": "b.2.1","value": 5},
{"name": "b.2.2","value": 5},
{"name": "b.2.3","value": 5},
{"name": "b.2.4","value": 5},
{"name": "b.2.5","value": 5}
]
}
]
}
]
}

Given your datastructure and a list of values to update, you can use next in recursion:
def update(d, targets):
return {a:[update(i, targets) for i in b] if isinstance(b, list) else update(b, targets) if isinstance(b, dict) else next(targets) if not b else b for a, b in d.items()}
targets = [80, 30, 50, 25]
results = update(nlist, iter(targets))
Output:
{'children': [{'children': [{'name': 'a.1', 'value': 10},
{'name': 'a.2', 'value': 20}],
'name': 'a',
'value': 30},
{'children': [{'name': 'b.1', 'value': 25},
{'children': [{'name': 'b.2.1', 'value': 5},
{'name': 'b.2.2', 'value': 5},
{'name': 'b.2.3', 'value': 5},
{'name': 'b.2.4', 'value': 5},
{'name': 'b.2.5', 'value': 5}],
'name': 'b.2',
'value': 25}],
'name': 'b',
'value': 50}],
'name': 'root',
'value': 80}

Related

Adding new pairs to a json file

I have a json file I need to add pairs to, I convert it into a dict, but now I need to put my new values in a specific place.
This is some of the json file I convert:
"rootObject": {
"id": "6ff0010c-00fe-485b-b695-4ddd6aca4dcd",
"type": "IDO_GEAR",
"children": [
{
"id": "1dd94d1a-e52d-40b3-a82b-6db02a8fbbab",
"type": "IDO_SYSTEM_LOADCASE",
"children": [],
"childList": "SYSTEMLOADCASE",
"properties": [
{
"name": "IDCO_IDENTIFICATION",
"value": "1dd94d1a-e52d-40b3-a82b-6db02a8fbbab"
},
{
"name": "IDCO_DESIGNATION",
"value": "Lastfall 1"
},
{
"name": "IDSLC_TIME_PORTION",
"value": 100
},
{
"name": "IDSLC_DISTANCE_PORTION",
"value": 100
},
{
"name": "IDSLC_OPERATING_TIME_IN_HOURS",
"value": 1
},
{
"name": "IDSLC_OPERATING_TIME_IN_SECONDS",
"value": 3600
},
{
"name": "IDSLC_OPERATING_REVOLUTIONS",
"value": 1
},
{
"name": "IDSLC_OPERATING_DISTANCE",
"value": 1
},
{
"name": "IDSLC_ACCELERATION",
"value": 9.81
},
{
"name": "IDSLC_EPSILON_X",
"value": 0
},
{
"name": "IDSLC_EPSILON_Y",
"value": 0
},
{
"name": "IDSLC_EPSILON_Z",
"value": 0
},
{
"name": "IDSLC_CALCULATION_WITH_OWN_WEIGHT",
"value": "CO_CALCULATION_WITHOUT_OWN_WEIGHT"
},
{
"name": "IDSLC_CALCULATION_WITH_TEMPERATURE",
"value": "CO_CALCULATION_WITH_TEMPERATURE"
},
{
"name": "IDSLC_FLAG_FOR_LOADCASE_CALCULATION",
"value": "LB_CALCULATE_LOADCASE"
},
{
"name": "IDSLC_STATUS_OF_LOADCASE_CALCULATION",
"value": false
}
I want to add somthing like ENTRY_ONE and ENTRY_TWO like this:
"rootObject": {
"id": "6ff0010c-00fe-485b-b695-4ddd6aca4dcd",
"type": "IDO_GEAR",
"children": [
{
"id": "1dd94d1a-e52d-40b3-a82b-6db02a8fbbab",
"type": "IDO_SYSTEM_LOADCASE",
"children": [],
"childList": "SYSTEMLOADCASE",
"properties": [
{
"name": "IDCO_IDENTIFICATION",
"value": "1dd94d1a-e52d-40b3-a82b-6db02a8fbbab"
},
{
"name": "IDCO_DESIGNATION",
"value": "Lastfall 1"
},
{
"name": "IDSLC_TIME_PORTION",
"value": 100
},
{
"name": "IDSLC_DISTANCE_PORTION",
"value": 100
},
{
"name": "ENTRY_ONE",
"value": 100
},
{
"name": "ENTRY_TWO",
"value": 55
},
{
"name": "IDSLC_OPERATING_TIME_IN_HOURS",
"value": 1
},
{
"name": "IDSLC_OPERATING_TIME_IN_SECONDS",
"value": 3600
},
{
"name": "IDSLC_OPERATING_REVOLUTIONS",
"value": 1
},
{
"name": "IDSLC_OPERATING_DISTANCE",
"value": 1
},
{
"name": "IDSLC_ACCELERATION",
"value": 9.81
},
{
"name": "IDSLC_EPSILON_X",
"value": 0
},
{
"name": "IDSLC_EPSILON_Y",
"value": 0
},
{
"name": "IDSLC_EPSILON_Z",
"value": 0
},
{
"name": "IDSLC_CALCULATION_WITH_OWN_WEIGHT",
"value": "CO_CALCULATION_WITHOUT_OWN_WEIGHT"
},
{
"name": "IDSLC_CALCULATION_WITH_TEMPERATURE",
"value": "CO_CALCULATION_WITH_TEMPERATURE"
},
{
"name": "IDSLC_FLAG_FOR_LOADCASE_CALCULATION",
"value": "LB_CALCULATE_LOADCASE"
},
{
"name": "IDSLC_STATUS_OF_LOADCASE_CALCULATION",
"value": false
}
How can I add the entries so that they are under the IDCO_IDENTIFICATION tag, and not under the rootObject?
The way I see your json file, it WOULD be under rootObject as EVERYTHING is under that key. There's quite a few closing brackets and braces missing.
So I can only assume you are meaning you want it directly under IDCO_IDENTIFICATION (which is nested under rootObject). But that doesn't match what you have as your example output either. You have the new ENTRY_ONE and ENTRY_TWO within the properties, within the children, within the rootObject, not "under" IDCO_IDENTIFICATION. So I'm going to follow what you are asking for from your example output.
import json
with open('C:/test.json') as f:
data = json.load(f)
new_elements = [{"name":"ENTRY_ONE", "value":100},
{"name":"ENTRY_TWO", "value":55}]
for each in new_elements:
data['rootObject']['children'][0]['properties'].append(each)
with open('C:/test.json', 'w') as f:
json.dump(data, f)
Output:
import pprint
pprint.pprint(data)
{'rootObject': {'children': [{'childList': 'SYSTEMLOADCASE',
'children': [],
'id': '1dd94d1a-e52d-40b3-a82b-6db02a8fbbab',
'properties': [{'name': 'IDCO_IDENTIFICATION',
'value': '1dd94d1a-e52d-40b3-a82b-6db02a8fbbab'},
{'name': 'IDCO_DESIGNATION',
'value': 'Lastfall 1'},
{'name': 'IDSLC_TIME_PORTION',
'value': 100},
{'name': 'IDSLC_DISTANCE_PORTION',
'value': 100},
{'name': 'IDSLC_OPERATING_TIME_IN_HOURS',
'value': 1},
{'name': 'IDSLC_OPERATING_TIME_IN_SECONDS',
'value': 3600},
{'name': 'IDSLC_OPERATING_REVOLUTIONS',
'value': 1},
{'name': 'IDSLC_OPERATING_DISTANCE',
'value': 1},
{'name': 'IDSLC_ACCELERATION',
'value': 9.81},
{'name': 'IDSLC_EPSILON_X',
'value': 0},
{'name': 'IDSLC_EPSILON_Y',
'value': 0},
{'name': 'IDSLC_EPSILON_Z',
'value': 0},
{'name': 'IDSLC_CALCULATION_WITH_OWN_WEIGHT',
'value': 'CO_CALCULATION_WITHOUT_OWN_WEIGHT'},
{'name': 'IDSLC_CALCULATION_WITH_TEMPERATURE',
'value': 'CO_CALCULATION_WITH_TEMPERATURE'},
{'name': 'IDSLC_FLAG_FOR_LOADCASE_CALCULATION',
'value': 'LB_CALCULATE_LOADCASE'},
{'name': 'IDSLC_STATUS_OF_LOADCASE_CALCULATION',
'value': False},
{'name': 'ENTRY_ONE',
'value': 100},
{'name': 'ENTRY_TWO',
'value': 55}],
'type': 'IDO_SYSTEM_LOADCASE'}],
'id': '6ff0010c-00fe-485b-b695-4ddd6aca4dcd',
'type': 'IDO_GEAR'}}

Convert from path list to Flare json format?

I have data in python that looks like this:
[['a', 'b', 'c', 50],
['a', 'b', 'd', 100],
['a', 'b', 'e', 67],
['a', 'g', 'c', 12],
['q', 'k', 'c', 11],
['q', 'b', 'p', 11]]
where each element of the list is a complete hierarchical path, and the last element is the size of the path. To do a visualization in D3, I need the data to be in the flare data format - seen here:
https://github.com/d3/d3-hierarchy/blob/master/test/data/flare.json
So a short piece would look like this
{
"name": "root",
"children": [
{
"name": "a",
"children": [
{
"name": "b",
"children": [
{"name": "c", "value": 50},
{"name": "d", "value": 100},
{"name": "e", "value": 67},
]
},
{
"name": "g",
"children": [
{"name": "c", "value": 12},
]
},
and so forth...
From what I've been looking up, I think the solution is recursive, and would use the json library on a Python dictionary, but I can't seem to get it to work. Any help is greatly appreciated.
Here's a solution using recursion:
def add_to_flare(n, flare):
children = flare["children"]
if len(n) == 2:
children.append({"name": n[0], "value": n[1]})
else:
for c in children:
if c["name"] == n[0]:
add_to_flare(n[1:], c)
return
children.append({"name": n[0], "children": []})
add_to_flare(n[1:], children[-1])
flare = {"name": "root", "children": []}
for i in data:
add_to_flare(i, flare)
To display it nicely, we can use the json library:
import json
print(json.dumps(flare, indent=1))
{
"name": "root",
"children": [
{
"name": "a",
"children": [
{
"name": "b",
"children": [
{
"name": "c",
"value": 50
},
{
"name": "d",
"value": 100
},
{
"name": "e",
"value": 67
}
]
},
{
"name": "g",
"children": [
{
"name": "c",
"value": 12
}
]
}
]
},
{
"name": "q",
"children": [
{
"name": "k",
"children": [
{
"name": "c",
"value": 11
}
]
},
{
"name": "b",
"children": [
{
"name": "p",
"value": 11
}
]
}
]
}
]
}
Try this:
master = []
for each in your_list:
head = master
for i in range(len(each)):
names = [e['name'] for e in head]
if i == len(each) - 2:
head.append({'name': each[i], 'value': each[i+1]})
break
if each[i] in names:
head = head[names.index(each[i])]['children']
else:
head.append({'name': each[i], 'children': []})
head = head[-1]['children']
results:
[{'children': [{'children': [{'name': 'c', 'value': 50},
{'name': 'd', 'value': 100},
{'name': 'e', 'value': 67}],
'name': 'b'},
{'children': [{'name': 'c', 'value': 12}], 'name': 'g'}],
'name': 'a'},
{'children': [{'children': [{'name': 'c', 'value': 11}], 'name': 'k'},
{'children': [{'name': 'p', 'value': 11}], 'name': 'b'}],
'name': 'q'}]
Please note that name and children are flipped in this dictionary since it's unordered. But the resulting structure is the same.
put it in root to get your target:
my_dict = {'name':'root', 'children': master}
Assuming your list of lists is stored in variable l, you can do:
o = []
for s in l:
c = o
for i, n in enumerate(['root'] + s[:-1]):
for d in c:
if n == d['name']:
break
else:
c.append({'name': n})
d = c[-1]
if i < len(s) - 1:
if 'children' not in d:
d['children'] = []
c = d['children']
else:
d['value'] = s[-1]
so that o[0] becomes:
{'children': [{'children': [{'children': [{'name': 'c', 'value': 50},
{'name': 'd', 'value': 100},
{'name': 'e', 'value': 67}],
'name': 'b'},
{'children': [{'name': 'c', 'value': 12}],
'name': 'g'}],
'name': 'a'},
{'children': [{'children': [{'name': 'c', 'value': 11}],
'name': 'k'},
{'children': [{'name': 'p', 'value': 11}],
'name': 'b'}],
'name': 'q'}],
'name': 'root'}

How do I isolate dicts from a list if the id is not found in a second list of dicts (in python)?

I have two lists of dicts
list1 =
[
{"name": "Maria",
"id": "16a",
},
{"name": "Tania",
"id": "13b",
},
{"name": "Steve",
"id": "5a",
}
]
list2 =
[
{"name": "Eric",
"id": "16a",
},
{"name": "Mike",
"id": "7b",
},
{"name": "Steve",
id: "57a",
}
]
I want to be able to return a list of dicts from list2, if the same id is not found in list1
For example, it should return
[
{"name": "Mike",
"id": "7b",
},
{"name": "Steve",
"id": "57a",
}
]
I tried a few suggestions here on stack overflow but haven't been able to get it right.
Use a list-comprehension that iterates through list2 checking the id with ids in list1:
list1 = [
{'name': "Maria",
'id': "16a",
},
{'name': "Tania",
'id': "13b",
},
{'name': "Steve",
'id': "5a",
}
]
list2 = [
{'name': "Eric",
'id': "16a",
},
{'name': "Mike",
'id': "7b",
},
{'name': "Steve",
'id': "57a",
}
]
list1_ids = [y['id'] for y in list1]
result = [x for x in list2 if x['id'] not in list1_ids]
# [{'name': 'Mike', 'id': '7b'}, {'name': 'Steve', 'id': '57a'}]
This should do:
[d2 for d2 in list2 if d2['id'] not in [d1['id'] for d1 in list1]]
Output:
[{'id': '7b', 'name': 'Mike'}, {'id': '57a', 'name': 'Steve'}]
You can also do it using filter function:
list1 = [
{"name": "Maria",
"id": "16a",
},
{"name": "Tania",
"id": "13b",
},
{"name": "Steve",
"id": "5a",
}
]
list2 = [
{"name": "Eric",
"id": "16a",
},
{"name": "Mike",
"id": "7b",
},
{"name": "Steve",
"id": "57a",
}
]
IDs = set(value["id"] for value in list1)
output = list(filter(lambda elem: elem["id"] not in IDs, list2))
print(output)
Output:
[{'name': 'Mike', 'id': '7b'}, {'name': 'Steve', 'id': '57a'}]

creating df to generate json in the given format

I am trying to generate a df to produce this below json.
Json data:
{
"name": "flare",
"children": [
{
"name": "K1",
"children": [
{"name": "Exact", "size": 4},
{"name": "synonyms", "size": 14}
]
},
{
"name": "K2",
"children": [
{"name": "Exact", "size": 10},
{"name": "synonyms", "size": 20}
]
},
{
"name": "K3",
"children": [
{"name": "Exact", "size": 0},
{"name": "synonyms", "size": 5}
]
},
{
"name": "K4",
"children": [
{"name": "Exact", "size": 13},
{"name": "synonyms", "size": 15}
]
},
{
"name": "K5",
"children": [
{"name": "Exact", "size": 0},
{"name": "synonyms", "size": 0}
]
}
]
}
input data:
name Exact synonyms
K1 4 14
K2 10 20
K3 0 5
K4 13 15
K5 0 0
I tried creating df with values in the json but I was not able to get the desired json on df.to_json, please help.
You need reshape data by set_index + stack and then use groupby with apply for nested list of dict:
import json
df = (df.set_index('name')
.stack()
.reset_index(level=1)
.rename(columns={'level_1':'name', 0:'size'})
.groupby(level=0).apply(lambda x: x.to_dict(orient='records'))
.reset_index(name='children')
)
print (df)
name children
0 K1 [{'name': 'Exact', 'size': 4}, {'name': 'synon...
1 K2 [{'name': 'Exact', 'size': 10}, {'name': 'syno...
2 K3 [{'name': 'Exact', 'size': 0}, {'name': 'synon...
3 K4 [{'name': 'Exact', 'size': 13}, {'name': 'syno...
4 K5 [{'name': 'Exact', 'size': 0}, {'name': 'synon...
#convert output to dict
j = { "name": "flare", "children": df.to_dict(orient='records')}
#for nice output - easier check
import pprint
pp = pprint.PrettyPrinter(indent=4)
pp.pprint(j)
{ 'children': [ { 'children': [ {'name': 'Exact', 'size': 4},
{'name': 'synonyms', 'size': 14}],
'name': 'K1'},
{ 'children': [ {'name': 'Exact', 'size': 10},
{'name': 'synonyms', 'size': 20}],
'name': 'K2'},
{ 'children': [ {'name': 'Exact', 'size': 0},
{'name': 'synonyms', 'size': 5}],
'name': 'K3'},
{ 'children': [ {'name': 'Exact', 'size': 13},
{'name': 'synonyms', 'size': 15}],
'name': 'K4'},
{ 'children': [ {'name': 'Exact', 'size': 0},
{'name': 'synonyms', 'size': 0}],
'name': 'K5'}],
'name': 'flare'}
#convert data to json and write to file
with open('data.json', 'w') as outfile:
json.dump(j, outfile)

Python - Adding fields and labels to nested json file

I have a dataframe as follows:
Name_ID | URL | Count | Rating
------------------------------------------------
ABC | www.example.com/ABC | 10 | 5
123 | www.example.com/123 | 9 | 4
XYZ | www.example.com/XYZ | 5 | 2
ABC111 | www.example.com/ABC111 | 5 | 2
ABC121 | www.example.com/ABC121 | 5 | 2
222 | www.example.com/222 | 5 | 3
abc222 | www.example.com/abc222 | 4 | 2
ABCaaa | www.example.com/ABCaaa | 4 | 2
I am trying to create a JSON as follows:
{
"name": "sampledata",
"children": [
{
"name": 9,
"children": [
{
"name": 4,
"children": [
{
"name": "123",
"size": 100
}
]
}
]
},
{
"name": 10,
"children": [
{
"name": 5,
"children": [
{
"name": "ABC",
"size": 100
}
]
}
]
},
{
"name": 4,
"children": [
{
"name": 2,
"children": [
{
"name": "abc222",
"size": 50
},
{
"name": "ABCaaa",
"size": 50
}
]
}
]
},
{
"name": 5,
"children": [
{
"name": 2,
"children": [
{
"name": "ABC",
"size": 16
},
{
"name": "ABC111",
"size": 16
},
{
"name": "ABC121",
"size": 16
}
]
},
{
"name": 3,
"children": [
{
"name": "222",
"size": 50
}
]
}
]
}
]
}
In order to do that:
I am trying to add labels such as "name" and "children" to the json while creating it.
I tried something like
results = [{"name": i, "children": j} for i,j in results.items()]
But it won't label it properly I believe.
Also, add another field with the label `"size"which I am planning to calculate based on the formula:
(Rating*Count*10000)/number_of_children_to_the_immediate_parent
Here is my dirty code:
import pandas as pd
from collections import defaultdict
import json
data =[('ABC', 'www.example.com/ABC', 10 , 5), ('123', 'www.example.com/123', 9, 4), ('XYZ', 'www.example.com/XYZ', 5, 2), ('ABC111', 'www.example.com/ABC111', 5, 2), ('ABC121', 'www.example.com/ABC121', 5, 2), ('222', 'www.example.com/222', 5, 3), ('abc222', 'www.example.com/abc222', 4, 2), ('ABCaaa', 'www.example.com/ABCaaa', 4, 2)]
df = pd.DataFrame(data, columns=['Name', 'URL', 'Count', 'Rating'])
gp = df.groupby(['Count'])
dict_json = {"name": "flare"}
children = []
for name, group in gp:
temp = {}
temp["name"] = name
temp["children"] = []
rgp = group.groupby(['Rating'])
for n, g in rgp:
temp2 = {}
temp2["name"] = n
temp2["children"] = g.reset_index().T.to_dict().values()
for t in temp2["children"]:
t["size"] = (t["Rating"] * t["Count"] * 10000) / len(temp2["children"])
t["name"] = t["Name"]
del t["Count"]
del t["Rating"]
del t["URL"]
del t["Name"]
del t["index"]
temp["children"].append(temp2)
children.append(temp)
dict_json["children"] = children
print json.dumps(dict_json, indent=4)
Though the above code does print what I need, I am looking for more efficient and cleaner way to do the same, mainly because the actual dataset might be even more nested and complicated. Any help/suggestion will be much appreciated.
Quite an interesting problem and a great question!
You can improve your approach by reorganizing the code inside the loops and using list comprehensions. No need to delete things and introduce temp variables inside loops:
dict_json = {"name": "flare"}
children = []
for name, group in gp:
temp = {"name": name, "children": []}
rgp = group.groupby(['Rating'])
for n, g in rgp:
temp["children"].append({
"name": n,
"children": [
{"name": row["Name"],
"size": row["Rating"] * row["Count"] * 10000 / len(g)}
for _, row in g.iterrows()
]
})
children.append(temp)
dict_json["children"] = children
Or, a "wrapped" version:
dict_json = {
"name": "flare",
"children": [
{
"name": name,
"children": [
{
"name": n,
"children": [
{
"name": row["Name"],
"size": row["Rating"] * row["Count"] * 10000 / len(g)
} for _, row in g.iterrows()
]
} for n, g in group.groupby(['Rating'])
]
} for name, group in gp
]
}
I'm getting the following dictionary printed for you sample input dataframe:
{
"name": "flare",
"children": [
{
"name": 4,
"children": [
{
"name": 2,
"children": [
{
"name": "abc222",
"size": 40000
},
{
"name": "ABCaaa",
"size": 40000
}
]
}
]
},
{
"name": 5,
"children": [
{
"name": 2,
"children": [
{
"name": "XYZ",
"size": 33333
},
{
"name": "ABC111",
"size": 33333
},
{
"name": "ABC121",
"size": 33333
}
]
},
{
"name": 3,
"children": [
{
"name": "222",
"size": 150000
}
]
}
]
},
{
"name": 9,
"children": [
{
"name": 4,
"children": [
{
"name": "123",
"size": 360000
}
]
}
]
},
{
"name": 10,
"children": [
{
"name": 5,
"children": [
{
"name": "ABC",
"size": 500000
}
]
}
]
}
]
}
If I understand correctly what you wan't to do is put a groupby into a nested json, if that is the case then you could use pandas groupby and cast it into a nested list of lists as so:
lol = pd.DataFrame(df.groupby(['Count','Rating'])\
.apply(lambda x: list(x['Name_ID']))).reset_index().values.tolist()
lol should look something like this:
[['10', '5', ['ABC']],
['4', '2', ['abc222', 'ABCaaa']],
['5', '2', ['XYZ ', 'ABC111', 'ABC121']],
['5', '3', ['222 ']],
['9', '4', ['123 ']]]
after that you could loop over lol to put it into a dict, but since you want to set nested items you'l have to use autovivification (check it out):
class autovividict(dict):
def __missing__(self, key):
value = self[key] = type(self)()
return value
d = autovividict()
for l in lol:
d[l[0]][l[1]] = l[2]
now you can use the json pack for printing and exporting:
print json.dumps(d,indent=2)
In case you need more than one groupby, you could concat your groups with pandas, cast to lol, remove any nans, and then loop, let me know if a full example can help.
setup
from io import StringIO
import pandas as pd
txt = """Name_ID,URL,Count,Rating
ABC,www.example.com/ABC,10,5
123,www.example.com/123,9,4
XYZ,www.example.com/XYZ,5,2
ABC111,www.example.com/ABC111,5,2
ABC121,www.example.com/ABC121,5,2
222,www.example.com/222,5,3
abc222,www.example.com/abc222,4,2
ABCaaa,www.example.com/ABCaaa,4,2"""
df = pd.read_csv(StringIO(txt))
size
pre-calculate it
df['size'] = df.Count.mul(df.Rating) \
.mul(10000) \
.div(df.groupby(
['Count', 'Rating']).Name_ID.transform('count')
).astype(int)
solution
create recursive function
def h(d):
if isinstance(d, pd.Series): d = d.to_frame().T
rec_cond = d.index.nlevels > 1 or d.index.nunique() > 1
return {'name': str(d.index[0]), 'size': str(d['size'].iloc[0])} if not rec_cond else \
[dict(name=str(n), children=h(g.xs(n))) for n, g in d.groupby(level=0)]
demo
import json
my_dict = dict(name='flare', children=h(df.set_index(['Count', 'Rating', 'Name_ID'])))
json.dumps(my_dict)
'{"name": "flare", "children": [{"name": "4", "children": [{"name": "2", "children": [{"name": "ABCaaa", "children": {"name": "ABCaaa", "size": "40000"}}, {"name": "abc222", "children": {"name": "abc222", "size": "40000"}}]}]}, {"name": "5", "children": [{"name": "2", "children": [{"name": "ABC111", "children": {"name": "ABC111", "size": "33333"}}, {"name": "ABC121", "children": {"name": "ABC121", "size": "33333"}}, {"name": "XYZ", "children": {"name": "XYZ", "size": "33333"}}]}, {"name": "3", "children": {"name": "222", "size": "150000"}}]}, {"name": "9", "children": [{"name": "4", "children": {"name": "123", "size": "360000"}}]}, {"name": "10", "children": [{"name": "5", "children": {"name": "ABC", "size": "500000"}}]}]}'
my_dict
{'children': [{'children': [{'children': [{'children': {'name': 'ABCaaa',
'size': '40000'},
'name': 'ABCaaa'},
{'children': {'name': 'abc222', 'size': '40000'}, 'name': 'abc222'}],
'name': '2'}],
'name': '4'},
{'children': [{'children': [{'children': {'name': 'ABC111', 'size': '33333'},
'name': 'ABC111'},
{'children': {'name': 'ABC121', 'size': '33333'}, 'name': 'ABC121'},
{'children': {'name': 'XYZ', 'size': '33333'}, 'name': 'XYZ'}],
'name': '2'},
{'children': {'name': '222', 'size': '150000'}, 'name': '3'}],
'name': '5'},
{'children': [{'children': {'name': '123', 'size': '360000'}, 'name': '4'}],
'name': '9'},
{'children': [{'children': {'name': 'ABC', 'size': '500000'}, 'name': '5'}],
'name': '10'}],
'name': 'flare'}

Categories