List of dicts to multilevel dict based on depth info

List of dicts to multilevel dict based on depth info - python

I have some data, more or less like this:
[
{"tag": "A", "level":0},
{"tag": "B", "level":1},
{"tag": "D", "level":2},
{"tag": "F", "level":3},
{"tag": "G", "level":4},
{"tag": "E", "level":2},
{"tag": "H", "level":3},
{"tag": "I", "level":3},
{"tag": "C", "level":1},
{"tag": "J", "level":2},
]
I want to turn it into a multilevel dict based on depth level (key "level"):
{
"A": {"level": 0, "children": {
"B": {"level": 1, "children": {
"D": {"level": 2, "children": {
"F": {"level": 3, "children": {
"G": {"level": 4, "children": {}}}}}},
"E": {"level": 2, "children": {
"H": {"level": 3, "children": {}},
"I": {"level": 3, "children": {}}}}}},
"C": {"level": 1, "children": {
"J": {"level": 2, "children": {}}}}}}
}
All I can come up with right now is this little piece of code... which obviously breaks after few items:
def list2multilevel(list):
children = {}
parent = list.pop(0)
tag = parent.get("Tag")
level = parent.get("Level")
for child in list:
ctag = child.get("Tag")
clevel = child.get("Level")
if clevel == level + 1:
children.update(list2multilevel(list))
elif clevel <= level:
print(clevel, level)
break
return {tag: children}
Originally sat down to it on Friday and it was supposed to be just a small exercise....

data = [
{"tag": "A", "level": 0},
{"tag": "B", "level": 1},
{"tag": "D", "level": 2},
{"tag": "F", "level": 3},
{"tag": "G", "level": 4},
{"tag": "E", "level": 2},
{"tag": "H", "level": 3},
{"tag": "I", "level": 3},
{"tag": "C", "level": 1},
{"tag": "J", "level": 2},
]
root = {'level': -1, 'children': {}}
parents = {-1: root}
for datum in data:
level = datum['level']
parents[level] = parents[level - 1]['children'][datum['tag']] = {
'level': datum['level'],
'children': {},
}
result = root['children']
print(result)
output:
{'A': {'level': 0, 'children': {'B': {'level': 1, 'children': {'D': {'level': 2, 'children': {'F': {'level': 3, 'children': {'G': {'level': 4, 'children': {}}}}}}, 'E': {'level': 2, 'children': {'H': {'level': 3, 'children': {}}, 'I': {'level': 3, 'children': {}}}}}}, 'C': {'level': 1, 'children': {'J': {'level': 2, 'children': {}}}}}}}
restriction:
level >= 0
Any level cannot be bigger than +1 of max level appeared before.
explanation:
parents is a dictionary to remember last element for each level.
root is a starting point(dummy element).
logic:
Start with -1 level which indicates the root.
Make an item and register it into parent's children.
Update same item to parents dictionary.
Repeat.
Extract root['children'].

Other solution using recursion (same restrictions as with Boseong Choi's answer):
data = [
{"tag": "A", "level": 0},
{"tag": "B", "level": 1},
{"tag": "D", "level": 2},
{"tag": "F", "level": 3},
{"tag": "G", "level": 4},
{"tag": "E", "level": 2},
{"tag": "H", "level": 3},
{"tag": "I", "level": 3},
{"tag": "C", "level": 1},
{"tag": "J", "level": 2},
]
def make_node(dic):
node = dic.copy()
node["children"] = {}
tag = node.pop("tag")
return tag, node
def add_child(parent, child, tag):
assert child["level"] > parent["level"]
if child["level"] == parent["level"] + 1:
parent["children"][tag] = child
return True
for node in parent["children"].values():
if add_child(node, child, tag):
return True
return False
def parse(lst):
assert lst[0]["level"] == 0
root_tag, root = make_node(lst[0])
for item in lst[1:]:
tag, node = make_node(item)
add_child(root, node, tag)
print(parse(data))

You can use recursion:
from itertools import groupby as gb
data = [{'tag': 'A', 'level': 0}, {'tag': 'B', 'level': 1}, {'tag': 'D', 'level': 2}, {'tag': 'F', 'level': 3}, {'tag': 'G', 'level': 4}, {'tag': 'E', 'level': 2}, {'tag': 'H', 'level': 3}, {'tag': 'I', 'level': 3}, {'tag': 'C', 'level': 1}, {'tag': 'J', 'level': 2}]
def to_tree(d, s = 0):
v = [list(b) for _, b in gb(d, key=lambda x:x['level'] == s)]
if len(v) == 1:
return {i['tag']:{'level':s, 'children':{}} for i in v[0]}
return {v[i][0]['tag']:{'level':s, 'children':to_tree(v[i+1], s+1)} for i in range(0, len(v), 2)}
import json
print(json.dumps(to_tree(data), indent=4))
Output:
{
"A": {
"level": 0,
"children": {
"B": {
"level": 1,
"children": {
"D": {
"level": 2,
"children": {
"F": {
"level": 3,
"children": {
"G": {
"level": 4,
"children": {}
}
}
}
}
},
"E": {
"level": 2,
"children": {
"H": {
"level": 3,
"children": {}
},
"I": {
"level": 3,
"children": {}
}
}
}
}
},
"C": {
"level": 1,
"children": {
"J": {
"level": 2,
"children": {}
}
}
}
}
}
}

Related

How to split a list of dicts by key

I have the following list of dicts:
list_of_dicts = [
{"type": "X", "hour": 22},
{"type": "A", "measure": "1"},
{"type": "B", "measure": "2"},
{"type": "X", "hour": 23},
{"type": "A", "measure": "3"},
{"type": "X", "hour": 24},
{"type": "A", "measure": "4"},
{"type": "B", "measure": "5"},
{"type": "C", "measure": "6"}
]
How can I split it into a dict where keys are the 'hour' values from 'type' = 'X' dicts and values are the other dicts between two 'type' = 'X' dicts? That's what I want to obtain using this example, but the interval between two 'type' = 'X' dicts can be variable.
dict_of_dicts = {
22: [
{"type": "A", "measure": "1"},
{"type": "B", "measure": "2"},
],
23:[
{"type": "A", "measure": "3"}
],
24:[
{"type": "A", "measure": "4"},
{"type": "B", "measure": "5"},
{"type": "C", "measure": "6"},
]
}
Thanks in advance!

This code should do the trick.
It creates new elements in a result dictionary, every time it comes across a dictionary which contains the "hour" key.
res = {}
cur_key = None
for d in list_of_dicts:
if "hour" in d:
cur_key = d["hour"]
res[cur_key] = []
elif cur_key is not None:
res[cur_key].append(d)

Here is another approach using list comprehension and a neat little trick (borrowed from here) to make a while loop work inside it:
list_of_dicts = [
{"type": "X", "hour": 22},
{"type": "A", "measure": "1"},
{"type": "B", "measure": "2"},
{"type": "X", "hour": 23},
{"type": "A", "measure": "3"},
{"type": "X", "hour": 24},
{"type": "A", "measure": "4"},
{"type": "B", "measure": "5"},
{"type": "C", "measure": "6"}
]
def while_generator(lst):
i = 0
while i < len(lst) and lst[i]['type'] != 'X':
yield lst[i]
i += 1
dict_of_dicts = {
d['hour']: [e for e in while_generator(list_of_dicts[i+1:])]
for i, d in enumerate(list_of_dicts) if d['type'] == 'X'
}
print(dict_of_dicts)
Prints:
{
22: [
{'type': 'A', 'measure': '1'},
{'type': 'B', 'measure': '2'}
],
23: [
{'type': 'A', 'measure': '3'}
],
24: [
{'type': 'A', 'measure': '4'},
{'type': 'B', 'measure': '5'},
{'type': 'C', 'measure': '6'}
]
}

Keep track of the current 'type X' list to add other dictionaries to it.
list_of_dicts = [
{"type": "X", "hour": 22},
{"type": "A", "measure": "1"},
{"type": "B", "measure": "2"},
{"type": "X", "hour": 23},
{"type": "A", "measure": "3"},
{"type": "X", "hour": 24},
{"type": "A", "measure": "4"},
{"type": "B", "measure": "5"},
{"type": "C", "measure": "6"}
]
dict_of_dicts = dict()
for d in list_of_dicts:
if 'hour' in d: dict_of_dicts[d['hour']] = subList = []
else: subList.append(d)
print(dict_of_dicts)
{ 22: [{'type': 'A', 'measure': '1'},
{'type': 'B', 'measure': '2'}],
23: [{'type': 'A', 'measure': '3'}],
24: [{'type': 'A', 'measure': '4'},
{'type': 'B', 'measure': '5'},
{'type': 'C', 'measure': '6'}]}
It could do it in a comprehension like this, but it's a bit convoluted:
dict_of_dicts = { d['hour']:sl.append([]) or sl[-1]
for sl in [[]] for d in list_of_dicts
if 'hour' in d or sl[-1].append(d) }

Python recursive aggregation

I am working with a nested data structure which needs to be flattened. The values need to be aggregated so totals are produced across each level of the nested data. I'm trying to do this recursively but it's not clear how best to achieve this?
The following is an example of the data I'm working with.
def get_result():
return {
"a1": {
"b1": {
"c1": {
"d1": 1,
"d2": 1,
},
"c2": {
"d3": 1,
}
},
"b2": {
"c3": {
"d4": 1
}
}
},
"a2": {}
}
The data I'd like to produce would be as follows:
[
{
"key": "a1",
"total": 4
},
{
"key": "b1",
"total": 3
},
{
"key": "c1",
"total": 2
},
{
"key": "d1",
"total": 1
},
{
"key": "d2",
"total": 1
}
{
"key": "c2",
"total": 1
},
{
"key": "d3",
"total": 1
},
{
"key": "b2",
"total": 1
},
{
"key": "c3",
"total": 1
},
{
"key": "d4",
"total": 1
}
]

You can use recursion
from collections import defaultdict
def agg(data):
result = defaultdict(int)
agg_sum = 0
for k, v in data.items():
if isinstance(v, dict):
d, sub = agg(v)
if sub:
result.update(d)
result[k] += sub
agg_sum += sub
else:
result[k] += v
agg_sum += v
return result, agg_sum

You can use a recursive generator function for a shorter solution:
d = {'a1': {'b1': {'c1': {'d1': 1, 'd2': 1}, 'c2': {'d3': 1}}, 'b2': {'c3': {'d4': 1}}}, 'a2': {}}
def get_aggr(d):
return d if not isinstance(d, dict) else sum(map(get_aggr, d.values()))
def aggr_keys(d):
for a, b in d.items():
yield {'key':a, 'total':get_aggr(b)}
yield from (() if not isinstance(b, dict) else aggr_keys(b))
print(list(aggr_keys(d)))
Output:
[{'key': 'a1', 'total': 4},
{'key': 'b1', 'total': 3},
{'key': 'c1', 'total': 2},
{'key': 'd1', 'total': 1},
{'key': 'd2', 'total': 1},
{'key': 'c2', 'total': 1},
{'key': 'd3', 'total': 1},
{'key': 'b2', 'total': 1},
{'key': 'c3', 'total': 1},
{'key': 'd4', 'total': 1},
{'key': 'a2', 'total': 0}]

Edit nested dictionary duplicate values in same keys

I have a list of dicts with the same structure
[{"Program Name": "Bulldozer", "Level": 3}, {"Program Name": "Robot", "Level": 1}, {"Program Name": "Bulldozer", "Level": 4}]
What I want is duplicate keys of "Program Name" that have the same value (ex: "Bulldozer" appearing 2x) to be renamed as "Bulldozer (1)", "Bulldozer (2)" and so on.

An Efficient way is to use defaultdict to count the "Program Name", the time complexity
is O(n):
from collections import defaultdict
l = [{"Program Name": "Bulldozer", "Level": 3}, {"Program Name": "Robot", "Level": 1},
{"Program Name": "Bulldozer", "Level": 4}, {"Program Name": "Bulldozer", "Level": 4}, {"Program Name": "Robot", "Level": 1}]
tmp = defaultdict(int)
for i in l:
i["Program Name"] = f'{i["Program Name"]} ({tmp[i["Program Name"]]})' if tmp[i["Program Name"]] else i["Program Name"]
tmp[i["Program Name"].split()[0]] += 1
print(l)
Result:
[{'Program Name': 'Bulldozer', 'Level': 3}, {'Program Name': 'Robot', 'Level': 1}, {'Program Name': 'Bulldozer (1)', 'Level': 4}, {'Program Name': 'Bulldozer (2)', 'Level': 4}, {'Program Name': 'Robot (1)', 'Level': 1}]

Hope this helps:
input = [{"Program Name": "Bulldozer", "Level": 3}, {"Program Name": "Robot", "Level": 1}, {"Program Name": "Bulldozer", "Level": 4}]
def update_input(input):
existing_program_names = {}
for i, d in enumerate(input):
current_list_program_name = d['Program Name']
try:
existing_program_names[current_list_program_name] += 1
except KeyError:
# Program name not in storage yet add it
existing_program_names.update({current_list_program_name: 0})
if existing_program_names[current_list_program_name] > 0 :
ID = existing_program_names[current_list_program_name]
input[i]['Program Name'] = current_list_program_name + ' ({ID})'.format(ID=ID)
else:
pass
return input
output = update_input(input)
yields:
[{'Program Name': 'Bulldozer', 'Level': 3}, {'Program Name': 'Robot', 'Level': 1}, {'Program Name': 'Bulldozer (1)', 'Level': 4}]

You can try this too:
data = [
{"Program Name": "Bulldozer", "Level": 3},
{"Program Name": "Robot", "Level": 1},
{"Program Name": "Bulldozer", "Level": 4},
{"Program Name": "Rozer", "Level": 3},
{"Program Name": "Robot", "Level": 1},
{"Program Name": "Rozer", "Level": 3},
{"Program Name": "Bulldozer", "Level": 3},
{"Program Name": "Robot", "Level": 1},
{"Program Name": "Bulldozer", "Level": 4},
{"Program Name": "Rozer", "Level": 3},
{"Program Name": "Robot", "Level": 1},
{"Program Name": "Rozer", "Level": 3}
]
Approach: 01
import pandas as pd
c = pd.DataFrame(data)
c['group_code'] = c.groupby(['Program Name']).cumcount() + 1
c['Program Name'] = ["{0} ({1})".format(x, y) for (x, y) in c[[
'Program Name', 'group_code']].values]
output = c[['Program Name', 'Level']].to_dict(orient='records')
print(output)
Approach: 02
temp = {}
for item in data:
temp.update(
{
item['Program Name']: temp[item['Program Name']] + 1 if temp.get(item['Program Name']) else 1
}
)
item['Program Name'] = item['Program Name'] + ' (' + str(temp[item['Program Name']]) + ')'
print(data)
output:
[
{"Program Name": "Bulldozer (1)", "Level": 3},
{"Program Name": "Robot (1)", "Level": 1},
{"Program Name": "Bulldozer (2)", "Level": 4},
{"Program Name": "Rozer (1)", "Level": 3},
{"Program Name": "Robot (2)", "Level": 1},
{"Program Name": "Rozer (2)", "Level": 3},
{"Program Name": "Bulldozer (3)", "Level": 3},
{"Program Name": "Robot (3)", "Level": 1},
{"Program Name": "Bulldozer (4)", "Level": 4},
{"Program Name": "Rozer (3)", "Level": 3},
{"Program Name": "Robot (4)", "Level": 1},
{"Program Name": "Rozer (4)", "Level": 3}
]
I would recommend you to use pandas(approach 01) if you have huge amount of data.

Thanks to #jizhihaoSAMA I have managed to find a solution to my problem with a small edit
machines = [{"Program Name": "Bulldozer", "Level": 3}, {"Program Name": "Robot", "Level": 1}, {"Program Name": "Bulldozer", "Level": 4}]
tmp = defaultdict(int)
for i in machines:
name = i["Program Name"].strip(f' ({tmp[i["Program Name"]]})')
i["Program Name"] = f'{name} ({tmp[i["Program Name"]]})' if tmp[name] else i["Program Name"]
tmp[name] += 1
This disables problems with spaces in the program name or any complexed names.

Convert from path list to Flare json format?

I have data in python that looks like this:
[['a', 'b', 'c', 50],
['a', 'b', 'd', 100],
['a', 'b', 'e', 67],
['a', 'g', 'c', 12],
['q', 'k', 'c', 11],
['q', 'b', 'p', 11]]
where each element of the list is a complete hierarchical path, and the last element is the size of the path. To do a visualization in D3, I need the data to be in the flare data format - seen here:
https://github.com/d3/d3-hierarchy/blob/master/test/data/flare.json
So a short piece would look like this
{
"name": "root",
"children": [
{
"name": "a",
"children": [
{
"name": "b",
"children": [
{"name": "c", "value": 50},
{"name": "d", "value": 100},
{"name": "e", "value": 67},
]
},
{
"name": "g",
"children": [
{"name": "c", "value": 12},
]
},
and so forth...
From what I've been looking up, I think the solution is recursive, and would use the json library on a Python dictionary, but I can't seem to get it to work. Any help is greatly appreciated.

Here's a solution using recursion:
def add_to_flare(n, flare):
children = flare["children"]
if len(n) == 2:
children.append({"name": n[0], "value": n[1]})
else:
for c in children:
if c["name"] == n[0]:
add_to_flare(n[1:], c)
return
children.append({"name": n[0], "children": []})
add_to_flare(n[1:], children[-1])
flare = {"name": "root", "children": []}
for i in data:
add_to_flare(i, flare)
To display it nicely, we can use the json library:
import json
print(json.dumps(flare, indent=1))
{
"name": "root",
"children": [
{
"name": "a",
"children": [
{
"name": "b",
"children": [
{
"name": "c",
"value": 50
},
{
"name": "d",
"value": 100
},
{
"name": "e",
"value": 67
}
]
},
{
"name": "g",
"children": [
{
"name": "c",
"value": 12
}
]
}
]
},
{
"name": "q",
"children": [
{
"name": "k",
"children": [
{
"name": "c",
"value": 11
}
]
},
{
"name": "b",
"children": [
{
"name": "p",
"value": 11
}
]
}
]
}
]
}

Try this:
master = []
for each in your_list:
head = master
for i in range(len(each)):
names = [e['name'] for e in head]
if i == len(each) - 2:
head.append({'name': each[i], 'value': each[i+1]})
break
if each[i] in names:
head = head[names.index(each[i])]['children']
else:
head.append({'name': each[i], 'children': []})
head = head[-1]['children']
results:
[{'children': [{'children': [{'name': 'c', 'value': 50},
{'name': 'd', 'value': 100},
{'name': 'e', 'value': 67}],
'name': 'b'},
{'children': [{'name': 'c', 'value': 12}], 'name': 'g'}],
'name': 'a'},
{'children': [{'children': [{'name': 'c', 'value': 11}], 'name': 'k'},
{'children': [{'name': 'p', 'value': 11}], 'name': 'b'}],
'name': 'q'}]
Please note that name and children are flipped in this dictionary since it's unordered. But the resulting structure is the same.
put it in root to get your target:
my_dict = {'name':'root', 'children': master}

Assuming your list of lists is stored in variable l, you can do:
o = []
for s in l:
c = o
for i, n in enumerate(['root'] + s[:-1]):
for d in c:
if n == d['name']:
break
else:
c.append({'name': n})
d = c[-1]
if i < len(s) - 1:
if 'children' not in d:
d['children'] = []
c = d['children']
else:
d['value'] = s[-1]
so that o[0] becomes:
{'children': [{'children': [{'children': [{'name': 'c', 'value': 50},
{'name': 'd', 'value': 100},
{'name': 'e', 'value': 67}],
'name': 'b'},
{'children': [{'name': 'c', 'value': 12}],
'name': 'g'}],
'name': 'a'},
{'children': [{'children': [{'name': 'c', 'value': 11}],
'name': 'k'},
{'children': [{'name': 'p', 'value': 11}],
'name': 'b'}],
'name': 'q'}],
'name': 'root'}

n-depth tree: set parent value based on children values

In a n-depth dict where values are set in the deepest level of a hierarchy:
{
"name": "root",
"value": None, # expected value to be 80
"children": [
{
"name": "a",
"value": None, # expected value to be 30
"children": [
{ "name": "a.1", "value": 10 },
{ "name": "a.2", "value": 20 }
]
},
{
"name": "b",
"value": None, # expected value to be 50
"children": [
{ "name": "b.1", "value": 25 },
{
"name": "b.2",
"value": None, # expected value to be 25
"children": [
{"name": "b.2.1", "value": 5},
{"name": "b.2.2", "value": 5},
{"name": "b.2.3", "value": 5},
{"name": "b.2.4", "value": 5},
{"name": "b.2.5", "value": 5}
]
}
]
}
]
}
What could be the approach to recursively set each parent value based on the result of an operation perfomed with its children value (i.e. sum)?

I finally managed to do it using the iterative level order traversal pattern (BFS), I was missing just a couple of details.
This approach works because the depth iteration order is guaranteed, so once we are getting to a node wich has children, all its sub-level children are already calculated.
The solution:
def reverseTraversal(obj):
def parentOperation(node):
out = 0
for child in node['children']:
out = out + child['value']
return out
if obj is None:
return
queue = []
stack = []
queue.append(obj)
while len(queue) > 0:
temp = queue.pop(0)
stack.append(temp)
if 'children' in temp and len(temp['children']) > 0:
for child in temp['children']:
queue.append(child)
while len(stack)>0:
node = stack.pop()
if 'children' in node and len(node['children']) > 0:
node['value'] = parentOperation(node)
# obj is the original dict
obj = reverseTraversal(obj)
print(obj)
Results in:
{
"name": "root",
"value": 80,
"children": [
{
"name": "a",
"value": 30,
"children": [
{"name": "a.1","value": 10},
{"name": "a.2","value": 20}
]
},
{
"name": "b",
"value": 50,
"children": [
{"name": "b.1","value": 25},
{
"name": "b.2",
"value": 25,
"children": [
{"name": "b.2.1","value": 5},
{"name": "b.2.2","value": 5},
{"name": "b.2.3","value": 5},
{"name": "b.2.4","value": 5},
{"name": "b.2.5","value": 5}
]
}
]
}
]
}

Given your datastructure and a list of values to update, you can use next in recursion:
def update(d, targets):
return {a:[update(i, targets) for i in b] if isinstance(b, list) else update(b, targets) if isinstance(b, dict) else next(targets) if not b else b for a, b in d.items()}
targets = [80, 30, 50, 25]
results = update(nlist, iter(targets))
Output:
{'children': [{'children': [{'name': 'a.1', 'value': 10},
{'name': 'a.2', 'value': 20}],
'name': 'a',
'value': 30},
{'children': [{'name': 'b.1', 'value': 25},
{'children': [{'name': 'b.2.1', 'value': 5},
{'name': 'b.2.2', 'value': 5},
{'name': 'b.2.3', 'value': 5},
{'name': 'b.2.4', 'value': 5},
{'name': 'b.2.5', 'value': 5}],
'name': 'b.2',
'value': 25}],
'name': 'b',
'value': 50}],
'name': 'root',
'value': 80}

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

List of dicts to multilevel dict based on depth info - python

Related

How to split a list of dicts by key

Python recursive aggregation

Edit nested dictionary duplicate values in same keys

Convert from path list to Flare json format?

n-depth tree: set parent value based on children values

Categories

Resources