How to split a list of dicts by key - python

I have the following list of dicts:
list_of_dicts = [
{"type": "X", "hour": 22},
{"type": "A", "measure": "1"},
{"type": "B", "measure": "2"},
{"type": "X", "hour": 23},
{"type": "A", "measure": "3"},
{"type": "X", "hour": 24},
{"type": "A", "measure": "4"},
{"type": "B", "measure": "5"},
{"type": "C", "measure": "6"}
]
How can I split it into a dict where keys are the 'hour' values from 'type' = 'X' dicts and values are the other dicts between two 'type' = 'X' dicts? That's what I want to obtain using this example, but the interval between two 'type' = 'X' dicts can be variable.
dict_of_dicts = {
22: [
{"type": "A", "measure": "1"},
{"type": "B", "measure": "2"},
],
23:[
{"type": "A", "measure": "3"}
],
24:[
{"type": "A", "measure": "4"},
{"type": "B", "measure": "5"},
{"type": "C", "measure": "6"},
]
}
Thanks in advance!

This code should do the trick.
It creates new elements in a result dictionary, every time it comes across a dictionary which contains the "hour" key.
res = {}
cur_key = None
for d in list_of_dicts:
if "hour" in d:
cur_key = d["hour"]
res[cur_key] = []
elif cur_key is not None:
res[cur_key].append(d)

Here is another approach using list comprehension and a neat little trick (borrowed from here) to make a while loop work inside it:
list_of_dicts = [
{"type": "X", "hour": 22},
{"type": "A", "measure": "1"},
{"type": "B", "measure": "2"},
{"type": "X", "hour": 23},
{"type": "A", "measure": "3"},
{"type": "X", "hour": 24},
{"type": "A", "measure": "4"},
{"type": "B", "measure": "5"},
{"type": "C", "measure": "6"}
]
def while_generator(lst):
i = 0
while i < len(lst) and lst[i]['type'] != 'X':
yield lst[i]
i += 1
dict_of_dicts = {
d['hour']: [e for e in while_generator(list_of_dicts[i+1:])]
for i, d in enumerate(list_of_dicts) if d['type'] == 'X'
}
print(dict_of_dicts)
Prints:
{
22: [
{'type': 'A', 'measure': '1'},
{'type': 'B', 'measure': '2'}
],
23: [
{'type': 'A', 'measure': '3'}
],
24: [
{'type': 'A', 'measure': '4'},
{'type': 'B', 'measure': '5'},
{'type': 'C', 'measure': '6'}
]
}

Keep track of the current 'type X' list to add other dictionaries to it.
list_of_dicts = [
{"type": "X", "hour": 22},
{"type": "A", "measure": "1"},
{"type": "B", "measure": "2"},
{"type": "X", "hour": 23},
{"type": "A", "measure": "3"},
{"type": "X", "hour": 24},
{"type": "A", "measure": "4"},
{"type": "B", "measure": "5"},
{"type": "C", "measure": "6"}
]
dict_of_dicts = dict()
for d in list_of_dicts:
if 'hour' in d: dict_of_dicts[d['hour']] = subList = []
else: subList.append(d)
print(dict_of_dicts)
{ 22: [{'type': 'A', 'measure': '1'},
{'type': 'B', 'measure': '2'}],
23: [{'type': 'A', 'measure': '3'}],
24: [{'type': 'A', 'measure': '4'},
{'type': 'B', 'measure': '5'},
{'type': 'C', 'measure': '6'}]}
It could do it in a comprehension like this, but it's a bit convoluted:
dict_of_dicts = { d['hour']:sl.append([]) or sl[-1]
for sl in [[]] for d in list_of_dicts
if 'hour' in d or sl[-1].append(d) }

Related

Python Group and aggregate unidirectionally a list of dictionaries by multiple keys

Am building a tree selector, I need to structure my data like a tree of grouped items. I have bellow input which is a list of dictionaries.
data = [
{'region': 'R1', 'group': 'G1', 'category': 'C1', 'item': 'I2'},
{'region': 'R1', 'group': 'G1', 'category': 'C1', 'item': 'I1'},
{'region': 'R1', 'group': 'G2', 'category': 'C2', 'item': 'I3'},
{'region': 'R2', 'group': 'G1', 'category': 'C1', 'item': 'I1'},
{'region': 'R2', 'group': 'G2', 'category': 'C2', 'item': 'I3'},
{'region': 'R2', 'group': 'G2', 'category': 'C2', 'item': 'I4'},
{'region': 'R2', 'group': 'G2', 'category': 'C3', 'item': 'I5'},
]
I want to get the following output
result = {
"regions": [
{
"name": "R1",
"groups": [
{
"name": "G1",
"categories": [
{"name": "C1","items": [{ "name": "I2"},{"name": "I1"}]}
]
},
{
"name": "G2",
"categories": [
{"name": "C2", "items": [{"name": "I3"}]}
]
}
]
},
{
"name": "R2",
"groups": [
{
"name": "G1",
"categories": [
{"name": "C1","items": [{"name": "I1"}]}
]
},
{
"name": "G2",
"categories": [
{"name": "C2","items": [{"name": "I3"},{"name": "I4"}]},
{"name": "C3", "items": [{"name": "I5"}]}
]
}
]
}
]
}
After some researches I come up with this solution
from collections import OrderedDict
d = OrderedDict()
for aggr in data:
d.setdefault(
key=(aggr['region'], aggr['group'], aggr['category']),
default=list()
).append({"name": aggr['item']})
d1 = OrderedDict()
for k, v in d.items():
d1.setdefault(
key=(k[0], k[1]),
default=list()
).append({"name": k[2], "items": v})
d2 = OrderedDict()
for k, v in d1.items():
d2.setdefault(
key=k[0],
default=list()
).append({"name": k[1], "categories": v})
result = {"regions": [{"name": k, "groups": v} for k, v in d2.items()]}
It's working but I believe it's not the most pythonic solution. I did not manage to simplify it.
Any help to propose another solution or improvement on above codes will be appreciated
As long as the items are sorted, like in your example, you could use groupby from itertools in a recursive function, like:
from itertools import groupby
from operator import itemgetter
def plural(word):
return f"{word}s" if word[-1] != 'y' else f"{word[:-1]}ies"
def grouping(records, *keys):
if len(keys) == 1:
return [{"name": record[keys[0]]} for record in records]
return [
{"name": key, plural(keys[1]): grouping(group, *keys[1:])}
for key, group in groupby(records, itemgetter(keys[0]))
]
result = {"regions": grouping(data, "region", "group", "category", "item")}
If the sorting isn't guaranteed, then you could adjust grouping in the following way
def grouping(records, *keys):
if len(keys) == 1:
return [{"name": record[keys[0]]} for record in records]
key_func = itemgetter(keys[0])
records = sorted(records, key=key_func)
return [
{"name": key, plural(keys[1]): grouping(group, *keys[1:])}
for key, group in groupby(records, key_func)
]
or sort the data beforehand
keys = ["region", "group", "category", "item"]
data = sorted(data, key=itemgetter(*keys))
result = {"regions": grouping(data, *keys)}
Result of first version for data as provided in the question:
result = {
"regions": [
{
"name": "R1",
"groups": [
{
"name": "G1",
"categories": [
{"name": "C1", "items": [{"name": "I2"}, {"name": "I1"}]
}
]
},
{
"name": "G2",
"categories": [
{"name": "C2", "items": [{"name": "I3"}]}
]
}
]
},
{
"name": "R2",
"groups": [
{
"name": "G1",
"categories": [
{"name": "C1", "items": [{"name": "I1"}]}
]
},
{
"name": "G2",
"categories": [
{"name": "C2", "items": [{"name": "I3"}, {"name": "I4"}]},
{"name": "C3", "items": [{"name": "I5"}]}
]
}
]
}
]
}

Parse List in nested dictionary Python

data = {
"persons": {"1": {"name": "siddu"}, "2": {"name": "manju"}},
"cars": {
"model1": {
"make": 1990,
"company_details": {
"name": "Ford Corporation",
"country": "US",
"some_list": [1, 2, 1],
},
},
"model2": {
"make": 1990,
"company_details": {
"name": "Ford Corporation",
"country": "US",
"some_list": [1, 2, 1, 1, 1],
},
},
},
}
This is my python object, How can I identify the Key's-Value is a list. example here, after traversing through 'print(data["cars"]["model1"]["company_details"]["some_list"])'I get the list, since it is small dictionary it was easy, but how can I identify the same if I encounter list as a value for some other key in future.
Example:
data = {
"persons": {"1": {"name": "siddu"}, "2": {"name": "manju"}},
"cars": {
"model1": {
"make": 1990,
"company_details": {
"name": "Ford Corporation",
"country": "US",
"some_list": [1, 2, 1],
},
},
"model2": {
"make": 1990,
"company_details": {
"name": "Ford Corporation",
"country": ["US", "UK", "IND"],
"some_list": [1, 2, 1, 1, 1],
},
},
},
}
Can anyone please suggest/guide me to understand how to identify the key's value is a list.
The final goal is to remove the duplicates in the list if any exists?
Thank you very much:)
You can have a recursive function that goes to any depth and make the items of the list unique like below:
In [8]: def removeDuplicatesFromList(di):
...: for key, val in di.items():
...: if isinstance(val, dict):
...: removeDuplicatesFromList(val)
...: elif isinstance(val, list):
...: di[key] =list(set(val))
...: else:
...: continue
...:
...:
In [9]: removeDuplicatesFromList(data)
In [10]: data
Out[10]:
{'persons': {'1': {'name': 'siddu'}, '2': {'name': 'manju'}},
'cars': {'model1': {'make': 1990,
'company_details': {'name': 'Ford Corporation',
'country': 'US',
'some_list': [1, 2]}},
'model2': {'make': 1990,
'company_details': {'name': 'Ford Corporation',
'country': 'US',
'some_list': [1, 2]}}}}

List of dicts to multilevel dict based on depth info

I have some data, more or less like this:
[
{"tag": "A", "level":0},
{"tag": "B", "level":1},
{"tag": "D", "level":2},
{"tag": "F", "level":3},
{"tag": "G", "level":4},
{"tag": "E", "level":2},
{"tag": "H", "level":3},
{"tag": "I", "level":3},
{"tag": "C", "level":1},
{"tag": "J", "level":2},
]
I want to turn it into a multilevel dict based on depth level (key "level"):
{
"A": {"level": 0, "children": {
"B": {"level": 1, "children": {
"D": {"level": 2, "children": {
"F": {"level": 3, "children": {
"G": {"level": 4, "children": {}}}}}},
"E": {"level": 2, "children": {
"H": {"level": 3, "children": {}},
"I": {"level": 3, "children": {}}}}}},
"C": {"level": 1, "children": {
"J": {"level": 2, "children": {}}}}}}
}
All I can come up with right now is this little piece of code... which obviously breaks after few items:
def list2multilevel(list):
children = {}
parent = list.pop(0)
tag = parent.get("Tag")
level = parent.get("Level")
for child in list:
ctag = child.get("Tag")
clevel = child.get("Level")
if clevel == level + 1:
children.update(list2multilevel(list))
elif clevel <= level:
print(clevel, level)
break
return {tag: children}
Originally sat down to it on Friday and it was supposed to be just a small exercise....
data = [
{"tag": "A", "level": 0},
{"tag": "B", "level": 1},
{"tag": "D", "level": 2},
{"tag": "F", "level": 3},
{"tag": "G", "level": 4},
{"tag": "E", "level": 2},
{"tag": "H", "level": 3},
{"tag": "I", "level": 3},
{"tag": "C", "level": 1},
{"tag": "J", "level": 2},
]
root = {'level': -1, 'children': {}}
parents = {-1: root}
for datum in data:
level = datum['level']
parents[level] = parents[level - 1]['children'][datum['tag']] = {
'level': datum['level'],
'children': {},
}
result = root['children']
print(result)
output:
{'A': {'level': 0, 'children': {'B': {'level': 1, 'children': {'D': {'level': 2, 'children': {'F': {'level': 3, 'children': {'G': {'level': 4, 'children': {}}}}}}, 'E': {'level': 2, 'children': {'H': {'level': 3, 'children': {}}, 'I': {'level': 3, 'children': {}}}}}}, 'C': {'level': 1, 'children': {'J': {'level': 2, 'children': {}}}}}}}
restriction:
level >= 0
Any level cannot be bigger than +1 of max level appeared before.
explanation:
parents is a dictionary to remember last element for each level.
root is a starting point(dummy element).
logic:
Start with -1 level which indicates the root.
Make an item and register it into parent's children.
Update same item to parents dictionary.
Repeat.
Extract root['children'].
Other solution using recursion (same restrictions as with Boseong Choi's answer):
data = [
{"tag": "A", "level": 0},
{"tag": "B", "level": 1},
{"tag": "D", "level": 2},
{"tag": "F", "level": 3},
{"tag": "G", "level": 4},
{"tag": "E", "level": 2},
{"tag": "H", "level": 3},
{"tag": "I", "level": 3},
{"tag": "C", "level": 1},
{"tag": "J", "level": 2},
]
def make_node(dic):
node = dic.copy()
node["children"] = {}
tag = node.pop("tag")
return tag, node
def add_child(parent, child, tag):
assert child["level"] > parent["level"]
if child["level"] == parent["level"] + 1:
parent["children"][tag] = child
return True
for node in parent["children"].values():
if add_child(node, child, tag):
return True
return False
def parse(lst):
assert lst[0]["level"] == 0
root_tag, root = make_node(lst[0])
for item in lst[1:]:
tag, node = make_node(item)
add_child(root, node, tag)
print(parse(data))
You can use recursion:
from itertools import groupby as gb
data = [{'tag': 'A', 'level': 0}, {'tag': 'B', 'level': 1}, {'tag': 'D', 'level': 2}, {'tag': 'F', 'level': 3}, {'tag': 'G', 'level': 4}, {'tag': 'E', 'level': 2}, {'tag': 'H', 'level': 3}, {'tag': 'I', 'level': 3}, {'tag': 'C', 'level': 1}, {'tag': 'J', 'level': 2}]
def to_tree(d, s = 0):
v = [list(b) for _, b in gb(d, key=lambda x:x['level'] == s)]
if len(v) == 1:
return {i['tag']:{'level':s, 'children':{}} for i in v[0]}
return {v[i][0]['tag']:{'level':s, 'children':to_tree(v[i+1], s+1)} for i in range(0, len(v), 2)}
import json
print(json.dumps(to_tree(data), indent=4))
Output:
{
"A": {
"level": 0,
"children": {
"B": {
"level": 1,
"children": {
"D": {
"level": 2,
"children": {
"F": {
"level": 3,
"children": {
"G": {
"level": 4,
"children": {}
}
}
}
}
},
"E": {
"level": 2,
"children": {
"H": {
"level": 3,
"children": {}
},
"I": {
"level": 3,
"children": {}
}
}
}
}
},
"C": {
"level": 1,
"children": {
"J": {
"level": 2,
"children": {}
}
}
}
}
}
}

Convert from path list to Flare json format?

I have data in python that looks like this:
[['a', 'b', 'c', 50],
['a', 'b', 'd', 100],
['a', 'b', 'e', 67],
['a', 'g', 'c', 12],
['q', 'k', 'c', 11],
['q', 'b', 'p', 11]]
where each element of the list is a complete hierarchical path, and the last element is the size of the path. To do a visualization in D3, I need the data to be in the flare data format - seen here:
https://github.com/d3/d3-hierarchy/blob/master/test/data/flare.json
So a short piece would look like this
{
"name": "root",
"children": [
{
"name": "a",
"children": [
{
"name": "b",
"children": [
{"name": "c", "value": 50},
{"name": "d", "value": 100},
{"name": "e", "value": 67},
]
},
{
"name": "g",
"children": [
{"name": "c", "value": 12},
]
},
and so forth...
From what I've been looking up, I think the solution is recursive, and would use the json library on a Python dictionary, but I can't seem to get it to work. Any help is greatly appreciated.
Here's a solution using recursion:
def add_to_flare(n, flare):
children = flare["children"]
if len(n) == 2:
children.append({"name": n[0], "value": n[1]})
else:
for c in children:
if c["name"] == n[0]:
add_to_flare(n[1:], c)
return
children.append({"name": n[0], "children": []})
add_to_flare(n[1:], children[-1])
flare = {"name": "root", "children": []}
for i in data:
add_to_flare(i, flare)
To display it nicely, we can use the json library:
import json
print(json.dumps(flare, indent=1))
{
"name": "root",
"children": [
{
"name": "a",
"children": [
{
"name": "b",
"children": [
{
"name": "c",
"value": 50
},
{
"name": "d",
"value": 100
},
{
"name": "e",
"value": 67
}
]
},
{
"name": "g",
"children": [
{
"name": "c",
"value": 12
}
]
}
]
},
{
"name": "q",
"children": [
{
"name": "k",
"children": [
{
"name": "c",
"value": 11
}
]
},
{
"name": "b",
"children": [
{
"name": "p",
"value": 11
}
]
}
]
}
]
}
Try this:
master = []
for each in your_list:
head = master
for i in range(len(each)):
names = [e['name'] for e in head]
if i == len(each) - 2:
head.append({'name': each[i], 'value': each[i+1]})
break
if each[i] in names:
head = head[names.index(each[i])]['children']
else:
head.append({'name': each[i], 'children': []})
head = head[-1]['children']
results:
[{'children': [{'children': [{'name': 'c', 'value': 50},
{'name': 'd', 'value': 100},
{'name': 'e', 'value': 67}],
'name': 'b'},
{'children': [{'name': 'c', 'value': 12}], 'name': 'g'}],
'name': 'a'},
{'children': [{'children': [{'name': 'c', 'value': 11}], 'name': 'k'},
{'children': [{'name': 'p', 'value': 11}], 'name': 'b'}],
'name': 'q'}]
Please note that name and children are flipped in this dictionary since it's unordered. But the resulting structure is the same.
put it in root to get your target:
my_dict = {'name':'root', 'children': master}
Assuming your list of lists is stored in variable l, you can do:
o = []
for s in l:
c = o
for i, n in enumerate(['root'] + s[:-1]):
for d in c:
if n == d['name']:
break
else:
c.append({'name': n})
d = c[-1]
if i < len(s) - 1:
if 'children' not in d:
d['children'] = []
c = d['children']
else:
d['value'] = s[-1]
so that o[0] becomes:
{'children': [{'children': [{'children': [{'name': 'c', 'value': 50},
{'name': 'd', 'value': 100},
{'name': 'e', 'value': 67}],
'name': 'b'},
{'children': [{'name': 'c', 'value': 12}],
'name': 'g'}],
'name': 'a'},
{'children': [{'children': [{'name': 'c', 'value': 11}],
'name': 'k'},
{'children': [{'name': 'p', 'value': 11}],
'name': 'b'}],
'name': 'q'}],
'name': 'root'}

n-depth tree: set parent value based on children values

In a n-depth dict where values are set in the deepest level of a hierarchy:
{
"name": "root",
"value": None, # expected value to be 80
"children": [
{
"name": "a",
"value": None, # expected value to be 30
"children": [
{ "name": "a.1", "value": 10 },
{ "name": "a.2", "value": 20 }
]
},
{
"name": "b",
"value": None, # expected value to be 50
"children": [
{ "name": "b.1", "value": 25 },
{
"name": "b.2",
"value": None, # expected value to be 25
"children": [
{"name": "b.2.1", "value": 5},
{"name": "b.2.2", "value": 5},
{"name": "b.2.3", "value": 5},
{"name": "b.2.4", "value": 5},
{"name": "b.2.5", "value": 5}
]
}
]
}
]
}
What could be the approach to recursively set each parent value based on the result of an operation perfomed with its children value (i.e. sum)?
I finally managed to do it using the iterative level order traversal pattern (BFS), I was missing just a couple of details.
This approach works because the depth iteration order is guaranteed, so once we are getting to a node wich has children, all its sub-level children are already calculated.
The solution:
def reverseTraversal(obj):
def parentOperation(node):
out = 0
for child in node['children']:
out = out + child['value']
return out
if obj is None:
return
queue = []
stack = []
queue.append(obj)
while len(queue) > 0:
temp = queue.pop(0)
stack.append(temp)
if 'children' in temp and len(temp['children']) > 0:
for child in temp['children']:
queue.append(child)
while len(stack)>0:
node = stack.pop()
if 'children' in node and len(node['children']) > 0:
node['value'] = parentOperation(node)
# obj is the original dict
obj = reverseTraversal(obj)
print(obj)
Results in:
{
"name": "root",
"value": 80,
"children": [
{
"name": "a",
"value": 30,
"children": [
{"name": "a.1","value": 10},
{"name": "a.2","value": 20}
]
},
{
"name": "b",
"value": 50,
"children": [
{"name": "b.1","value": 25},
{
"name": "b.2",
"value": 25,
"children": [
{"name": "b.2.1","value": 5},
{"name": "b.2.2","value": 5},
{"name": "b.2.3","value": 5},
{"name": "b.2.4","value": 5},
{"name": "b.2.5","value": 5}
]
}
]
}
]
}
Given your datastructure and a list of values to update, you can use next in recursion:
def update(d, targets):
return {a:[update(i, targets) for i in b] if isinstance(b, list) else update(b, targets) if isinstance(b, dict) else next(targets) if not b else b for a, b in d.items()}
targets = [80, 30, 50, 25]
results = update(nlist, iter(targets))
Output:
{'children': [{'children': [{'name': 'a.1', 'value': 10},
{'name': 'a.2', 'value': 20}],
'name': 'a',
'value': 30},
{'children': [{'name': 'b.1', 'value': 25},
{'children': [{'name': 'b.2.1', 'value': 5},
{'name': 'b.2.2', 'value': 5},
{'name': 'b.2.3', 'value': 5},
{'name': 'b.2.4', 'value': 5},
{'name': 'b.2.5', 'value': 5}],
'name': 'b.2',
'value': 25}],
'name': 'b',
'value': 50}],
'name': 'root',
'value': 80}

Categories