convert hierarchical data to a specific json format in python

convert hierarchical data to a specific json format in python - python

I have a dataframe like below. Each topic has several sub-topics.
pd.DataFrame({'topic': ['A', 'A', 'A', 'B', 'B'],
'sub-topic': ['A1', 'A2', 'A3', 'B1', 'B3' ],
'value': [2,12,44,21,1]})
topic sub-topic value
0 A A1 2
1 A A2 12
2 A A3 44
3 B B1 21
4 B B3 1
I need to convert it to Json format like below. Within first layer, for example topic A, the value is the sum of all its sub-topics.
{'A': {
'value': 58,
'children': {
'A1': {'value': 2},
'A2': {'value': 12},
'A3': {'value': 44}
},
},
'B': {
'value': 22,
'children': {
'B1': {'value': 21},
'B3': {'value': 1}
}
}
}
Does anyone know how I can convert the data to this specific json? I have no clue how I should approach that. Thanks a lot in advance.

Use cusom function in GroupBy.apply, last use Series.to_dict or Series.to_json:
def f(x):
d = {'value': x['value'].sum(),
'children': x.set_index('sub-topic')[['value']].to_dict('index')}
return (d)
#for dictonary
out = df.groupby('topic').apply(f).to_dict()
#for json
#out = df.groupby('topic').apply(f).to_json()
print (out)
{
'A': {
'value': 58,
'children': {
'A1': {
'value': 2
},
'A2': {
'value': 12
},
'A3': {
'value': 44
}
}
},
'B': {
'value': 22,
'children': {
'B1': {
'value': 21
},
'B3': {
'value': 1
}
}
}
}

Related

Sort nested list of dict by it's dict value

I have the following structure:
d = {
'futures': {
'test': {
'nested': {
1: {
'list': [
{
'c': 'third',
'price': 3
},
{
'b': 'second',
'price': 2
},
{
'a': 'first',
'price': 1
}
]
},
2: {
'list': [
{
'f': 'sixth',
'price': 6
},
{
'e': 'fifth',
'price': 5
},
{
'd': 'fourth',
'price': 4
}
]
}
}
}
}
}
I need to order each list by price, ascending. The result should be:
d = {
'futures': {
'test': {
'nested': {
1: {
'list': [
{
'a': 'first',
'price': 1
},
{
'b': 'second',
'price': 2
},
{
'c': 'third',
'price': 3
},
]
},
2: {
'list': [
{
'd': 'fourth',
'price': 4
},
{
'e': 'fifth',
'price': 5
},
{
'f': 'sixth',
'price': 6
}
]
}
}
}
}
}
None of the questions I've found fits my needs because of this particular structure.
Is there a way to order it without having to access each previous keys? Because on my project I have cases with more nested keys before the list, so I need a dynamic solution for sorting it.
I mean, I don't know the exactly path to the list, only the list key.

Make a function to recursively traverse your dict looking for lists, and sort each one based on your criteria:
def find_and_sort_lists(d):
for value in d.values():
if isinstance(value, list):
value.sort(key = lambda nested_d: nested_d['price'])
if isinstance(value, dict):
find_and_sort_lists(value)
If it's a requirement to sort only lists whose key is actually 'list', you can use the following:
def find_and_sort_lists(d):
for key, value in d.items():
if key == 'list' and isinstance(value, list):
value.sort(key = lambda nested_d: nested_d['price'])
if isinstance(value, dict):
find_and_sort_lists(value)

loop over a nested dictionary to create a new one

I've got a nested dictionary like that:
d={'a1': {'b': ['x', 1]}, 'a2': {'b1': ['x1', 2]}}
Expected result:
[
{
"measurements": "XXXXX",
"tags": {
"MPC": b,
"host": a1
},
"time": "timexxxxx",
"fields": {
x: 1
}
},
{
"measurements": "XXXXX",
"tags": {
"MPC": b,
"host": a2
},
"time": "timexxxxx",
"fields": {
x: 1
}
}
]
that is what I'm trying, however it's being overwritten
for k,v in d.items():
metrics['measurements'] = "XXXXX"
if isinstance(v,dict):
for j,h in v.items():
metrics['tags'] = {'MPC':j,'host':k}
metrics['time'] = "timexxxxx"
for value in h:
metrics['fields'] = {j:h}
and I'm getting:
{'fields': {'b1': ['x1', 2]},
'measurements': 'XXXXX',
'tags': {'MPC': 'b1', 'host': 'a2'},
'time': 'timexxxxx'}
Could you give me some pointers on how to deal with this?
Thanks

see below
import pprint
d = {'a1': {'b': ['x', 1]}, 'a2': {'b1': ['x1', 2]}}
data = []
for k, v in d.items():
entry = {"measurements": "XXXXX"}
entry['tags'] = {'MPC': list(v.keys())[0],"host": k}
entry["time"] = "timexxxxx"
values= list(v.values())
entry["fields"] = {values[0][0]:values[0][1]}
data.append(entry)
pprint.pprint(data)
output
[{'fields': {'x': 1},
'measurements': 'XXXXX',
'tags': {'MPC': 'b', 'host': 'a1'},
'time': 'timexxxxx'},
{'fields': {'x1': 2},
'measurements': 'XXXXX',
'tags': {'MPC': 'b1', 'host': 'a2'},
'time': 'timexxxxx'}]

This code can help you:
d={'a1': {'b': ['x', 1]}, 'a2': {'b1': ['x1', 2]}}
def convert(dictionary):
return [
{
"measurements": "XXXXX",
"tags": {
"MPC": list(value.keys())[0],
"host": key
},
"time": "timexxxxx",
"fields": dict(value.values())
} for key, value in dictionary.items()
]
print(convert(d))
Results in [{'measurements': 'XXXXX', 'tags': {'MPC': 'b', 'host': 'a1'}, 'time': 'timexxxxx', 'fields': {'x': 1}}, {'measurements': 'XXXXX', 'tags': {'MPC': 'b1', 'host': 'a2'}, 'time': 'timexxxxx', 'fields': {'x1': 2}}]

You can do it like this
#Empty List
li=[]
#Add Items in list
for i in range(2):
d = {}
d["measurment"] = "XXXXX"
d["tags"] = {1: "x"}
d["time"] = "timexxx"
d["field"] = {2: "y"}
li.append(d)
#Print list elements
for i in li:
for key, value in i.items():
print(key, ":", value)
print()

Python recursive aggregation

I am working with a nested data structure which needs to be flattened. The values need to be aggregated so totals are produced across each level of the nested data. I'm trying to do this recursively but it's not clear how best to achieve this?
The following is an example of the data I'm working with.
def get_result():
return {
"a1": {
"b1": {
"c1": {
"d1": 1,
"d2": 1,
},
"c2": {
"d3": 1,
}
},
"b2": {
"c3": {
"d4": 1
}
}
},
"a2": {}
}
The data I'd like to produce would be as follows:
[
{
"key": "a1",
"total": 4
},
{
"key": "b1",
"total": 3
},
{
"key": "c1",
"total": 2
},
{
"key": "d1",
"total": 1
},
{
"key": "d2",
"total": 1
}
{
"key": "c2",
"total": 1
},
{
"key": "d3",
"total": 1
},
{
"key": "b2",
"total": 1
},
{
"key": "c3",
"total": 1
},
{
"key": "d4",
"total": 1
}
]

You can use recursion
from collections import defaultdict
def agg(data):
result = defaultdict(int)
agg_sum = 0
for k, v in data.items():
if isinstance(v, dict):
d, sub = agg(v)
if sub:
result.update(d)
result[k] += sub
agg_sum += sub
else:
result[k] += v
agg_sum += v
return result, agg_sum

You can use a recursive generator function for a shorter solution:
d = {'a1': {'b1': {'c1': {'d1': 1, 'd2': 1}, 'c2': {'d3': 1}}, 'b2': {'c3': {'d4': 1}}}, 'a2': {}}
def get_aggr(d):
return d if not isinstance(d, dict) else sum(map(get_aggr, d.values()))
def aggr_keys(d):
for a, b in d.items():
yield {'key':a, 'total':get_aggr(b)}
yield from (() if not isinstance(b, dict) else aggr_keys(b))
print(list(aggr_keys(d)))
Output:
[{'key': 'a1', 'total': 4},
{'key': 'b1', 'total': 3},
{'key': 'c1', 'total': 2},
{'key': 'd1', 'total': 1},
{'key': 'd2', 'total': 1},
{'key': 'c2', 'total': 1},
{'key': 'd3', 'total': 1},
{'key': 'b2', 'total': 1},
{'key': 'c3', 'total': 1},
{'key': 'd4', 'total': 1},
{'key': 'a2', 'total': 0}]

Python: formatting json output

I need to convert this DataFrame to json file.
Code:
def new_json(df):
drec = dict()
ncols = df.values.shape[1]
for line in df.values:
d = drec
for j, col in enumerate(line[:-1]):
if not col in d.keys():
if j != ncols-2:
d[col] = {}
d = d[col]
else:
d[col] = line[-1]
else:
if j!= ncols-2:
d = d[col]
return drec
a=new_json(df)
print(a)
result:
{'a': {'a2': {'a21': 'new', 'a22': 'old'}, 'a3': {'a31': 'content'}, 'a4': {'a41': 'old'}}, 'b': {'b1': {'b11': 'content', 'b12': 'new', 'b13': 'new'}}, 'c': {'c1': {'c11': 'content'}, 'c2': {'c21': 'content'}, 'c3': {'c31': 'old'}}}
Is it possible to modify the result in this json format?
{
'a': {
'a2': {
'a21': 'new',
'a22': 'old'
},
'a3': {
'a31': 'content'
},
'a4': {
'a41': 'old'
}
},
'b': {
'b1': {
'b11': 'content',
'b12': 'new',
'b13': 'new'
}
},
'c': {
'c1': {
'c11': 'content'
},
'c2': {
'c21': 'content'
},
'c3': {
'c31': 'old'
}
}
}

How can I create aggregate expressions of this list of dicts?

I have a list of dictionaries that expresses periods+days for a class in a student information system. Here's the data I'd like to aggregate:
[
{
'period': {
'name': '1',
'sort_order': 1
},
'day': {
'name': 'A',
'sort_order': 1
}
},
{
'period': {
'name': '1',
'sort_order': 1
},
'day': {
'name': 'B',
'sort_order': 2
}
},
{
'period': {
'name': '1',
'sort_order': 1
},
'day': {
'name': 'C',
'sort_order': 1
}
},
{
'period': {
'name': '3',
'sort_order': 3
},
'day': {
'name': 'A',
'sort_order': 1
}
},
{
'period': {
'name': '3',
'sort_order': 3
},
'day': {
'name': 'B',
'sort_order': 2
}
},
{
'period': {
'name': '3',
'sort_order': 3
},
'day': {
'name': 'C',
'sort_order': 2
}
},
{
'period': {
'name': '4',
'sort_order': 4
},
'day': {
'name': 'D',
'sort_order': 3
}
}
]
The aggregated string I'd like the above to reduce to is 1,3(A-C) 4(D). Notice that objects that aren't "adjacent" (determined by the object's sort_order) to each other are delimited by , and "adjacent" records are delimited by a -.
EDIT
Let me try to elaborate on the aggregation process. Each "class meeting" object contains a period and day. There are usually ~5 periods per day, and the days alternate cyclically between A,B,C,D, etc. So if I have a class that occurs 1st period on an A day, we might express that as 1(A). If a class occurs on 1st and 2nd period on an A day, the raw form of that might be 1(A),2(A), but it can be shortened to 1-2(A).
Some classes might not be in "adjacent" periods or days. A class might occur on 1st period and 3rd period on an A day, so its short form would be 1,3(A). However, if that class were on 1st, 2nd, and 3rd period on an A day, it could be written as 1-3(A). This also applies to days, so if a class occurs on 1st,2nd, and 3rd period, on A,B, and C day, then we could write it 1-3(A-C).
Finally, if a class occurs on 1st,2nd, and 3rd period and on A,B, and C day, but also on 4th period on D day, its short form would be 1-3(A-C) 4(D).
What I've tried
The first step that occurs to me to perform is to "group" the meeting objects into related sub-lists with the following function:
def _to_related_lists(list):
"""Given a list of section meeting dicts, return a list of lists, where each sub-list is list of
related section meetings, either related by period or day"""
related_list = []
sub_list = []
related_values = set()
for index, section_meeting_object in enumerate(list):
# starting with empty values list
if not related_values:
related_values.add(section_meeting_object['period']['name'])
related_values.add(section_meeting_object['day']['name'])
sub_list.append(section_meeting_object)
elif section_meeting_object['period']['name'] in related_values or section_meeting_object['day']['name'] in related_values:
related_values.add(section_meeting_object['period']['name'])
related_values.add(section_meeting_object['day']['name'])
sub_list.append(section_meeting_object)
else:
# no related values found in current section_meeting_object
related_list.append(sub_list)
sub_list = []
related_values = set()
related_values.add(section_meeting_object['period']['name'])
related_values.add(section_meeting_object['day']['name'])
sub_list.append(section_meeting_object)
related_list.append(sub_list)
return related_list
Which returns:
[
[{
'period': {
'sort_order': 1,
'name': '1'
},
'day': {
'sort_order': 1,
'name': 'A'
}
}, {
'period': {
'sort_order': 1,
'name': '1'
},
'day': {
'sort_order': 2,
'name': 'B'
}
}, {
'period': {
'sort_order': 2,
'name': '2'
},
'day': {
'sort_order': 1,
'name': 'A'
}
}, {
'period': {
'sort_order': 2,
'name': '2'
},
'day': {
'sort_order': 2,
'name': 'B'
}
}],
[{
'period': {
'sort_order': 4,
'name': '4'
},
'day': {
'sort_order': 3,
'name': 'C'
}
}]
]
If the entire string 1-3(A-C) 4(D) is the aggregate expression I'd like in the end, let's call 1-3(A-C) and 4(D) "sub-expressions". Each related sub-list would be a "sub-expression", so I was thinking I'd somehow iterate through every sublist and create the sub-expression, but I"m not exactly sure how to do that.

First, let us define your list as d_list.
d_list = [
{'period': {'sort_order': 1, 'name': '1'}, 'day': {'sort_order': 1, 'name': 'A'}},
{'period': {'sort_order': 1, 'name': '1'}, 'day': {'sort_order': 2, 'name': 'B'}},
{'period': {'sort_order': 1, 'name': '1'}, 'day': {'sort_order': 1, 'name': 'C'}},
{'period': {'sort_order': 3, 'name': '3'}, 'day': {'sort_order': 1, 'name': 'A'}},
{'period': {'sort_order': 3, 'name': '3'}, 'day': {'sort_order': 2, 'name': 'B'}},
{'period': {'sort_order': 3, 'name': '3'}, 'day': {'sort_order': 2, 'name': 'C'}},
{'period': {'sort_order': 4, 'name': '4'}, 'day': {'sort_order': 3, 'name': 'D'}},
]
Note that I use the python native module string to define that B is between A and C. Thus what you may want to do is
import string
agg0 = {}
for d in d_list:
name = d['period']['name']
if name not in agg0:
agg0[name] = []
day = d['day']
agg0[name].append(day['name'])
agg1 = {}
for k,v in agg0.items():
pos_in_alph = [string.ascii_lowercase.index(el.lower()) for el in v]
allowed_indexes = [max(pos_in_alph),min(pos_in_alph)]
agg1[k] = [el for el in v if string.ascii_lowercase.index(el.lower()) in allowed_indexes]
agg = {}
for k,v in agg1.items():
w = tuple(v)
if w not in agg:
agg[w] = {'ks':[],'gr':len(agg0[k])>2}
agg[w]['ks'].append(k)
print agg[w]
str_ = ''
for k,v in sorted(agg.items(), key=lambda item:item[0], reverse=False):
str_ += ' {pnames}({dnames})'.format(pnames=('-' if v['gr'] else ',').join(sorted(v['ks'])),
dnames='-'.join(k))
print(str_.strip())
which outputs 1-3(A-C) 4(D)
Following #NathanJones's comment, note that if d_list were defined as
d_list = [
{'period': {'sort_order': 1, 'name': '1'}, 'day': {'sort_order': 1, 'name': 'A'}},
##{'period': {'sort_order': 1, 'name': '1'}, 'day': {'sort_order': 2, 'name': 'B'}},
{'period': {'sort_order': 1, 'name': '1'}, 'day': {'sort_order': 1, 'name': 'C'}},
{'period': {'sort_order': 3, 'name': '3'}, 'day': {'sort_order': 1, 'name': 'A'}},
{'period': {'sort_order': 3, 'name': '3'}, 'day': {'sort_order': 2, 'name': 'B'}},
{'period': {'sort_order': 3, 'name': '3'}, 'day': {'sort_order': 2, 'name': 'C'}},
{'period': {'sort_order': 4, 'name': '4'}, 'day': {'sort_order': 3, 'name': 'D'}},
]
The code above would print 1,3(A-C) 4(D)

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

convert hierarchical data to a specific json format in python - python

Related

Sort nested list of dict by it's dict value

loop over a nested dictionary to create a new one

Python recursive aggregation

Python: formatting json output

How can I create aggregate expressions of this list of dicts?

Categories

Resources