Convert from path list to Flare json format? - python

I have data in python that looks like this:
[['a', 'b', 'c', 50],
['a', 'b', 'd', 100],
['a', 'b', 'e', 67],
['a', 'g', 'c', 12],
['q', 'k', 'c', 11],
['q', 'b', 'p', 11]]
where each element of the list is a complete hierarchical path, and the last element is the size of the path. To do a visualization in D3, I need the data to be in the flare data format - seen here:
https://github.com/d3/d3-hierarchy/blob/master/test/data/flare.json
So a short piece would look like this
{
"name": "root",
"children": [
{
"name": "a",
"children": [
{
"name": "b",
"children": [
{"name": "c", "value": 50},
{"name": "d", "value": 100},
{"name": "e", "value": 67},
]
},
{
"name": "g",
"children": [
{"name": "c", "value": 12},
]
},
and so forth...
From what I've been looking up, I think the solution is recursive, and would use the json library on a Python dictionary, but I can't seem to get it to work. Any help is greatly appreciated.

Here's a solution using recursion:
def add_to_flare(n, flare):
children = flare["children"]
if len(n) == 2:
children.append({"name": n[0], "value": n[1]})
else:
for c in children:
if c["name"] == n[0]:
add_to_flare(n[1:], c)
return
children.append({"name": n[0], "children": []})
add_to_flare(n[1:], children[-1])
flare = {"name": "root", "children": []}
for i in data:
add_to_flare(i, flare)
To display it nicely, we can use the json library:
import json
print(json.dumps(flare, indent=1))
{
"name": "root",
"children": [
{
"name": "a",
"children": [
{
"name": "b",
"children": [
{
"name": "c",
"value": 50
},
{
"name": "d",
"value": 100
},
{
"name": "e",
"value": 67
}
]
},
{
"name": "g",
"children": [
{
"name": "c",
"value": 12
}
]
}
]
},
{
"name": "q",
"children": [
{
"name": "k",
"children": [
{
"name": "c",
"value": 11
}
]
},
{
"name": "b",
"children": [
{
"name": "p",
"value": 11
}
]
}
]
}
]
}

Try this:
master = []
for each in your_list:
head = master
for i in range(len(each)):
names = [e['name'] for e in head]
if i == len(each) - 2:
head.append({'name': each[i], 'value': each[i+1]})
break
if each[i] in names:
head = head[names.index(each[i])]['children']
else:
head.append({'name': each[i], 'children': []})
head = head[-1]['children']
results:
[{'children': [{'children': [{'name': 'c', 'value': 50},
{'name': 'd', 'value': 100},
{'name': 'e', 'value': 67}],
'name': 'b'},
{'children': [{'name': 'c', 'value': 12}], 'name': 'g'}],
'name': 'a'},
{'children': [{'children': [{'name': 'c', 'value': 11}], 'name': 'k'},
{'children': [{'name': 'p', 'value': 11}], 'name': 'b'}],
'name': 'q'}]
Please note that name and children are flipped in this dictionary since it's unordered. But the resulting structure is the same.
put it in root to get your target:
my_dict = {'name':'root', 'children': master}

Assuming your list of lists is stored in variable l, you can do:
o = []
for s in l:
c = o
for i, n in enumerate(['root'] + s[:-1]):
for d in c:
if n == d['name']:
break
else:
c.append({'name': n})
d = c[-1]
if i < len(s) - 1:
if 'children' not in d:
d['children'] = []
c = d['children']
else:
d['value'] = s[-1]
so that o[0] becomes:
{'children': [{'children': [{'children': [{'name': 'c', 'value': 50},
{'name': 'd', 'value': 100},
{'name': 'e', 'value': 67}],
'name': 'b'},
{'children': [{'name': 'c', 'value': 12}],
'name': 'g'}],
'name': 'a'},
{'children': [{'children': [{'name': 'c', 'value': 11}],
'name': 'k'},
{'children': [{'name': 'p', 'value': 11}],
'name': 'b'}],
'name': 'q'}],
'name': 'root'}

Related

Pandas to JSON Within Groups

I have the following pandas dataframe. I want to output a json object but nested within State first and then City. The Code, Name, and Rank variables all become triplets to make a list of dictionaries.
MWE
import pandas as pd
df = pd.DataFrame({
'State': ['PA', 'PA', 'PA', 'PA', 'PA', 'PA', 'PA', 'NY', 'NY', 'NY', 'NY', 'NY', 'NY', 'ME', 'ME', 'ME' ],
'City': ['Philadelphia', 'Philadelphia', 'Philadelphia', 'Philadelphia', 'Scranton', 'Scranton', 'Williamsport', 'Buffalo', 'Buffalo', 'Buffalo', 'Buffalo', 'Albany', 'Albany', 'Portland', 'Portland', 'Ogunquit'],
'Code': [10, 20, 30, 40, 50, 60, 10, 20, 30, 40, 50, 10, 20, 30, 40, 30],
'Name': ['A', 'B', 'C', 'D', 'E', 'F', 'A', 'B', 'C', 'D', 'E', 'A', 'B', 'C', 'D', 'C'],
'Rank': [1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 1, 2, 3, 4, 3]
})
df
I got to here but this was not close to where I want it to be.
df.groupby(['State', 'City']).apply(lambda x: x[['Code', 'Name', 'Rank']].to_json(orient='records', indent = 4))
Desired Output
[
{
"State": "PA",
"City": "Philadelphia",
"List": [
{
"Code": 10,
"Name": "A",
"Rank": 1
},
{
"Code": 20,
"Name": "B",
"Rank": 2
},
{
"Code": 30,
"Name": "C",
"Rank": 3
},
{
"Code": 40,
"Name": "D",
"Rank": 4
}
]
},
{
"State": "PA",
"City": "Scranton",
"List": [
{
"Code": 50,
"Name": "E",
"Rank": 5
},
{
"Code": 60,
"Name": "F",
"Rank": 6
}
]
},
{
"State": "PA",
"City": "Williamsport",
"List": [
{
"Code": 10,
"Name": "A",
"Rank": 1
}
]
},
{
"State": "NY",
"City": "Albany",
"List": [
{
"Code": 10,
"Name": "A",
"Rank": 1
},
{
"Code": 20,
"Name": "B",
"Rank": 2
}
]
},
{
"State": "NY",
"City": "Buffalo",
"List": [
{
"Code": 20,
"Name": "B",
"Rank": 2
},
{
"Code": 30,
"Name": "C",
"Rank": 3
},
{
"Code": 40,
"Name": "D",
"Rank": 4
},
{
"Code": 50,
"Name": "E",
"Rank": 5
}
]
},
{
"State": "ME",
"City": "Portland",
"List": [
{
"Code": 30,
"Name": "C",
"Rank": 3
},
{
"Code": 40,
"Name": "D",
"Rank": 4
}
]
},
{
"State": "ME",
"City": "Ogunquit",
"List": [
{
"Code": 30,
"Name": "C",
"Rank": 3
}
]
}
]
IIUC, you can try:
df["List"] = df[["Code", "Name", "Rank"]].to_dict("records")
grouped = df.groupby(["State", "City"])["List"].apply(list).reset_index()
json_obj = grouped.to_json(orient="records")
>>>json_obj
'[{"State":"ME",
"City":"Ogunquit",
"List":[{"Code":30,"Name":"C","Rank":3}]},
{"State":"ME",
"City":"Portland",
"List":[{"Code":30,"Name":"C","Rank":3},
{"Code":40,"Name":"D","Rank":4}]},
{"State":"NY",
"City":"Albany",
"List":[{"Code":10,"Name":"A","Rank":1},
{"Code":20,"Name":"B","Rank":2}]},
{"State":"NY",
"City":"Buffalo",
"List":[{"Code":20,"Name":"B","Rank":2},
{"Code":30,"Name":"C","Rank":3},
{"Code":40,"Name":"D","Rank":4},
{"Code":50,"Name":"E","Rank":5}]},
{"State":"PA",
"City":"Philadelphia",
"List":[{"Code":10,"Name":"A","Rank":1},
{"Code":20,"Name":"B","Rank":2},
{"Code":30,"Name":"C","Rank":3},
{"Code":40,"Name":"D","Rank":4}]},
{"State":"PA",
"City":"Scranton",
"List":[{"Code":50,"Name":"E","Rank":5},
{"Code":60,"Name":"F","Rank":6}]},
{"State":"PA",
"City":"Williamsport",
"List":[{"Code":10,"Name":"A","Rank":1}]}]'
Try:
df.groupby(["State", "City"]).apply(
lambda x: x[["Code", "Name", "Rank"]].to_dict("records")
).reset_index(name="List").to_json(orient="records")
Output:
[{'State': 'ME',
'City': 'Ogunquit',
'List': [{'Code': 30, 'Name': 'C', 'Rank': 3}]},
{'State': 'ME',
'City': 'Portland',
'List': [{'Code': 30, 'Name': 'C', 'Rank': 3},
{'Code': 40, 'Name': 'D', 'Rank': 4}]},
{'State': 'NY',
'City': 'Albany',
'List': [{'Code': 10, 'Name': 'A', 'Rank': 1},
{'Code': 20, 'Name': 'B', 'Rank': 2}]},
{'State': 'NY',
'City': 'Buffalo',
'List': [{'Code': 20, 'Name': 'B', 'Rank': 2},
{'Code': 30, 'Name': 'C', 'Rank': 3},
{'Code': 40, 'Name': 'D', 'Rank': 4},
{'Code': 50, 'Name': 'E', 'Rank': 5}]},
{'State': 'PA',
'City': 'Philadelphia',
'List': [{'Code': 10, 'Name': 'A', 'Rank': 1},
{'Code': 20, 'Name': 'B', 'Rank': 2},
{'Code': 30, 'Name': 'C', 'Rank': 3},
{'Code': 40, 'Name': 'D', 'Rank': 4}]},
{'State': 'PA',
'City': 'Scranton',
'List': [{'Code': 50, 'Name': 'E', 'Rank': 5},
{'Code': 60, 'Name': 'F', 'Rank': 6}]},
{'State': 'PA',
'City': 'Williamsport',
'List': [{'Code': 10, 'Name': 'A', 'Rank': 1}]}]

Merge lists of complex dicts with arbitrary keys

I have this code:
dotteds = ["apple.orange.banana", "a.b.c", "a.b.d"]
name = "name"
avtype = "type"
fields = "fields"
main_dictionary_list = []
for x in dotteds:
split_name = x.split('.')
if len(split_name) > 1:
value = {name: split_name[-1], avtype: 'string'}
dicts = []
for y in split_name:
dicts.append({name: y, avtype: {name: y, avtype: "record", fields: []}})
dicts[-1] = value
value = value['name']+split_name[-1]
for z in reversed(range(len(dicts))):
if z != 0:
dicts[z - 1]['type']['fields'].append(dicts[z])
main_dictionary_list.append(dicts[0])
else:
dicts = []
value = {name: split_name[-1], avtype: 'string'}
dicts.append(value)
main_dictionary_list.append(dicts[0])
print(main_dictionary_list)
Which gives me an output like this:
[{
'name': 'apple',
'type': {
'name': 'apple',
'type': 'record',
'fields': [{
'name': 'orange',
'type': {
'name': 'orange',
'type': 'record',
'fields': [{
'name': 'banana',
'type': 'string'
}
]
}
}
]
}
}, {
'name': 'a',
'type': {
'name': 'a',
'type': 'record',
'fields': [{
'name': 'b',
'type': {
'name': 'b',
'type': 'record',
'fields': [{
'name': 'c',
'type': 'string'
}
]
}
}
]
}
}, {
'name': 'a',
'type': {
'name': 'a',
'type': 'record',
'fields': [{
'name': 'b',
'type': {
'name': 'b',
'type': 'record',
'fields': [{
'name': 'd',
'type': 'string'
}
]
}
}
]
}
}
]
Ideally I need:
[{
'name': 'apple',
'type': {
'name': 'apple',
'type': 'record',
'fields': [{
'name': 'orange',
'type': {
'name': 'orange',
'type': 'record',
'fields': [{
'name': 'banana',
'type': 'string'
}
]
}
}
]
}
}, {
'name': 'a',
'type': {
'name': 'a',
'type': 'record',
'fields': [{
'name': 'b',
'type': {
'name': 'b',
'type': 'record',
'fields': [{
'name': 'c',
'type': 'string'
},
{
'name': 'd',
'type': 'string'
}
]
}
}
]
}
}
]
I need to be able to do this with any number of combinations:
apple.orange.banana, a.b.c, a.b.d, a.b.q.e.a.s.d, etc.
I cannot figure out how to combine the similar 'name: key' combinations. It's intended to be avro format.
I have also tried making the dotted values into a dictionary which is a bit of trouble on its own.
You can use recursion with collections.defaultdict:
from collections import defaultdict
def group(vals, last=None):
if any(len(i) == 1 for i in vals):
return [{'name':last, 'type':{'name':last, 'type':'record', 'fields':[{'name':i[0], 'type':'string'} if len(i) == 1 else group([i], i[0])[0] for i in vals]}}]
_d = defaultdict(list)
for i in vals:
_d[i[0]].append(i[1:])
return [{'name':a, 'type':group(b, last=a)} if last is None else
{'name':last, 'type':'record', 'fields':group(b, last=a)} for a, b in _d.items()]
import json
vals = ['apple.orange.banana', 'a.b.c', 'a.b.d']
print(json.dumps(group([i.split('.') for i in vals]), indent=4))
Output:
[
{
"name": "apple",
"type": [
{
"name": "apple",
"type": "record",
"fields": [
{
"name": "orange",
"type": {
"name": "orange",
"type": "record",
"fields": [
{
"name": "banana",
"type": "string"
}
]
}
}
]
}
]
},
{
"name": "a",
"type": [
{
"name": "a",
"type": "record",
"fields": [
{
"name": "b",
"type": {
"name": "b",
"type": "record",
"fields": [
{
"name": "c",
"type": "string"
},
{
"name": "d",
"type": "string"
}
]
}
}
]
}
]
}
]
vals = ['asd.2', 'asd.3', 'asd.5.3.4']
print(json.dumps(group([i.split('.') for i in vals]), indent=4))
Output:
[
{
"name": "asd",
"type": [
{
"name": "asd",
"type": {
"name": "asd",
"type": "record",
"fields": [
{
"name": "2",
"type": "string"
},
{
"name": "3",
"type": "string"
},
{
"name": "5",
"type": "record",
"fields": [
{
"name": "5",
"type": "record",
"fields": [
{
"name": "3",
"type": {
"name": "3",
"type": "record",
"fields": [
{
"name": "4",
"type": "string"
}
]
}
}
]
}
]
}
]
}
}
]
}
]

How do I isolate dicts from a list if the id is not found in a second list of dicts (in python)?

I have two lists of dicts
list1 =
[
{"name": "Maria",
"id": "16a",
},
{"name": "Tania",
"id": "13b",
},
{"name": "Steve",
"id": "5a",
}
]
list2 =
[
{"name": "Eric",
"id": "16a",
},
{"name": "Mike",
"id": "7b",
},
{"name": "Steve",
id: "57a",
}
]
I want to be able to return a list of dicts from list2, if the same id is not found in list1
For example, it should return
[
{"name": "Mike",
"id": "7b",
},
{"name": "Steve",
"id": "57a",
}
]
I tried a few suggestions here on stack overflow but haven't been able to get it right.
Use a list-comprehension that iterates through list2 checking the id with ids in list1:
list1 = [
{'name': "Maria",
'id': "16a",
},
{'name': "Tania",
'id': "13b",
},
{'name': "Steve",
'id': "5a",
}
]
list2 = [
{'name': "Eric",
'id': "16a",
},
{'name': "Mike",
'id': "7b",
},
{'name': "Steve",
'id': "57a",
}
]
list1_ids = [y['id'] for y in list1]
result = [x for x in list2 if x['id'] not in list1_ids]
# [{'name': 'Mike', 'id': '7b'}, {'name': 'Steve', 'id': '57a'}]
This should do:
[d2 for d2 in list2 if d2['id'] not in [d1['id'] for d1 in list1]]
Output:
[{'id': '7b', 'name': 'Mike'}, {'id': '57a', 'name': 'Steve'}]
You can also do it using filter function:
list1 = [
{"name": "Maria",
"id": "16a",
},
{"name": "Tania",
"id": "13b",
},
{"name": "Steve",
"id": "5a",
}
]
list2 = [
{"name": "Eric",
"id": "16a",
},
{"name": "Mike",
"id": "7b",
},
{"name": "Steve",
"id": "57a",
}
]
IDs = set(value["id"] for value in list1)
output = list(filter(lambda elem: elem["id"] not in IDs, list2))
print(output)
Output:
[{'name': 'Mike', 'id': '7b'}, {'name': 'Steve', 'id': '57a'}]

n-depth tree: set parent value based on children values

In a n-depth dict where values are set in the deepest level of a hierarchy:
{
"name": "root",
"value": None, # expected value to be 80
"children": [
{
"name": "a",
"value": None, # expected value to be 30
"children": [
{ "name": "a.1", "value": 10 },
{ "name": "a.2", "value": 20 }
]
},
{
"name": "b",
"value": None, # expected value to be 50
"children": [
{ "name": "b.1", "value": 25 },
{
"name": "b.2",
"value": None, # expected value to be 25
"children": [
{"name": "b.2.1", "value": 5},
{"name": "b.2.2", "value": 5},
{"name": "b.2.3", "value": 5},
{"name": "b.2.4", "value": 5},
{"name": "b.2.5", "value": 5}
]
}
]
}
]
}
What could be the approach to recursively set each parent value based on the result of an operation perfomed with its children value (i.e. sum)?
I finally managed to do it using the iterative level order traversal pattern (BFS), I was missing just a couple of details.
This approach works because the depth iteration order is guaranteed, so once we are getting to a node wich has children, all its sub-level children are already calculated.
The solution:
def reverseTraversal(obj):
def parentOperation(node):
out = 0
for child in node['children']:
out = out + child['value']
return out
if obj is None:
return
queue = []
stack = []
queue.append(obj)
while len(queue) > 0:
temp = queue.pop(0)
stack.append(temp)
if 'children' in temp and len(temp['children']) > 0:
for child in temp['children']:
queue.append(child)
while len(stack)>0:
node = stack.pop()
if 'children' in node and len(node['children']) > 0:
node['value'] = parentOperation(node)
# obj is the original dict
obj = reverseTraversal(obj)
print(obj)
Results in:
{
"name": "root",
"value": 80,
"children": [
{
"name": "a",
"value": 30,
"children": [
{"name": "a.1","value": 10},
{"name": "a.2","value": 20}
]
},
{
"name": "b",
"value": 50,
"children": [
{"name": "b.1","value": 25},
{
"name": "b.2",
"value": 25,
"children": [
{"name": "b.2.1","value": 5},
{"name": "b.2.2","value": 5},
{"name": "b.2.3","value": 5},
{"name": "b.2.4","value": 5},
{"name": "b.2.5","value": 5}
]
}
]
}
]
}
Given your datastructure and a list of values to update, you can use next in recursion:
def update(d, targets):
return {a:[update(i, targets) for i in b] if isinstance(b, list) else update(b, targets) if isinstance(b, dict) else next(targets) if not b else b for a, b in d.items()}
targets = [80, 30, 50, 25]
results = update(nlist, iter(targets))
Output:
{'children': [{'children': [{'name': 'a.1', 'value': 10},
{'name': 'a.2', 'value': 20}],
'name': 'a',
'value': 30},
{'children': [{'name': 'b.1', 'value': 25},
{'children': [{'name': 'b.2.1', 'value': 5},
{'name': 'b.2.2', 'value': 5},
{'name': 'b.2.3', 'value': 5},
{'name': 'b.2.4', 'value': 5},
{'name': 'b.2.5', 'value': 5}],
'name': 'b.2',
'value': 25}],
'name': 'b',
'value': 50}],
'name': 'root',
'value': 80}

creating df to generate json in the given format

I am trying to generate a df to produce this below json.
Json data:
{
"name": "flare",
"children": [
{
"name": "K1",
"children": [
{"name": "Exact", "size": 4},
{"name": "synonyms", "size": 14}
]
},
{
"name": "K2",
"children": [
{"name": "Exact", "size": 10},
{"name": "synonyms", "size": 20}
]
},
{
"name": "K3",
"children": [
{"name": "Exact", "size": 0},
{"name": "synonyms", "size": 5}
]
},
{
"name": "K4",
"children": [
{"name": "Exact", "size": 13},
{"name": "synonyms", "size": 15}
]
},
{
"name": "K5",
"children": [
{"name": "Exact", "size": 0},
{"name": "synonyms", "size": 0}
]
}
]
}
input data:
name Exact synonyms
K1 4 14
K2 10 20
K3 0 5
K4 13 15
K5 0 0
I tried creating df with values in the json but I was not able to get the desired json on df.to_json, please help.
You need reshape data by set_index + stack and then use groupby with apply for nested list of dict:
import json
df = (df.set_index('name')
.stack()
.reset_index(level=1)
.rename(columns={'level_1':'name', 0:'size'})
.groupby(level=0).apply(lambda x: x.to_dict(orient='records'))
.reset_index(name='children')
)
print (df)
name children
0 K1 [{'name': 'Exact', 'size': 4}, {'name': 'synon...
1 K2 [{'name': 'Exact', 'size': 10}, {'name': 'syno...
2 K3 [{'name': 'Exact', 'size': 0}, {'name': 'synon...
3 K4 [{'name': 'Exact', 'size': 13}, {'name': 'syno...
4 K5 [{'name': 'Exact', 'size': 0}, {'name': 'synon...
#convert output to dict
j = { "name": "flare", "children": df.to_dict(orient='records')}
#for nice output - easier check
import pprint
pp = pprint.PrettyPrinter(indent=4)
pp.pprint(j)
{ 'children': [ { 'children': [ {'name': 'Exact', 'size': 4},
{'name': 'synonyms', 'size': 14}],
'name': 'K1'},
{ 'children': [ {'name': 'Exact', 'size': 10},
{'name': 'synonyms', 'size': 20}],
'name': 'K2'},
{ 'children': [ {'name': 'Exact', 'size': 0},
{'name': 'synonyms', 'size': 5}],
'name': 'K3'},
{ 'children': [ {'name': 'Exact', 'size': 13},
{'name': 'synonyms', 'size': 15}],
'name': 'K4'},
{ 'children': [ {'name': 'Exact', 'size': 0},
{'name': 'synonyms', 'size': 0}],
'name': 'K5'}],
'name': 'flare'}
#convert data to json and write to file
with open('data.json', 'w') as outfile:
json.dump(j, outfile)

Categories