I have a list of paths:
paths = [
"root/child1/file1",
"root/child1/file2",
"root/child2/file1"
]
And I want to parse it ith python into dict (or list of dicts) that looks like:
{
"text": "root",
"children": [
{
"text": "child1",
"children": [
{
"text": "file1",
"children": []
},
{
"text": "file2",
"children": []
}
]
},
{
"text": "child2",
"children": [
{
"text": "file2",
"children": []
}
]
}
I tried to write some recursive function, but no success. Example:
def path2dict(path, depth):
d = {}
text = path.split('/')[0]
d['text'] = text
depth = depth + 1
d['children'] = [path2dict(p, depth) for p in path.split('/')[depth:]]
return d
paths = [
"root/child1/file1",
"root/child1/file2",
"root/child2/file1"
]
depth = 0
for path in paths:
d = path2dict(path, depth)
print(d)
Sorry for not using your existing solution, but I have some other:
def stage1(paths):
result = {}
for path in paths:
split = path.split('/')
current = result
for part in split:
current.setdefault(part, {})
current = current[part]
return result
def stage2(dct):
return [
{
'text': key,
'children': stage2(value)
}
for key, value in dct.items()
]
after_stage1 = stage1(paths)
# after_stage1 is
# {
# 'root': {
# 'child1': {
# 'file1': {},
# 'file2': {}
# },
# 'child2': {
# 'file1': {}
# }
# }
# }
after_stage2 = stage2(after_stage1)
# after_stage2 contains exactly what you need
You can use itertools.groupby:
from itertools import groupby
import json
d = ['root/child1/file1', 'root/child1/file2', 'root/child2/file1']
def create_paths(paths):
_vals = [[a, [c for _, *c in b]] for a, b in groupby(sorted(paths, key=lambda x:x[0]), key=lambda x:x[0])]
return [{'text':a, 'children':[] if not b[0] else create_paths(b)} for a, b in _vals]
print(json.dumps(create_paths([i.split('/') for i in d]), indent=4))
Output:
[
{
"text": "root",
"children": [
{
"text": "child1",
"children": [
{
"text": "file1",
"children": []
},
{
"text": "file2",
"children": []
}
]
},
{
"text": "child2",
"children": [
{
"text": "file1",
"children": []
}
]
}
]
}
]
I am trying to generate custom JSON in python using the following code
root={}
Levels=[['L1','L1','L2'],
['L1','L1','L3'],
['L1','L2'],
['L2','L2','L3'],
['L2','L2','L1'],
['L3','L2'],
['L4','L2','L1'],
['L4','L2','L4']]
def append_path(root, paths):
if paths:
child = root.setdefault(paths[0], {})
append_path(child, paths[1:])
for p in Levels:
append_path(root, p)
def convert(d):
templist=[]
noofchildren=0
if(len(d.items())==0):
return ([{}],1)
for k,v in d.items():
temp,children=convert(v)
noofchildren+=children
if(temp):
templist.append({"name":k+"("+str(children)+")",'children':temp})
else:
templist.append({'name': k+"("+str(children)+")", 'children':[{}]})
return (templist,noofchildren)
# Print results
import json
print(json.dumps(convert(root)[0], indent=2))
and the OUTPUT is
[
{
"name": "L1(3)",
"children": [
{
"name": "L1(2)",
"children": [
{
"name": "L2(1)",
"children": [
{}
]
},
{
"name": "L3(1)",
"children": [
{}
]
}
]
},
{
"name": "L2(1)",
"children": [
{}
]
}
]
},
{
"name": "L2(2)",
"children": [
{
"name": "L2(2)",
"children": [
{
"name": "L3(1)",
"children": [
{}
]
},
{
"name": "L1(1)",
"children": [
{}
]
}
]
}
]
},
{
"name": "L3(1)",
"children": [
{
"name": "L2(1)",
"children": [
{}
]
}
]
},
{
"name": "L4(2)",
"children": [
{
"name": "L2(2)",
"children": [
{
"name": "L1(1)",
"children": [
{}
]
},
{
"name": "L4(1)",
"children": [
{}
]
}
]
}
]
}
]
My dataset has changed a little bit
Levels=[[['L1','L1','L2'],[10,20,30]],
[[['L1','L1','L3'],[10,15,20]],
[[['L1','L2'],[20,10]],
[[['L2','L2','L3'],[20,20,30]],
[[['L2','L2','L1'],[10,20,30]]
[[['L3','L2'],[10,20]]
[[['L4','L2','L1'],[10,20,10]]
[[['L4','L2','L4'],[20,40,50]]]
and the output that I want is the average of the levels along with the count
[
{
"name": "L1(3)#(13)", // taking avg of 10,10,20
"children": [
{
"name": "L1(2)#(17)", // taking avg of 20,15
"children": [
{
"name": "L2(1)#(30)",
"children": [
{}
]
},
{
"name": "L3(1)#(20)",
"children": [
{}
]
}
]
},
{
"name": "L2(1)#10",
"children": [
{}
]
}
]
},
{
"name": "L2(2)#(15)", // avg of 20,10
"children": [
{
"name": "L2(2)#(20)", // avg of 20,20
"children": [
{
"name": "L3(1)#(30)",
"children": [
{}
]
},
{
"name": "L1(1)#(30)",
"children": [
{}
]
}
]
}
]
},
{
"name": "L3(1)#(10)",
"children": [
{
"name": "L2(1)#(10)",
"children": [
{}
]
}
]
},
{
"name": "L4(2)#(15)",// avg of 10,20
"children": [
{
"name": "L2(2)#(30)", // avg of 20,40
"children": [
{
"name": "L1(1)# (10)",
"children": [
{}
]
},
{
"name": "L4(1)#(50)",
"children": [
{}
]
}
]
}
]
}
]
How can i change my code to add this information?
Preface
Before moving onto the solution, here are some points I want to mention:
Make use of Python's object-oriented programming features! This makes the data structure much clearer to both yourself and future readers.
Using a custom class also makes it easier for us to store the metadata – i.e. the number of instances of a node and its total value – while constructing the intermediate tree structure, rather than while converting it. This is also more efficient because with the latter method, a simple naive traversal algorithm would make duplicate visits to the nodes!
If you want your output to (reliably) maintain the order in which the paths were inserted, you should use an OrderedDict (from collections) instead of an ordinary dict ({}).
It is more logical to output an empty list for nodes with no children than a list with a single empty dict:
// Before
"children": [
{}
]
// After
"children": []
The reason being that any software which will parse this data later can safely assume that all objects have the "name" and "children" fields, which an empty dict does not.
The list boundaries and elements in the Levels array seem to be poorly formed; did you mean:
Levels = [
[['L1','L1','L2'],[10,20,30]],
[['L1','L1','L3'],[10,15,20]],
[['L1','L2'],[20,10]],
[['L2','L2','L3'],[20,20,30]],
[['L2','L2','L1'],[10,20,30]],
[['L3','L2'],[10,20]],
[['L4','L2','L1'],[10,20,10]],
[['L4','L2','L4'],[20,40,50]],
]
While on the subject of the data, since the nodes and values obey 1-to-1 mappings (within each path), it would be more appropriate to use a list of tuples rather than a list of two parallel lists:
Levels = [
[('L1', 10), ('L1', 20), ('L2', 30)],
[('L1', 10), ('L1', 15), ('L3', 20)],
[('L1', 20), ('L2', 10)],
[('L2', 20), ('L2', 20), ('L3', 30)],
[('L2', 10), ('L2', 20), ('L1', 30)],
[('L3', 10), ('L2', 20)],
[('L4', 10), ('L2', 20), ('L1', 10)],
[('L4', 20), ('L2', 40), ('L4', 50)]
]
There seems to be a mistake in your expected output:
{
"name": "L3(1)#(10)",
"children": [
{
"name": "L2(1)#(10)", <--- this should be #(20)
"children": [
{}
]
}
]
},
Implementations
For your current data format (pair of lists):
# A dictionary here corresponds to an array of nodes in JSON
# the "name" fields serve as the keys and "children" as the values
class data_node(OrderedDict):
def __init__(self, **kwargs):
super(data_node, self).__init__(**kwargs)
self.instances = 0
self.total = 0
def insert(self, names, values):
# Python lists are truthy, so no need for len(path) == 0
if not (names or values):
return
# create the child node if it doesn't exist
child = self.get(names[0], data_node())
# add the value to the total
# and increment the instance counter
child.instances += 1
child.total += values[0]
# recursive call on the child
# with the rest of the path
child.insert(names[1:], values[1:])
self[names[0]] = child
def convert(self):
return [
{
"name": "{}({})#({})".format(
name,
child.instances,
child.total / child.instances # mean
),
"children": child.convert()
}
for name, child in self.items()
]
tree = data_node()
for path in Levels:
tree.insert(path[0], path[1])
print json.dumps(tree.convert(), indent=2)
For my proposed data format (list of tuples):
# only the insertion method differs
# all other parts of the class are unchanged
def insert(self, path):
if not path:
return
name, value = path[0]
child = self.get(name, data_node())
child.instances += 1
child.total += value
child.insert(path[1:])
self[name] = child
...
for path in Levels:
tree.insert(path) # simpler function signature
EDIT:
If there is a reason that you want the leaf node format to be [{}] instead of just [], only a simple change would be needed:
# in convert()
{
"name": ..., # as before
# again exploiting the truthy-ness property of arrays
"children": child.convert() or [{}]
}
Output
Both implementations give the correct JSON output, according my comments in the preface:
[
{
"name": "L1(3)#(13)",
"children": [
{
"name": "L1(2)#(17)",
"children": [
{
"name": "L2(1)#(30)",
"children": []
},
{
"name": "L3(1)#(20)",
"children": []
}
]
},
{
"name": "L2(1)#(10)",
"children": []
}
]
},
{
"name": "L2(2)#(15)",
"children": [
{
"name": "L2(2)#(20)",
"children": [
{
"name": "L3(1)#(30)",
"children": []
},
{
"name": "L1(1)#(30)",
"children": []
}
]
}
]
},
{
"name": "L3(1)#(10)",
"children": [
{
"name": "L2(1)#(20)",
"children": []
}
]
},
{
"name": "L4(2)#(15)",
"children": [
{
"name": "L2(2)#(30)",
"children": [
{
"name": "L1(1)#(10)",
"children": []
},
{
"name": "L4(1)#(50)",
"children": []
}
]
}
]
}
]