Make dict from list of paths - python

I have a list of paths:
paths = [
"root/child1/file1",
"root/child1/file2",
"root/child2/file1"
]
And I want to parse it ith python into dict (or list of dicts) that looks like:
{
"text": "root",
"children": [
{
"text": "child1",
"children": [
{
"text": "file1",
"children": []
},
{
"text": "file2",
"children": []
}
]
},
{
"text": "child2",
"children": [
{
"text": "file2",
"children": []
}
]
}
I tried to write some recursive function, but no success. Example:
def path2dict(path, depth):
d = {}
text = path.split('/')[0]
d['text'] = text
depth = depth + 1
d['children'] = [path2dict(p, depth) for p in path.split('/')[depth:]]
return d
paths = [
"root/child1/file1",
"root/child1/file2",
"root/child2/file1"
]
depth = 0
for path in paths:
d = path2dict(path, depth)
print(d)

Sorry for not using your existing solution, but I have some other:
def stage1(paths):
result = {}
for path in paths:
split = path.split('/')
current = result
for part in split:
current.setdefault(part, {})
current = current[part]
return result
def stage2(dct):
return [
{
'text': key,
'children': stage2(value)
}
for key, value in dct.items()
]
after_stage1 = stage1(paths)
# after_stage1 is
# {
# 'root': {
# 'child1': {
# 'file1': {},
# 'file2': {}
# },
# 'child2': {
# 'file1': {}
# }
# }
# }
after_stage2 = stage2(after_stage1)
# after_stage2 contains exactly what you need

You can use itertools.groupby:
from itertools import groupby
import json
d = ['root/child1/file1', 'root/child1/file2', 'root/child2/file1']
def create_paths(paths):
_vals = [[a, [c for _, *c in b]] for a, b in groupby(sorted(paths, key=lambda x:x[0]), key=lambda x:x[0])]
return [{'text':a, 'children':[] if not b[0] else create_paths(b)} for a, b in _vals]
print(json.dumps(create_paths([i.split('/') for i in d]), indent=4))
Output:
[
{
"text": "root",
"children": [
{
"text": "child1",
"children": [
{
"text": "file1",
"children": []
},
{
"text": "file2",
"children": []
}
]
},
{
"text": "child2",
"children": [
{
"text": "file1",
"children": []
}
]
}
]
}
]

Related

Yaql expression. How can I make two list into json object

I m trying to form json object from two list vn1","vn2","vn3"] and [6,4,5] using below yaql expression
yaql> dict(data=>dict(["name","id"].zip(["vn1","vn2","vn3"],[6,4,5])))
{
"data": {
"name": "vn1",
"id": "vn2"
}
}
I would like below output
{
"data": [
{
"name": "vn1",
"id": 6
},
{
"name": "vn2",
"id": 4
},
{
"name": "vn3",
"id": 5
}
] }
Your json object resembles a dictionary of lists:
# dictionary of empty list
json_dict = { 'data': [] }
label = ['name', 'id']
v = ["vn1","vn2","vn3"]
k = [6,4,5]
# for each value
for i in range(len(v)):
# add each list entry as dictionarys keys, values
json_dict['data'].append({ label[0] : v[i], label[1]: k[i] })
check it matches your needed json object:
json_object = { "data": [ { "name": "vn1", "id": 6 }, { "name": "vn2", "id": 4 }, { "name": "vn3", "id": 5 } ] }
>>> json_dict == json_object
True

Adding nodes to json in python

I am trying to generate custom JSON in python using the following code
root={}
Levels=[['L1','L1','L2'],
['L1','L1','L3'],
['L1','L2'],
['L2','L2','L3'],
['L2','L2','L1'],
['L3','L2'],
['L4','L2','L1'],
['L4','L2','L4']]
def append_path(root, paths):
if paths:
child = root.setdefault(paths[0], {})
append_path(child, paths[1:])
for p in Levels:
append_path(root, p)
def convert(d):
templist=[]
noofchildren=0
if(len(d.items())==0):
return ([{}],1)
for k,v in d.items():
temp,children=convert(v)
noofchildren+=children
if(temp):
templist.append({"name":k+"("+str(children)+")",'children':temp})
else:
templist.append({'name': k+"("+str(children)+")", 'children':[{}]})
return (templist,noofchildren)
# Print results
import json
print(json.dumps(convert(root)[0], indent=2))
and the OUTPUT is
[
{
"name": "L1(3)",
"children": [
{
"name": "L1(2)",
"children": [
{
"name": "L2(1)",
"children": [
{}
]
},
{
"name": "L3(1)",
"children": [
{}
]
}
]
},
{
"name": "L2(1)",
"children": [
{}
]
}
]
},
{
"name": "L2(2)",
"children": [
{
"name": "L2(2)",
"children": [
{
"name": "L3(1)",
"children": [
{}
]
},
{
"name": "L1(1)",
"children": [
{}
]
}
]
}
]
},
{
"name": "L3(1)",
"children": [
{
"name": "L2(1)",
"children": [
{}
]
}
]
},
{
"name": "L4(2)",
"children": [
{
"name": "L2(2)",
"children": [
{
"name": "L1(1)",
"children": [
{}
]
},
{
"name": "L4(1)",
"children": [
{}
]
}
]
}
]
}
]
My dataset has changed a little bit
Levels=[[['L1','L1','L2'],[10,20,30]],
[[['L1','L1','L3'],[10,15,20]],
[[['L1','L2'],[20,10]],
[[['L2','L2','L3'],[20,20,30]],
[[['L2','L2','L1'],[10,20,30]]
[[['L3','L2'],[10,20]]
[[['L4','L2','L1'],[10,20,10]]
[[['L4','L2','L4'],[20,40,50]]]
and the output that I want is the average of the levels along with the count
[
{
"name": "L1(3)#(13)", // taking avg of 10,10,20
"children": [
{
"name": "L1(2)#(17)", // taking avg of 20,15
"children": [
{
"name": "L2(1)#(30)",
"children": [
{}
]
},
{
"name": "L3(1)#(20)",
"children": [
{}
]
}
]
},
{
"name": "L2(1)#10",
"children": [
{}
]
}
]
},
{
"name": "L2(2)#(15)", // avg of 20,10
"children": [
{
"name": "L2(2)#(20)", // avg of 20,20
"children": [
{
"name": "L3(1)#(30)",
"children": [
{}
]
},
{
"name": "L1(1)#(30)",
"children": [
{}
]
}
]
}
]
},
{
"name": "L3(1)#(10)",
"children": [
{
"name": "L2(1)#(10)",
"children": [
{}
]
}
]
},
{
"name": "L4(2)#(15)",// avg of 10,20
"children": [
{
"name": "L2(2)#(30)", // avg of 20,40
"children": [
{
"name": "L1(1)# (10)",
"children": [
{}
]
},
{
"name": "L4(1)#(50)",
"children": [
{}
]
}
]
}
]
}
]
How can i change my code to add this information?
Preface
Before moving onto the solution, here are some points I want to mention:
Make use of Python's object-oriented programming features! This makes the data structure much clearer to both yourself and future readers.
Using a custom class also makes it easier for us to store the metadata – i.e. the number of instances of a node and its total value – while constructing the intermediate tree structure, rather than while converting it. This is also more efficient because with the latter method, a simple naive traversal algorithm would make duplicate visits to the nodes!
If you want your output to (reliably) maintain the order in which the paths were inserted, you should use an OrderedDict (from collections) instead of an ordinary dict ({}).
It is more logical to output an empty list for nodes with no children than a list with a single empty dict:
// Before
"children": [
{}
]
// After
"children": []
The reason being that any software which will parse this data later can safely assume that all objects have the "name" and "children" fields, which an empty dict does not.
The list boundaries and elements in the Levels array seem to be poorly formed; did you mean:
Levels = [
[['L1','L1','L2'],[10,20,30]],
[['L1','L1','L3'],[10,15,20]],
[['L1','L2'],[20,10]],
[['L2','L2','L3'],[20,20,30]],
[['L2','L2','L1'],[10,20,30]],
[['L3','L2'],[10,20]],
[['L4','L2','L1'],[10,20,10]],
[['L4','L2','L4'],[20,40,50]],
]
While on the subject of the data, since the nodes and values obey 1-to-1 mappings (within each path), it would be more appropriate to use a list of tuples rather than a list of two parallel lists:
Levels = [
[('L1', 10), ('L1', 20), ('L2', 30)],
[('L1', 10), ('L1', 15), ('L3', 20)],
[('L1', 20), ('L2', 10)],
[('L2', 20), ('L2', 20), ('L3', 30)],
[('L2', 10), ('L2', 20), ('L1', 30)],
[('L3', 10), ('L2', 20)],
[('L4', 10), ('L2', 20), ('L1', 10)],
[('L4', 20), ('L2', 40), ('L4', 50)]
]
There seems to be a mistake in your expected output:
{
"name": "L3(1)#(10)",
"children": [
{
"name": "L2(1)#(10)", <--- this should be #(20)
"children": [
{}
]
}
]
},
Implementations
For your current data format (pair of lists):
# A dictionary here corresponds to an array of nodes in JSON
# the "name" fields serve as the keys and "children" as the values
class data_node(OrderedDict):
def __init__(self, **kwargs):
super(data_node, self).__init__(**kwargs)
self.instances = 0
self.total = 0
def insert(self, names, values):
# Python lists are truthy, so no need for len(path) == 0
if not (names or values):
return
# create the child node if it doesn't exist
child = self.get(names[0], data_node())
# add the value to the total
# and increment the instance counter
child.instances += 1
child.total += values[0]
# recursive call on the child
# with the rest of the path
child.insert(names[1:], values[1:])
self[names[0]] = child
def convert(self):
return [
{
"name": "{}({})#({})".format(
name,
child.instances,
child.total / child.instances # mean
),
"children": child.convert()
}
for name, child in self.items()
]
tree = data_node()
for path in Levels:
tree.insert(path[0], path[1])
print json.dumps(tree.convert(), indent=2)
For my proposed data format (list of tuples):
# only the insertion method differs
# all other parts of the class are unchanged
def insert(self, path):
if not path:
return
name, value = path[0]
child = self.get(name, data_node())
child.instances += 1
child.total += value
child.insert(path[1:])
self[name] = child
...
for path in Levels:
tree.insert(path) # simpler function signature
EDIT:
If there is a reason that you want the leaf node format to be [{}] instead of just [], only a simple change would be needed:
# in convert()
{
"name": ..., # as before
# again exploiting the truthy-ness property of arrays
"children": child.convert() or [{}]
}
Output
Both implementations give the correct JSON output, according my comments in the preface:
[
{
"name": "L1(3)#(13)",
"children": [
{
"name": "L1(2)#(17)",
"children": [
{
"name": "L2(1)#(30)",
"children": []
},
{
"name": "L3(1)#(20)",
"children": []
}
]
},
{
"name": "L2(1)#(10)",
"children": []
}
]
},
{
"name": "L2(2)#(15)",
"children": [
{
"name": "L2(2)#(20)",
"children": [
{
"name": "L3(1)#(30)",
"children": []
},
{
"name": "L1(1)#(30)",
"children": []
}
]
}
]
},
{
"name": "L3(1)#(10)",
"children": [
{
"name": "L2(1)#(20)",
"children": []
}
]
},
{
"name": "L4(2)#(15)",
"children": [
{
"name": "L2(2)#(30)",
"children": [
{
"name": "L1(1)#(10)",
"children": []
},
{
"name": "L4(1)#(50)",
"children": []
}
]
}
]
}
]

custom json formatting in python

I have the following code to generate json representation of list of lists.
Levels=[['L1','L1','L2'],
['L1','L1','L3'],
['L1','L2'],
['L2','L2','L3'],
['L2','L2','L1'],
['L3','L2'],
['L4','L2','L1'],
['L4','L2','L4']]
def append_path(root, paths):
if paths:
child = root.setdefault(paths[0], {})
append_path(child, paths[1:])
for p in Levels:
append_path(root, p)
def convert(d):
return [{'name': k, 'children': convert(v) if v else [{}]} for k, v in d.items()]
# Print results
import json
print(json.dumps(convert(root), indent=4))
Output:
[
"name": "L1",
"children": [
{
"name": "L1",
"children":[
{
"name":"L3",
"children":[{}]
},
{
"name":"L1",
"children":[{}]
}]
},
{
"name":"L2",
"children":[{}]
}
]
for the levels
Levels=[['L1','L1','L2'],
['L1','L1','L3'],
['L1','L2'],
I also need to encode the count of each level
for eg there is the path from L1 which has two first level childrens L1(2) and L2(1) followed by L2(1) and L3(1) for next level .
L1(3)-->L1(2)-->L2(1)
-->L3(1)
-->L2(1)
How can I encode this count in my json output.
I want my final output to look like this
"name": "L1(3)",
"children": [
{
"name": "L1(2)",
"children":[
root={}
Levels=[['L1','L1','L2'],
['L1','L1','L3'],
['L1','L2'],
['L2','L2','L3'],
['L2','L2','L1'],
['L3','L2'],
['L4','L2','L1'],
['L4','L2','L4']]
def append_path(root, paths):
if paths:
child = root.setdefault(paths[0], {})
append_path(child, paths[1:])
for p in Levels:
append_path(root, p)
def convert(d):
templist=[]
noofchildren=0
if(len(d.items())==0):
return ([{}],1)
for k,v in d.items():
temp,children=convert(v)
noofchildren+=children
if(temp):
templist.append({"name":k+"("+str(children)+")",'children':temp})
else:
templist.append({'name': k+"("+str(children)+")", 'children':[{}]})
return (templist,noofchildren)
# Print results
import json
print(json.dumps(convert(root)[0], indent=2))
OUTPUT
[
{
"name": "L1(3)",
"children": [
{
"name": "L1(2)",
"children": [
{
"name": "L2(1)",
"children": [
{}
]
},
{
"name": "L3(1)",
"children": [
{}
]
}
]
},
{
"name": "L2(1)",
"children": [
{}
]
}
]
},
{
"name": "L2(2)",
"children": [
{
"name": "L2(2)",
"children": [
{
"name": "L3(1)",
"children": [
{}
]
},
{
"name": "L1(1)",
"children": [
{}
]
}
]
}
]
},
{
"name": "L3(1)",
"children": [
{
"name": "L2(1)",
"children": [
{}
]
}
]
},
{
"name": "L4(2)",
"children": [
{
"name": "L2(2)",
"children": [
{
"name": "L1(1)",
"children": [
{}
]
},
{
"name": "L4(1)",
"children": [
{}
]
}
]
}
]
}
]

convert file path list to tree

There is a python file path list like below:
file_path_list = ["test/dir1/log.txt", "test/dir1/dir2/server.txt", "test/manage/img.txt"]
I want to convert it to a tree. the expect result is below:
tree_data = [
{
"path": "test",
"children": [
{
"path": "dir1",
"children": [
{
"path": "log.txt"
},
{
"path": "dir2",
"children": [
{
"path": "server.txt"
}
]
}
]
},
{
"path": "manage",
"children": [
{
"path": "img.txt",
}
]
}
]
}
]
What's the best way to convert?
update: my code is below, but I think it's not well.
def list2tree(file_path):
"""Convert list to tree."""
tree_data = [{
"path": "root",
"children": []
}]
for f in file_path:
node_path = tree_data[0]
pathes = f.split("/")
for i, p in enumerate(pathes):
length = len(node_path["children"])
if not length or node_path["children"][length - 1]["path"] != p:
# create new node
new_node = {
"path": p,
}
if i != len(pathes) - 1: # middle path
new_node["children"] = list()
node_path["children"].append(new_node)
node_path = new_node
else:
node_path = node_path["children"][length - 1]
return tree_data
I think this way is not the best. any ideas? Thank you very much!
One way is to split the strings at '/' and put them in a defaultdict of defaultdicts, see defaultdict of defaultdict, nested.

Python to parent/child JSON

I'm trying to use Python to turn data from a CSV into a JSON with the format found here:
https://gist.github.com/mbostock/1093025 so that I can modify some http://d3js.org/ examples.
I have found some posts on how to do similar transformations, but nothing exactly like the nested {'name': name, 'children' = []} format.
For the test.csv:
Team,Task,Country,ID
Team A,Processing,CA,5
Team A,Review,CA,45
Team B,Processing,US,76
Team B,Processing,CA,676
Team B,Support,US,2345
Team C,Processing,US,67
Team C,Review,US,734
Team C,Support,US,34
Output should look like:
{
"name": "Flare",
"children": [
{
"name": "Team A",
"children": [
{
"name": "Processing",
"children": [
{"name": "CA", "size": 5}
]
},
{
"name": "Review",
"children": [
{"name": "CA", "size": 45}
]
}
]
},
{
"name": "Team B",
"children": [
{
"name": "Processing",
"children": [
{"name": "US", "size": 76},
{"name": "CA", "size": 676}
]
},
{
"name": "Support",
"children": [
{"name": "US", "size": 2345}
]
}
]
},
{
"name": "Team C",
"children": [
{
"name": "Processing",
"children": [
{"name": "US", "size": 67}
]
},
{
"name": "Review",
"children": [
{"name": "US", "size": 734}
]
},
{
"name": "Support",
"children": [
{"name": "US", "size": 34}
]
}
]
}
]
}
This is as far as I have been able to get (I know it's pretty bad):
import csv
import json
children = []
#create a list of lists from CSV
reader = csv.reader(open('//corp.bloomberg.com/pn-dfs/AllLinks/Users/jdesilvio/Desktop/test.csv', 'rb'))
reader.next()
for row in reader:
children.append(row)
#create tree root
tree = {'name': "flare", 'children': []}
#create a generic subtree
subtree = {'name': 0, 'children': []}
for i in children:
#if the first element in i doesn't equal name, then we know that it's a different group
if i[0] != subtree['name']:
#so we append the current group
tree['children'].append({'name': subtree['name'], 'children': subtree['children']})
#start a new group
subtree['children'] = []
#and rename the subtree
subtree['name'] = i[0]
else:
#then start appending pieces to the new group
subtree['children'].append(i[1:len(i)])
#remove the generic starting name
tree['children'] = tree['children'][1:]
print json.dumps(tree, indent=1)
Which yields:
{
"name": "flare",
"children": [
{
"name": "Team A",
"children": [
[
"Review",
"CA",
"45"
]
]
},
{
"name": "Team B",
"children": [
[
"Processing",
"CA",
"676"
],
[
"Support",
"US",
"2345"
]
]
}
]
}
This looks like it is headed in the right direction, but even if I was able to get the first level nested, I'm not sure how to nest more levels in a generic way.
Populate the tree is the most clear solution. However, using a dict for traversing is not a good idea. I suggest to create a helper class for each tree node, use it for populating data and then convert result to JSON:
import csv
import json
class Node(object):
def __init__(self, name, size=None):
self.name = name
self.children = []
self.size = size
def child(self, cname, size=None):
child_found = [c for c in self.children if c.name == cname]
if not child_found:
_child = Node(cname, size)
self.children.append(_child)
else:
_child = child_found[0]
return _child
def as_dict(self):
res = {'name': self.name}
if self.size is None:
res['children'] = [c.as_dict() for c in self.children]
else:
res['size'] = self.size
return res
root = Node('Flare')
with open('/tmp/test.csv', 'r') as f:
reader = csv.reader(f)
reader.next()
for row in reader:
grp1, grp2, grp3, size = row
root.child(grp1).child(grp2).child(grp3, size)
print json.dumps(root.as_dict(), indent=4)

Categories