convert file path list to tree - python

There is a python file path list like below:
file_path_list = ["test/dir1/log.txt", "test/dir1/dir2/server.txt", "test/manage/img.txt"]
I want to convert it to a tree. the expect result is below:
tree_data = [
{
"path": "test",
"children": [
{
"path": "dir1",
"children": [
{
"path": "log.txt"
},
{
"path": "dir2",
"children": [
{
"path": "server.txt"
}
]
}
]
},
{
"path": "manage",
"children": [
{
"path": "img.txt",
}
]
}
]
}
]
What's the best way to convert?
update: my code is below, but I think it's not well.
def list2tree(file_path):
"""Convert list to tree."""
tree_data = [{
"path": "root",
"children": []
}]
for f in file_path:
node_path = tree_data[0]
pathes = f.split("/")
for i, p in enumerate(pathes):
length = len(node_path["children"])
if not length or node_path["children"][length - 1]["path"] != p:
# create new node
new_node = {
"path": p,
}
if i != len(pathes) - 1: # middle path
new_node["children"] = list()
node_path["children"].append(new_node)
node_path = new_node
else:
node_path = node_path["children"][length - 1]
return tree_data
I think this way is not the best. any ideas? Thank you very much!

One way is to split the strings at '/' and put them in a defaultdict of defaultdicts, see defaultdict of defaultdict, nested.

Related

building json tree from list of file paths

I have list of file paths and need them to be organized in a tree structure like the following.
{
"label": "VP Accounting",
"children": [
{
"label": "iWay",
"children": [
{
"label": "Universidad de Especialidades del Espíritu Santo"
},
{
"label": "Marmara University"
},
{
"label": "Baghdad College of Pharmacy"
}
]
},
{
"label": "KDB",
"children": [
{
"label": "Latvian University of Agriculture"
},
{
"label": "Dublin Institute of Technology"
}
]
},
what I did so far is the following
output = {}
current = {}
for path in paths :
current = output
for segment in path.split("/") :
if segment != '':
if segment not in current:
current[segment] = {}
current = current[segment]
The output is a tree like structure but I can not add the keys ["label", "children"]
The idea is to maintain the dictionary that your code is building as an auxiliary helper structure (which is called helper below, and is simplified to a flat dictionary), and to create the other (desired) structure in parallel (having the lists).
Note that the top level should really be a list as it is not guaranteed all entries will start with the same folder ("segment").
Here is your code adapted to create the children lists and add the labels:
output = []
root = { "children": output }
helper = {}
for path in paths:
current = root
subpath = ""
for segment in path.split("/"):
if "children" not in current:
current["children"] = []
subpath += "/" + segment
if subpath not in helper:
helper[subpath] = { "label": segment }
current["children"].append(helper[subpath])
current = helper[subpath]
print(output)

Make dict from list of paths

I have a list of paths:
paths = [
"root/child1/file1",
"root/child1/file2",
"root/child2/file1"
]
And I want to parse it ith python into dict (or list of dicts) that looks like:
{
"text": "root",
"children": [
{
"text": "child1",
"children": [
{
"text": "file1",
"children": []
},
{
"text": "file2",
"children": []
}
]
},
{
"text": "child2",
"children": [
{
"text": "file2",
"children": []
}
]
}
I tried to write some recursive function, but no success. Example:
def path2dict(path, depth):
d = {}
text = path.split('/')[0]
d['text'] = text
depth = depth + 1
d['children'] = [path2dict(p, depth) for p in path.split('/')[depth:]]
return d
paths = [
"root/child1/file1",
"root/child1/file2",
"root/child2/file1"
]
depth = 0
for path in paths:
d = path2dict(path, depth)
print(d)
Sorry for not using your existing solution, but I have some other:
def stage1(paths):
result = {}
for path in paths:
split = path.split('/')
current = result
for part in split:
current.setdefault(part, {})
current = current[part]
return result
def stage2(dct):
return [
{
'text': key,
'children': stage2(value)
}
for key, value in dct.items()
]
after_stage1 = stage1(paths)
# after_stage1 is
# {
# 'root': {
# 'child1': {
# 'file1': {},
# 'file2': {}
# },
# 'child2': {
# 'file1': {}
# }
# }
# }
after_stage2 = stage2(after_stage1)
# after_stage2 contains exactly what you need
You can use itertools.groupby:
from itertools import groupby
import json
d = ['root/child1/file1', 'root/child1/file2', 'root/child2/file1']
def create_paths(paths):
_vals = [[a, [c for _, *c in b]] for a, b in groupby(sorted(paths, key=lambda x:x[0]), key=lambda x:x[0])]
return [{'text':a, 'children':[] if not b[0] else create_paths(b)} for a, b in _vals]
print(json.dumps(create_paths([i.split('/') for i in d]), indent=4))
Output:
[
{
"text": "root",
"children": [
{
"text": "child1",
"children": [
{
"text": "file1",
"children": []
},
{
"text": "file2",
"children": []
}
]
},
{
"text": "child2",
"children": [
{
"text": "file1",
"children": []
}
]
}
]
}
]

Adding nodes to json in python

I am trying to generate custom JSON in python using the following code
root={}
Levels=[['L1','L1','L2'],
['L1','L1','L3'],
['L1','L2'],
['L2','L2','L3'],
['L2','L2','L1'],
['L3','L2'],
['L4','L2','L1'],
['L4','L2','L4']]
def append_path(root, paths):
if paths:
child = root.setdefault(paths[0], {})
append_path(child, paths[1:])
for p in Levels:
append_path(root, p)
def convert(d):
templist=[]
noofchildren=0
if(len(d.items())==0):
return ([{}],1)
for k,v in d.items():
temp,children=convert(v)
noofchildren+=children
if(temp):
templist.append({"name":k+"("+str(children)+")",'children':temp})
else:
templist.append({'name': k+"("+str(children)+")", 'children':[{}]})
return (templist,noofchildren)
# Print results
import json
print(json.dumps(convert(root)[0], indent=2))
and the OUTPUT is
[
{
"name": "L1(3)",
"children": [
{
"name": "L1(2)",
"children": [
{
"name": "L2(1)",
"children": [
{}
]
},
{
"name": "L3(1)",
"children": [
{}
]
}
]
},
{
"name": "L2(1)",
"children": [
{}
]
}
]
},
{
"name": "L2(2)",
"children": [
{
"name": "L2(2)",
"children": [
{
"name": "L3(1)",
"children": [
{}
]
},
{
"name": "L1(1)",
"children": [
{}
]
}
]
}
]
},
{
"name": "L3(1)",
"children": [
{
"name": "L2(1)",
"children": [
{}
]
}
]
},
{
"name": "L4(2)",
"children": [
{
"name": "L2(2)",
"children": [
{
"name": "L1(1)",
"children": [
{}
]
},
{
"name": "L4(1)",
"children": [
{}
]
}
]
}
]
}
]
My dataset has changed a little bit
Levels=[[['L1','L1','L2'],[10,20,30]],
[[['L1','L1','L3'],[10,15,20]],
[[['L1','L2'],[20,10]],
[[['L2','L2','L3'],[20,20,30]],
[[['L2','L2','L1'],[10,20,30]]
[[['L3','L2'],[10,20]]
[[['L4','L2','L1'],[10,20,10]]
[[['L4','L2','L4'],[20,40,50]]]
and the output that I want is the average of the levels along with the count
[
{
"name": "L1(3)#(13)", // taking avg of 10,10,20
"children": [
{
"name": "L1(2)#(17)", // taking avg of 20,15
"children": [
{
"name": "L2(1)#(30)",
"children": [
{}
]
},
{
"name": "L3(1)#(20)",
"children": [
{}
]
}
]
},
{
"name": "L2(1)#10",
"children": [
{}
]
}
]
},
{
"name": "L2(2)#(15)", // avg of 20,10
"children": [
{
"name": "L2(2)#(20)", // avg of 20,20
"children": [
{
"name": "L3(1)#(30)",
"children": [
{}
]
},
{
"name": "L1(1)#(30)",
"children": [
{}
]
}
]
}
]
},
{
"name": "L3(1)#(10)",
"children": [
{
"name": "L2(1)#(10)",
"children": [
{}
]
}
]
},
{
"name": "L4(2)#(15)",// avg of 10,20
"children": [
{
"name": "L2(2)#(30)", // avg of 20,40
"children": [
{
"name": "L1(1)# (10)",
"children": [
{}
]
},
{
"name": "L4(1)#(50)",
"children": [
{}
]
}
]
}
]
}
]
How can i change my code to add this information?
Preface
Before moving onto the solution, here are some points I want to mention:
Make use of Python's object-oriented programming features! This makes the data structure much clearer to both yourself and future readers.
Using a custom class also makes it easier for us to store the metadata – i.e. the number of instances of a node and its total value – while constructing the intermediate tree structure, rather than while converting it. This is also more efficient because with the latter method, a simple naive traversal algorithm would make duplicate visits to the nodes!
If you want your output to (reliably) maintain the order in which the paths were inserted, you should use an OrderedDict (from collections) instead of an ordinary dict ({}).
It is more logical to output an empty list for nodes with no children than a list with a single empty dict:
// Before
"children": [
{}
]
// After
"children": []
The reason being that any software which will parse this data later can safely assume that all objects have the "name" and "children" fields, which an empty dict does not.
The list boundaries and elements in the Levels array seem to be poorly formed; did you mean:
Levels = [
[['L1','L1','L2'],[10,20,30]],
[['L1','L1','L3'],[10,15,20]],
[['L1','L2'],[20,10]],
[['L2','L2','L3'],[20,20,30]],
[['L2','L2','L1'],[10,20,30]],
[['L3','L2'],[10,20]],
[['L4','L2','L1'],[10,20,10]],
[['L4','L2','L4'],[20,40,50]],
]
While on the subject of the data, since the nodes and values obey 1-to-1 mappings (within each path), it would be more appropriate to use a list of tuples rather than a list of two parallel lists:
Levels = [
[('L1', 10), ('L1', 20), ('L2', 30)],
[('L1', 10), ('L1', 15), ('L3', 20)],
[('L1', 20), ('L2', 10)],
[('L2', 20), ('L2', 20), ('L3', 30)],
[('L2', 10), ('L2', 20), ('L1', 30)],
[('L3', 10), ('L2', 20)],
[('L4', 10), ('L2', 20), ('L1', 10)],
[('L4', 20), ('L2', 40), ('L4', 50)]
]
There seems to be a mistake in your expected output:
{
"name": "L3(1)#(10)",
"children": [
{
"name": "L2(1)#(10)", <--- this should be #(20)
"children": [
{}
]
}
]
},
Implementations
For your current data format (pair of lists):
# A dictionary here corresponds to an array of nodes in JSON
# the "name" fields serve as the keys and "children" as the values
class data_node(OrderedDict):
def __init__(self, **kwargs):
super(data_node, self).__init__(**kwargs)
self.instances = 0
self.total = 0
def insert(self, names, values):
# Python lists are truthy, so no need for len(path) == 0
if not (names or values):
return
# create the child node if it doesn't exist
child = self.get(names[0], data_node())
# add the value to the total
# and increment the instance counter
child.instances += 1
child.total += values[0]
# recursive call on the child
# with the rest of the path
child.insert(names[1:], values[1:])
self[names[0]] = child
def convert(self):
return [
{
"name": "{}({})#({})".format(
name,
child.instances,
child.total / child.instances # mean
),
"children": child.convert()
}
for name, child in self.items()
]
tree = data_node()
for path in Levels:
tree.insert(path[0], path[1])
print json.dumps(tree.convert(), indent=2)
For my proposed data format (list of tuples):
# only the insertion method differs
# all other parts of the class are unchanged
def insert(self, path):
if not path:
return
name, value = path[0]
child = self.get(name, data_node())
child.instances += 1
child.total += value
child.insert(path[1:])
self[name] = child
...
for path in Levels:
tree.insert(path) # simpler function signature
EDIT:
If there is a reason that you want the leaf node format to be [{}] instead of just [], only a simple change would be needed:
# in convert()
{
"name": ..., # as before
# again exploiting the truthy-ness property of arrays
"children": child.convert() or [{}]
}
Output
Both implementations give the correct JSON output, according my comments in the preface:
[
{
"name": "L1(3)#(13)",
"children": [
{
"name": "L1(2)#(17)",
"children": [
{
"name": "L2(1)#(30)",
"children": []
},
{
"name": "L3(1)#(20)",
"children": []
}
]
},
{
"name": "L2(1)#(10)",
"children": []
}
]
},
{
"name": "L2(2)#(15)",
"children": [
{
"name": "L2(2)#(20)",
"children": [
{
"name": "L3(1)#(30)",
"children": []
},
{
"name": "L1(1)#(30)",
"children": []
}
]
}
]
},
{
"name": "L3(1)#(10)",
"children": [
{
"name": "L2(1)#(20)",
"children": []
}
]
},
{
"name": "L4(2)#(15)",
"children": [
{
"name": "L2(2)#(30)",
"children": [
{
"name": "L1(1)#(10)",
"children": []
},
{
"name": "L4(1)#(50)",
"children": []
}
]
}
]
}
]

How to extract values from json which has multiple hierarchies inside using Python

Below is the json content, how to extract values for "GBL_ACTIVE_CPU" using python.
{
"test": "00.00.004",
"Metric Payload": [
{
"ClassName": "test",
"SystemId": "test",
"uri": "http://test/testmet",
"MetaData": [
{
"FieldName": "GBL_ACTIVE_CPU",
"DataType": "STRING",
"Label": "test",
"Unit": "string"
}
],
"Instances": [
{
"InstanceNo": "0",
"GBL_ACTIVE_CPU": "4"
}
]
]
}
I tried below code, but doesn't work. Any help is appreciated:
result = json.loads(jsonoutput)
print(result)
node = result["Metric Payload"]["Instances"]["GBL_ACTIVE_CPU"]
print(node)
I get below error:
TypeError: list indices must be integers or slices, not str
In JSON "Instances" is a list. You are accessing it like a dict. So it have 2 ways on is static other is dynamic.
If you like to use static way:-
result = json.loads(jsonoutput)
print(result)
node = result["Metric Payload"][0]["Instances"][0]["GBL_ACTIVE_CPU"]
print(node)
If you like to use dynamic way:-
result = json.loads(jsonoutput)
print(result)
for metric in result["Metric Payload"]:
for inst in metric["Instances"]:
node = inst["GBL_ACTIVE_CPU"]
print(node)

Build a graph recursively - breadth first

i want to build a "graph" in the following style:
{
"name":cersei
"children": [
{
"name": "baratheon",
"children": [
{
"name": "cersei",
"children": []
},
{
"name": "baratheon",
"children": [],
}
],
},
{
"name": "joffrey",
"children": [
{
"name": "robert",
"children": []
},
{
"name": "cersei",
"children": []
}
]
}
]
}
But i build this via depth-first. That means the first element of "children" is fully build, after that the second element of "children" is build. This is the recurse-function:
def recurse(dicts, depth):
if depth >=0:
dicts["children"] = []
child_elements = [] //do something to get your child-elements
for child in child_elements:
if depth >=0:
child_dict = dict(name=word[0])
dicts["children"].append(child_dict)
recurse(child_dict, depth-1)
How can i change the code that it builds the whole "level" first and appends the childrens-children later? I've got the problem, that i don't know how to call the level 1 dictionary, because its kind of a dictionary in the dictionary...
Kind regards and thanks for your help, FFoDWindow.
--------------------****UPDATE****---------------------------
I solved the issue myself. Actually it was pretty simple. I only had to save the temporarly build "children"-elements in an extra list. Now the tree is build breadth-first. Here is my recurse function:
def recurse( input_dict, level_list, depth):
next_level_list = []
for dictionary in input_dict:
child_elements = [...] //get the data for your children
for child_element in child_elements:
dictionary["children"].append = dict(name = child_element)
next_level_list.append(dictionary["children"][-1])
if depth >=0:
recurse(input_dict, next_level_list, depth-1)

Categories