I'm trying to convert a filesystem tree to json using python.
Imagine that i have the following tree :
plans/
|-- p1/
| |-- p1_1.pdf
| |-- p1_2.pdf
| `-- test/
| `-- test.jpg
|-- p2/
| |-- p2_1.pdf
| |-- p2_2.pdf
| `-- test2/
|
`-- resume.pdf
I would like to have a json output like that :
[
{
"name": "p1",
"type": "folder",
"path": "/plans/p1",
"tag": "org",
"children": [
{
"name": "p1_1.pdf",
"type": "file",
"path": "/plans/p1/p1_1.pdf",
"tag": "org"
},
{
"name": "p1_2.pdf",
"type": "file",
"path": "/plans/p1/p1_2.pdf",
"tag": "org"
},
{
"name": "test",
"type": "folder",
"path": "/plans/p1/test",
"tag": "org",
"children": [
{
"name": "test.jpg",
"type": "file",
"path": "/plans/p1/test/test.jpg",
"tag": "org"
}
]
}
]
},
{
"name": "p2",
"type": "folder",
"path": "/plans/p2",
"tag": "org",
"children": [
{
"name": "p2_1.pdf",
"type": "file",
"path": "/plans/p2/p2_1.pdf",
"tag": "org"
},
{
"name": "p2_2.pdf",
"type": "file",
"path": "/plans/p2/p2_2.pdf",
"tag": "org"
},
{
"name": "test2",
"type": "folder",
"path": "/plans/p2/test2",
"tag": "org",
"children": [
]
}
]
},
{
"name": "resume.pdf",
"type": "file",
"path": "/plans/resume.pdf",
"tag": "org"
}
]
I'm currently using the os.walk() python function to go through the tree and creating lists of dicts to generate a "dumpable" list using json.dumps() but i didn't know how to do it recursively.
Here a quick code draft:
def tree_to_json(rootdir):
main = []
for path, dirs, files in os.walk(rootdir):
for curdir in dirs:
child = []
new_dir = {"name": curdir,
"type": "folder",
"path": path + os.sep + curdir,
"children": child}
main.append(new_dir)
for curfile in files:
new_file = {"name": curfile,
"type": "file",
"path": path + os.sep + curfile}
main.append(new_file)
return json.dumps(main, sort_keys=True, indent=2, separators=(',', ': '))
As anything in programming there are many ways to solve. Here is one solution:
import json
from os import walk, path
def file_to_dict(fpath):
return {
'name': path.basename(fpath),
'type': 'file',
'path': fpath,
'tag': 'org',
}
def folder_to_dict(rootpath):
return {
'name': path.basename(rootpath),
'type': 'folder',
'path': rootpath,
'tag': 'org',
'children': [],
}
def tree_to_dict(rootpath):
root_dict = folder_to_dict(rootpath)
root, folders, files = walk(rootpath).next()
root_dict['children'] = [file_to_dict(path.sep.join([root, fpath])) for fpath in files]
root_dict['children'] += [tree_to_dict(path.sep.join([root, folder])) for folder in folders]
return root_dict
def tree_to_json(rootdir, pretty_print=True):
root, folders, files = walk(rootdir).next()
root_dict = [tree_to_dict(path.sep.join([root, folder])) for folder in folders]
root_dict += [file_to_dict(path.sep.join([root, fpath])) for fpath in files]
if pretty_print:
js = json.dumps(root_dict, indent=4, encoding='utf-8')
else:
js = json.dumps(root_dict, encoding='utf-8')
return js
print tree_to_json('/tmp/tree')
And here is the output:
[
{
"path": "/tmp/tree/p1",
"tag": "org",
"type": "folder",
"name": "p1",
"children": [
{
"path": "/tmp/tree/p1/p1_1.pdf",
"tag": "org",
"type": "file",
"name": "p1_1.pdf"
},
{
"path": "/tmp/tree/p1/p1_2.pdf",
"tag": "org",
"type": "file",
"name": "p1_2.pdf"
},
{
"path": "/tmp/tree/p1/test",
"tag": "org",
"type": "folder",
"name": "test",
"children": [
{
"path": "/tmp/tree/p1/test/test.jpg",
"tag": "org",
"type": "file",
"name": "test.jpg"
}
]
}
]
},
{
"path": "/tmp/tree/p2",
"tag": "org",
"type": "folder",
"name": "p2",
"children": [
{
"path": "/tmp/tree/p2/p2_1.pdf",
"tag": "org",
"type": "file",
"name": "p2_1.pdf"
},
{
"path": "/tmp/tree/p2/p2_2.pdf",
"tag": "org",
"type": "file",
"name": "p2_2.pdf"
},
{
"path": "/tmp/tree/p2/test2",
"tag": "org",
"type": "folder",
"name": "test2",
"children": []
}
]
},
{
"path": "/tmp/tree/resume.pdf",
"tag": "org",
"type": "file",
"name": "resume.pdf"
}
]
Related
I have a nested dictionary like below:
[
{
"name": "A",
"flag": "folder",
"children": [
{
"name": "A1",
"flag": "folder",
"children": [
{
"name": "A1x",
"flag": "file",
"children": []
},
{
"name": "A1y",
"flag": "file",
"children": []
}
]
}
]
}
]
From this dict, I would like to generate a dataframe as below:
Is there any nice way to make this?
With the following nested dictionary which expands on yours for demonstration purposes:
data = [
{
"name": "A",
"flag": "folder",
"children": [
{
"name": "A1",
"flag": "folder",
"children": [
{
"name": "A1x",
"flag": "file",
"children": [{"name": "A1xx", "flag": "file", "children": []}],
},
{
"name": "A1y",
"flag": "file",
"children": [{"name": "A1yy", "flag": "file", "children": []}],
},
],
},
{
"name": "A2",
"flag": "folder",
"children": [
{
"name": "A2x",
"flag": "file",
"children": [{"name": "A2xx", "flag": "file", "children": []}],
},
{
"name": "A2y",
"flag": "file",
"children": [{"name": "A2yy", "flag": "file", "children": []}],
},
],
},
],
},
{
"name": "B",
"flag": "folder",
"children": [
{
"name": "B1",
"flag": "folder",
"children": [
{
"name": "B1x",
"flag": "file",
"children": [{"name": "B1xx", "flag": "file", "children": []}],
},
{
"name": "B1y",
"flag": "file",
"children": [{"name": "B1yy", "flag": "file", "children": []}],
},
],
},
{
"name": "B2",
"flag": "folder",
"children": [
{
"name": "B2x",
"flag": "file",
"children": [{"name": "B2xx", "flag": "file", "children": []}],
},
{
"name": "B2y",
"flag": "file",
"children": [{"name": "B2yy", "flag": "file", "children": []}],
},
],
},
],
},
]
Here is one way to do it by defining two short helper functions:
import pandas as pd
def traverse(data, new_data=None):
"""Recursive function to go through dict of values.
Args:
data: target dict.
new_data: container. Defaults to None.
Returns:
Flatten data.
"""
new_data = new_data if new_data else [[]]
new_data[-1].append(data["name"])
new_data[-1].append(data["flag"])
for child in data["children"]:
traverse(child, new_data)
new_data.append([])
return new_data
def make_rows(flat_data):
"""Custom function to shape data.
Args:
flat_data: target data.
Returns:
Dataframe.
"""
rows = [[]]
for item in flat_data:
if item:
rows[-1] += item
else:
rows.append([None for _ in range(int(len(rows[-1]) / 2))])
return pd.DataFrame(rows).dropna(how="all").fillna(method="ffill")
And then:
df = pd.concat([make_rows(traverse(item)) for item in data]).reset_index(drop=True)
df.columns = pd.MultiIndex.from_product(
[[f"Level {i}" for i in range(int(df.shape[1] / 2))], ["name", "flag"]]
)
print(df)
# Output
Level 0 Level 1 Level 2 Level 3
name flag name flag name flag name flag
0 A folder A1 folder A1x file A1xx file
1 A folder A1 folder A1y file A1yy file
2 A folder A2 folder A2x file A2xx file
3 A folder A2 folder A2y file A2yy file
4 B folder B1 folder B1x file B1xx file
5 B folder B1 folder B1y file B1yy file
6 B folder B2 folder B2x file B2xx file
7 B folder B2 folder B2y file B2yy file
I'm trying to merge 2 json files in Python. Here are the files:
test1.json
{
"version": "1.0",
"data": {
"admin1": {
"id": "1",
"location": "NY"
},
"admin2": {
"id": "2",
"name": "Bob",
"location": "LA",
"admin_key": {
"adminvalue1": "admin1",
"adminvalue2": "admin2"
}
},
"admin3": {
"name": "john"
}
}
}
test2.json
{
"data": {
"user1": {
"name": "jane",
"phone": "555-666-7777",
"enail": "jane#jane.com"
},
"user2": {
"location": "LA",
"id": "5"
},
"user3": {
"description": "user",
"location": "NY",
"name": "zoe",
"phone": "111-222-3333",
"user_key": {
"uservalue1": "user1",
"uservalue2": "user2"
}
}
}
}
I have this code to merge the two files
import json
with open("test1.json", "r") as data1_file:
data1 = json.load(data1_file)
with open("test2.json", "r") as data2_file:
data2 = json.load(data2_file)
data1.update(data2)
with open("out.json", "w") as out_file:
json.dump(data1, out_file, indent=4)
The output I'm getting is this. It only has test2.json contents under "data".
{
"version": "1.0",
"data": {
"user1": {
"name": "jane",
"phone": "555-666-7777",
"enail": "jane#jane.com"
},
"user2": {
"location": "LA",
"id": "5"
},
"user3": {
"description": "user",
"location": "NY",
"name": "zoe",
"phone": "111-222-3333",
"user_key": {
"uservalue1": "user1",
"uservalue2": "user2"
}
}
}
}
I want the output to have contents of both files under "data" like below
{
"version": "1.0",
"data": {
"admin1": {
"id": "1",
"location": "NY"
},
"admin2": {
"id": "2",
"name": "Bob",
"location": "LA",
"admin_key": {
"adminvalue1": "admin1",
"adminvalue2": "admin2"
}
},
"admin3": {
"name": "john"
},
"user1": {
"name": "jane",
"phone": "555-666-7777",
"enail": "jane#jane.com"
},
"user2": {
"location": "LA",
"id": "5"
},
"user3": {
"description": "user",
"location": "NY",
"name": "zoe",
"phone": "111-222-3333",
"user_key": {
"uservalue1": "user1",
"uservalue2": "user2"
}
}
}
}
How can I achieve this? Thanks!
You need to merge the "sub-dictionary" data1['data'], not data1 itself. In the current code, you are updating data1 with data2, so that data2['data'] overwrites data1['data'].
So replace data1.update(data2) with:
data1['data'].update(data2['data'])
I think this is what you are looking for:
https://stackoverflow.com/a/7205107/8786297
def merge(a, b, path=None):
"merges b into a"
if path is None: path = []
for key in b:
if key in a:
if isinstance(a[key], dict) and isinstance(b[key], dict):
merge(a[key], b[key], path + [str(key)])
elif a[key] == b[key]:
pass # same leaf value
else:
raise Exception('Conflict at %s' % '.'.join(path + [str(key)]))
else:
a[key] = b[key]
return a
I am working with one of my requirement
My requirement: I need to pick and print only 3rd "id" from "syrap" list from the nested json file. I am not getting desired output. Any help will be appreciated.
Test file:
{
"id": "0001",
"type": "donut",
"name": "Cake",
"ppu": 0.55,
"batters":
{ "process": "abc",
"mix": "0303",
"syrap":
[
{ "id": "1001", "type": "Regular" },
{ "id": "1002", "type": "Chocolate" },
{ "id": "1003", "type": "Blueberry" },
{ "id": "1004", "type": "Devil's Food" }
]
},
"rate": 0.55,
"topping":
[
{ "id": "5001", "type": "None" },
{ "id": "5002", "type": "Glazed" },
{ "id": "5005", "type": "Sugar" },
{ "id": "5007", "type": "Powdered Sugar" },
{ "id": "5006", "type": "Chocolate with Sprinkles" },
{ "id": "5003", "type": "Chocolate" },
{ "id": "5004", "type": "Maple" }
]
}
Expected output in a csv:
0001,donut,abc,0303,1003
My code:
import requests
import json
import csv
f = open('testdata.json')
data = json.load(f)
f.close()
f = csv.writer(open('testout.csv', 'wb+'))
for item in data:
f.writerow([item['id'], item[type], item['batters'][0]['process'],
item['batters'][0]['mix'],
item['batters'][0]['syrap'][0]['id'],
item['batters'][0]['syrap'][1]['id'],
item['batters'][0]['syrap'][2]['id'])
Here is some sample code showing how you can iterate through json content parsed as a dictionary:
import json
json_str = '''{
"id": "0001",
"type": "donut",
"name": "Cake",
"ppu": 0.55,
"batters":
{ "process": "abc",
"mix": "0303",
"syrap":
[
{ "id": "1001", "type": "Regular" },
{ "id": "1002", "type": "Chocolate" },
{ "id": "1003", "type": "Blueberry" },
{ "id": "1004", "type": "Devil's Food" }
]
},
"rate": 0.55,
"topping":
[
{ "id": "5001", "type": "None" },
{ "id": "5002", "type": "Glazed" },
{ "id": "5005", "type": "Sugar" },
{ "id": "5007", "type": "Powdered Sugar" },
{ "id": "5006", "type": "Chocolate with Sprinkles" },
{ "id": "5003", "type": "Chocolate" },
{ "id": "5004", "type": "Maple" }
]
}
'''
jsondict = json.loads(json_str)
syrap_node = jsondict['batters']['syrap']
for item in syrap_node:
print (f'id:{item["id"]} type: {item["type"]}')
Simply, data[“batters”][“syrap”][2][“id”]
Much better way to achieve this would be
f = open('testout.csv', 'wb+')
with f:
fnames = ['id','type','process','mix','syrap']
writer = csv.DictWriter(f, fieldnames=fnames)
writer.writeheader()
for item in data:
print item
writer.writerow({'id' : item['id'], 'type': item['type'],
'process' : item['batters']['process'],
'mix': item['batters']['mix'],
'syrap': item['batters']['syrap'][2]['id']})
You need to make sure that data is actually a list. if it is not a list, don't use for loop.
simply,
writer.writerow({'id' : data['id'], 'type': data['type'],
'process' : data['batters']['process'],
'mix': data['batters']['mix'],
'syrap': data['batters']['syrap'][2]['id']})
I need some assistance on a somewhat simple issue.
I'm trying to convert the content of a json file from this:
{ "Timestamp": "Timestamp",
"name": "SVCHOST.EXE",
"icon": "binary_icon.png",
"Process": SVCHOST.EXE,
"Pid": "876",
"PPID": "500"],
"children": [Process details])
},
{ "Timestamp":"Timestamp",
"name": "LSAS.EXE",
"icon": "binary_icon.png",
"Process": "LSAS.EXE",
"Pid": "500",
"PPID": "4"],
"children": [Process details])
},
{ "Timestamp":"Timestamp",
"name": "SYSTEM",
"icon": "binary_icon.png",
"Process": "SYSTEM",
"Pid": "4",
"PPID": "0"],
"children": [Process details])
}
To this:
{
"name": "Root",
"children": [
{
"name": "4",
"children": [
{
"name": "500",
"children": [
{
"name": "876",
"children": []
}
]
}
]
}
}
To create a node tree graph in the end.
But after a lot of trial and error, and still not close to the output I need. I'm asking for some pointer, tips or tricks.
Any help is much appreciated.
Thanks,
Here is my most recent attempt.
import json
links = ({
"Timestamp": "Timestamp",
"name": "SVCHOST.EXE",
"icon": "binary_icon.png",
"Process": "SVCHOST.EXE",
"Pid": "876",
"PPID": "500",
"children": "Process_details"
},
{
"Timestamp":"Timestamp",
"name": "LSAS.EXE",
"icon": "binary_icon.png",
"Process": "LSAS.EXE",
"Pid": "500",
"PPID": "4",
"children": "Process_details"
},
{
"Timestamp":"Timestamp",
"name": "SYSTEM",
"icon": "binary_icon.png",
"Process": "SYSTEM",
"Pid": "4",
"PPID": "0",
"children": "Process_details"
})
parent_proc_node = {}
root = {'name': 'Root', 'children': []}
for item in procs:
parent_node = parent_proc_node.get(item['Pid'])
if not parent_node:
parent_proc_node[item['Pid']] = parent_node = {'name': item['PPID']}
root['children'].append(parent_node)
parent_proc_node[item['PPID']] = child_node = {'name': item['Pid']}
parent_node.setdefault('children', []).append(child_node)
print json.dumps(root, indent=4)
Current output:
{
"name": "Root",
"children": [
{
"name": "500",
"children": [
{
"name": "876",
"children": [
{
"name": "500",
"children": [
{
"name": "4"
}
]
}
]
}
]
}
}
The output is now what I want, but im still not able to correctly match parent process with children.
What am I doing wrong?
The correct output would be like this:
{
"name": "Root",
"children": [
{
"name": "4",
"children": [
{
"name": "500",
"children": [
{
"name": "876",
"children": [
{
"name": ""
}
]
}
]
}
]
}
}
Here's some code that does what I think you want. It processes the links (which I turned into a list, since JSON doesn't have tuples), converting it into the nested structure that you show as the final correct output. I've also added a couple of new records so that some parents have multiple children.
The trick is to first create a dictionary (ids) that captures the parent-child relationship of the process IDs.
import json
links = [
{
"Timestamp": "Timestamp",
"name": "SVCHOST.EXE",
"icon": "binary_icon.png",
"Process": "SVCHOST.EXE",
"Pid": "876",
"PPID": "500",
"children": "Process_details"
},
{
"Timestamp": "Timestamp",
"name": "LSAS.EXE",
"icon": "binary_icon.png",
"Process": "LSAS.EXE",
"Pid": "500",
"PPID": "4",
"children": "Process_details"
},
{
"Timestamp": "Timestamp",
"name": "LSAS.EXE",
"icon": "binary_icon.png",
"Process": "LSAS.EXE",
"Pid": "510",
"PPID": "4",
"children": "Process_details"
},
{
"Timestamp": "Timestamp",
"name": "LSAS.EXE",
"icon": "binary_icon.png",
"Process": "LSAS.EXE",
"Pid": "600",
"PPID": "510",
"children": "Process_details"
},
{
"Timestamp": "Timestamp",
"name": "SYSTEM",
"icon": "binary_icon.png",
"Process": "SYSTEM",
"Pid": "4",
"PPID": "0",
"children": "Process_details"
}
]
# Create a dict linking each pid to its parent
ids = {}
for d in links:
# Use "0" as the ppid if "PPID" field is an empty string
ppid, pid = d["PPID"] or "0", d["Pid"]
ids.setdefault(ppid, []).append(pid)
print(ids)
# Nest the data for each pid in its parent's dict
def insert(lst, ppid, name):
if ppid in ids:
children = []
lst.append({"name": name, "children": children})
for pid in ids[ppid]:
insert(children, pid, pid)
else:
children = [{"name": ""}]
lst.append({"name": name, "children": children})
nested = []
insert(nested, "0", "Root")
print(json.dumps(nested[0], indent=4))
output
{'500': ['876'], '4': ['500', '510'], '510': ['600'], '0': ['4']}
{
"name": "Root",
"children": [
{
"name": "4",
"children": [
{
"name": "500",
"children": [
{
"name": "876",
"children": [
{
"name": ""
}
]
}
]
},
{
"name": "510",
"children": [
{
"name": "600",
"children": [
{
"name": ""
}
]
}
]
}
]
}
]
}
#PM 2Ring Sorry please disregard the PPID 0 remarks. missing execution handling at my end. :)
Your excample works perfectly, for parents with child pids. however if a PID has no parent it is not added to the root node.
procs = [{
"Timestamp": "Timestamp",
"name": "SVCHOST.EXE",
"icon": "binary_icon.png",
"Process": "SVCHOST.EXE",
"Pid": "876",
"PPID": "500",
"children": "Process_details"
},
{
"Timestamp":"Timestamp",
"name": "LSAS.EXE",
"icon": "binary_icon.png",
"Process": "LSAS.EXE",
"Pid": "500",
"PPID": "4",
"children": "Process_details"
},
{
"Timestamp":"Timestamp",
"name": "SYSTEM",
"icon": "binary_icon.png",
"Process": "SYSTEM",
"Pid": "4",
"PPID": "0",
"children": "Process_details"
}
,
{
"Timestamp":"Timestamp",
"name": "ROUGEPROC",
"icon": "binary_icon.png",
"Process": "ROUGEPROC",
"Pid": "4322",
"PPID": "",
"children": "Process_details"
}]
# Create a dict linking each pid to its parent
ids = {}
for d in procs:
ppid, pid = d["PPID"], d["Pid"]
ids.setdefault(ppid, []).append(pid)
print(ids)
# Nest the data for each pid in its parent's dict
def insert(lst, ppid, name):
if ppid in ids:
children = []
lst.append({"name": name, "children": children, "icon": "binary_icon.png"})
for pid in ids[ppid]:
insert(children, pid, pid)
else:
children = []
lst.append({"name": name, "children": children, "icon": "binary_icon.png"})
nested = []
insert(nested, "0", "PPID")
proc_report = {"name" :"HOSTNAME",
"icon": "win_os_icon.png",
"children": nested[0]["children"]
}
print(json.dumps(proc_report, indent=4))
Output:
{'': ['4322'], '0': ['4'], '4': ['500'], '500': ['876']}
{
"children": [
{
"icon": "binary_icon.png",
"name": "4",
"children": [
{
"icon": "binary_icon.png",
"name": "500",
"children": [
{
"icon": "binary_icon.png",
"name": "876",
"children": []
}
]
}
]
}
],
"name": "HOSTNAME",
"icon": "win_os_icon.png"
}
I have this kind of Json tree for folder structure. Is there any way to compare it with same kind of Json tree to get differences (file missing or different file properties (date,crc,..)) and return this as a list with names of different/missing files.
{
"testfolder": {
"children": {
"content.json": {
"last_modified_timestamp": 1485902084.0222416,
"created_timestamp": 1485193414.5027652,
"crc": "7c71cf7ff765ddd78fffcac2eed56ae2",
"type": "file",
"size": 961
},
"config.json": {
"last_modified_timestamp": 1484831126.4821935,
"created_timestamp": 1484830625.6165457,
"crc": "bff5d42e18df483841aa10df8b38cdd4",
"type": "file",
"size": 132
}
}
},
"__init__.py": {
"last_modified_timestamp": 1481651800.7150106,
"created_timestamp": 1481651800.7150106,
"crc": "d41d8cd98f00b204e9800998ecf8427e",
"type": "file",
"size": 0
},
"test.json": {
"last_modified_timestamp": 1486126931.2528062,
"created_timestamp": 1486126732.7074502,
"crc": "8a30d9b3834ef46ad3b996edb06c72bf",
"type": "file",
"size": 1675
},
"test": {
"children": {
"test.txt.txt": {
"last_modified_timestamp": 1486126927.9266162,
"created_timestamp": 1486126865.9750726,
"crc": "b5301fdbf2ba41520b255a651c7017b1",
"type": "file",
"size": 5
}
}
}
}
Thank you for help!
def jsondiff(local,online,path='',todo=[]):
for key in local.keys():
if not online.has_key(key):
if local[key].has_key('children'):
todo = todo + json_path_print(local[key]["children"],path+key+"/")
else:
todo.append(path+key)
else:
if local[key].has_key('children'):
todo=todo+jsondiff(local[key]["children"],online[key]["children"],path+key+"/")
else:
if local[key]["last_modified_timestamp"]>online[key]["last_modified_timestamp"]:
todo.append(path + key)
return todo
Solved it if anyone need solution