I'm trying to use Python to turn data from a CSV into a JSON having parent-child transformation. I am able to make it according to the columns specified in my .csv file but I want something more to be added which can be a metadata to the node, or the header of the csv file can be as metadata.
my text.csv
Team,Task,Country,ID,ID2
Team C,Processing,US,67,34
Team C,Review,US,734,56
Team C,Support,US,34,43
I want the following output:
{"name": "flare",
"test": "flare",
"children":
[
{
"name": "Team C",
"test": "TEAM",
"children": [{
"name": "Processing",
"test": "Task",
"children": [{
"name": "US",
"test": "[ID: 67, ID2: 34]",
"size": 1983
},
{
"name": "Review",
"test": "Task",
"size": 1675
},
{
"name": "Support",
"test": "Task",
"size": 2042
},
]
},
]
},
]
};
Python Code:
import csv
import json
class Node(object):
def __init__(self, name, size=None):
self.name = name
self.children = []
self.size = size
def child(self, cname, size=None):
child_found = [c for c in self.children if c.name == cname]
if not child_found:
_child = Node(cname, size)
self.children.append(_child)
else:
_child = child_found[0]
return _child
def as_dict(self):
res = {'name': self.name}
if self.size is None:
res['children'] = [c.as_dict() for c in self.children]
else:
res['size'] = self.size
return res
root = Node('Flare')
with open('/tmp/test.csv', 'r') as f:
reader = csv.reader(f)
reader.next()
for row in reader:
grp1, grp2, grp3, size = row
root.child(grp1).child(grp2).child(grp3, size)
print json.dumps(root.as_dict(), indent=4)
where "test" is some data that I want to be printed in the .json. Kindly help me for the same. Stuck and wasted many hours but couldn't find any.
TIA :)
Additional information could be passed to each child by adding another optional argument to your child, e.g. extra. This could be in the form of a dictionary, and if present is added to the child's dictionary. For example:
import csv
import json
class Node(object):
def __init__(self, name, size=None, extra=None):
self.name = name
self.children = []
self.size = size
self.extra = extra
def child(self, cname, size=None, extra=None):
child_found = [c for c in self.children if c.name == cname]
if not child_found:
_child = Node(cname, size, extra)
self.children.append(_child)
else:
_child = child_found[0]
return _child
def as_dict(self):
res = {'name': self.name}
if self.size is None:
res['children'] = [c.as_dict() for c in self.children]
else:
res['size'] = self.size
if self.extra:
res.update(self.extra)
return res
root = Node('Flare')
with open('test.csv', 'r') as f:
reader = csv.reader(f)
header = next(reader)
for row in reader:
team, task, country, id, id2 = row
root.child(team, extra={'test' : 'TEAM'}) \
.child(task, extra={'test' : 'Task'}) \
.child(country, extra={'test' : {'ID': id, 'ID2': id2}}) \
.child(id, size=id2)
print json.dumps(root.as_dict(), indent=4)
This would give you:
{
"name": "Flare",
"children": [
{
"test": "TEAM",
"name": "Team C",
"children": [
{
"test": "Task",
"name": "Processing",
"children": [
{
"test": {
"ID2": "34",
"ID": "67"
},
"name": "US",
"children": [
{
"name": "67",
"size": "34"
}
]
}
]
},
{
"test": "Task",
"name": "Review",
"children": [
{
"test": {
"ID2": "56",
"ID": "734"
},
"name": "US",
"children": [
{
"name": "734",
"size": "56"
}
]
}
]
},
{
"test": "Task",
"name": "Support",
"children": [
{
"test": {
"ID2": "43",
"ID": "34"
},
"name": "US",
"children": [
{
"name": "34",
"size": "43"
}
]
}
]
}
]
}
]
}
You should be able to adjust the call to include the information you want and the layout you want. These are examples.
Related
I have a dictionary returned by a response in AWS Lambdas which looks like below:
data = {
"id": 1,
"name": "Hey",
"category": null,
"salary": 1222,
"Roles": [
{
"attributes1": {
"key": "sum_assured",
"value": 192000
},
"attributes2": {
"key": "cash_value",
"value": 48576
},
"attributes3": {
"key": "expiry_date",
"value": null
}
}
]
}
Now, I want to format a few numeric columns like salary to 1222.00 and convert other non-string columns to strings for "ID" I should get "1" as a string and not an Int. Here, null is the actual NaN returned by the response.
Below is the code which I tried:
new_dict = {}
for i, j in data.items():
print(i)
if (i == "salary"):
new_dict[i] = "%0.2f" % j
if isinstance(j, str):
new_dict[i] = j
elif j is None:
new_dict[i] = ""
else:
new_dict[i] = str(j)
I know that I could maintain a list for such amount related columns and then I can do some kind of comparisons and replace it. When I test the value for salary inside the loop it gives me decimal places but in the response, it doesn't show up.
Output:
{
"id": "1",
"name": "Hey",
"category": "",
"salary": "1222",
"Roles": "[{'attributes4': {'key': 'sum_assured', 'value': 192000}, 'attributes5': {'key': 'cash_value', 'value': 48576}, 'attributes6': {'key': 'expiry_date', 'value': 'null'}}]"
}
Expected:
{
"id": "1",
"name": "Hey",
"category": "",
"salary": "1222",
"Roles": "[{"attributes4": {"key": "sum_assured", "value": "192000.00"}, "attributes5": {"key": "cash_value", "value": "48576.00"}, "attributes6": {"key": "expiry_date", "value": ""}}]"
}
Also, how could I format the records of the Roles array with some kind of indent in it? I wasn't able to do that.
Update 1:
I tried the below code for accessing the nested attributes as per my JSON but it gives me a key error:
for i, j in formatted_json.items():
print(type(j))
if isinstance(j, list):
for k in j:
for key, value in k.items():
for l, m in value.items():
if (m == "salary" or m == 'value'):
new_dict[key][l] = "%0.2f" % m
elif isinstance(m, str):
new_dict[key][l] = m
elif m is None:
new_dict[key][l] = ""
else:
new_dict[key][l] = str(m)
elif (i == "salary" or i == 'value'):
new_dict[i] = "%0.2f" % j
elif isinstance(j, str):
new_dict[i] = j
elif j is None:
new_dict[i] = ""
else:
new_dict[i] = str(j)
Traceback (most recent call last):
File "/home/.../Downloads/pyspark/src/sample/json_to_string.py", line 392, in <module>
new_dict[key][l] = m
KeyError: 'attributes4'
Update 2:
The actual dataset is as below:
json_data = '''{
"policy_no": "",
"paid_to_date": "2016-09-28",
"proposal_no": null,
"agent_code": "1234",
"policy_status": "Death",
"policy_term": "15",
"policy_premium_payment_term": "7",
"policy_effective_date": "2013-09-28",
"last_premium_paid": 13790.85,
"last_premium_paid_date": "2016-06-17",
"pivc_flag_status": null,
"product_code": "",
"product_name": "Monthly Income Plan",
"application_date": "2013-09-16",
"underwriting_decesion_date": "2013-09-26",
"issuance_date": "2013-09-30",
"ols_policy": "N",
"next_premium_due_date": "2016-09-28",
"policy_year": null,
"premium_holiday_from_date": null,
"premium_holiday_to_date": null,
"product_type": "Non-Linked",
"proposer_client_code": "",
"life_insured_client_code": "",
"ppt_completion_date": "2020-09-28",
"ppt_left": 4,
"auto_termination_date": null,
"fund_value": null,
"loan_amt": 0,
"welcome_kit_current_status": null,
"dispatch_date": "2013-10-25",
"status_date": "2013-10-25",
"updated_on": "2013-10-26 19:51:51.983",
"surrender_value": null,
"loan_eligiability": null,
"courier_name": null,
"awb_no": "",
"status_id": 5,
"claim_registration_date": "2021-12-15",
"claim_approval_reject_date": "2021-12-15",
"claim_requirement_document": "HEART ATTACK",
"claim_requirement_status": "P",
"requirement_raised_on": "2021-12-15",
"requirement_closed_on": "2021-12-15",
"claim_status": "P",
"status_for_death_claims": "P",
"date_of_transaction": "2021-12-15",
"ecs_si_registration_date": null,
"ecs_si_status": "inactive",
"ecs_si_failure_reasons": null,
"ecs_si_status_as_per_rls": null,
"ecs_day": "0",
"payment_mode": "S",
"payment_mode_desc": "Semi Anually",
"payment_method": "S",
"payment_method_desc": "Cash/Cheque",
"payment_date": "2021-12-15",
"payment_amount": 13790.85,
"premium_base_amt": null,
"premium_tax_amt": null,
"due_premium": null,
"total_premium": 13790.85,
"advance_premium": null,
"assignee": [
{
"assignee_name": "",
"attributes1": {
"key": "assignee_client_id",
"value": ""
},
"attributes2": {
"key": "assignee_type",
"value": ""
}
}
],
"agents": [
{
"attributes1": {
"key": "policy_no",
"value": ""
},
"agent_name": "",
"attributes2": {
"key": "agent_code",
"value": "1234"
},
"attributes3": {
"key": "email",
"value": ""
},
"attributes4": {
"key": "channel",
"value": "AGENCY"
},
"attributes5": {
"key": "subchannel",
"value": null
},
"attributes6": {
"key": "branch_name",
"value": ""
},
"attributes7": {
"key": "contact_no",
"value": ""
},
"attributes8": {
"key": "status",
"value": "T"
}
}
],
"bank_accounts": {
"attributes1": {
"key": "accountnumber",
"value": ""
},
"attributes2": {
"key": "accountholdername",
"value": ""
},
"attributes3": {
"key": "ifsccode",
"value": ""
},
"attributes4": {
"key": "micrcode",
"value": ""
},
"bankname": "",
"attributes5": {
"key": "branchname",
"value": ""
},
"attributes6": {
"key": "bank_address",
"value": ""
}
},
"Funds": [
{
"attributes1": {
"key": "policy_no",
"value": ""
},
"attributes2": {
"key": "allocation_mode",
"value": "percentage"
},
"attributes3": {
"key": "allocation_value",
"value": null
},
"attributes4": {
"key": "fund_code",
"value": null
},
"fund_name": null,
"investment_strategy_name": null
}
],
"Investment_Strategies": [
{
"attributes1": {
"key": "policy_no",
"value": ""
},
"attributes2": {
"key": "allocation_mode",
"value": "percentage"
},
"attributes3": {
"key": "allocation_value",
"value": null
},
"attributes4": {
"key": "fund_code",
"value": null
},
"fund_name": null,
"investment_strategy_name": null
}
],
"Roles": [
{
"attributes1": {
"key": "policy_no",
"value": ""
},
"attributes2": {
"key": "beneficiary_percentage",
"value": "100.00"
},
"attributes3": {
"key": "relationship",
"value": "SPOUSE"
},
"role_name": "Nominee",
"attributes4": {
"key": "age",
"value": "NaN"
},
"attributes5": {
"key": "party_name",
"value": ""
},
"attributes6": {
"key": "gender",
"value": "F"
},
"attributes7": {
"key": "date_of_birth",
"value": "1953-03-20"
},
"attributes8": {
"key": "email",
"value": null
},
"attributes9": {
"key": "registred_mobile",
"value": null
}
},
{
"attributes1": {
"key": "policy_no",
"value": ""
},
"attributes2": {
"key": "beneficiary_percentage",
"value": null
},
"attributes3": {
"key": "relationship",
"value": null
},
"role_name": "Insured",
"attributes4": {
"key": "age",
"value": 63
},
"attributes5": {
"key": "party_name",
"value": ""
},
"attributes6": {
"key": "gender",
"value": "M"
},
"attributes7": {
"key": "date_of_birth",
"value": "1950-06-20"
},
"attributes8": {
"key": "email",
"value": null
},
"attributes9": {
"key": "registred_mobile",
"value": null
}
},
{
"attributes1": {
"key": "policy_no",
"value": ""
},
"attributes2": {
"key": "beneficiary_percentage",
"value": null
},
"attributes3": {
"key": "relationship",
"value": null
},
"role_name": "Owner",
"attributes4": {
"key": "age",
"value": 63
},
"attributes5": {
"key": "party_name",
"value": ""
},
"attributes6": {
"key": "gender",
"value": "M"
},
"attributes7": {
"key": "date_of_birth",
"value": "1950-06-20"
},
"attributes8": {
"key": "email",
"value": ""
},
"attributes9": {
"key": "registred_mobile",
"value": ""
}
}
],
"Benefits": [
{
"attributes1": {
"key": "policy_no",
"value": ""
},
"benefit_name": "Miii",
"attributes2": {
"key": "benefit_term",
"value": "7"
},
"attributes3": {
"key": "modal_premium",
"value": null
},
"attributes4": {
"key": "sum_assured",
"value": 192000
},
"attributes5": {
"key": "cash_value",
"value": 48576
},
"attributes6": {
"key": "expiry_date",
"value": null
},
"attributes7": {
"key": "status",
"value": "Death"
},
"attributes8": {
"key": "benefit_code",
"value": ""
},
"attributes9": {
"key": "benefit_type",
"value": "base"
}
}
]
}
'''
When you have a strong model in your data, and you want to process/validate the data you receive, its better to be done in a object oriented fashion. If you can consider your data value as an object with all logic written inside for validating or correcting, it becomes simple.
See an example approach below:
(Assumed your null as None)
import json
# Your data object..
data = '''{
"id": 1,
"name": "Hey",
"category": null,
"salary": 1222,
"Roles": [
{
"attributes1": {
"key": "sum_assured",
"value": 192000
},
"attributes2": {
"key": "cash_value",
"value": 48576
},
"attributes3": {
"key": "expiry_date",
"value": null
}
}
]
}'''
# Create simple object models to represent the data.
class Role:
def __init__(self, key, value):
self.key = key
self.value = value or "" # fallback to empty string
class Policy:
def __init__(self, id, name, category, salary, Roles):
# write all your pre/post processing here
self.id = id
self.name = name
self.category = category or ""
self.salary = format(salary, '0.2f') if salary else ""
self.Roles = [Role(**role) for role in Roles[0].values()] if Roles else []
# a json serialization helper function
def serial_default(obj):
if isinstance(obj, (Policy, Role)):
return obj.__dict__
else:
return obj
# You just create the object with your data. Now all data is in the object.
data = json.loads(data)
p = Policy(**data)
# Serialize the object to json in the proper format
print(json.dumps(p, default=serial_default))
The print generates a valid version of your object as JSON like
{"id": 1, "name": "Hey", "category": "", "salary": "1222.00", "Roles": [{"key": "sum_assured", "value": 192000}, {"key": "cash_value", "value": 48576}, {"key": "expiry_date", "value": ""}]}
This is just an example to the approach. You can modify or improvise a similar approach and make things simpler.
Firstly i have to agree with Kris answer, this kind of conversion looks now a bit ugly and with every added special case Object-Oriented-style makes more sense.
A few points:
null, None, ""
It seems that null won't be seen as None, so your not getting into the if clause.
Check JSON to Dict for that
How can JSON data with null value be converted to a dictionary
Salary
According to your question you would expect
"salary": "1222.00"
correct?
If so you are currently overwriting your previously formatted number with
if isinstance(j, str):
new_dict[i] = j
Just change it to elif so you wont overwrite your conversion.
Roles
The objects within the array of Roles are currently not iterated over. Hence the number are not being formatted. For that you would need another loop on the Roles array.
Following a recursion attempt to cover multiple nested collections.
import json
from collections import abc
dataRaw = '''
{
"id": 1,
"name": "Hey",
"category": null,
"salary": 1222,
"Roles": [
{
"attributes1": {
"key": "sum_assured",
"value": 192000
},
"attributes2": {
"key": "cash_value",
"value": 48576
},
"attributes3": {
"key": "expiry_date",
"value": null
}
}
]
}
'''
data = json.loads(dataRaw)
def processNested(nested):
# Check for list
if (isinstance(nested, list)):
# for each element in list processNested
for element in nested:
element = processNested(element)
else:
for key, value in nested.items():
# If nested Collection
if isinstance(value, abc.Mapping):
# Allow special "treatment" for {"key": "...", "value": ... } like objects
if "key" in value and "value" in value:
if ((value["key"] == "cash_value" or value["key"] == "sum_assured") and value["value"] != "" and
value["value"] != None):
nested[key]["value"] = str("%0.2f" % int(value["value"]))
else:
nested[key] = processNested(value)
else:
if (key == "value" and value != None and value != ""):
nested[key] = str(nested["value"])
return nested
new_dict = data
for i, j in new_dict.items():
if (i == "salary"):
new_dict[i] = "%0.2f" % j
elif isinstance(j, str):
new_dict[i] = j
elif j is None:
new_dict[i] = ""
elif i == "Roles":
new_dict[i] = processNested(j)
else:
new_dict[i] = str(j)
print(new_dict)
I have a list of paths:
paths = [
"root/child1/file1",
"root/child1/file2",
"root/child2/file1"
]
And I want to parse it ith python into dict (or list of dicts) that looks like:
{
"text": "root",
"children": [
{
"text": "child1",
"children": [
{
"text": "file1",
"children": []
},
{
"text": "file2",
"children": []
}
]
},
{
"text": "child2",
"children": [
{
"text": "file2",
"children": []
}
]
}
I tried to write some recursive function, but no success. Example:
def path2dict(path, depth):
d = {}
text = path.split('/')[0]
d['text'] = text
depth = depth + 1
d['children'] = [path2dict(p, depth) for p in path.split('/')[depth:]]
return d
paths = [
"root/child1/file1",
"root/child1/file2",
"root/child2/file1"
]
depth = 0
for path in paths:
d = path2dict(path, depth)
print(d)
Sorry for not using your existing solution, but I have some other:
def stage1(paths):
result = {}
for path in paths:
split = path.split('/')
current = result
for part in split:
current.setdefault(part, {})
current = current[part]
return result
def stage2(dct):
return [
{
'text': key,
'children': stage2(value)
}
for key, value in dct.items()
]
after_stage1 = stage1(paths)
# after_stage1 is
# {
# 'root': {
# 'child1': {
# 'file1': {},
# 'file2': {}
# },
# 'child2': {
# 'file1': {}
# }
# }
# }
after_stage2 = stage2(after_stage1)
# after_stage2 contains exactly what you need
You can use itertools.groupby:
from itertools import groupby
import json
d = ['root/child1/file1', 'root/child1/file2', 'root/child2/file1']
def create_paths(paths):
_vals = [[a, [c for _, *c in b]] for a, b in groupby(sorted(paths, key=lambda x:x[0]), key=lambda x:x[0])]
return [{'text':a, 'children':[] if not b[0] else create_paths(b)} for a, b in _vals]
print(json.dumps(create_paths([i.split('/') for i in d]), indent=4))
Output:
[
{
"text": "root",
"children": [
{
"text": "child1",
"children": [
{
"text": "file1",
"children": []
},
{
"text": "file2",
"children": []
}
]
},
{
"text": "child2",
"children": [
{
"text": "file1",
"children": []
}
]
}
]
}
]
I'm trying to use Python to turn data from a CSV into a JSON with the format found here:
https://gist.github.com/mbostock/1093025 so that I can modify some http://d3js.org/ examples.
I have found some posts on how to do similar transformations, but nothing exactly like the nested {'name': name, 'children' = []} format.
For the test.csv:
Team,Task,Country,ID
Team A,Processing,CA,5
Team A,Review,CA,45
Team B,Processing,US,76
Team B,Processing,CA,676
Team B,Support,US,2345
Team C,Processing,US,67
Team C,Review,US,734
Team C,Support,US,34
Output should look like:
{
"name": "Flare",
"children": [
{
"name": "Team A",
"children": [
{
"name": "Processing",
"children": [
{"name": "CA", "size": 5}
]
},
{
"name": "Review",
"children": [
{"name": "CA", "size": 45}
]
}
]
},
{
"name": "Team B",
"children": [
{
"name": "Processing",
"children": [
{"name": "US", "size": 76},
{"name": "CA", "size": 676}
]
},
{
"name": "Support",
"children": [
{"name": "US", "size": 2345}
]
}
]
},
{
"name": "Team C",
"children": [
{
"name": "Processing",
"children": [
{"name": "US", "size": 67}
]
},
{
"name": "Review",
"children": [
{"name": "US", "size": 734}
]
},
{
"name": "Support",
"children": [
{"name": "US", "size": 34}
]
}
]
}
]
}
This is as far as I have been able to get (I know it's pretty bad):
import csv
import json
children = []
#create a list of lists from CSV
reader = csv.reader(open('//corp.bloomberg.com/pn-dfs/AllLinks/Users/jdesilvio/Desktop/test.csv', 'rb'))
reader.next()
for row in reader:
children.append(row)
#create tree root
tree = {'name': "flare", 'children': []}
#create a generic subtree
subtree = {'name': 0, 'children': []}
for i in children:
#if the first element in i doesn't equal name, then we know that it's a different group
if i[0] != subtree['name']:
#so we append the current group
tree['children'].append({'name': subtree['name'], 'children': subtree['children']})
#start a new group
subtree['children'] = []
#and rename the subtree
subtree['name'] = i[0]
else:
#then start appending pieces to the new group
subtree['children'].append(i[1:len(i)])
#remove the generic starting name
tree['children'] = tree['children'][1:]
print json.dumps(tree, indent=1)
Which yields:
{
"name": "flare",
"children": [
{
"name": "Team A",
"children": [
[
"Review",
"CA",
"45"
]
]
},
{
"name": "Team B",
"children": [
[
"Processing",
"CA",
"676"
],
[
"Support",
"US",
"2345"
]
]
}
]
}
This looks like it is headed in the right direction, but even if I was able to get the first level nested, I'm not sure how to nest more levels in a generic way.
Populate the tree is the most clear solution. However, using a dict for traversing is not a good idea. I suggest to create a helper class for each tree node, use it for populating data and then convert result to JSON:
import csv
import json
class Node(object):
def __init__(self, name, size=None):
self.name = name
self.children = []
self.size = size
def child(self, cname, size=None):
child_found = [c for c in self.children if c.name == cname]
if not child_found:
_child = Node(cname, size)
self.children.append(_child)
else:
_child = child_found[0]
return _child
def as_dict(self):
res = {'name': self.name}
if self.size is None:
res['children'] = [c.as_dict() for c in self.children]
else:
res['size'] = self.size
return res
root = Node('Flare')
with open('/tmp/test.csv', 'r') as f:
reader = csv.reader(f)
reader.next()
for row in reader:
grp1, grp2, grp3, size = row
root.child(grp1).child(grp2).child(grp3, size)
print json.dumps(root.as_dict(), indent=4)
I am working on a file representing a tree-like structure very similar to flare.json which is known for D3.js community. What's the best way to delete all the leaves of the tree in python? In other words, I want to remove all the keys that don't have a 'children' key in their value.
example :
{
"name": "flare",
"children": [
{
"name": "analytics",
"children": [
{
"name": "cluster",
"children": [
{"name": "AgglomerativeCluster", "size": 3938},
{"name": "CommunityStructure", "size": 3812},
{"name": "HierarchicalCluster", "size": 6714},
{"name": "MergeEdge", "size": 743}
]
},
{
"name": "graph",
"children": [
{"name": "BetweennessCentrality", "size": 3534},
{"name": "LinkDistance", "size": 5731},
{"name": "MaxFlowMinCut", "size": 7840},
{"name": "ShortestPaths", "size": 5914},
{"name": "SpanningTree", "size": 3416}
]
},
{
"name": "optimization",
"children": [
{"name": "AspectRatioBanker", "size": 7074}
] ...
which should become:
{
"name": "flare",
"children": [
{
"name": "analytics",
"children": [
{
"name": "cluster",
},
{
"name": "graph",
},
{
"name": "optimization",
] ...
In other words, I'm just cutting the leaves of the tree. In a children list is empty, it should be removed.
I tried this only to remove the keys and it did not work:
def deleteLeaves(pTree):
if pTree.has_key('children'):
for child in pTree['children']:
deleteLeaves(child)
else:
del pTree
This seems to approximate what you want:
def pruneLeaves(obj):
if isinstance(obj, dict):
isLeaf = True
for key in obj.keys():
if key == 'children': isLeaf = False
if pruneLeaves(obj[key]): del obj[key]
return isLeaf
elif isinstance(obj, list):
leaves = []
for (index, element) in enumerate(obj):
if pruneLeaves(element): leaves.append(index)
leaves.reverse()
for index in leaves: obj.pop(index)
return not bool(obj)
else: # String values look like attributes in your dict, so never prune them
return False
Tested with a truncated sample of your data:
data = {
"name": "flare",
"children": [
{
"name": "analytics",
"children": [
{
"name": "cluster",
"children": [
{"name": "AgglomerativeCluster", "size": 3938},
{"name": "CommunityStructure", "size": 3812},
{"name": "HierarchicalCluster", "size": 6714},
{"name": "MergeEdge", "size": 743}
]
},
{
"name": "graph",
"children": [
{"name": "BetweennessCentrality", "size": 3534},
{"name": "LinkDistance", "size": 5731},
{"name": "MaxFlowMinCut", "size": 7840},
{"name": "ShortestPaths", "size": 5914},
{"name": "SpanningTree", "size": 3416}
]
}
]
}
]
}
pruneLeaves(data)
print data
And got these results:
{'name': 'flare', 'children': [{'name': 'analytics', 'children': [{'name': 'cluster'}, {'name': 'graph'}]}]}
I just edited the answer of #rchang to fix deletion of lists other than children.
def pruneLeaves(self,obj):
if isinstance(obj, dict):
isLeaf = True
for key in obj.keys():
if key=='children':
isLeaf = False
if self.pruneLeaves(obj[key]): del obj[key]
return isLeaf
elif isinstance(obj, list) :
leaves = []
for (index, element) in enumerate(obj):
if self.pruneLeaves(element): leaves.append(index)
leaves.reverse()
for index in leaves: obj.pop(index)
return not bool(obj)
else: # String values look like attributes in your dict, so never prune them
return False
I have data in JSON format:
data = {"outfit":{"shirt":"red,"pants":{"jeans":"blue","trousers":"khaki"}}}
I'm attempting to plot this data into a decision tree using InfoVis, because it looks pretty and interactive. The problem is that their graph takes JSON data in this format:
data = {id:"nodeOutfit",
name:"outfit",
data:{},
children:[{
id:"nodeShirt",
name:"shirt",
data:{},
children:[{
id:"nodeRed",
name:"red",
data:{},
children:[]
}],
}, {
id:"nodePants",
name:"pants",
data:{},
children:[{
id:"nodeJeans",
name:"jeans",
data:{},
children:[{
id:"nodeBlue",
name:"blue",
data:{},
children[]
},{
id:"nodeTrousers",
name:"trousers",
data:{},
children:[{
id:"nodeKhaki",
name:"khaki",
data:{},
children:[]
}
}
Note the addition of 'id', 'data' and 'children' to every key and value and calling every key and value 'name'. I feel like I have to write a recursive function to add these extra values. Is there an easy way to do this?
Here's what I want to do but I'm not sure if it's the right way. Loop through all the keys and values and replace them with the appropriate:
for name, list in data.iteritems():
for dict in list:
for key, value in dict.items():
#Need something here which changes the value for each key and values
#Not sure about the syntax to change "outfit" to name:"outfit" as well as
#adding id:"nodeOutfit", data:{}, and 'children' before the value
Let me know if I'm way off.
Here is their example http://philogb.github.com/jit/static/v20/Jit/Examples/Spacetree/example1.html
And here's the data http://philogb.github.com/jit/static/v20/Jit/Examples/Spacetree/example1.code.html
A simple recursive solution:
data = {"outfit":{"shirt":"red","pants":{"jeans":"blue","trousers":"khaki"}}}
import json
from collections import OrderedDict
def node(name, children):
n = OrderedDict()
n['id'] = 'node' + name.capitalize()
n['name'] = name
n['data'] = {}
n['children'] = children
return n
def convert(d):
if type(d) == dict:
return [node(k, convert(v)) for k, v in d.items()]
else:
return [node(d, [])]
print(json.dumps(convert(data), indent=True))
note that convert returns a list, not a dict, as data could also have more then one key then just 'outfit'.
output:
[
{
"id": "nodeOutfit",
"name": "outfit",
"data": {},
"children": [
{
"id": "nodeShirt",
"name": "shirt",
"data": {},
"children": [
{
"id": "nodeRed",
"name": "red",
"data": {},
"children": []
}
]
},
{
"id": "nodePants",
"name": "pants",
"data": {},
"children": [
{
"id": "nodeJeans",
"name": "jeans",
"data": {},
"children": [
{
"id": "nodeBlue",
"name": "blue",
"data": {},
"children": []
}
]
},
{
"id": "nodeTrousers",
"name": "trousers",
"data": {},
"children": [
{
"id": "nodeKhaki",
"name": "khaki",
"data": {},
"children": []
}
]
}
]
}
]
}
]