Related
Need some help please.
I have a similar json file:
{
"timestamp": "2022-09-20T08:16:00.000Z",
"metadata": {
"orgID": "6780",
"projectId": 0988,
}
},
{
"data":
"workers": [
{
"identifiers": {
"FullName": null,
"NINumber": null,
"CompID": null
},
"lastName": null,
"costCenter": null
},
{
"codes": [
{
"source": {
"name": "net_salary",
"value": 11500
},
"name": "net_salary",
"code": "rt_sa",
"value": 11500
},
{
"identifiers": {
"FullName": null,
"NINumber": null,
Comp ID": null
},
"lastName": null,
"costCenter": null
},
{
"codes": [
{
"source": {
"name": "hiredate",
"value": 3.333
},
"name": "hiredate",
"code": "h_code",
"value": 3.333
},
I want to change the key names under source from name->fieldname and value to fieldvalue.
However, I don't want to change the keys where there are the keys: name, code, value.
I tried this but it is not correct:
with open(r'C:\Users\Administrator\Documents\test\PJSON.json') as f:
payrolldata = json.load(f)
source = payrolldata[1]['data']['workers'][1]['codes'][1]['source']
print(source)
oldvalue = source.keys()
print(str(oldvalue).replace('name', 'newname').replace('value', 'value2'))
payrolldata = str(oldvalue).replace('name', 'newname').replace('value', 'newvalue2')
for d in payrolldata:
d['newName':] = d.pop["'name':"]
with open(r'C:\Users\Administrator\Documents\test\PJSON.json', "w") as f:
json.dump(payrolldata, f, indent=4)
I suggest you don't convert your dict into string and use something like this on you dict read from json file (with json.load)
def deep_replace_key(
d,
old_key: str,
new_key: str,
branch_name: str = None,
replace: bool = False,
):
"""deep replace key in dict.
Only make replacement if the we are in the branch branch_name."""
if branch_name is None:
replace = True
if isinstance(d, dict):
d_copy = d.copy()
for key, value in d_copy.items():
if key == old_key and replace:
d[new_key] = d.pop(old_key)
else:
if branch_name and key == branch_name:
deep_replace_key(value, old_key, new_key, branch_name, True)
else:
deep_replace_key(value, old_key, new_key, branch_name, False)
elif isinstance(d, list):
for item in d:
deep_replace_key(item, old_key, new_key, branch_name, replace)
return d
Here is a working test for this code
import unittest
# test
class TestDeepReplaceKey(unittest.TestCase):
def test_deep_replace_key(self):
d = {
"codes": [
{
"source": {
"name": "hiredate",
"value": 3.333
},
"not_source": {
"name": "hiredate",
"value": 3.333
},
},
{
"source": {
"name": "hiredate",
"value": 3.333
},
"not_source": {
"name": "hiredate",
"value": 3.333
},
},
]
}
d = deep_replace_key(d, "name", "new_name", "source", )
self.assertEqual(d["codes"][0]["source"]["new_name"], "hiredate")
self.assertEqual(d["codes"][0]["not_source"]["name"], "hiredate")
d = deep_replace_key(d, "name", "new_name", )
self.assertEqual(d["codes"][0]["not_source"]["new_name"], "hiredate")
So you can see if I call deep_replace_key(d, "name", "new_name", "source", ) the change only happens in the source block.
If I omit mentioning "source" like this deep_replace_key(d, "name", "new_name", ) change happens everywhere.
I have 2 dictionaries:
data = {
"filter":
{
"and":
[
{
"or":
[
{
"and":
[
{"category": "profile", "key": "languages", "operator": "IN", "value": "EN"},
{"category": "skill", "key": "26366", "value": 100, "operator": "EQ"},
],
},
],
},
{"or": [{"category": "skill", "key": "45165", "operator": "NE"}]},
{"or": [{"category": "skill", "key": "48834", "value": 80, "operator": "GT"}]},
{"or": [{"category": "profile", "key": "gender", "operator": "EQ", "value": "FEMALE"}]},
],
},
}
new_val = {'26366': '11616', '45165': '11613', '48834': '11618'}
I want to update values in "data" dictionary with the values from "new_val" dictionary.
So that 26366(in "data" dict) becomes 11616(from "new_val" dict), 45165 becomes 11613, and 48834 becomes 11618.
"data" dictionary nesting can be different (both up and down)
The key in the "data" dictionary can be different, not only "key", it can be "skill_id", "filter_id" and so on.
And get this result:
{
"filter":
{
"and":
[
{
"or":
[
{
"and":
[
{"category": "profile", "key": "languages", "operator": "IN", "value": "EN"},
{"category": "skill", "key": "11616", "value": 100, "operator": "EQ"},
],
},
],
},
{"or": [{"category": "skill", "key": "11613", "operator": "NE"}]},
{"or": [{"category": "skill", "key": "11618", "value": 80, "operator": "GT"}]},
{"or": [{"category": "profile", "key": "gender", "operator": "EQ", "value": "FEMALE"}]},
],
},
}
To return an updated dict without modifying the old one:
def updated_in_depth(d, replace):
if isinstance(d, dict):
return {k: updated_in_depth(v, replace)
for k,v in d.items()}
elif isinstance(d, list):
return [updated_in_depth(x, replace) for x in d]
else:
return replace.get(d, d)
Testing with your data and new_val:
>>> updated_in_depth(data, new_val)
{'filter': {'and': [{'or': [{'and': [
{'category': 'profile', 'key': 'languages', 'operator': 'IN', 'value': 'EN'},
{'category': 'skill', 'key': '11616', 'value': 100, 'operator': 'EQ'}]}]},
{'or': [{'category': 'skill', 'key': '11613', 'operator': 'NE'}]},
{'or': [{'category': 'skill', 'key': '11618', 'value': 80, 'operator': 'GT'}]},
{'or': [{'category': 'profile', 'key': 'gender', 'operator': 'EQ', 'value': 'FEMALE'}]}]}}
Use something like this:
data['filter']['and']['or']['and'][1]['key']='11616'
To search for the keys recursively you can do:
from copy import deepcopy
def replace(d, new_vals):
if isinstance(d, dict):
# replace key (if there's match):
if "key" in d:
d["key"] = new_vals.get(d["key"], d["key"])
for v in d.values():
replace(v, new_vals)
elif isinstance(d, list):
for v in d:
replace(v, new_vals)
new_data = deepcopy(data)
replace(new_data, new_val)
print(new_data)
Prints:
{
"filter": {
"and": [
{
"or": [
{
"and": [
{
"category": "profile",
"key": "languages",
"operator": "IN",
"value": "EN",
},
{
"category": "skill",
"key": "11616",
"value": 100,
"operator": "EQ",
},
]
}
]
},
{"or": [{"category": "skill", "key": "11613", "operator": "NE"}]},
{
"or": [
{
"category": "skill",
"key": "11618",
"value": 80,
"operator": "GT",
}
]
},
{
"or": [
{
"category": "profile",
"key": "gender",
"operator": "EQ",
"value": "FEMALE",
}
]
},
]
}
}
If you don't need copy of data you can omit the deepcopy:
replace(data, new_val)
print(data)
You can build a recursive function like this
def walk_dict(d):
if isinstance(d, list):
for item in d:
walk_dict(item)
elif isinstance(d, dict):
if 'key' in d and d['key'] in new_val:
d['key'] = new_val[d['key']]
for k, v in d.items():
walk_dict(v)
walk_dict(data)
print(data)
As many have advised, a recursive function will do the trick:
def a(d):
if isinstance(d, dict): # if dictionary, apply a to all values
d = {k: a(d[k]) for k in d.keys()}
return d
elif isinstance(d, list): # if list, apply to all elements
return [a(x) for x in d]
else: # apply to d directly (it is a number, a string or a bool)
return new_val[d] if d in new_val else d
When a is called, it check what is the type of the variable d:
if d is a list, it apply a to each element of the list and return the updated list
if d is a dict, it applies a to all values and return the updated dict
otherwise, it returns the mapped new value if the old one has been found in the new_val keys
data = {
"filter":
{
"and":
[
{
"or":
[
{
"and":
[
{"category": "profile", "key": "languages", "operator": "IN", "value": "EN"},
{"category": "skill", "key": "11616", "value": 100, "operator": "EQ"},
],
},
],
},
{"or": [{"category": "skill", "key": "11613", "operator": "NE"}]},
{"or": [{"category": "skill", "key": "11618", "value": 80, "operator": "GT"}]},
{"or": [{"category": "profile", "key": "gender", "operator": "EQ", "value": "FEMALE"}]},
],
},
}
class Replace:
def __init__(self,data):
self.data=data
def start(self,d):
data = self.data
def replace(data):
if type(data) == list:
for v in data:
replace(v)
if type(data) == dict:
for k,v in data.items():
if type(v) == dict:
replace(v)
if type(v) == str:
if v in d:
data[k] = d[v]
replace(data)
return data
new_data = Replace(data).start({'26366': '11616',
'45165': '11613',
'48834': '11618'})
print(new_data)
Want to get the MetaEntry data from the dictionary, but each time the keys differ with some random names.
for example: ['Client']
dictionary = {
"Tags":[],
"ObjectId":
"ab9c6448-85fe-eb11-b563-281878c3a7fe",
"Client": {
"MetaData": {
"MetaEntry": [
{
"Key": "status",
"Value": "Active"
},
{
"Key": "first_day_of_week",
"Value": "Monday"
},
{
"Key": "default_induction_expiry",
"Value": "0"
}
]
},
"RelatedLinks": [],
"Tags": [],
"ObjectId": "6cf54386-d81a-eb11-9fb4-281878b13795",
"Type": "Artifice.Web.Data.Entities.Client",
"Name": "Amco Logictics "
}
}
print(dictionary['Client']['MetaData']['MetaEntry'])
here ['Client'] key will change randomly, so my above print fun will not work, is there a workaround way to get the ['MetaData']['MetaEntry'] without hardcoding the ['Client'] key?
If you have deeply nested dictionary, you can recursively traverse it to find all MetaEntry keys:
dictionary = {
"Tags": [],
"ObjectId": "ab9c6448-85fe-eb11-b563-281878c3a7fe",
"Client": {
"MetaData": {
"MetaEntry": [
{"Key": "status", "Value": "Active"},
{"Key": "first_day_of_week", "Value": "Monday"},
{"Key": "default_induction_expiry", "Value": "0"},
]
},
"RelatedLinks": [],
"Tags": [],
"ObjectId": "6cf54386-d81a-eb11-9fb4-281878b13795",
"Type": "Artifice.Web.Data.Entities.Client",
"Name": "Amco Logictics ",
},
}
def find(d):
if isinstance(d, dict):
if "MetaEntry" in d:
yield d["MetaEntry"]
else:
for k, v in d.items():
yield from find(v)
elif isinstance(d, list):
for v in d:
yield from find(v)
for meta_entry in find(dictionary):
print(meta_entry)
Prints:
[
{"Key": "status", "Value": "Active"},
{"Key": "first_day_of_week", "Value": "Monday"},
{"Key": "default_induction_expiry", "Value": "0"},
]
EDIT: To print current path:
def find(d, cur_path=None):
if cur_path is None:
cur_path = []
if isinstance(d, dict):
if "MetaEntry" in d:
yield d["MetaEntry"], cur_path + ["MetaEntry"]
else:
for k, v in d.items():
yield from find(v, cur_path + [k])
elif isinstance(d, list):
for i, v in enumerate(d):
yield from find(v, cur_path + [i])
for meta_entry, cur_path in find(dictionary):
print(cur_path)
print(meta_entry)
Prints:
['Client', 'MetaData', 'MetaEntry']
[{'Key': 'status', 'Value': 'Active'}, {'Key': 'first_day_of_week', 'Value': 'Monday'}, {'Key': 'default_induction_expiry', 'Value': '0'}]
the below code will handle the situation where 'Client' will be replaced by other string
data = {"Tags": [], "ObjectId": "ab9c6448-85fe-eb11-b563-281878c3a7fe", "kkk": {"MetaData": {
"MetaEntry": [{"Key": "status", "Value": "Active"}, {"Key": "first_day_of_week", "Value": "Monday"},
{"Key": "default_induction_expiry", "Value": "0"}]}, "RelatedLinks": [], "Tags": [],
"ObjectId": "6cf54386-d81a-eb11-9fb4-281878b13795",
"Type": "Artifice.Web.Data.Entities.Client",
"Name": "Amco Logictics "}}
for k,v in data.items():
if isinstance(v,dict) and 'MetaData' in v:
print(f'{k} points to metadata')
output
kkk points to metadata
Below is sample list data, I want to convert it into a dynamic dictionary.
result = [
{
"standard": "119",
"score": "0",
"type": "assignment",
"student": "4"
},
{
"standard": "119",
"score": "0",
"type": "assignment",
"student": "5"
},
{
"standard": "118",
"score": "0",
"type": "assessment",
"student": "4"
}
]
I want to create one function conv_to_nested_dict(*args,data), which convertes all list of key to dictonary dynamically.
For example : conv_to_nested_dict(['standard','student'],result) should give op :
{
"118": {
"4": [{
"score": "0",
"type": "assessment"
}]
},
"119": {
"4": [{
"score": "0",
"type": "assignment"
}],
"5": [{
"score": "0",
"type": "assignment"
}]
}
}
conv_to_nested_dict(['standard','type'],result)
{
"118": {
"assessment": [{
"score": 0,
"student": "4"
}]
},
"119": {
"assignment": [{
"score": 0,
"student": "4"
},{
"score": 0,
"student": "5"
}]
}
}
This is a general idea.
def conf_to_nested_dict(keys, result):
R = {}
for record in result:
node = R
for key in keys[:-1]:
kv = record[key]
next_node = node.get(kv, {})
node[kv] = next_node
node = next_node
last_node = node.get(record[keys[-1]], [])
last_node.append(record)
node[record[keys[-1]]] = last_node
return R
#R is your structure
result is your source array, keys are the keys by which you want to group results. Iterate over results, for each record - create a tree structure based on key values ( record[key] ). For the last key - create a list and append the record to it.
I am working on a file representing a tree-like structure very similar to flare.json which is known for D3.js community. What's the best way to delete all the leaves of the tree in python? In other words, I want to remove all the keys that don't have a 'children' key in their value.
example :
{
"name": "flare",
"children": [
{
"name": "analytics",
"children": [
{
"name": "cluster",
"children": [
{"name": "AgglomerativeCluster", "size": 3938},
{"name": "CommunityStructure", "size": 3812},
{"name": "HierarchicalCluster", "size": 6714},
{"name": "MergeEdge", "size": 743}
]
},
{
"name": "graph",
"children": [
{"name": "BetweennessCentrality", "size": 3534},
{"name": "LinkDistance", "size": 5731},
{"name": "MaxFlowMinCut", "size": 7840},
{"name": "ShortestPaths", "size": 5914},
{"name": "SpanningTree", "size": 3416}
]
},
{
"name": "optimization",
"children": [
{"name": "AspectRatioBanker", "size": 7074}
] ...
which should become:
{
"name": "flare",
"children": [
{
"name": "analytics",
"children": [
{
"name": "cluster",
},
{
"name": "graph",
},
{
"name": "optimization",
] ...
In other words, I'm just cutting the leaves of the tree. In a children list is empty, it should be removed.
I tried this only to remove the keys and it did not work:
def deleteLeaves(pTree):
if pTree.has_key('children'):
for child in pTree['children']:
deleteLeaves(child)
else:
del pTree
This seems to approximate what you want:
def pruneLeaves(obj):
if isinstance(obj, dict):
isLeaf = True
for key in obj.keys():
if key == 'children': isLeaf = False
if pruneLeaves(obj[key]): del obj[key]
return isLeaf
elif isinstance(obj, list):
leaves = []
for (index, element) in enumerate(obj):
if pruneLeaves(element): leaves.append(index)
leaves.reverse()
for index in leaves: obj.pop(index)
return not bool(obj)
else: # String values look like attributes in your dict, so never prune them
return False
Tested with a truncated sample of your data:
data = {
"name": "flare",
"children": [
{
"name": "analytics",
"children": [
{
"name": "cluster",
"children": [
{"name": "AgglomerativeCluster", "size": 3938},
{"name": "CommunityStructure", "size": 3812},
{"name": "HierarchicalCluster", "size": 6714},
{"name": "MergeEdge", "size": 743}
]
},
{
"name": "graph",
"children": [
{"name": "BetweennessCentrality", "size": 3534},
{"name": "LinkDistance", "size": 5731},
{"name": "MaxFlowMinCut", "size": 7840},
{"name": "ShortestPaths", "size": 5914},
{"name": "SpanningTree", "size": 3416}
]
}
]
}
]
}
pruneLeaves(data)
print data
And got these results:
{'name': 'flare', 'children': [{'name': 'analytics', 'children': [{'name': 'cluster'}, {'name': 'graph'}]}]}
I just edited the answer of #rchang to fix deletion of lists other than children.
def pruneLeaves(self,obj):
if isinstance(obj, dict):
isLeaf = True
for key in obj.keys():
if key=='children':
isLeaf = False
if self.pruneLeaves(obj[key]): del obj[key]
return isLeaf
elif isinstance(obj, list) :
leaves = []
for (index, element) in enumerate(obj):
if self.pruneLeaves(element): leaves.append(index)
leaves.reverse()
for index in leaves: obj.pop(index)
return not bool(obj)
else: # String values look like attributes in your dict, so never prune them
return False