I found a method to iterate a python dictionary object recursively on this forum. However, I wish to extend that function so that I get a string similar to the structure of a file path. With my function below, I expect an output in the form of
/key1/value1
/key2/value2
/key3/key3a/value3a
/key4/key4a/key4a1/value4a1
/key4/key4a/key4a2/value4a2
/key4/key4a/key4a3/value4a3
/key4/key4b/key4b1/key4b1a/value4b1a
/key4/key4b/key4b1/key4b1b/value4b1b
/key4/key4b/key4b1/key4b1c/value4b1c
/key4/key4c/key4c1/key4c1a/value4c1a
/key4/key4c/key4c1/key4c1b/value4c1b
/key4/key4c/key4c1/key4c1c/value4c1c
Unfortunately, I hit a block. I cannot figure out how to achieve that. Below is the code that I came up with. Any help is greatly appreciated.
import sys
import collections
dict_object = {
'key1': 'value1',
'key2': 'value2',
'key3': {'key3a': 'value3a'},
'key4': {
'key4a': {
'key4a1': 'value4a1',
'key4a2': 'value4a2',
'key4a3': 'value4a3'
},
'key4b': {
'key4b1': {
'key4b1a': 'value4b1a',
'key4b1b': 'value4b1b',
'key4b1c': 'value4b1c'
},
'key4c': {
'key4c1': {
'key4c1a': 'value4c1a',
'key4c1b': 'value4c1b',
'key4c1c': 'value4c1c'
}
}
}
}
}
def print_dict(dictionary, path='', parent=''):
""" This finction recursively prints nested dictionaries."""
#Sort the dictionary object by its keys
if isinstance(dictionary, dict):
dictionary = collections.OrderedDict(sorted(dictionary.items()))
else:
dictionary = sorted(dictionary.items(), key=operator.itemgetter(1))
#iterate each sorted dictionary key
for key, value in dictionary.iteritems():
if isinstance(value, dict):
path = ''
path = '%s/%s/%s' % (path, parent, key)
#Repeat this funtion for nested {} instances
print_dict(value, path, key)
else:
#Print the last node i.e PATH + KEY + VALUE
print '%s/%s/%s' % (path, key, value)
if __name__ == '__main__':
print_dict(dict_object)
Your function appears overly complicated. Only actually print when you have an object that's not a dictionary, otherwise recurse for all values in a dictionary. I simplified path handling to just one string:
def print_dict(ob, path=''):
if not isinstance(ob, dict):
print '{}/{}'.format(path, ob)
else:
for key, value in sorted(ob.items()):
print_dict(value, '{}/{}'.format(path, key))
I didn't bother with creating OrderedDict objects; all you need is iteration in sorted order.
This produces the expected output:
>>> print_dict(dict_object)
/key1/value1
/key2/value2
/key3/key3a/value3a
/key4/key4a/key4a1/value4a1
/key4/key4a/key4a2/value4a2
/key4/key4a/key4a3/value4a3
/key4/key4b/key4b1/key4b1a/value4b1a
/key4/key4b/key4b1/key4b1b/value4b1b
/key4/key4b/key4b1/key4b1c/value4b1c
/key4/key4b/key4c/key4c1/key4c1a/value4c1a
/key4/key4b/key4c/key4c1/key4c1b/value4c1b
/key4/key4b/key4c/key4c1/key4c1c/value4c1c
Related
I have a json file (an ansible fact file)
What I'm trying to do is based on an array of keys, if the key is in the file, replace the value... This is so we can replace values people don't want to be made public (IP Address for example).
So far, the python I have can do it if its a simple Key value.... but not if its nested...
So this would be OK and it will replace...
"ansible_diff_mode": false,
"ansible_distribution": "CentOS",
"ansible_distribution_file_parsed": true,
"ansible_distribution_file_path": "/etc/redhat-release",
"ansible_distribution_file_variety": "RedHat",
"ansible_distribution_major_version": "7",
"ansible_distribution_release": "Core",
However, It can't find these values...
"ansible_all_ipv4_addresses": [
"1.2.3.4"
],
"ansible_apparmor": {
"status": "disabled"
},
Here is the code I'm using, and appreciate any pointers...
import json
keys_to_sanitise = ['ansible_all_ipv4_addressess','ansible_machine','ansible_bios_version',
'ansible_domain','environment']
factfile = 'host.yaml'
def sanitiseDict(d):
for k in keys_to_sanitise:
if k in d.keys():
d.update({k: 'EXCLUDED'})
for v in d.values():
if isinstance(v, dict):
sanitiseDict(v)
return
with open(factfile, "r") as infile:
jdata = json.load(infile)
mydict = {}
sanitiseDict(jdata)
print(json.dumps(jdata))
Well, for starters you have an extra s in 'ansible_all_ipv4_addressess'.
You can also clean up the syntax of sanitiseDict a bit, to get this, which reads a litte better:
def sanitiseDict(d):
for k in keys_to_sanitise:
if k in d:
d[k] = 'EXCLUDED'
for v in d.values():
if isinstance(v, dict):
sanitiseDict(v)
I am trying to convert a Json file that looks like
{
# "item_1":"value_11",
# "item_2":"value_12",
# "item_3":"value_13",
# "item_4":["sub_value_14", "sub_value_15"],
# "item_5":{
# "sub_item_1":"sub_item_value_11",
# "sub_item_2":["sub_item_value_12", "sub_item_value_13"]
# }
# }
TO something that looks like this:
{
# "node_item_1":"value_11",
# "node_item_2":"value_12",
# "node_item_3":"value_13",
# "node_item_4_0":"sub_value_14",
# "node_item_4_1":"sub_value_15",
# "node_item_5_sub_item_1":"sub_item_value_11",
# "node_item_5_sub_item_2_0":"sub_item_value_12",
# "node_item_5_sub_item_2_0":"sub_item_value_13"
# }
I am aware that you can't maintain the order of the Json file when converted to CSV. I am considering to do a workaround by loading the JSON data into OrderedDic objects (which cause them to be added in the order that the input document lists them. However, I am new to working with JSON files, as well as OrderedDic function.
To split items into subgroups i used:
def reduce_item(key, value):
global reduced_item
#Reduction Condition 1
if type(value) is list:
i=0
for sub_item in value:
reduce_item(key+'_'+to_string(i), sub_item)
i=i+1
#Reduction Condition 2
elif type(value) is dict:
sub_keys = value.keys()
for sub_key in sub_keys:
reduce_item(key+'_'+to_string(sub_key), value[sub_key])
#Base Condition
else:
reduced_item[to_string(key)] = to_string(value)
But how do I use the orderedDic along with the above code to show this output:
{
# "node_item_1":"value_11",
# "node_item_2":"value_12",
# "node_item_3":"value_13",
# "node_item_4_0":"sub_value_14",
# "node_item_4_1":"sub_value_15",
# "node_item_5_sub_item_1":"sub_item_value_11",
# "node_item_5_sub_item_2_0":"sub_item_value_12",
# "node_item_5_sub_item_2_0":"sub_item_value_13"
# }
I have the below code as well but it does not split each in subgroups based on the conditions of the subtring code above:
import json
from collections import OrderedDict
with open("/home/file/official.json", 'r') as fp:
metrics_types = json.load(fp, object_pairs_hook=OrderedDict)
print(metrics_types)
That shows:
Any suggestions?
You can use a function that iterates through the given dict or list items and merges the keys from the dict output of the recursive calls:
def flatten(d):
if not isinstance(d, (dict, list)):
return d
out = {}
for k, v in d.items() if isinstance(d, dict) else enumerate(d):
f = flatten(v)
if isinstance(f, dict):
out.update({'%s_%s' % (k, i): s for i, s in f.items()})
else:
out[k] = f
return out
so that given:
d = {
"item_1":"value_11",
"item_2":"value_12",
"item_3":"value_13",
"item_4":["sub_value_14", "sub_value_15"],
"item_5":{
"sub_item_1":"sub_item_value_11",
"sub_item_2":["sub_item_value_12", "sub_item_value_13"]
}
}
flatten(d) returns:
{'item_1': 'value_11',
'item_2': 'value_12',
'item_3': 'value_13',
'item_4_0': 'sub_value_14',
'item_4_1': 'sub_value_15',
'item_5_sub_item_1': 'sub_item_value_11',
'item_5_sub_item_2_0': 'sub_item_value_12',
'item_5_sub_item_2_1': 'sub_item_value_13'}
The above assumes that you're using Python 3.7 or later, where dict keys are guaranteed to be ordered. If you're using earlier versions, you can use OrderedDict in place of a regular dict.
consider the sample JSON below.
{
"widget": {
"test": "on",
"window": {
"title": "myWidget1",
"name": "main_window"
},
"image": {
"src": "Images/wid1.png",
"name": "wid1"
}
},
"os":{
"name": "ios"
}
}
Consider the case where we dont know the structure of the JSON and any of the keys. What I need to implement is a python function which iterates through all the keys and sub-keys and prints the key. That is by only knowing the JSON file name, I should be able to iterate the entire keys and sub-keys. The JSON can be of any structure.What I have tried is given below.
JSON_PATH = "D:\workspace\python\sampleJSON.json"
os.path.expanduser(JSON_PATH)
def iterateAllKeys(e):
for key in e.iterkeys():
print key
for child in key.get(key):
iterateAllKeys(child)
with open(JSON_PATH) as data_file:
data = json.load(data_file)
iterateAllKeys(data)
Here, the iterateAllKeys() function is supposed to print all the keys present in the JSON file. But if only the outer loop is present, ie
def iterateAllKeys(e):
for key in e.iterkeys():
print key
It will print the keys "widget" and "os". But,
def iterateAllKeys(e):
for key in e.iterkeys():
print key
for child in key.get(key):
iterateAllKeys(child)
returns an error - AttributeError: 'unicode' object has no attribute 'get'. My understanding is - since the value of 'child' is not a dict object, we cannot apply the 'key.get()'. But is there any alternate way by which I can iterate the JSON file without specifying any of the key names. Thank you.
You can use recursion to iterate through multi level dictionaries like this:
def iter_dict(dic):
for key in dic:
print(key)
if isinstance(dic[key], dict):
iter_dict(dic[key])
The keys of the first dictionary are iterated and every key is printed, if the item is an instance of dict class, we can use recursion to also iterate through the dictionaries we encounter as items.
You can do this thru auxiliary package like flatten_json.
pip install flatten_json
from flatten_json import flatten
for key in flatten(your_dict).keys():
print(key)
Output:
widget_test
widget_window_title
widget_window_name
widget_image_src
widget_image_name
os_name
If you want to show only key without whole path then you can do like that:
print(key.split('_')[-1])
First of all your last function:
def iterateAllKeys(e):
for key in e.iterkeys():
print key
for child in key.get(key):
iterateAllKeys(child)
key is just the key_value of the dictionary. So if anything you should be using e.get(key) or e[key].
for child in e.get(key):
Now this would not solve your problem, one work-around is using try except, as follows:
def iterateAllKeys(e):
for key in e.iterkeys():
print key
try:
iterateAllKeys(e[key])
except:
print "---SKIP---"
This is maybe not the best work-around, but it certainly works.
With your Data it prints the following:
widget
test
---SKIP---
window
name
---SKIP---
title
---SKIP---
image
src
---SKIP---
name
---SKIP---
os
name
---SKIP---
Using python on web2py server I have following problem.
How do I loop trough json to find all the keys that have null values (or empty strings) and collect those keys to report what is missing:
This is my json example.
{
"version" : 1,
"general" : {
"scriptFrom" : "",
"scriptTo" : "1.1.2014",
"scriptComment" : "dada"
},
"nbworkersDays" : [{
"days" : ["1", "2"],
"data" : [{
"nbWorkersFrom" : 2,
"nbWorkersTo" : null,
"timeFrom" : "12:00",
"timeTo" : ""
}
,{
"nbWorkersFrom" : 2,
"nbWorkersTo" : 7,
"timeFrom" : "9:00",
"timeTo" : "14:00"
}
]
}
]}
I was thinking to retrieve a list with all keys and if nested than first level . second level.
missingData= [scriptFrom, nbworkersDays.nbWorkersTo, nbworkersDays.timeTo]
any suggestions on how to solve this or how would you collect all the errors to report back to client
(I have a web app using web2py)
thank you
You can use a recursive function to iterate over the values of complex objects while remembering the path you're at (to report back):
#!/usr/bin/env python
# used to parse the json text
import json
with open('json.txt', 'r') as f:
d = json.load(f)
# define list of what your consider as "missing"
missings = [None, ""]
# recursive function to collect missing paths and alter the aggregator list
# make sure to add cases if needed
def aggregate_missing_paths(aggregator, path, item):
if isinstance(item, list):
# check all list items
map(lambda x: aggregate_missing_paths(aggregator, path, x), item)
if isinstance(item, dict):
# check all dict items
map(lambda kv: aggregate_missing_paths(aggregator, path + '.' + kv[0], kv[1]), item.iteritems())
if item in missings:
# alter list to cotain path to missing
aggregator.append(path)
aggregator = []
aggregate_missing_paths(aggregator, 'root', d)
print aggregator
Edit:
I added a version without the aggregator using a recursive generator:
#!/usr/bin/env python
import json
with open('json.txt', 'r') as f:
d = json.load(f)
missings = [None, ""]
def generate_missing_paths(path, item):
if isinstance(item, list):
for v in item:
for current_path in generate_missing_paths(path, v):
yield current_path
if isinstance(item, dict):
for k, v in item.iteritems():
for current_path in generate_missing_paths(path + '.' + k, v):
yield current_path
if item in missings:
yield path
for path in generate_missing_paths('root', d):
print path
I am trying to decode the following JSON file with Python:
{"node":[
{
"id":"12387",
"ip":"172.20.0.1",
"hid":"213",
"coord":{"dist":"12","lat":"-9.8257","lon":"65.0880"},
"status":{"speed":"90","direction":"N"},
"ts":"12387"}
]
}
By using:
json_data=open('sampleJSON')
jdata = json.load(json_data)
for key, value in jdata.iteritems():
print "Key:"
print key
print "Value:"
print value
and i have as output:
Key:
node
Value:
[{u'status': {u'direction': u'N', u'speed': u'90'}, u'ip': u'172.20.0.1', u'ts': u'12387', u'coord': {u'lat': u'-9.8257', u'lon': u'65.0880', u'dist': u'12'}, u'hid': u'213', u'id': u'12387'}]
And i want to be able to print the key's and values of the nested objects status, coord, and also que key/values of node, "hid", "id", "ip" and "ts".
How can i interate throughout all the nested values?
Thank you in advance!
You can use a recursive function to print it all out. This could be improved, but here is the idea:
import json
json_data = open('data.json')
jdata = json.load(json_data)
def printKeyVals(data, indent=0):
if isinstance(data, list):
print
for item in data:
printKeyVals(item, indent+1)
elif isinstance(data, dict):
print
for k, v in data.iteritems():
print " " * indent, k + ":",
printKeyVals(v, indent + 1)
else:
print data
OUTPUT
node:
status:
direction: N
speed: 90
ip: 172.20.0.1
ts: 12387
coord:
lat: -9.8257
lon: 65.0880
dist: 12
hid: 213
id: 12387
Otherwise, you could just use:
import pprint
pprint.pprint(jdata)
Without knowing more about your specific use case, it's hard to give a general answer, but..
If you have an arbitrarily nested structure, this is a good case for recursion.
simple sample code:
def print_object(object, indent=0):
if type(object)==dict:
for key, value in object.iteritems():
print " "*4*indent, "Key: ", key
print " "*4*indent, "Value: "
print_object(value, indent+1)
elif type(object)==list:
for value in object:
print_object(value,indent+1)
else:
print " "*4*indent, object
you really don't want to do strict type checking, but it works for quick-and-dirty sample code.
output:
Key: node
Value:
Key: status
Value:
Key: direction
Value:
N
Key: speed
Value:
90
Key: ip
Value:
172.20.0.1
Key: ts
Value:
12387
Key: coord
Value:
Key: lat
Value:
-9.8257
Key: lon
Value:
65.0880
Key: dist
Value:
12
Key: hid
Value:
213
Key: id
Value:
12387
It looks like the top level element of your JSON is a dictionary containing a list of dictionaries. If you want to keep this structure, try the code below.
from pprint import pprint
json_data=open('sampleJSON')
jdata = json.load(json_data)
node = jdata['node'][0]
for key, value in node.iteritems():
pprint("Key:")
pprint(key)
pprint("Value:")
pprint(value)