Extracting all path from a multi-level dictionary - python

I have a dictionary like this:
dirDict = {"DIR1" : {
"DIR11" : {
"DIR111" : "Maki111",
"DIR112" : "Maki112"
},
"DIR12" : "Maki12",
"DIR13" : {
"DIR131" : "Maki131"
}
}
}
Imagine this like a folder structure. And I would like to get similar as os.walk would do with a folder structure. Something like this:
["DIR1/DIR11/DIR111/Maki111",
"DIR1/DIR11/DIR112/Maki112",
"DIR1/DIR12/Maki12",
"DIR1/DIR13/DIR131/Maki131"]
So it is basically all the path for the dictionary values. I tried it many ways with recursive functions but I got lost.
Here is my latest trial:
def walk(input_dict, path_string = "", result = ""):
for key, value in input_dict.items():
if isinstance(value, dict):
path_string += "/" + key
print "==== DICT ====", "\nkey: ", key, "\nvalue: ", value, "\n\t\tpath_string: ", path_string
result = walk(value, path_string)
print "\t\t\t\tresulting: ", result
elif isinstance(value, str):
print "==== NOT DICT ===="
path_string += "/" + value
print "\t\tpath_string: ", path_string, "\nvalue: ", value
return path_string
else:
path_string = "/" + key
result += "\n" + result
return result

Using Python 3:
dirDict = {"DIR1" : {
"DIR11" : {
"DIR111" : "Maki111",
"DIR112" : "Maki112"
},
"DIR12" : "Maki12",
"DIR13" : {
"DIR131" : "Maki131"
}
}
}
def recurse(d, prefix=None, sep='/'):
if prefix is None:
prefix = []
for key, value in d.items():
if isinstance(value, dict):
yield from recurse(value, prefix + [key])
else:
yield sep.join(prefix + [key, value])
print(list(recurse(dirDict)))
Output:
['DIR1/DIR13/DIR131/Maki131', 'DIR1/DIR11/DIR111/Maki111', 'DIR1/DIR11/DIR112/Maki112', 'DIR1/DIR12/Maki12']

def walk(d, path):
paths = []
if len(d) == 0:
return path
for k, v in d.iteritems():
child_path = path + k + '/'
if isinstance(v, basestring):
paths.append(child_path + v)
else:
paths.extend(walk(v, child_path))
return paths

THe walk function I posted at https://gist.github.com/nvie/f304caf3b4f1ca4c3884#gistcomment-1597937 can be used as a helper for your problem:
def walk(obj, parent_first=True):
# Top down?
if parent_first:
yield (), obj
# For nested objects, the key is the path component.
if isinstance(obj, dict):
children = obj.items()
# For nested lists, the position is the path component.
elif isinstance(obj, (list, tuple)):
children = enumerate(obj)
# Scalar values have no children.
else:
children = []
# Recurse into children
for key, value in children:
for child_path, child in walk(value, parent_first):
yield (key,) + child_path, child
# Bottom up?
if not parent_first:
yield (), obj
Your problem can be approached using something like this:
for path, value in walk(obj):
if isinstance(value, str): # leaf node
path_with_value = path + (value,)
print("/".join(path_with_value))

A compact solution with a list comprehension:
def f(v):
if isinstance(v, dict):
return dict_to_list(v)
elif isinstance(v, list):
return v
else:
return [v]
def dict_to_list(d):
return ['{}/{}'.format(k, i) for k, v in d.items() for i in f(v)]
lst = dict_to_list(dirDict)
lst.sort()
print('\n'.join(lst))

Related

How to combines all the values for same key of JSON data using Python

I want to combines all the values for same keys of JSON data using python. Any helping hand would really be appreciated.
Please find below the input data:
{'MESSAGE_DATA': {'BGEN_CENQO_XTRA_KEY': {'BGEN_CENQO_CLNTCOY': 'A'}}}
{'MESSAGE_DATA': {'BGEN_CENQO_XTRA_KEY': {'BGEN_CENQO_CLNTPFX': 'CN'}}}
{'MESSAGE_DATA': {'BGEN_CENQO_XTRA_KEY': {'BGEN_CENQO_CLNTNUM': '50003159'}}}
The output format which I want:
{
"MESSAGE_DATA": {
"BGEN_CENQO_XTRA_KEY": {
"BGEN_CENQO_CLNTCOY": "A",
"BGEN_CENQO_CLNTPFX": "CN",
"BGEN_CENQO_CLNTNUM": "50003159"
}
}
}
Slightly changed code from this answer:
from functools import reduce
lst = [{'MESSAGE_DATA': {'BGEN_CENQO_XTRA_KEY': {'BGEN_CENQO_CLNTCOY': 'A'}}},
{'MESSAGE_DATA': {'BGEN_CENQO_XTRA_KEY': {'BGEN_CENQO_CLNTPFX': 'CN'}}},
{'MESSAGE_DATA': {'BGEN_CENQO_XTRA_KEY': {'BGEN_CENQO_CLNTNUM': '50003159'}}}]
def merge(a, b, path=None):
if path is None: path = []
for key in b:
if key in a:
if isinstance(a[key], dict) and isinstance(b[key], dict):
merge(a[key], b[key], path + [str(key)])
elif a[key] == b[key]:
pass
else:
raise Exception('Conflict at %s' % '.'.join(path + [str(key)]))
else:
a[key] = b[key]
return a
d = reduce(merge, lst)
Structure of dictionary d will be:
{'MESSAGE_DATA': {
'BGEN_CENQO_XTRA_KEY': {
'BGEN_CENQO_CLNTCOY': 'A',
'BGEN_CENQO_CLNTNUM': '50003159',
'BGEN_CENQO_CLNTPFX': 'CN'
}
}}

Recursive function return None

I have a function that runs over an API output and should return the path to specific key.
Here is the function:
def find_path(obj, val, path=''):
if isinstance(obj, dict):
for k, v in obj.items():
if k == val:
return f'{path}[{k!r}]'
return find_path(v, val, path + f'[{k!r}]')
elif isinstance(obj, list):
for i, v in enumerate(obj):
if v == val:
return f'{path}[{i!r}]'
return find_path(v, val, path + f'[{i!r}]')
API output Example:
ex = {'Resources': [{'uschemas': {'emailSelfUpdateAllowed': True,
'emailVerificationDays': 30,
'approvers': {'manager': False,
'secondLevelManager': False,
'owner': False,
'workGroup': 'workgroup'}}}]}
When I run the function I get None:
bla = find_path(ex, 'approvers')
print(bla)
>>> None
I expect to get:
['Resources'][0]['uschemas']['approvers']
I can only get the expected output when I am using the function with print instead of return.
Can someone help me to understand why? and how can I make it work with returns and not prints because I need to use its output.
Thank you.
You need to return value after the for loop is done, this is the value returns from the recursion, if you don't have anything it will return None
def find_path(obj, val, path=''):
p = []
if isinstance(obj, dict):
for k, v in obj.items():
if k == val:
return f'{path}[{k!r}]'
p = find_path(v, val, path + f'[{k!r}]')
return p
elif isinstance(obj, list):
for i, v in enumerate(obj):
if v == val:
return f'{path}[{i!r}]'
p = find_path(v, val, path + f'[{i!r}]')
return p
bla = find_path(ex, 'approvers')
print(bla) # ['Resources'][0]['uschemas']['approvers']

How can I get the specified key value in a nested dictionary in a most effective way?

There is a nested dictionery like :
data_dict = {
"picture":"xxx.jpg",
"link_data":{
"picture":"xxxxx.jpg",
...
"child_attachments":{
"picture":"xxxxx.jpg",
...
}
}
...
}
The problem is at every level of the dictionary, the key picture may exist, how can I get the picture's value in a most effective way?
Here's my trial, but failed:
def get_picture_url(data):
for key, value in data.items():
if key == "picture":
return data[key]
else:
if isinstance(value, dict):
return get_picture_url(value)
get_picture_url(data_dict)
This should work for the general case of an arbitrarily nested dictionary with JSON-like structure:
def get_picture(data):
# you can remove this case if the
# input doesn't contain lists
if isinstance(data, list):
ans = []
for e in data:
ans += get_picture(e)
return ans
elif not isinstance(data, dict):
return []
else:
ans = []
for k, v in data.items():
if k == 'picture':
ans.append(v)
else:
ans += get_picture(v)
return ans
It'll traverse all levels of the data structure, looking for keys named 'picture' and accumulating all of their values in a single output list. If you're sure that there are no lists in the input, we can simplify the solution a bit:
def get_picture(data):
ans = []
if isinstance(data, dict):
for k, v in data.items():
if k == 'picture':
ans.append(v)
else:
ans += get_picture(v)
return ans
Either way, it works as expected for your sample input:
data_dict = {
"picture":"xxx.jpg",
"link_data":{
"picture":"xxxx.jpg",
"child_attachments":{
"picture":"xxxxx.jpg"
}
}
}
get_picture(data_dict)
=> ['xxx.jpg', 'xxxx.jpg', 'xxxxx.jpg']
You are not checking the returned value of the recursive call to get_picture_url.
This should give you the top most picture in your dict:
def get_picture_url(data, picture_key="picture"):
if not isinstance(data, dict):
return None
picture_url = data.get(picture_key)
if picture_url is not None:
return picture_url
for value in data.values():
picture_url = get_picture_url(value)
if picture_url is not None:
return picture_url
return None

How to return parents in path, and locate key in JSON

I'm trying to figure out how to find a key called ['text'] and then get the parent keys from the nest. It's the third key that's different, all the other tags are the same.
html_data = data['data']['document_data']['dataItem-ihmty5rw']['text']
I'm using this function to get the keys:
def printKeysValues(d):
for k, v in d.items():
if isinstance(v, dict):
printKeysValues(v)
else:
print("{0} : {1}".format(k, v))
And this function to find the indent:
def pretty(d, indent=0):
for key, value in d.items():
print('\t' * indent + str(key))
if isinstance(value, dict):
pretty(value, indent+1)
else:
print('\t' * (indent+1) + str(value))
I'm still not entirely sure if this is what you wanted, but if you just want the text per item dictionary, I think this should work:
# a dictionary of item string to dictionary containing a "text" key
item_map = data['data']['document_data']
for item_string, item_map in item_map.iteritems():
print item_string # the item string (e.g. 'dataItem-ihmty5rw')
print item_map['text'] # this is the text associated with the item string

urlencode a multidimensional dictionary in python

How can I get a URL-encoded version of a multidimensional dictionary in Python? Unfortunately, urllib.urlencode() only works in a single dimension. I would need a version capable of recursively encoding the dictionary.
For example, if I have the following dictionary:
{'a': 'b', 'c': {'d': 'e'}}
I want to obtain the following string:
a=b&c[d]=e
OK people. I implemented it myself:
import urllib
def recursive_urlencode(d):
"""URL-encode a multidimensional dictionary.
>>> data = {'a': 'b&c', 'd': {'e': {'f&g': 'h*i'}}, 'j': 'k'}
>>> recursive_urlencode(data)
u'a=b%26c&j=k&d[e][f%26g]=h%2Ai'
"""
def recursion(d, base=[]):
pairs = []
for key, value in d.items():
new_base = base + [key]
if hasattr(value, 'values'):
pairs += recursion(value, new_base)
else:
new_pair = None
if len(new_base) > 1:
first = urllib.quote(new_base.pop(0))
rest = map(lambda x: urllib.quote(x), new_base)
new_pair = "%s[%s]=%s" % (first, ']['.join(rest), urllib.quote(unicode(value)))
else:
new_pair = "%s=%s" % (urllib.quote(unicode(key)), urllib.quote(unicode(value)))
pairs.append(new_pair)
return pairs
return '&'.join(recursion(d))
if __name__ == "__main__":
import doctest
doctest.testmod()
Still, I'd be interested to know if there's a better way to do this. I can't believe Python's standard library doesn't implement this.
Something like this?
a = {'a': 'b', 'c': {'d': 'e'}}
url = urllib.urlencode([('%s[%s]'%(k,v.keys()[0]), v.values()[0] ) if type(v)==dict else (k,v) for k,v in a.iteritems()])
url = 'a=b&c%5Bd%5D=e'
Based on the code of #malaney, I think that the code below emulates the PHP function http_build_query() quite well.
#!/usr/bin/env python3
import urllib.parse
def http_build_query(data):
parents = list()
pairs = dict()
def renderKey(parents):
depth, outStr = 0, ''
for x in parents:
s = "[%s]" if depth > 0 or isinstance(x, int) else "%s"
outStr += s % str(x)
depth += 1
return outStr
def r_urlencode(data):
if isinstance(data, list) or isinstance(data, tuple):
for i in range(len(data)):
parents.append(i)
r_urlencode(data[i])
parents.pop()
elif isinstance(data, dict):
for key, value in data.items():
parents.append(key)
r_urlencode(value)
parents.pop()
else:
pairs[renderKey(parents)] = str(data)
return pairs
return urllib.parse.urlencode(r_urlencode(data))
if __name__ == '__main__':
payload = {
'action': 'add',
'controller': 'invoice',
'code': 'debtor',
'InvoiceLines': [
{'PriceExcl': 150, 'Description': 'Setupfee'},
{'PriceExcl':49.99, 'Description':'Subscription'}
],
'date': '2016-08-01',
'key': 'Yikes&ampersand'
}
print(http_build_query(payload))
payload2 = [
'item1',
'item2'
]
print(http_build_query(payload2))
I think the code below may be what you want
import urllib.parse
def url_encoder(params):
g_encode_params = {}
def _encode_params(params, p_key=None):
encode_params = {}
if isinstance(params, dict):
for key in params:
encode_key = '{}[{}]'.format(p_key,key)
encode_params[encode_key] = params[key]
elif isinstance(params, (list, tuple)):
for offset,value in enumerate(params):
encode_key = '{}[{}]'.format(p_key, offset)
encode_params[encode_key] = value
else:
g_encode_params[p_key] = params
for key in encode_params:
value = encode_params[key]
_encode_params(value, key)
if isinstance(params, dict):
for key in params:
_encode_params(params[key], key)
return urllib.parse.urlencode(g_encode_params)
if __name__ == '__main__':
params = {'name': 'interface_name', 'interfaces': [{'interface': 'inter1'}, {'interface': 'inter2'}]}
print(url_encoder(params))
the output is
interfaces%5B1%5D%5Binterface%5D=inter2&name=interface_name&interfaces%5B0%5D%5Binterface%5D=inter1
which is look like
interfaces[1][interface]=inter2&name=interface_name&interfaces[0][interface]=inter1
PS: you may want use OrderDict to replace dict above
The above solution only works for arrays with depth < 2. The code below will properly urlencode a multidimensional array of any depth.
#!/usr/bin/env python
import sys
import urllib
def recursive_urlencode(data):
def r_urlencode(data, parent=None, pairs=None):
if pairs is None:
pairs = {}
if parent is None:
parents = []
else:
parents = parent
for key, value in data.items():
if hasattr(value, 'values'):
parents.append(key)
r_urlencode(value, parents, pairs)
parents.pop()
else:
pairs[renderKey(parents + [key])] = renderVal(value)
return pairs
return urllib.urlencode(r_urlencode(data))
def renderKey(parents):
depth, outStr = 0, ''
for x in parents:
str = "[%s]" if depth > 0 else "%s"
outStr += str % renderVal(x)
depth += 1
return outStr
def renderVal(val):
return urllib.quote(unicode(val))
def main():
print recursive_urlencode(payload)
if __name__ == '__main__':
sys.exit(main())
The function get_encoded_url_params() takes a dict as argument and returns url encoded form of the dict.
def get_encoded_url_params(d):
"""URL-encode a nested dictionary.
:param d = dict
:returns url encoded string with dict key-value pairs as query parameters
e.g.
if d = { "addr":{ "country": "US", "line": ["a","b"] },
"routing_number": "011100915", "token": "asdf"
}
:returns 'addr[country]=US&addr[line][0]=a&addr[line][1]=b&routing_number=011100915&token=asdf'
or 'addr%5Bcountry%5D=US&addr%5Bline%5D%5B0%5D=a&addr%5Bline%5D%5B1%5D=b&routing_number=011100915&token=asdf'
(which is url encoded form of the former using quote_plus())
"""
def get_pairs(value, base):
if isinstance(value, dict):
return get_dict_pairs(value, base)
elif isinstance(value, list):
return get_list_pairs(value, base)
else:
return [base + '=' + str(value)]
# use quote_plus() to get url encoded string
# return [quote_plus(base) + '=' + quote_plus(str(value))]
def get_list_pairs(li, base):
pairs = []
for idx, value in enumerate(li):
new_base = base + '[' + str(idx) + ']'
pairs += get_pairs(value, new_base)
return pairs
def get_dict_pairs(d, base=''):
pairs = []
for key, value in d.items():
new_base = key if base == '' else base + '[' + key + ']'
pairs += get_pairs(value, new_base)
return pairs
return '&'.join(get_dict_pairs(d))
what about json.dumps and json.loads?
d = {'a': 'b', 'c': {'d': 'e'}}
s = json.dumps(d) # s: '{"a": "b", "c": {"d": "e"}}'
json.loads(s) # -> d
what about this simplified version:
def _clean(value):
return urllib.quote(unicode(value))
'&'.join([ v for val in [[ "%s[%s]=%s"%(k,ik, _(iv))
for ik, iv in v.items()] if type(v)==dict else ["%s=%s"%(k,_(v))]
for k,v in data.items() ]
for v in val ])
I agree is not readable, maybe flattening the list can be better done with itertools.chain instead of another list comprehension.
This only goes 1 level deeper, yours can go N levels deeper if you would add some logic to manage N numbers of "[%s]" depending on the level, but I guess is not that necesary
If you want to convert python dict/list/nested to PHP Array like urlencoded string.
In python, most of the data type you want to convert to urlencoded maybe: dict list tuple nested of them, Like
a = [1, 2]
print(recursive_urlencode(a))
# 0=1&1=2
a2 = (1, '2')
print(recursive_urlencode(a2))
# 0=1&1=2
b = {'a': 11, 'b': 'foo'}
print(recursive_urlencode(b))
# a=11&b=foo
c = {'a': 11, 'b': [1, 2]}
print(recursive_urlencode(c))
# a=11&b[0]=1&b[1]=2
d = [1, {'a': 11, 'b': 22}]
print(recursive_urlencode(d))
# 0=1&1[a]=11&1[b]=22
e = {'a': 11, 'b': [1, {'c': 123}, [3, 'foo']]}
print(recursive_urlencode(e))
# a=11&b[0]=1&b[1][c]=123&b[2][0]=3&b[2][1]=foo
https://github.com/Viky-zhang/to_php_post_arr
P.S. some code from: https://stackoverflow.com/a/4014164/2752670

Categories