I have a use case where I ll be getting complex dictionaries as inputs in a inconsistent hirarchy.
one use case would be like below :
pro : 1
rel : 1.2
del : demo
cb :{
a : b
}
cd : {
en : {
b : a
}
}
cc : {
a : b
}
I used something like this : -
def jsonToDict(data):
d = data
res = defaultdict(dict)
def dict2ini(d, root):
for k, v in d.items():
if isinstance(v, dict):
_key = '%s.%s' % (root, k) if root else k
if v:
dict2ini(v, _key)
else:
res[_key] = {}
elif isinstance(v, (str,int, float)):
res[root] = {k:v}
dict2ini(d, '')
config = configparser.RawConfigParser()
for key in sorted(res.keys()):
config.add_section(key)
for subKey, value in res[key].items():
config.set(key, subKey, value)
with open('example.ini', 'w') as configfile:
config.write(configfile)
but the above doesn't process all the values present in my dict but only the first line in each section. I went through [ConfigParser][1]. But I am unable to find a solution to my use case can someone suggest me some workaround this also please note the above data is not fixed it will be changing according to our needs.
EXAMPLE INI :
pro = 1
rel = 1.2
del = demo
[cb]
a=b
[cd.en]
b=a
## suppose if multiple data is present in cd then
[cd]
b=a
[cd.en]
b=a
## end
[cc]
a=b
First, take a close look at your code. In dict2ini you iterate over a list of items in d:
for k, v in d.items():
And if v is a scalar value, you add it to the res dictionary...but you always use the same key:
elif isinstance(v, (str, int, float)):
res[root] = {k: v}
So for each item in the dictionary, you're going to override the previous value of res[root]. With a few minor changes, I think you'll get closer to what you want:
def dict2ini(d, root):
section = res[root]
for k, v in d.items():
if isinstance(v, dict):
_key = '%s.%s' % (root, k) if root else k
if v:
dict2ini(v, _key)
else:
section[_key] = {}
elif isinstance(v, (str,int, float)):
section[k] = v
dict2ini(d, '')
This gives me as output:
[DEFAULT]
pro = 1
del = demo
rel = 1.2
[]
[cb]
a = b
[cc]
a = b
[cd]
[cd.en]
b = a
You obviously have a few additional things to fix there, but hopefully that sets you in the right direction.
Related
I am trying to develop the most efficient/comprehensive function with this aim:
Sorting every nested dictionary or list inside a dictionary or list.
Note: I used the collections.OrderedDict because I wanted to make it useful also for python versions before the 3.7, the ones that does not preserve order in dictionaries.
Based on the recursive function from this thread, which sorts only nested dictionaries, I'm trying to build a correspondant recursive function that sorts only nested lists, and then to combine them by using if cycles that identify if the object to be sorted is a dictionary or a list.
This is what I have developed:
from collections import OrderedDict
def recursively_order_dict(d):
ordered_dict = OrderedDict()
for key in sorted(d.keys()):
val = d[key]
if isinstance(val, dict):
val = recursively_order_dict(val)
if isinstance(val, list):
val = recursively_order_list(val)
ordered_dict[key] = val
return ordered_dict
def recursively_order_list(l):
ordered_list = []
for element in sorted(l):
if isinstance(element, list):
element = recursively_order_list(element)
if isinstance(element, dict):
element = recursively_order_dict(element)
ordered_list.append(element)
return ordered_list
def order_all_dicts_and_lists_in_iterable(iterable1):
if isinstance(iterable1, dict):
ordered_iterable = recursively_order_dict(iterable1)
if isinstance(iterable1, list):
ordered_iterable = recursively_order_list(iterable1)
else:
print("%s\n is nor a list nor a dictionary.\nIts type is %s." % (iterable1, type(iterable1)) )
return
return ordered_iterable
It works fine on many examples, but it does not by processing the dictionary dict_2
dict_2 = {
"key9":"value9",
"key5":"value5",
"key3":{
"key3_1":"value3_1",
"key3_5":"value3_5",
"key3_2":[[],"value3_2_1",[] ],
},
"key2":"value2",
"key8":{
"key8_1":"value8_1",
"key8_5":{
"key8_5_4":["value8_5_b", "value8_5_a", "value8_5_c"],
"key8_5_2":[{},{"key8_5_2_4_2":"value8_5_2_4_2", "key8_5_2_4_1":"value8_5_2_4_1", "key8_5_2_4_5":"value8_5_2_4_5"}, "value8_5_2_1",{}],
},
"key8_2":"value8_2",
},
"key1":"value1",
}
sorted_dict_2 = order_all_dicts_and_lists_in_iterable(dict_2)
and throws this error:
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) <ipython-input-12-9cbf4414127d> in <module>
----> 1 order_all_dicts_and_lists_in_iterable(dict_2)
<ipython-input-9-352b10801248> in order_all_dicts_and_lists_in_iterable(iterable1)
26
27 if isinstance(iterable1, dict):
---> 28 ordered_iterable = recursively_order_dict(iterable1)
29 if isinstance(iterable1, list):
30 ordered_iterable = order_all_dicts_and_lists_in_iterable(ordered_iterable)
<ipython-input-9-352b10801248> in recursively_order_dict(d)
6 val = d[key]
7 if isinstance(val, dict):
----> 8 val = recursively_order_dict(val)
9 if isinstance(val, list):
10 val = recursively_order_list(val)
<ipython-input-9-352b10801248> in recursively_order_dict(d)
8 val = recursively_order_dict(val)
9 if isinstance(val, list):
---> 10 val = recursively_order_list(val)
11 ordered_dict[key] = val
12 return ordered_dict
<ipython-input-9-352b10801248> in recursively_order_list(l)
14 def recursively_order_list(l):
15 ordered_list = []
---> 16 for element in sorted(l):
17 if isinstance(element, list):
18 element = recursively_order_list(element)
TypeError: '<' not supported between instances of 'str' and 'list'
So it looks like Python cannot sort iterable made of strings/numbers and lists/dictionaries, because it does not know what to take from lists/dictionaries as a term of comparison.
How could I change my function in order to get lists/dictionaries just being put at the end/start of the sorted iterable, when compared to strings/numbers ?
In few words, how should I change my function to have it turn the above dict_2 into this (hand-edited) sorted_dict_2?
sorted_dict_2 = {
"key1":"value1",
"key2":"value2",
"key3":{
"key3_1":"value3_1",
"key3_2":[ [],[],"value3_2_1" ],
"key3_5":"value3_5",
},
"key5":"value5",
"key8":{
"key8_1":"value8_1",
"key8_2":"value8_2",
"key8_5":{
"key8_5_2":[
{},
{},
"value8_5_2_1",
{
"key8_5_2_4_1":"value8_5_2_4_1",
"key8_5_2_4_2":"value8_5_2_4_2",
"key8_5_2_4_5":"value8_5_2_4_5"
},
],
"key8_5_4":["value8_5_a", "value8_5_b", "value8_5_c"],
},
},
"key9":"value9",
}
So, basically, you need to make a key function that will make all containers compare less than anything else. A handy value is float('inf') for this. However, since we don't know if the thing we are sorting contains numbers or strings, we have to just transform everything into a tuple, and manually map the ordinal values for each string: map(ord, x)
The following is an example if you want containers to move to the front (so negative inf...:
from collections import OrderedDict
def recursively_order_dict(d):
ordered_dict = OrderedDict()
for key in sorted(d.keys()):
val = d[key]
if isinstance(val, dict):
val = recursively_order_dict(val)
if isinstance(val, list):
val = recursively_order_list(val)
ordered_dict[key] = val
return ordered_dict
def _move_containers_to_end(x):
if isinstance(x, (list, dict)):
# to put at the end, use inf, at the start, -inf
return (float('-inf'),)
elif isinstance(x, str):
return tuple(map(ord, x))
else: # assuming we only can get numbers at this point
return (x,)
def recursively_order_list(l):
ordered_list = []
for element in sorted(l, key=_move_containers_to_end):
if isinstance(element, list):
element = recursively_order_list(element)
if isinstance(element, dict):
element = recursively_order_dict(element)
ordered_list.append(element)
return ordered_list
def order_all_dicts_and_lists_in_iterable(iterable1):
if isinstance(iterable1, dict):
ordered_iterable = recursively_order_dict(iterable1)
elif isinstance(iterable1, list):
ordered_iterable = recursively_orded_list(iterable1)
else:
print("%s\n is nor a list nor a dictionary.\nIts type is %s." % (iterable1, type(iterable1)) )
return ordered_iterable
The result of the above is:
OrderedDict([('key1', 'value1'),
('key2', 'value2'),
('key3',
OrderedDict([('key3_1', 'value3_1'),
('key3_2', [[], [], 'value3_2_1']),
('key3_5', 'value3_5')])),
('key5', 'value5'),
('key8',
OrderedDict([('key8_1', 'value8_1'),
('key8_2', 'value8_2'),
('key8_5',
OrderedDict([('key8_5_2',
[OrderedDict(),
OrderedDict([('key8_5_2_4_1',
'value8_5_2_4_1'),
('key8_5_2_4_2',
'value8_5_2_4_2'),
('key8_5_2_4_5',
'value8_5_2_4_5')]),
OrderedDict(),
'value8_5_2_1']),
('key8_5_4',
['value8_5_a',
'value8_5_b',
'value8_5_c'])]))])),
('key9', 'value9')])
Note, you may want to do something like:
import sys:
if sys.version_info.minor < 7:
OrderedMapping = dict
else:
from collections import OrderedDict as OrderedMapping
Then use:
ordered_dict = OrderedMapping()
in recursively_order_dict
There is a nested dictionery like :
data_dict = {
"picture":"xxx.jpg",
"link_data":{
"picture":"xxxxx.jpg",
...
"child_attachments":{
"picture":"xxxxx.jpg",
...
}
}
...
}
The problem is at every level of the dictionary, the key picture may exist, how can I get the picture's value in a most effective way?
Here's my trial, but failed:
def get_picture_url(data):
for key, value in data.items():
if key == "picture":
return data[key]
else:
if isinstance(value, dict):
return get_picture_url(value)
get_picture_url(data_dict)
This should work for the general case of an arbitrarily nested dictionary with JSON-like structure:
def get_picture(data):
# you can remove this case if the
# input doesn't contain lists
if isinstance(data, list):
ans = []
for e in data:
ans += get_picture(e)
return ans
elif not isinstance(data, dict):
return []
else:
ans = []
for k, v in data.items():
if k == 'picture':
ans.append(v)
else:
ans += get_picture(v)
return ans
It'll traverse all levels of the data structure, looking for keys named 'picture' and accumulating all of their values in a single output list. If you're sure that there are no lists in the input, we can simplify the solution a bit:
def get_picture(data):
ans = []
if isinstance(data, dict):
for k, v in data.items():
if k == 'picture':
ans.append(v)
else:
ans += get_picture(v)
return ans
Either way, it works as expected for your sample input:
data_dict = {
"picture":"xxx.jpg",
"link_data":{
"picture":"xxxx.jpg",
"child_attachments":{
"picture":"xxxxx.jpg"
}
}
}
get_picture(data_dict)
=> ['xxx.jpg', 'xxxx.jpg', 'xxxxx.jpg']
You are not checking the returned value of the recursive call to get_picture_url.
This should give you the top most picture in your dict:
def get_picture_url(data, picture_key="picture"):
if not isinstance(data, dict):
return None
picture_url = data.get(picture_key)
if picture_url is not None:
return picture_url
for value in data.values():
picture_url = get_picture_url(value)
if picture_url is not None:
return picture_url
return None
Given some arbitrary dictionary
mydict = {
'first': {
'second': {
'third': {
'fourth': 'the end'
}
}
}
}
I've written a small routine to flatten it in the process of writing an answer to another question.
def recursive_flatten(mydict):
d = {}
for k, v in mydict.items():
if isinstance(v, dict):
for k2, v2 in recursive_flatten(v).items():
d[k + '.' + k2] = v2
else:
d[k] = v
return d
It works, giving me what I want:
new_dict = recursive_flatten(mydict)
print(new_dict)
{'first.second.third.fourth': 'the end'}
And should work for just about any arbitrarily structured dictionary. Unfortunately, it does not:
mydict['new_key'] = mydict
Now recursive_flatten(mydict) will run until I run out of stack space. I'm trying to figure out how to gracefully handle self-references (basically, ignore or remove them). To complicate matters, self-references may occur for any sub-dictionary... not just the top level. How would I handle self-references elegantly? I can think of a mutable default argument, but there should be a better way... right?
Pointers appreciated, thanks for reading. I welcome any other suggestions/improvements to recursive_flatten if you have them.
One way you can do it using set and id. Note this solution also uses generators which means we can start using our flattened dict before the entire result is computed
def recursive_flatten (mydict):
def loop (seen, path, value):
# if we've seen this value, skip it
if id(value) in seen:
return
# if we haven't seen this value, now we have
else:
seen.add(id(value))
# if this value is a dict...
if isinstance (value, dict):
for (k, v) in value.items ():
yield from loop(seen, path + [k], v)
# base case
else:
yield (".".join(path), value)
# init the loop
yield from loop (set(), [], mydict)
Program demo
mydict = {
'first': {
'second': {
'third': {
'fourth': 'the end'
}
}
}
}
for (k,v) in recursive_flatten (mydict):
print (k, v)
# first.second.third.fourth the end
mydict['new_key'] = mydict
for (k,v) in recursive_flatten (mydict):
print (k, v)
# first.second.third.fourth the end
We can make a slight modification if you would like to see output for self-referential values
# if we've seen this value, skip it
if (id(value) in seen):
# this is the new line
yield (".".join(path), "*self-reference* %d" % id(value))
return
Now the output of the program will be
first.second.third.fourth the end
first.second.third.fourth the end
new_key *self-reference* 139700111853032
I'm not sure what your definition of "graceful" is, but this can be done with some bookkeeping of what has been seen before in a set of object ids:
class RecursiveFlatten:
def __init__(self):
self.seen = set()
def __call__(self, mydict):
self.seen.add(id(mydict))
d = {}
for k, v in mydict.items():
if isinstance(v, dict):
if id(v) not in self.seen:
self.seen.add(id(v))
for k2, v2 in self(v).items():
d[k + '.' + k2] = v2
else:
d[k] = v
return d
def recursive_flatten(mydict):
return RecursiveFlatten()(mydict)
Testing it out gives me what I expect
mydict = {
'first': {
'second': {
'third': {
'fourth': 'the end'
}
},
'second2': {
'third2': 'the end2'
}
}
}
mydict['first']['second']['new_key'] = mydict
mydict['new_key'] = mydict
print(recursive_flatten(mydict))
Out:
{'first.second2.third2': 'the end2', 'first.second.third.fourth': 'the end'}
I have a dictionary like this:
dirDict = {"DIR1" : {
"DIR11" : {
"DIR111" : "Maki111",
"DIR112" : "Maki112"
},
"DIR12" : "Maki12",
"DIR13" : {
"DIR131" : "Maki131"
}
}
}
Imagine this like a folder structure. And I would like to get similar as os.walk would do with a folder structure. Something like this:
["DIR1/DIR11/DIR111/Maki111",
"DIR1/DIR11/DIR112/Maki112",
"DIR1/DIR12/Maki12",
"DIR1/DIR13/DIR131/Maki131"]
So it is basically all the path for the dictionary values. I tried it many ways with recursive functions but I got lost.
Here is my latest trial:
def walk(input_dict, path_string = "", result = ""):
for key, value in input_dict.items():
if isinstance(value, dict):
path_string += "/" + key
print "==== DICT ====", "\nkey: ", key, "\nvalue: ", value, "\n\t\tpath_string: ", path_string
result = walk(value, path_string)
print "\t\t\t\tresulting: ", result
elif isinstance(value, str):
print "==== NOT DICT ===="
path_string += "/" + value
print "\t\tpath_string: ", path_string, "\nvalue: ", value
return path_string
else:
path_string = "/" + key
result += "\n" + result
return result
Using Python 3:
dirDict = {"DIR1" : {
"DIR11" : {
"DIR111" : "Maki111",
"DIR112" : "Maki112"
},
"DIR12" : "Maki12",
"DIR13" : {
"DIR131" : "Maki131"
}
}
}
def recurse(d, prefix=None, sep='/'):
if prefix is None:
prefix = []
for key, value in d.items():
if isinstance(value, dict):
yield from recurse(value, prefix + [key])
else:
yield sep.join(prefix + [key, value])
print(list(recurse(dirDict)))
Output:
['DIR1/DIR13/DIR131/Maki131', 'DIR1/DIR11/DIR111/Maki111', 'DIR1/DIR11/DIR112/Maki112', 'DIR1/DIR12/Maki12']
def walk(d, path):
paths = []
if len(d) == 0:
return path
for k, v in d.iteritems():
child_path = path + k + '/'
if isinstance(v, basestring):
paths.append(child_path + v)
else:
paths.extend(walk(v, child_path))
return paths
THe walk function I posted at https://gist.github.com/nvie/f304caf3b4f1ca4c3884#gistcomment-1597937 can be used as a helper for your problem:
def walk(obj, parent_first=True):
# Top down?
if parent_first:
yield (), obj
# For nested objects, the key is the path component.
if isinstance(obj, dict):
children = obj.items()
# For nested lists, the position is the path component.
elif isinstance(obj, (list, tuple)):
children = enumerate(obj)
# Scalar values have no children.
else:
children = []
# Recurse into children
for key, value in children:
for child_path, child in walk(value, parent_first):
yield (key,) + child_path, child
# Bottom up?
if not parent_first:
yield (), obj
Your problem can be approached using something like this:
for path, value in walk(obj):
if isinstance(value, str): # leaf node
path_with_value = path + (value,)
print("/".join(path_with_value))
A compact solution with a list comprehension:
def f(v):
if isinstance(v, dict):
return dict_to_list(v)
elif isinstance(v, list):
return v
else:
return [v]
def dict_to_list(d):
return ['{}/{}'.format(k, i) for k, v in d.items() for i in f(v)]
lst = dict_to_list(dirDict)
lst.sort()
print('\n'.join(lst))
I use the json library to decode json text
import json
I have a Party class defined like this:
class Party(object):
id = ""
code = ""
create_date = ""
citizenship = ""
an object_hook method:
def as_party(d):
p = Party()
p.__dict__.update(d)
return p
I can use this method in order to get a Party object from a json text:
def parse_as(s, typo_class):
return json.loads(str(s), object_hook=typo_class)
When I call the parse_as method on a json text containing encoded Party class, i get an object of type Party.
json_text = {'id': 2, 'code': '2', 'create_date': null, 'citizenship': null}
party1 = parse_as(json_text, as_party)
I can call its attributes like this:
print party1.code
My problem is to make the parse_as method able to parse a json text containing a list of Party objects like this one:
json_text = [{'id': 2, 'code': '2', 'create_date': null, 'citizenship': null}, {'id': 5, 'code': '3', 'create_date': null, 'citizenship': null}, {'id': 6, 'code': '8', 'create_date': null, 'citizenship': null}]
Please help me and thanks in advance!
object hook needs to be changed
def as_party(d):
if isinstance(d, dict):
p = Party()
p.__dict__.update(d)
elif isinstance(d, list):
out = []
for i in d:
n = Party()
n.__dict__.update(i)
out.append(n)
p = out
else:
raise Exception('got non-dict value %s' % d)
return p
btw, i wouldnt use __dict__, would rather use setattr with some prefix...
because if key value is a python keyword, that wont be good.
>>> a = Foo()
>>> a.__dict__['except'] = 1
>>> a.except
File "<stdin>", line 1
a.except
^
SyntaxError: invalid syntax
as for different approach using setattr and avoiding colision of python keywords you can use:
def as_party(d):
if isinstance(d, dict):
p = Party()
for k,v in d.iteritems():
setattr(p, 'v_%s' % k, v) # you can assign any prefix you want
elif isinstance(d, list):
out = []
for i in d:
n = Party()
for k,v in i.iteritems():
setattr(n, 'v_%s' % k, v)
out.append(n)
p = out
else:
raise Exception('got non-dict value %s' % d)
return p
but then you would have to access those values with prefix that we incorporated like so:
print party.v_for, party.v_except # etc..
generally, you could work out the code that it does not allow python keywords:
import keyword
def as_party(d):
if isinstance(d, dict):
p = Party()
for k,v in d.iteritems():
if keyword.iskeyword(k):
raise Exception('Cannot accept %s as a property name. That is resrved python keyword' % k)
setattr(p, k, v) # you can assign any prefix you want
elif isinstance(d, list):
out = []
for i in d:
n = Party()
for k,v in i.iteritems():
if keyword.iskeyword(k):
raise Exception('Cannot accept %s as a property name. That is resrved python keyword' % k)
setattr(n, k, v)
out.append(n)
p = out
else:
raise Exception('got non-dict value %s' % d)
return p