Handle self-references when flattening dictionary - python

Given some arbitrary dictionary
mydict = {
'first': {
'second': {
'third': {
'fourth': 'the end'
}
}
}
}
I've written a small routine to flatten it in the process of writing an answer to another question.
def recursive_flatten(mydict):
d = {}
for k, v in mydict.items():
if isinstance(v, dict):
for k2, v2 in recursive_flatten(v).items():
d[k + '.' + k2] = v2
else:
d[k] = v
return d
It works, giving me what I want:
new_dict = recursive_flatten(mydict)
print(new_dict)
{'first.second.third.fourth': 'the end'}
And should work for just about any arbitrarily structured dictionary. Unfortunately, it does not:
mydict['new_key'] = mydict
Now recursive_flatten(mydict) will run until I run out of stack space. I'm trying to figure out how to gracefully handle self-references (basically, ignore or remove them). To complicate matters, self-references may occur for any sub-dictionary... not just the top level. How would I handle self-references elegantly? I can think of a mutable default argument, but there should be a better way... right?
Pointers appreciated, thanks for reading. I welcome any other suggestions/improvements to recursive_flatten if you have them.

One way you can do it using set and id. Note this solution also uses generators which means we can start using our flattened dict before the entire result is computed
def recursive_flatten (mydict):
def loop (seen, path, value):
# if we've seen this value, skip it
if id(value) in seen:
return
# if we haven't seen this value, now we have
else:
seen.add(id(value))
# if this value is a dict...
if isinstance (value, dict):
for (k, v) in value.items ():
yield from loop(seen, path + [k], v)
# base case
else:
yield (".".join(path), value)
# init the loop
yield from loop (set(), [], mydict)
Program demo
mydict = {
'first': {
'second': {
'third': {
'fourth': 'the end'
}
}
}
}
for (k,v) in recursive_flatten (mydict):
print (k, v)
# first.second.third.fourth the end
mydict['new_key'] = mydict
for (k,v) in recursive_flatten (mydict):
print (k, v)
# first.second.third.fourth the end
We can make a slight modification if you would like to see output for self-referential values
# if we've seen this value, skip it
if (id(value) in seen):
# this is the new line
yield (".".join(path), "*self-reference* %d" % id(value))
return
Now the output of the program will be
first.second.third.fourth the end
first.second.third.fourth the end
new_key *self-reference* 139700111853032

I'm not sure what your definition of "graceful" is, but this can be done with some bookkeeping of what has been seen before in a set of object ids:
class RecursiveFlatten:
def __init__(self):
self.seen = set()
def __call__(self, mydict):
self.seen.add(id(mydict))
d = {}
for k, v in mydict.items():
if isinstance(v, dict):
if id(v) not in self.seen:
self.seen.add(id(v))
for k2, v2 in self(v).items():
d[k + '.' + k2] = v2
else:
d[k] = v
return d
def recursive_flatten(mydict):
return RecursiveFlatten()(mydict)
Testing it out gives me what I expect
mydict = {
'first': {
'second': {
'third': {
'fourth': 'the end'
}
},
'second2': {
'third2': 'the end2'
}
}
}
mydict['first']['second']['new_key'] = mydict
mydict['new_key'] = mydict
print(recursive_flatten(mydict))
Out:
{'first.second2.third2': 'the end2', 'first.second.third.fourth': 'the end'}

Related

python flatten dict with leaving the same key name after flatten

I have the following dict:
{
"a": "b",
"c": {'d':'e', 'g':'f'}
}
and I want to flatten the dict, but in this way:
{
"a": "b",
'd':'e',
'g':'f'
}
You can assume there is no duplicate key.
I read about flatten_dict library but it looks there is no support in this.
How can I do this?
Thank you!
For one level of nesting, a nice loop does the trick:
result = {}
for key, value in my_dict.items():
if isinstance(value, dict):
result.update(value) # add subdict directly into the dict
else:
result[key] = value # non-subdict elements are just copied
If you have more nesting, the if/else should be executed recursively:
def flatten(my_dict):
result = {}
for key, value in my_dict.items():
if isinstance(value, dict):
result.update(flatten(value))
else:
result[key] = value
return result
You can use a modified version of this answer:
from collections.abc import MutableMapping
def flatten(d):
items = []
for k, v in d.items():
if isinstance(v, MutableMapping):
items.extend(flatten(v).items())
else:
items.append((k, v))
return dict(items)
>>> x = {
... "a": "b",
... "c": {'d': 'e', 'g': 'f'}
... }
>>> flatten(x)
{'a': 'b', 'd': 'e', 'g': 'f'}
This will work for all forms of any dictionary

Recursively locate nested dictionary containing a target key and value

There are many questions about this problem, but in my case they are not working. I'm trying to find a nested dictionary given a target key and value pair. My recursive function returned none (after fix, max depth recursive error).
def recursive_lookup(k, sv, d):
if k in d: return d[k]
for v in d.values():
if isinstance(v, dict):
a = recursive_lookup(k, sv, v)
if a == sv:
if a is not None:
return d
return None
def run():
maly = {'_id': "ObjectId('5def7e8c4802b906dd067f97')", 'METADATA': {'Tags': {'AcquisitionTime': '2019-02-05T15:59:37.5862118Z', 'ImageScaling': {'ImageScaling': {'ImagePixelSize': '4.54,4.54'}}, 'DetectorState': {'CameraState': {'ApplyCameraProfile': 'false', 'ApplyImageOrientation': 'true', 'ExposureTime': '2200000', 'Frame': '0,0,2752,2208', 'ImageOrientation': '3'}}, 'StageXPosition': '+000000141526.5820', 'StageYPosition': '+000000189329.5000', 'FocusPosition': '+000000002097.2550', 'RoiCenterOffsetX': '+000000000000.0000', 'RoiCenterOffsetY': '+000000000000.0000'}, 'DataSchema': None, 'AttachmentSchema': None}}
returned_value = recursive_lookup("FocusPosition", "+000000002097.2550", maly)
print(returned_value)
run()
If I change return d to recursive_lookup(k, sv, d) it is also not working.
It should return the maly dictionary, but it returned None.
How can I fix that problem?
This is the right idea, but a matched result isn't being passed up the call stack correctly. You can also simplify logic by checking key and value on the same call frame--this should also eliminate a bug where the target key-value are on the top level of the dict (there's no previous frame to fall back on to check the value).
def recursive_lookup(target_key, target_val, dictionary):
if target_key in dictionary and dictionary[target_key] == target_val:
return dictionary
for value in dictionary.values():
if isinstance(value, dict):
if result := recursive_lookup(target_key, target_val, value):
return result
if __name__ == "__main__":
maly = {'_id': "ObjectId('5def7e8c4802b906dd067f97')", 'METADATA': {'Tags': {'AcquisitionTime': '2019-02-05T15:59:37.5862118Z', 'ImageScaling': {'ImageScaling': {'ImagePixelSize': '4.54,4.54'}}, 'DetectorState': {'CameraState': {'ApplyCameraProfile': 'false', 'ApplyImageOrientation': 'true', 'ExposureTime': '2200000', 'Frame': '0,0,2752,2208', 'ImageOrientation': '3'}}, 'StageXPosition': '+000000141526.5820', 'StageYPosition': '+000000189329.5000', 'FocusPosition': '+000000002097.2550', 'RoiCenterOffsetX': '+000000000000.0000', 'RoiCenterOffsetY': '+000000000000.0000'}, 'DataSchema': None, 'AttachmentSchema': None}}
print(recursive_lookup("FocusPosition", "+000000002097.2550", maly))
Here's a more-easily verifiable version that uses a simple dictionary and doesn't use the 3.8 assignment expression:
def recursive_lookup(target_key, target_val, dictionary):
if target_key in dictionary and dictionary[target_key] == target_val:
return dictionary
for value in dictionary.values():
if isinstance(value, dict):
result = recursive_lookup(target_key, target_val, value)
if result: return result
if __name__ == "__main__":
dictionary = {
"a": "foo",
"b": {
"c": "bar",
"d": "baz",
"e": {
"f": "quux",
"g": "garply"
}
}
}
print(recursive_lookup("c", "bar", dictionary)) # => {'c': 'bar', 'd': 'baz', 'e': {'f': 'quux', 'g': 'garply'}}
print(recursive_lookup("g", "garply", dictionary)) # => {'f': 'quux', 'g': 'garply'}
This sample code performs a recursive search in a hierarchy of dictionaries. So I guess it may correspond to what you are looking for:
def rec_search(key, dic):
if key in dic:
return dic[key]
for d in dic.values():
if isinstance(d, dict):
val = rec_search(key, d)
if val is not None: return val
return None
maly = {1:'a',
2:'b',
3:{4:'d',
5:'e',
6:{7:'g',
8:'h'}
},
9:{10:'i',
11:'j',
12:{13:'l',
14:'m'}
}
}
print(rec_search(2,maly)) # --> 'b'
print(rec_search(7,maly)) # --> 'g'
print(rec_search(10,maly)) # --> 'i'
print(rec_search(15,maly)) # --> None
EDIT: Corrected code after Sylwester's comment
i think the problem is when you call recursive_search() for the second time
it just keeps looking for sv in the same dictionary which is maly and doesn't search deeper inside the rest that's why it returns None

How to convert a inconsistent dictionary to .ini dynamically using python?

I have a use case where I ll be getting complex dictionaries as inputs in a inconsistent hirarchy.
one use case would be like below :
pro : 1
rel : 1.2
del : demo
cb :{
a : b
}
cd : {
en : {
b : a
}
}
cc : {
a : b
}
I used something like this : -
def jsonToDict(data):
d = data
res = defaultdict(dict)
def dict2ini(d, root):
for k, v in d.items():
if isinstance(v, dict):
_key = '%s.%s' % (root, k) if root else k
if v:
dict2ini(v, _key)
else:
res[_key] = {}
elif isinstance(v, (str,int, float)):
res[root] = {k:v}
dict2ini(d, '')
config = configparser.RawConfigParser()
for key in sorted(res.keys()):
config.add_section(key)
for subKey, value in res[key].items():
config.set(key, subKey, value)
with open('example.ini', 'w') as configfile:
config.write(configfile)
but the above doesn't process all the values present in my dict but only the first line in each section. I went through [ConfigParser][1]. But I am unable to find a solution to my use case can someone suggest me some workaround this also please note the above data is not fixed it will be changing according to our needs.
EXAMPLE INI :
pro = 1
rel = 1.2
del = demo
[cb]
a=b
[cd.en]
b=a
## suppose if multiple data is present in cd then
[cd]
b=a
[cd.en]
b=a
## end
[cc]
a=b
First, take a close look at your code. In dict2ini you iterate over a list of items in d:
for k, v in d.items():
And if v is a scalar value, you add it to the res dictionary...but you always use the same key:
elif isinstance(v, (str, int, float)):
res[root] = {k: v}
So for each item in the dictionary, you're going to override the previous value of res[root]. With a few minor changes, I think you'll get closer to what you want:
def dict2ini(d, root):
section = res[root]
for k, v in d.items():
if isinstance(v, dict):
_key = '%s.%s' % (root, k) if root else k
if v:
dict2ini(v, _key)
else:
section[_key] = {}
elif isinstance(v, (str,int, float)):
section[k] = v
dict2ini(d, '')
This gives me as output:
[DEFAULT]
pro = 1
del = demo
rel = 1.2
[]
[cb]
a = b
[cc]
a = b
[cd]
[cd.en]
b = a
You obviously have a few additional things to fix there, but hopefully that sets you in the right direction.

How to get the level of values in a specific key in nested Dictionary

Assuming that I have the following dictionary:
a={'brother': {'name': 'paskal', 'surname': 'leonis'},
'family': {'parents': {'father': 'telis',
'mother': 'xrisanthi',
'name': 'dimitris'}},
'name': 'alekos'}
And the desired output is to get a list:
[['paskal',1],['dimitris',2],['alekos',0]]
So what I want is to get the values of all the keys that have the name with the level that they key was found(starting with zero)
Until now I have succesufully got the values but the levels that I found are incorrect. I am using the following code:
from six import iteritems
def findKey_printValue(key, document,c=0):
if isinstance(document, dict):
for k, v in iteritems(document):
if k == key:
yield [v,c]
elif isinstance(v, dict):
c+=1
for result in findKey_printValue(key, v,c):
yield result
In [125]:list(findKey_printValue('name',a))
Out[125]:[['dimitris', 2], ['paskal', 2], ['alekos', 2]]
Any help?
You need to make sure the c variable only increases as you go down a level. When you run back up the stack, c should not have changed.
Modify this :
c+=1
for result in findKey_printValue(key, v,c):
yield result
to this:
for result in findKey_printValue(key, v,c+1):
yield result
Don't update the value of c in the current function itself, just do it in the call to the recursion:
elif isinstance(v, dict):
for result in findKey_printValue(key, v, c+1):
yield result
Do not pass c, but the increment of the value (else all your results have a reference to c):
from six import iteritems
def findKey_printValue(key, document,c=0):
if isinstance(document, dict):
for k, v in iteritems(document):
if k == key:
yield (v,c)
elif isinstance(v, dict):
for result in findKey_printValue(key, v, c + 1):
yield result
a={'brother': {'name': 'paskal', 'surname': 'leonis'},
'family': {'parents': {'father': 'telis',
'mother': 'xrisanthi',
'name': 'dimitris'}},
'name': 'alekos'}
holder = {}
def get(d, count):
if isinstance(d, dict):
for key, val in d.items():
if isinstance(val, dict):
get(val, count+1)
elif key=='name':
holder[val]=count
get(a, 0)

How to parse a list of class objects from json text in python?

I use the json library to decode json text
import json
I have a Party class defined like this:
class Party(object):
id = ""
code = ""
create_date = ""
citizenship = ""
an object_hook method:
def as_party(d):
p = Party()
p.__dict__.update(d)
return p
I can use this method in order to get a Party object from a json text:
def parse_as(s, typo_class):
return json.loads(str(s), object_hook=typo_class)
When I call the parse_as method on a json text containing encoded Party class, i get an object of type Party.
json_text = {'id': 2, 'code': '2', 'create_date': null, 'citizenship': null}
party1 = parse_as(json_text, as_party)
I can call its attributes like this:
print party1.code
My problem is to make the parse_as method able to parse a json text containing a list of Party objects like this one:
json_text = [{'id': 2, 'code': '2', 'create_date': null, 'citizenship': null}, {'id': 5, 'code': '3', 'create_date': null, 'citizenship': null}, {'id': 6, 'code': '8', 'create_date': null, 'citizenship': null}]
Please help me and thanks in advance!
object hook needs to be changed
def as_party(d):
if isinstance(d, dict):
p = Party()
p.__dict__.update(d)
elif isinstance(d, list):
out = []
for i in d:
n = Party()
n.__dict__.update(i)
out.append(n)
p = out
else:
raise Exception('got non-dict value %s' % d)
return p
btw, i wouldnt use __dict__, would rather use setattr with some prefix...
because if key value is a python keyword, that wont be good.
>>> a = Foo()
>>> a.__dict__['except'] = 1
>>> a.except
File "<stdin>", line 1
a.except
^
SyntaxError: invalid syntax
as for different approach using setattr and avoiding colision of python keywords you can use:
def as_party(d):
if isinstance(d, dict):
p = Party()
for k,v in d.iteritems():
setattr(p, 'v_%s' % k, v) # you can assign any prefix you want
elif isinstance(d, list):
out = []
for i in d:
n = Party()
for k,v in i.iteritems():
setattr(n, 'v_%s' % k, v)
out.append(n)
p = out
else:
raise Exception('got non-dict value %s' % d)
return p
but then you would have to access those values with prefix that we incorporated like so:
print party.v_for, party.v_except # etc..
generally, you could work out the code that it does not allow python keywords:
import keyword
def as_party(d):
if isinstance(d, dict):
p = Party()
for k,v in d.iteritems():
if keyword.iskeyword(k):
raise Exception('Cannot accept %s as a property name. That is resrved python keyword' % k)
setattr(p, k, v) # you can assign any prefix you want
elif isinstance(d, list):
out = []
for i in d:
n = Party()
for k,v in i.iteritems():
if keyword.iskeyword(k):
raise Exception('Cannot accept %s as a property name. That is resrved python keyword' % k)
setattr(n, k, v)
out.append(n)
p = out
else:
raise Exception('got non-dict value %s' % d)
return p

Categories