iteration through nested dictionaries - python

I'm trying to implement simple tree class which is inherited from dictionary.
Here my code:
class tree(dict):
def __init__(self, hsymbol="/"):
self.hsymbol = hsymbol
def __setitem__(self, key, value):
if key[0] == self.hsymbol : key = key[1:]
parts = key.split(self.hsymbol, 1)
if len(parts) == 2:
if parts[0] not in self: self[parts[0]] = tree(hsymbol = self.hsymbol)
self[parts[0]].__setitem__(parts[1], value)
else:
super(tree, self).__setitem__(key, value)
def __getitem__(self, key):
if key[0] == self.hsymbol : key = key[1:]
parts = key.split(self.hsymbol, 1)
if len(parts) == 2:
if parts[0] not in self: raise KeyError(parts[0])
return self[parts[0]][parts[1]]
else:
if key not in self: raise KeyError(parts[0])
return super(tree, self).__getitem__(key)
def __contains__(self,key):
if key[0] == self.hsymbol : key = key[1:]
parts = key.split(self.hsymbol, 1)
if len(parts) == 2:
if not super(tree, self).__contains__(parts[0]): return False
return parts[1] in self[parts[0]]
else:
if not super(tree, self).__contains__(key): return False
return True
def __delitem__(self, key):
if key[0] == self.hsymbol : key = key[1:]
parts = key.split(self.hsymbol, 1)
if len(parts) == 2:
if parts[0] not in self: raise KeyError(parts[0])
self[parts[0]].__delitem__(parts[1])
else:
if key not in list(self): raise KeyError(parts[0])
super(tree,self).__delitem__(key)
def keys(self,parent=""):
#if parent is None: parent = self.hsymbol
names = []
for name in super(tree, self).keys():
if isinstance(self[name], tree):
names += self[name].keys(parent=parent+self.hsymbol+name)
else:
names.append(parent+self.hsymbol+name)
return names
So everything works quite well, although I'm not sure about keys function realization:
>>> t=tree()
>>> t['/user/Johnson/incoming'] = 2200
>>> t['/user/Johnson/family'] = 4
>>> t['/user/Johnson/play'] = False
>>> t['/user/Smith/incoming'] = 12000
>>> t['/user/Smith/family'] = 1
>>> t['/user/Smith/play'] = True
>>> t['user/Smith/incoming']
12000
>>> print t
{'user': {'Smith': {'play': True, 'incoming': 12000, 'family': 1}, 'Johnson': {'play': False, 'incoming': 2200, 'family': 4}}}
>>> print t.keys()
['/user/Smith/play', '/user/Smith/incoming', '/user/Smith/family', '/user/Johnson/play', '/user/Johnson/incoming', '/user/Johnson/family']
>>> t
{'user': {'Smith': {'play': True, 'incoming': 12000, 'family': 1}, 'Johnson': {'play': False, 'incoming': 2200, 'family': 4}}}
...but not an iteration through it:
>>> for k in t:
... print k
...
user
>>>
How can I get something like this?
/user/Smith/play
/user/Smith/incoming
/user/Smith/family
/user/Johnson/play
/user/Johnson/incoming
/user/Johnson/family
Pretty sure that it must be __iter__ and next attributes of tree class, but I haven't figured out how to write it yet.
I've searched over Stack Overflow with no luck:
python recursive iteration nested dictionaries
python class inherited from dictionary iteration through nested dictionaries
python iteration through nested dictionaries

Yes, you need __iter__ (an iterator will have a next() automatically).
Following your existing logic:
def __iter__(self, parent=""):
for name in super(tree, self).keys():
if isinstance(self[name], tree):
for item in self[name].__iter__(parent=parent+self.hsymbol+name):
yield item
else:
yield parent+self.hsymbol+name
Unlike your current keys() implementation, this only walks the tree on an as-needed basis: If a client only needs the first two keys, it only calls next() twice, and so the iterator only proceeds past two yields.
(I might suggest implementing keys as simply return list(iter(self)) -- that way you have the lazy approach available for those that want to avoid the inefficiency of unnecessarily walking a full tree, and the non-lazy approach otherwise).

Related

Recursively locate nested dictionary containing a target key and value

There are many questions about this problem, but in my case they are not working. I'm trying to find a nested dictionary given a target key and value pair. My recursive function returned none (after fix, max depth recursive error).
def recursive_lookup(k, sv, d):
if k in d: return d[k]
for v in d.values():
if isinstance(v, dict):
a = recursive_lookup(k, sv, v)
if a == sv:
if a is not None:
return d
return None
def run():
maly = {'_id': "ObjectId('5def7e8c4802b906dd067f97')", 'METADATA': {'Tags': {'AcquisitionTime': '2019-02-05T15:59:37.5862118Z', 'ImageScaling': {'ImageScaling': {'ImagePixelSize': '4.54,4.54'}}, 'DetectorState': {'CameraState': {'ApplyCameraProfile': 'false', 'ApplyImageOrientation': 'true', 'ExposureTime': '2200000', 'Frame': '0,0,2752,2208', 'ImageOrientation': '3'}}, 'StageXPosition': '+000000141526.5820', 'StageYPosition': '+000000189329.5000', 'FocusPosition': '+000000002097.2550', 'RoiCenterOffsetX': '+000000000000.0000', 'RoiCenterOffsetY': '+000000000000.0000'}, 'DataSchema': None, 'AttachmentSchema': None}}
returned_value = recursive_lookup("FocusPosition", "+000000002097.2550", maly)
print(returned_value)
run()
If I change return d to recursive_lookup(k, sv, d) it is also not working.
It should return the maly dictionary, but it returned None.
How can I fix that problem?
This is the right idea, but a matched result isn't being passed up the call stack correctly. You can also simplify logic by checking key and value on the same call frame--this should also eliminate a bug where the target key-value are on the top level of the dict (there's no previous frame to fall back on to check the value).
def recursive_lookup(target_key, target_val, dictionary):
if target_key in dictionary and dictionary[target_key] == target_val:
return dictionary
for value in dictionary.values():
if isinstance(value, dict):
if result := recursive_lookup(target_key, target_val, value):
return result
if __name__ == "__main__":
maly = {'_id': "ObjectId('5def7e8c4802b906dd067f97')", 'METADATA': {'Tags': {'AcquisitionTime': '2019-02-05T15:59:37.5862118Z', 'ImageScaling': {'ImageScaling': {'ImagePixelSize': '4.54,4.54'}}, 'DetectorState': {'CameraState': {'ApplyCameraProfile': 'false', 'ApplyImageOrientation': 'true', 'ExposureTime': '2200000', 'Frame': '0,0,2752,2208', 'ImageOrientation': '3'}}, 'StageXPosition': '+000000141526.5820', 'StageYPosition': '+000000189329.5000', 'FocusPosition': '+000000002097.2550', 'RoiCenterOffsetX': '+000000000000.0000', 'RoiCenterOffsetY': '+000000000000.0000'}, 'DataSchema': None, 'AttachmentSchema': None}}
print(recursive_lookup("FocusPosition", "+000000002097.2550", maly))
Here's a more-easily verifiable version that uses a simple dictionary and doesn't use the 3.8 assignment expression:
def recursive_lookup(target_key, target_val, dictionary):
if target_key in dictionary and dictionary[target_key] == target_val:
return dictionary
for value in dictionary.values():
if isinstance(value, dict):
result = recursive_lookup(target_key, target_val, value)
if result: return result
if __name__ == "__main__":
dictionary = {
"a": "foo",
"b": {
"c": "bar",
"d": "baz",
"e": {
"f": "quux",
"g": "garply"
}
}
}
print(recursive_lookup("c", "bar", dictionary)) # => {'c': 'bar', 'd': 'baz', 'e': {'f': 'quux', 'g': 'garply'}}
print(recursive_lookup("g", "garply", dictionary)) # => {'f': 'quux', 'g': 'garply'}
This sample code performs a recursive search in a hierarchy of dictionaries. So I guess it may correspond to what you are looking for:
def rec_search(key, dic):
if key in dic:
return dic[key]
for d in dic.values():
if isinstance(d, dict):
val = rec_search(key, d)
if val is not None: return val
return None
maly = {1:'a',
2:'b',
3:{4:'d',
5:'e',
6:{7:'g',
8:'h'}
},
9:{10:'i',
11:'j',
12:{13:'l',
14:'m'}
}
}
print(rec_search(2,maly)) # --> 'b'
print(rec_search(7,maly)) # --> 'g'
print(rec_search(10,maly)) # --> 'i'
print(rec_search(15,maly)) # --> None
EDIT: Corrected code after Sylwester's comment
i think the problem is when you call recursive_search() for the second time
it just keeps looking for sv in the same dictionary which is maly and doesn't search deeper inside the rest that's why it returns None

How can I read a dictionary with a list?

How can I read a list inside a dictionary and try to change string numbers to digits? For example:
obj = {'azul':'4','rojo':[{'rojo_a':'1','rojo_b':'2'}],'amarillo':'xxx','naranja':[{'naranja_1':'1','naranja_2':'2'}]}
I use this to change dictionary number strings to integers:
{k:int(v) if v.isdigit() else v for k,v in obj.items()}
But it doesn't work, so I was trying something like this:
for objs in obj:
if objs.isdigit():
k:int(v)
else:
for k,v in objs.items():
print k
But this fails as well.
this seems like a good problem for recursion
obj = {'azul':'4','rojo':[{'rojo_a':'1','rojo_b':'2'}],'amarillo':'xxx','naranja':[{'naranja_1':'1','naranja_2':'2'}]}
def fix_ints(obj):
if isinstance(obj,basestring):
try:
return int(obj)
except ValueError:
print "I cant Make %r an int"%obj
return obj
elif isinstance(obj,(list,tuple)):
return [fix_ints(item) for item in obj]
elif isinstance(obj,dict):
return dict((key,fix_ints(value)) for key,value in obj.items())
else:
print "I have no idea what to do with %r"%obj
new_obj = fix_ints(obj)
print new_obj
note that python does not support tail recursion so if this data structure goes very deep (greater than 1k levels of nesting) then recursion may not be appropriate ...
of coarse you can also do silly string tricks with it
import json,re
new_obj = json.loads(re.sub("\"(\d+)\"","\\1",json.dumps(obj)))
(although really you should do it like i do in my first exzample ... this second method is really just for fun)
String to number:
def int_it(obj):
if obj.isdigit():
obj = int(obj)
return obj
Dict to number (regardless of the number of nested dicts or lists):
class Convert(object):
def __init__(self, obj):
self.obj = obj
if isinstance(obj, dict):
self.handle_dict(obj)
def handle_dict(self, obj):
for key, value in obj.items():
if isinstance(value, str) and value.isdigit():
self.obj[key] = int_it(value)
elif isinstance(obj[key], list):
ins = HandleList(obj[key])
self.obj[key] = ins.obj
elif isinstance(obj[key], dict):
ins = Convert(obj.items())
self.obj[key] = ins.obj
return obj
List to numbers, regardless of the number of nested lists or dicts.
class HandleList(object):
def __init__(self, obj):
self.obj = obj
self.handle_list(obj)
def handle_list(self, obj):
for index, item in enumerate(obj):
if isinstance(item, list):
obj.index(index, [HandleList(val).obj for val in item])
elif isinstance(item, str):
obj.index(index, int_it(item))
elif isinstance(item, dict):
Convert(item)
return obj
output = Convert(values)
print(output.obj)
Returns:
{
'amarillo': 'xxx',
'naranja': [{'naranja_1': 1, 'naranja_2': 2}],
'rojo': [{'rojo_b': 2, 'rojo_a': 1}],
'azul': 4
}
Given the input:
values = {
'azul':'4',
'rojo': [
{'rojo_a':'1',
'rojo_b':'2'
}
],
'amarillo':'xxx',
'naranja': [
{'naranja_1':'1',
'naranja_2':'2'
}
]
}

How to filter by keys through a nested dictionary in a pythonic way

Try to filter a nested dictionary. My solution is clunky, was hoping to see if there is a better method something using comprehensions. Only interested in the dictionary and lists for this example.
_dict_key_filter() will filter the keys of a nested dictionary or a list of nested dictionaries. Anything not in the obj_filter will be ignored on all nested levels.
obj : can be a dictionary or a list of dictionaries.
obj_filter: has to be a list of filter values
def _dict_key_filter(self, obj, obj_filter):
if isinstance(obj, dict):
retdict = {}
for key, value in obj.iteritems():
if key in obj_filter:
retdict[key] = copy.deepcopy(value)
elif isinstance(value, (dict, list)):
child = self._dict_key_filter(value, obj_filter)
if child:
retdict[key] = child
return retdict if retdict else None
elif isinstance(obj, list):
retlist = []
for value in list:
child = self._dict_key_filter(value, obj_filter)
if child:
retlist.append(child)
return retlist if retlist else None
else:
return None
Example#
dict1 = {'test1': {'test2':[1,2]}, 'test3': [{'test6': 2},
{'test8': {'test9': 23}}], 'test4':{'test5': 5}}
filter = ['test5' , 'test9']
return = _dict_key_filter(dict1, filter)
return value would be {'test3': [{'test8': {'test9': 23}}], 'test4': {'test5': 5}}
It's a really old question. I came across a similar problem recently.
It maybe obvious, but you are dealing with a tree in which each node has an arbitray number of children. You want to cut the subtrees that do not contain some items as nodes (not leaves). To achieve this, you are using a custom DFS: the main function returns either a subtree or None. If the value is None then you "cut" the branch.
First of all, the function dict_key_filter returns a (non empty) dict, a (non empty) list or None if no filter key was not found in the branch.
To reduce complexity, you could return a sequence in every case: an empty sequence if no filter key was found, and a non empty sequence if you are still searching or you found the leaf of the tree. Your code would look like:
def dict_key_filter(obj, obj_filter):
if isinstance(obj, dict):
retdict = {}
...
return retdict # empty or not
elif isinstance(obj, list):
retlist = []
...
return retlist # empty or not
else:
return [] # obvioulsy empty
This was the easy part. Now we have to fill the dots.
The list case
Let's begin with the list case, since it is the easier to refactor:
retlist = []
for value in obj:
child = dict_key_filter0(value, obj_filter)
if child:
retlist.append(child)
We can translate this into a simple list comprehension:
retlist = [dict_key_filter(value, obj_filter) for value in obj if dict_key_filter(value, obj_filter)]
The drawback is that dict_key_filter is evaluated twice. We can avoid this with a little trick (see https://stackoverflow.com/a/15812866):
retlist = [subtree for subtree in (dict_key_filter(value, obj_filter) for value in obj) if subtree]
The inner expression (dict_key_filter(value, obj_filter) for value in obj) is a generator that calls dict_key_filter once per value. But we can even do better if we build a closure of dict_key_filter:
def dict_key_filter(obj, obj_filter):
def inner_dict_key_filter(obj): return dict_key_filter(obj, obj_filter)
...
retlist = list(filter(len, map(inner_dict_key_filter, obj)))
Now we are in the functional world: map applies inner_dict_key_filter to every element of the list and then the subtrees are filtered to exclude empty subtrees (len(subtree) is true iff subtree is not empty). Now, the code looks like:
def dict_key_filter(obj, obj_filter):
def inner_dict_key_filter(obj): return dict_key_filter(obj, obj_filter)
if isinstance(obj, dict):
retdict = {}
...
return retdict
elif isinstance(obj, list):
return list(filter(len, map(inner_dict_key_filter, obj)))
else:
return []
If you are familiar with functional programming, the list case is readable (not quite as readable as it would be in Haskell, but still readable).
The dict case
I do not forget the dictionary-comprehension tag in your question. The first idea is to create a function to return either a whole copy of the branch or the result of the rest of the DFS.
def build_subtree(key, value):
if key in obj_filter:
return copy.deepcopy(value) # keep the branch
elif isinstance(value, (dict, list)):
return inner_dict_key_filter(value) # continue to search
return [] # just an orphan value here
As in the list case, we do not refuse empty subtrees for now:
retdict = {}
for key, value in obj.items():
retdict[key] = build_subtree(key, value)
We have now a perfect case for dict comprehension:
retdict = {key: build_subtree(key, value) for key, value in obj.items() if build_subtree(key, value)}
Again, we use the little trick to avoid to compute a value twice:
retdict = {key:subtree for key, subtree in ((key, build_subtree(key, value)) for key, value in obj.items()) if subtree}
But we have a little problem here: the code above is not exaclty equivalent to the original code. What if the value is 0? In the original version, we have retdict[key] = copy.deepcopy(0) but in the new version we have nothing. The 0 value is evaluated as false and filtered. And then the dict may become empty and we cut the branch wrongfully. We need another test to be sure we want to remove a value: if it's an empty list or dict, then remove it, else keep it:
def to_keep(subtree): return not (isinstance(subtree, (dict, list)) or len(subtree) == 0)
That is:
def to_keep(subtree): return not isinstance(subtree, (dict, list)) or subtree
If you remember a bit of logic (https://en.wikipedia.org/wiki/Truth_table#Logical_implication) you can interpret this as: if subtree is a dict or a list, then it must not be empty.
Let's put the pieces together:
def dict_key_filter(obj, obj_filter):
def inner_dict_key_filter(obj): return dict_key_filter(obj, obj_filter)
def to_keep(subtree): return not isinstance(subtree, (dict, list)) or subtree
def build_subtree(key, value):
if key in obj_filter:
return copy.deepcopy(value) # keep the branch
elif isinstance(value, (dict, list)):
return inner_dict_key_filter(value) # continue to search
return [] # just an orphan value here
if isinstance(obj, dict):
key_subtree_pairs = ((key, build_subtree(key, value)) for key, value in obj.items())
return {key:subtree for key, subtree in key_subtree_pairs if to_keep(subtree)}
elif isinstance(obj, list):
return list(filter(to_keep, map(inner_dict_key_filter, obj)))
return []
I don't know if this is more pythonic, but it seems clearer to me.
dict1 = {
'test1': { 'test2':[1,2] },
'test3': [
{'test6': 2},
{
'test8': { 'test9': 23 }
}
],
'test4':{'test5': 0}
}
obj_filter = ['test5' , 'test9']
print (dict_key_filter(dict1, obj_filter))
# {'test3': [{'test8': {'test9': 23}}], 'test4': {'test5': 0}}

Get key count from OrderedDict where key is a tuple

I've a dictionary such as this:
my_dict=collections.OrderedDict([((123, 1), 'qwe'), ((232, 1), 'asd'), ((234, 2), 'zxc'), ((6745, 2), 'aaa'), ((456, 3), 'bbb')])
The combination of the tuple is always unique and I would like to maintain the order of insertion, and hence OrderedDict. I've a well over ~10K items in the dict. How can I efficiently maintain a counter that gives the count of the second element in the tuple? Basically, I need to know the count whenever I would like to add/delete an item in the key. Right now I just iterate through my_dict and get the counter everytime but it seems to be very expensive to do that.
In the above example I want the output to be:
1:2 # As in 1 occurs 2 times
2:2
3:1
Right now I do the following:
from collections import OrderedDict, Counter
my_dict = OrderedDict()
my_dict[(123,1)] = 'qwe'
my_dict[(232,1)] = 'asd'
my_dict[(234,2)] = 'zxc'
my_dict[(6745,2)] = 'aaa'
my_dict[(456,3)] = 'bbb'
cnt = []
for item in my_dict.keys():
cnt.append(item[1])
print Counter(cnt)
I'm not sure if this is the best way but is there a way to override the the = operator and pop function, such that it adds or subtracts a count every time I do that operation?
Getting a Counter to work nicely with an OrderedDict is probably going to require some subclassing. Here's something that might work (I've only implemented __setitem__ and __getitem__, but if you'd like a more robust implementation, let me know):
import collections
class CountedOrderedDict(collections.OrderedDict):
def __init__(self, *args, **kwargs):
self.counter = collections.Counter()
super(CountedOrderedDict, self).__init__(*args, **kwargs)
def __delitem__(self, key):
super(CountedOrderedDict, self).__delitem__(key)
self.counter[key[1]] -= 1
def __setitem__(self, key, value):
if key not in self:
self.counter[key[1]] += 1
super(CountedOrderedDict, self).__setitem__(key, value)
Example usage:
>>> my_dict = CountedOrderedDict({(123,1): 'sda', (232,1) : 'bfd', (234,2) : 'csd', (6745,2) : 'ds', (456,3) : 'rd'})
>>> my_dict.counter
Counter({'1': 2, '2': 2, '3': 1})
>>> del my_dict[(123,1)]
>>> my_dict.counter
Counter({'2': 2, '1': 1, '3': 1})
>>> my_dict[(150,1)] = "asdf"
>>> my_dict.counter
Counter({'1': 2, '2': 2, '3': 1})
Here's a more general CountedOrderedDict implementation that takes a key function as a parameter.
import collections
class CountedOrderedDict(collections.OrderedDict):
def __init__(self, key=lambda k: k, *args, **kwargs):
self.counter = collections.Counter()
self.key_transform = key
super(CountedOrderedDict, self).__init__(*args, **kwargs)
def __delitem__(self, key):
super(CountedOrderedDict, self).__delitem__(key)
self.counter[self.key_transform(key)] -= 1
def __setitem__(self, key, value):
if key not in self:
self.counter[self.key_transform(key)] += 1
super(CountedOrderedDict, self).__setitem__(key, value)
For your needs, you'd instantiate it like so:
my_dict = CountedOrderedDict(key=lambda k: k[1])

Python dict like surjective multiple key → value container

I'm currently in the need for a Python container class with similar functionality like the builtin dict type. Basically what I need is a dictionary, where an arbitrary number of keys beside a primary key, which map to the very same value. However when iterating over it, it should iterate only over the (primary_key, value) pairs and only the primary key if the list of keys is requested.
If this has already been implemented I'd rather not reinvent the wheel. So is there already a module providing such a container? If not, I'm going to implement it myself.
Here is a quick implementation:
class MultipleKeyDict(dict):
__slots__ = ["_primary_keys"]
def __init__(self, arg=None, **kwargs):
self._primary_keys = {}
self.update(arg, **kwargs)
def __setitem__(self, key, value):
super(MultipleKeyDict, self).__setitem__(key, value)
self._primary_keys.setdefault(value, key)
def __delitem__(self, key):
value = self[key]
super(MultipleKeyDict, self).__delitem__(key)
if self._primary_keys[value] == key:
del self._primary_keys[value]
for k, v in super(MultipleKeyDict, self).iteritems():
if v == value:
self._primary_keys[value] = k
break
def __iter__(self):
return self.iterkeys()
def update(self, arg=None, **kwargs):
if arg is not None:
if isinstance(arg, collections.Mapping):
for k in arg:
self[k] = arg[k]
else:
for k, v in arg:
self[k] = v
for k in kwargs:
self[k] = kwargs[k]
def clear(self):
super(MultipleKeyDict, self).clear()
self._primary_keys.clear()
def iteritems(self):
for v, k in self._primary_keys.iteritems():
yield k, v
def items(self):
return list(self.iteritems())
def itervalues(self):
return self._primary_keys.iterkeys()
def values(self):
return self._primary_keys.keys()
def iterkeys(self):
return self._primary_keys.itervalues()
def keys(self):
return self._primary_keys.values()
The only messy bit is that it has to search the whole dict in case a primary key gets deleted.
I omitted copy(), pop(), popitem() and setdefault(). If you need them, you'll have to implement them yourself.
The simplest and easiest solution would be to use two dictionaries, one of which maps secondary keys to a primary key. If for some reason you need a reverse mapping, that could be included in the primary dictionary.
sec = {'one': 'blue', 'two': 'red', 'three': 'blue', # seconary keys
'blue': 'blue', 'red': 'red'} # include identity mapping for primaries
dict = {'blue': ('doll', '$9.43', ('one', 'three')),
'red': ('truck', '$14.99', ('two',)) }
record = dict[sec['two']]
print('Toy=', record[0], 'Price=', record[1])
There is now a multiple key dictionary python package.
https://pypi.python.org/pypi/multi_key_dict/1.0.2
From the link:
from multi_key_dict import multi_key_dict
k = multi_key_dict()
k[1000, 'kilo', 'k'] = 'kilo (x1000)'
print k[1000] # will print 'kilo (x1000)'
print k['k'] # will also print 'kilo (x1000)'
# the same way objects can be updated, deleted:
# and if an object is updated using one key, the new value will
# be accessible using any other key, e.g. for example above:
k['kilo'] = 'kilo'
print k[1000] # will now print 'kilo' as value was updated

Categories