How to count the number of occurrences of a nested dictionary key? - python

I have a nested dictionary:
d = {'key': 1, 'lock':{'key': 3, 'lock': {'key': 7, 'lock': None}}}
and I am hoping to simply get the number of times 'key' occurs. So in this example, the output would be:
3
because 'key' appears 3 times. I know this is pretty straight forward but I'm a bit foggy on how to do this with dictionaries. I would prefer to not use any libraries if possible.
Thanks for the help!

You can use a function that recursively counts the given key:
def count(d, k):
c = int(k in d)
for v in d.values():
if isinstance(v, dict):
c += count(v, k)
return c
or to write the above in a more concise way:
def count(d, k):
return (k in d) + sum(count(v, k) for v in d.values() if isinstance(v, dict))
so that count(d, 'key') returns: 3

Related

Convert a nested dictionary into list of tuples

I have a dictionary -
d={'revenues':
{
'201907':
{'aaa.csv':'fdwe34x2'},
'201906':{'ddd.csv':'e4c5q'}
},
'complaints':
{'2014':
{'sfdwa.csv','c2c2jh'}
}
}
I want to convert it into list of tuples -
[
('revenues','201907','aaa.csv','fdwe34x2'),
('revenues','201906','ddd.csv','e4c5q'),
('complaints','2014','sfdwa.csv','c2c2jh')
]
I tried using list comprehensions, but did not help -
l = [(k,[(p,q) for p,q in v.items()]) for k,v in d.items()]
print(l)
[('revenues', [('201907', {'aaa.csv': 'fdwe34x2'}), ('201906', {'ddd.csv': 'e4c5q'})]),
('complaints', [('2014', {'c2c2jh', 'sfdwa.csv'})])]
Any suggestions?
If you're not sure how many levels this list may have, it seems that what you need is recursion:
def unnest(d, keys=[]):
result = []
for k, v in d.items():
if isinstance(v, dict):
result.extend(unnest(v, keys + [k]))
else:
result.append(tuple(keys + [k, v]))
return result
Just a friendly reminder: before Python 3.6, dict order is not maintained.
[('complaints', '2014', 'sfdwa.csv', 'c2c2jh'),
('revenues', '201906', 'ddd.csv', 'e4c5q'),
('revenues', '201907', 'aaa.csv', 'fdwe34x2')]
You can loop through the levels of your dictionary:
[(x, y, z) for x in d for y in d[x] for z in d[x][y]]
You can do it using a list comprehension, but it would be quite complex, and not easy to maintain if the structure changes.
Unless you especially need good performance, I would suggest using a generic recursive function:
def unnest(d, keys=[]):
result = []
if isinstance(d, dict):
for k, v in d.items():
result.extend(unnest(v, keys + [k]))
elif isinstance(d, list):
result.append(tuple(keys + d))
elif isinstance(d, set) or isinstance(d, tuple):
result.append(tuple(keys + list(d)))
else:
result.append(tuple(keys + [d]))
return result
As a bonus, I've also supported lists and tuples during the recursion, in addition to the set on the provided example.

Pythonic way to handle function overloading

I'm trying to write a function to invert a dictionary but I'm having troubles finding the proper way to do it without rewriting code, using different methods and avoiding if/else at each iteration. What's the most pythonic way to do it?
def invert_dict(dic, type=None):
if type == 'list':
return _invert_dict_list(dic)
return _invert_dict(dic)
# if there's only one value per key
def _invert_dict(dic):
inverted = defaultdict()
for k,v in dic.items():
for item in v:
inverted[item]=k
return dict(inverted)
# if there are multiple values for the same key
def _invert_dict_list(dic):
inverted = defaultdict(list)
for k,v in dic.items():
for item in v:
inverted[item].append(k)
return dict(inverted)
I won't comment on the actual impementation, but for the type based branching there is functools.singledispatch:
import functools
#functools.singledispatch
def inv_item(value, key, dest):
< fallback implementation >
# special case based on type
#inv_item.register(list)
#inv_item.register(tuple)
def inv_sequence(value, key, dest):
< handle sequence values >
...
def invert_dict(In):
Out = {}
for k, v in In.items():
inv_item(v, k, Out)
return Out
You can use itertools.groupby and dictionary comprehension:
import itertools
d1 = {'val1':[4, 5, 2, 4], 13:'v2', 'val2':'v2', 'age':17}
new_d = [(a, list(b)) for a, b in itertools.groupby(sorted([(b, a) for a, b in d1.items()], key=lambda x:x[0]), key=lambda x:x[0])]
final_d = {tuple(a) if isinstance(a, list) else a:[i[-1] for i in b][0] if len([i[-1] for i in b]) == 1 else [i[-1] for i in b] for a, b in new_d}
Output:
{(4, 5, 2, 4): 'val1', 17: 'age', 'v2': ['val2', 13]}

What do these terms mean in regards to a "reverse" dictionary? [duplicate]

Given a dictionary like so:
my_map = {'a': 1, 'b': 2}
How can one invert this map to get:
inv_map = {1: 'a', 2: 'b'}
Python 3+:
inv_map = {v: k for k, v in my_map.items()}
Python 2:
inv_map = {v: k for k, v in my_map.iteritems()}
Assuming that the values in the dict are unique:
Python 3:
dict((v, k) for k, v in my_map.items())
Python 2:
dict((v, k) for k, v in my_map.iteritems())
If the values in my_map aren't unique:
Python 3:
inv_map = {}
for k, v in my_map.items():
inv_map[v] = inv_map.get(v, []) + [k]
Python 2:
inv_map = {}
for k, v in my_map.iteritems():
inv_map[v] = inv_map.get(v, []) + [k]
To do this while preserving the type of your mapping (assuming that it is a dict or a dict subclass):
def inverse_mapping(f):
return f.__class__(map(reversed, f.items()))
Try this:
inv_map = dict(zip(my_map.values(), my_map.keys()))
(Note that the Python docs on dictionary views explicitly guarantee that .keys() and .values() have their elements in the same order, which allows the approach above to work.)
Alternatively:
inv_map = dict((my_map[k], k) for k in my_map)
or using python 3.0's dict comprehensions
inv_map = {my_map[k] : k for k in my_map}
Another, more functional, way:
my_map = { 'a': 1, 'b':2 }
dict(map(reversed, my_map.items()))
We can also reverse a dictionary with duplicate keys using defaultdict:
from collections import Counter, defaultdict
def invert_dict(d):
d_inv = defaultdict(list)
for k, v in d.items():
d_inv[v].append(k)
return d_inv
text = 'aaa bbb ccc ddd aaa bbb ccc aaa'
c = Counter(text.split()) # Counter({'aaa': 3, 'bbb': 2, 'ccc': 2, 'ddd': 1})
dict(invert_dict(c)) # {1: ['ddd'], 2: ['bbb', 'ccc'], 3: ['aaa']}
See here:
This technique is simpler and faster than an equivalent technique using dict.setdefault().
This expands upon the answer by Robert, applying to when the values in the dict aren't unique.
class ReversibleDict(dict):
# Ref: https://stackoverflow.com/a/13057382/
def reversed(self):
"""
Return a reversed dict, with common values in the original dict
grouped into a list in the returned dict.
Example:
>>> d = ReversibleDict({'a': 3, 'c': 2, 'b': 2, 'e': 3, 'd': 1, 'f': 2})
>>> d.reversed()
{1: ['d'], 2: ['c', 'b', 'f'], 3: ['a', 'e']}
"""
revdict = {}
for k, v in self.items():
revdict.setdefault(v, []).append(k)
return revdict
The implementation is limited in that you cannot use reversed twice and get the original back. It is not symmetric as such. It is tested with Python 2.6. Here is a use case of how I am using to print the resultant dict.
If you'd rather use a set than a list, and there could exist unordered applications for which this makes sense, instead of setdefault(v, []).append(k), use setdefault(v, set()).add(k).
Combination of list and dictionary comprehension. Can handle duplicate keys
{v:[i for i in d.keys() if d[i] == v ] for k,v in d.items()}
A case where the dictionary values is a set. Like:
some_dict = {"1":{"a","b","c"},
"2":{"d","e","f"},
"3":{"g","h","i"}}
The inverse would like:
some_dict = {vi: k for k, v in some_dict.items() for vi in v}
The output is like this:
{'c': '1',
'b': '1',
'a': '1',
'f': '2',
'd': '2',
'e': '2',
'g': '3',
'h': '3',
'i': '3'}
For instance, you have the following dictionary:
my_dict = {'a': 'fire', 'b': 'ice', 'c': 'fire', 'd': 'water'}
And you wanna get it in such an inverted form:
inverted_dict = {'fire': ['a', 'c'], 'ice': ['b'], 'water': ['d']}
First Solution. For inverting key-value pairs in your dictionary use a for-loop approach:
# Use this code to invert dictionaries that have non-unique values
inverted_dict = dict()
for key, value in my_dict.items():
inverted_dict.setdefault(value, list()).append(key)
Second Solution. Use a dictionary comprehension approach for inversion:
# Use this code to invert dictionaries that have unique values
inverted_dict = {value: key for key, value in my_dict.items()}
Third Solution. Use reverting the inversion approach (relies on the second solution):
# Use this code to invert dictionaries that have lists of values
my_dict = {value: key for key in inverted_dict for value in my_map[key]}
Lot of answers but didn't find anything clean in case we are talking about a dictionary with non-unique values.
A solution would be:
from collections import defaultdict
inv_map = defaultdict(list)
for k, v in my_map.items():
inv_map[v].append(k)
Example:
If initial dict my_map = {'c': 1, 'd': 5, 'a': 5, 'b': 10}
then, running the code above will give:
{5: ['a', 'd'], 1: ['c'], 10: ['b']}
I found that this version is more than 10% faster than the accepted version of a dictionary with 10000 keys.
d = {i: str(i) for i in range(10000)}
new_d = dict(zip(d.values(), d.keys()))
In addition to the other functions suggested above, if you like lambdas:
invert = lambda mydict: {v:k for k, v in mydict.items()}
Or, you could do it this way too:
invert = lambda mydict: dict( zip(mydict.values(), mydict.keys()) )
I think the best way to do this is to define a class. Here is an implementation of a "symmetric dictionary":
class SymDict:
def __init__(self):
self.aToB = {}
self.bToA = {}
def assocAB(self, a, b):
# Stores and returns a tuple (a,b) of overwritten bindings
currB = None
if a in self.aToB: currB = self.bToA[a]
currA = None
if b in self.bToA: currA = self.aToB[b]
self.aToB[a] = b
self.bToA[b] = a
return (currA, currB)
def lookupA(self, a):
if a in self.aToB:
return self.aToB[a]
return None
def lookupB(self, b):
if b in self.bToA:
return self.bToA[b]
return None
Deletion and iteration methods are easy enough to implement if they're needed.
This implementation is way more efficient than inverting an entire dictionary (which seems to be the most popular solution on this page). Not to mention, you can add or remove values from your SymDict as much as you want, and your inverse-dictionary will always stay valid -- this isn't true if you simply reverse the entire dictionary once.
If the values aren't unique, and you're a little hardcore:
inv_map = dict(
(v, [k for (k, xx) in filter(lambda (key, value): value == v, my_map.items())])
for v in set(my_map.values())
)
Especially for a large dict, note that this solution is far less efficient than the answer Python reverse / invert a mapping because it loops over items() multiple times.
This handles non-unique values and retains much of the look of the unique case.
inv_map = {v:[k for k in my_map if my_map[k] == v] for v in my_map.itervalues()}
For Python 3.x, replace itervalues with values.
I am aware that this question already has many good answers, but I wanted to share this very neat solution that also takes care of duplicate values:
def dict_reverser(d):
seen = set()
return {v: k for k, v in d.items() if v not in seen or seen.add(v)}
This relies on the fact that set.add always returns None in Python.
Here is another way to do it.
my_map = {'a': 1, 'b': 2}
inv_map= {}
for key in my_map.keys() :
val = my_map[key]
inv_map[val] = key
dict([(value, key) for key, value in d.items()])
Function is symmetric for values of type list; Tuples are coverted to lists when performing reverse_dict(reverse_dict(dictionary))
def reverse_dict(dictionary):
reverse_dict = {}
for key, value in dictionary.iteritems():
if not isinstance(value, (list, tuple)):
value = [value]
for val in value:
reverse_dict[val] = reverse_dict.get(val, [])
reverse_dict[val].append(key)
for key, value in reverse_dict.iteritems():
if len(value) == 1:
reverse_dict[key] = value[0]
return reverse_dict
Since dictionaries require one unique key within the dictionary unlike values, we have to append the reversed values into a list of sort to be included within the new specific keys.
def r_maping(dictionary):
List_z=[]
Map= {}
for z, x in dictionary.iteritems(): #iterate through the keys and values
Map.setdefault(x,List_z).append(z) #Setdefault is the same as dict[key]=default."The method returns the key value available in the dictionary and if given key is not available then it will return provided default value. Afterward, we will append into the default list our new values for the specific key.
return Map
Fast functional solution for non-bijective maps (values not unique):
from itertools import imap, groupby
def fst(s):
return s[0]
def snd(s):
return s[1]
def inverseDict(d):
"""
input d: a -> b
output : b -> set(a)
"""
return {
v : set(imap(fst, kv_iter))
for (v, kv_iter) in groupby(
sorted(d.iteritems(),
key=snd),
key=snd
)
}
In theory this should be faster than adding to the set (or appending to the list) one by one like in the imperative solution.
Unfortunately the values have to be sortable, the sorting is required by groupby.
Try this for python 2.7/3.x
inv_map={};
for i in my_map:
inv_map[my_map[i]]=i
print inv_map
def invertDictionary(d):
myDict = {}
for i in d:
value = d.get(i)
myDict.setdefault(value,[]).append(i)
return myDict
print invertDictionary({'a':1, 'b':2, 'c':3 , 'd' : 1})
This will provide output as : {1: ['a', 'd'], 2: ['b'], 3: ['c']}
A lambda solution for current python 3.x versions:
d1 = dict(alice='apples', bob='bananas')
d2 = dict(map(lambda key: (d1[key], key), d1.keys()))
print(d2)
Result:
{'apples': 'alice', 'bananas': 'bob'}
This solution does not check for duplicates.
Some remarks:
The lambda construct can access d1 from the outer scope, so we only
pass in the current key. It returns a tuple.
The dict() constructor accepts a list of tuples. It
also accepts the result of a map, so we can skip the conversion to a
list.
This solution has no explicit for loop. It also avoids using a list comprehension for those who are bad at math ;-)
Taking up the highly voted answer starting If the values in my_map aren't unique:, I had a problem where not only the values were not unique, but in addition, they were a list, with each item in the list consisting again of a list of three elements: a string value, a number, and another number.
Example:
mymap['key1'] gives you:
[('xyz', 1, 2),
('abc', 5, 4)]
I wanted to switch only the string value with the key, keeping the two number elements at the same place. You simply need another nested for loop then:
inv_map = {}
for k, v in my_map.items():
for x in v:
# with x[1:3] same as x[1], x[2]:
inv_map[x[0]] = inv_map.get(x[0], []) + [k, x[1:3]]
Example:
inv_map['abc'] now gives you:
[('key1', 1, 2),
('key1', 5, 4)]
This works even if you have non-unique values in the original dictionary.
def dict_invert(d):
'''
d: dict
Returns an inverted dictionary
'''
# Your code here
inv_d = {}
for k, v in d.items():
if v not in inv_d.keys():
inv_d[v] = [k]
else:
inv_d[v].append(k)
inv_d[v].sort()
print(f"{inv_d[v]} are the values")
return inv_d
I would do it that way in python 2.
inv_map = {my_map[x] : x for x in my_map}
Not something completely different, just a bit rewritten recipe from Cookbook. It's futhermore optimized by retaining setdefault method, instead of each time getting it through the instance:
def inverse(mapping):
'''
A function to inverse mapping, collecting keys with simillar values
in list. Careful to retain original type and to be fast.
>> d = dict(a=1, b=2, c=1, d=3, e=2, f=1, g=5, h=2)
>> inverse(d)
{1: ['f', 'c', 'a'], 2: ['h', 'b', 'e'], 3: ['d'], 5: ['g']}
'''
res = {}
setdef = res.setdefault
for key, value in mapping.items():
setdef(value, []).append(key)
return res if mapping.__class__==dict else mapping.__class__(res)
Designed to be run under CPython 3.x, for 2.x replace mapping.items() with mapping.iteritems()
On my machine runs a bit faster, than other examples here

Python: get key with the least value from a dictionary BUT multiple minimum values

I'm trying to do the same as
Get the key corresponding to the minimum value within a dictionary, where we want to get the key corresponding to the minimum value in a dictionary.
The best way appears to be:
min(d, key=d.get)
BUT I want to apply this on a dictionary with multiple minimum values:
d = {'a' : 1, 'b' : 2, 'c' : 1}
Note that the answer from the above would be:
>>> min(d, key=d.get)
'a'
However, I need both the two keys that have a minimum value, namely a and c.
What would be the best approach?
(Ultimately I want to pick one of the two at random, but I don't think this is relevant).
One simple option is to first determine the minimum value, and then select all keys mapping to that minimum:
min_value = min(d.itervalues())
min_keys = [k for k in d if d[k] == min_value]
For Python 3 use d.values() instead of d.itervalues().
This needs two passes through the dictionary, but should be one of the fastest options to do this anyway.
Using reservoir sampling, you can implement a single pass approach that selects one of the items at random:
it = d.iteritems()
min_key, min_value = next(it)
num_mins = 1
for k, v in it:
if v < min_value:
num_mins = 1
min_key, min_value = k, v
elif v == min_value:
num_mins += 1
if random.randrange(num_mins) == 0:
min_key = k
After writing down this code, I think this option is of rather theoretical interest… :)
EDITED: Now using setdefault as suggested :)
I don't know if that helps you but you could build a reverse dictionary with the values as key and the keys (in a list as values).
d = {'a' : 1, 'b' : 2, 'c' : 1}
d2 = {}
for k, v in d.iteritems():
d2.setdefault(v, []).append(k)
print d2[min(d2)]
It will print this:
['a', 'c']
However, I think the other solutions are more compact and probably more elegant...
min_keys = [k for k in d if all(d[m] >= d[k] for m in d)]
or, slightly optimized
min_keys = [k for k, x in d.items() if not any(y < x for y in d.values())]
It's not as efficient as other solutions, but demonstrates the beauty of python (well, to me at least).
def get_rand_min(d):
min_val = min(d.values())
min_keys = filter(lambda k: d[k] == min_val, d)
return random.choice(min_keys)
You can use heapq.nsmallest to get the N smallest members of the dict, then filter out all that are not equal to the lowest one. That's provided you know the maximal number of smallest members you can have, let's assume it's N here. something like:
from heapq import nsmallest
from operator import itemgetter
#get the N smallest members
smallestN = nsmallest(N, myDict.iteritems(), itemgetter(1)))
#leave in only the ones with a score equal to the smallest one
smallest = [x for x in smallestN if x[1] == smallestN[0][1]]
minValue,minKey = min((v,k) for k,v in d.items())
Due to your semantics you need to go through the entire dictionary at least once. This will retrieve exactly 1 minimum element.
If you want all the minimum items in O(log(N)) query time, you can insert your elements into a priority queue as you generate them (if you can). The priority queue must have O(1) insertion time and O(log(N)) extract-min time. (This will be as bad as sorting if all your elements have the same value, but otherwise may work quite well.)
One pass solution would be:
>>> result = [100000, []]
>>> for key, val in d.items():
... if val < result[0]:
... result[1] = [key]; result[0]=val;
... elif val == result[0]:
... result[1].append(key)
...
>>> result
[1, ['a', 'c']]
Here's another way to do it in one pass:
d = {'foo': 2, 'a' : 1, 'b' : 2, 'c' : 1, 'z': 99, 'x': 1}
current_min = d[d.keys()[0]]
min_keys = []
for k, v in d.iteritems():
if v < current_min:
current_min = v
min_keys = [k]
elif v == current_min:
min_keys.append(k)
print min_keys
['a', 'x', 'c']
This works:
d = {'a' :1, 'b' : 2, 'c' : 1}
min_value = min(d.values())
result = [x[0] for x in d.items() if x[1] == k]
Hmpf. After fixing up the code to work, I ended up with #Sven Marnach's answer, so, disregard this ;)

Create a dictionary of non-contradicting items from a list of dictionaries

This question is inspired by this question. I'd like to get a dictionary from a list of dictionaries that should contain all key/value pairs from all dictionaries that are either only contained once, or where all dictionaries agree on the associated value. Example (taken from the aforementioned posting):
dicts = [dict(a=3, b=89, d=2), dict(a=3, b=89, c=99), dict(a=3, b=42, c=33)]
print dict_itersection(dicts)
should yield
{'a': 3, 'd': 2}
My current implementation looks like this:
import collections
def dict_intersection(dicts):
c=collections.defaultdict(set)
for d in dicts:
for a, b in d.iteritems():
c[a].add(b)
return {a: next(iter(b)) for a, b in c.iteritems() if len(b) == 1}
So my question: Can this be done more elegantly?
Sidequestion: can next(iter(b)) be done better without modification of the underlying dictionary (i.e. not b.pop())?
dicts = [dict(a=3, b=89, d=2), dict(a=3, b=89, c=99), dict(a=3, b=42, c=33)]
data = {}
for d in dicts:
for k, v in d.iteritems():
data.setdefault(k, set()).add(v)
out = dict((k, v.pop()) for k, v in data.iteritems() if len(v) == 1)
# out == {'a': 3, 'd': 2}
… or a one-liner:
import itertools as it
dict((k, v.pop()[1]) for k,v in ((k, set(v)) for k, v in it.groupby(sorted(it.chain(*(d.iteritems() for d in dicts))), key=lambda x: x[0])) if len(v) == 1)
Yours is pretty close to as elegant as I can think of. The only change I would make is to replaced the nested for loop with a itertools.chain()'ed iterator, like this:
import collections
def dict_intersection(dicts):
c=collections.defaultdict(set)
for k,v in itertools.chain(*[d.iteritems() for d in dicts]):
c[k].add(v)
return {a: next(iter(b)) for a, b in c.iteritems() if len(b) == 1}
Edit(1): The below code answers a slightly different question - how to get any entry which appears with the same key and value in at least two of the input dictionaries.
My answer from the comments in the other question:
dict(
[k for k,count in
collections.Counter(itertools.chain(*[d.iteritems() for d in dicts])).iteritems()
if count > 1]
)
This is nominally a "one-liner" but I've spread it over multiple lines to (hopefully) make it a bit clearer.
The way it works is (starting from the inside and working out):
Use itertools.chain() to get an iterator over the elements of all the dictionaries.
Use collections.Counter() to count how many times each key, value pair appears in the dictionaries.
Use a list comprehension to filter the Counter for those key, value pairs occurring at least twice.
Convert the list back into a dict.
All solutions so far assume that all dictionary values are hashable. Since the code won't get slower and only little more complex without this assumption, I'd drop it. Here's a version that works for all values that support !=:
def dict_intersection(dicts):
result = {}
conflicting = set()
for d in dicts:
for k, v in d.iteritems():
if k not in conflicting and result.setdefault(k, v) != v:
del result[k]
conflicting.add(k)
return result
The set conflicting will only contain dictionary keys, which will always be hashable.
To get the intersection:
dict(reduce(lambda x, y: x & y, map(set, map(lambda x: x.iteritems(), dicts))))
Of course, this drops unique values, so we need to get the complement:
dict(reduce(lambda x, y: x - y, map(set, map(lambda x: x.iteritems(), dicts))))
Combining the resulting dictionaries gives us the result set:
def dict_intersection(d):
x = dict(reduce(lambda x, y: x & y, map(set, map(lambda x: x.iteritems(), dicts))))
y = dict(reduce(lambda x, y: x - y, map(set, map(lambda x: x.iteritems(), dicts))))
return dict(x.items() + y.items())
If my set fu was stronger I could get it down to a one liner, but not today it seems.

Categories