Getting duplicates from nested dictionary - python

I'm fairly new to python and have the following problem. I have a nested dictionary in the form of
dict = {'a': {'1','2'}, 'b':{'5','1'}, 'c':{'3','2'}}
and would like to find all the keys that have the same values. The output should look similar to this.
1 : [a,b]
2 : [a,c]
..
Many thanks in Advance for any help!

dict = {'a': {'1','2'}, 'b':{'5','1'}, 'c':{'3','2'}}
output = {}
for key, value in dict.items():
for v in value:
if v in output.keys():
output[v].append(key)
else:
output[v] = [ key ]
print(output)
And the output will be
{'2': ['a', 'c'], '1': ['a', 'b'], '5': ['b'], '3': ['c']}

before we go to the solution, lemme tell you something. What you've got there is not a nested dictionary but rather sets within the dictionary.
Some python terminologies to clear that up:
Array: [ 1 , 2 ]
Arrays are enclosed in square braces & separated by commas.
Dictionary: { "a":1 , "b":2 }
Dictionaries are enclosed in curly braces & separate "key":value pairs with comma. Here, "a" & "b" are keys & 1 & 2 would be their respective values.
Set: { 1 , 2 }
Sets are enclosed in curly braces & separated by commas.
dict = {'a': {'1','2'}, 'b':{'5','1'}, 'c':{'3','2'}}
Here, {'1', '2'} is a set in a dictionary with key 'a'. Thus, what you've got is actually set in a dictionary & not a nested dictionary.
Solution
Moving on to the solution, sets are not iterable meaning you can't go through them one by one. So, you gotta turn them into lists & then iterate them.
# Initialize the dictionary to be processed
data = {'a': {'1','2'}, 'b':{'5','1'}, 'c':{'3','2'}}
# Create dictionary to store solution
sol = {} # dictionary to store element as a key & sets containing that element as an array
# Eg., sol = { "1" : [ "a" , "b" ] }
# This shows that the value 1 is present in the sets contained in keys a & b.
# Record all elements & list every set containing those elements
for key in data. keys (): # iterate all keys in the dictionary
l = list ( data [ key ] ) # convert set to list
for elem in l: # iterate every element in the list
if elem in sol. keys (): # check if elem already exists in solution as a key
sol [ elem ]. append ( key ) # record that key contains elem
else:
sol [ elem ] = [ key ] # create a new list with elem as key & store that key contains elem
# At this time, sol would be
# {
# "1" : [ "a" , "b" ] ,
# "2" : [ "a" , "C" ] ,
# "3" : [ "c" ] ,
# "5" : [ "b" ]
# }
# Since, you want only the ones that are present in more than 1 sets, let's remove them
for key in sol : # iterate all keys in sol
if sol [ key ]. length < 2 : # Only keys in at least 2 sets will be retained
del sol [ key ] # remove the unrequired element
# Now, you have your required output in sol
print ( sol )
# Prints:
# {
# "1" : [ "a" , "b" ] ,
# "2" : [ "a" , "c" ]
# }
I hope that helps you...

You can use a defaultdict to build the output easily (and sort it if you want the keys in sorted order):
from collections import defaultdict
d = {'a': {'1','2'}, 'b':{'5','1'}, 'c':{'3','2'}}
out = defaultdict(list)
for key, values in d.items():
for value in values:
out[value].append(key)
# for a sorted output (dicts are ordered since Python 3.7):
sorted_out = dict((k, out[k]) for k in sorted(out))
print(sorted_out)
#{'1': ['a', 'b'], '2': ['a', 'c'], '3': ['c'], '5': ['b']}

you can reverse the key-value in dict, create a value-key dict, if you only want duplicated values(find all the keys that have the same values), you can filter it:
from collections import defaultdict
def get_duplicates(dict1):
dict2 = defaultdict(list)
for k, v in dict1.items():
for c in v:
dict2[c].append(k)
# if you want to all values, just return dict2
# return dict2
return dict(filter(lambda x: len(x[1]) > 1, dict2.items()))
output:
{'1': ['a', 'b'], '2': ['a', 'c']}

This can be easily done using defaultdict from collections,
>>> d = {'a': {'1','2'}, 'b':{'5','1'}, 'c':{'3','2'}}
>>> from collections import defaultdict
>>> dd = defaultdict(list)
>>> for key,vals in d.items():
... for val in vals:
... dd[val].append(key)
...
>>>>>> dict(dd)
{'1': ['a', 'b'], '3': ['c'], '2': ['a', 'c'], '5': ['b']}

This can be easily achieved with two inner for loops:
dict = {'a': {'1','2'}, 'b':{'5','1'}, 'c':{'3','2'}}
out = {}
for key in dict:
for value in dict[key]:
if value not in out:
out[value]= [key]
else:
out[value]+= [key]
print out # {'1': ['a', 'b'], '3': ['c'], '2': ['a', 'c'], '5': ['b']}

Related

How to efficiently remove elements from dicts that have certain value patterns?

For example, in dict1 the keys 1, 2, 3 all have the same value 'a', but the keys 3 and 5 have different values, 'b' and 'd'. What I want is:
If N keys have the same value and N >=3, then I want to remove all other elements from the dict and only keep those N key values, which means 'b' & 'd' have to be removed from the dict.
The following code works, but it seems very verbose. Is there a better way to do this?
from collections import defaultdict
dict1 = {1:'a', 2:'a', '3':'b', '4': 'a', '5':'d'}
l1 = [1, 2, 3, 4, 5]
dict2 = defaultdict(list)
for k, v in dict1.items():
dict2[v].append(k)
to_be_removed = []
is_to_be_removed = False
for k, values in dict2.items():
majority = len(values)
if majority>=3:
is_to_be_removed = True
else:
to_be_removed.extend(values)
if is_to_be_removed:
for d in to_be_removed:
del dict1[d]
print(f'New dict: {dict1}')
You can use collections.Counter to get the frequency of every value, then use a dictionary comprehension to retain only the keys that have the desired corresponding value:
from collections import Counter
dict1 = {1:'a', 2:'a', '3':'b', '4': 'a', '5':'d'}
ctr = Counter(dict1.values())
result = {key: value for key, value in dict1.items() if ctr[value] >= 3}
print(result)
This outputs:
{1: 'a', 2: 'a', '4': 'a'}

Recursive function for converting a nested list to a nested dictionary

I have a list of lists and I want to get a dictionary of dictionaries:
import json
list = [
['1', '2', '3'],
['a', 'b'],
['I', 'II'],
['A', 'B', 'C'],
['A', 'B', 'D']
]
dict = {}
for val in list:
count = len(val)
if val[0] not in dict:
dict[val[0]] = {}
if count == 3:
if val[1] not in dict[val[0]]:
dict[val[0]][val[1]] = {}
if val[2] not in dict[val[0]][val[1]]:
dict[val[0]][val[1]][val[2]] = ''
else:
if val[1] not in dict[val[0]]:
dict[val[0]][val[1]] = ''
print (json.dumps(dict, sort_keys=True, indent=4))
output:
{
"1": {
"2": {
"3": ""
}
},
"A": {
"B": {
"C": "",
"D": ""
}
},
"I": {
"II": ""
},
"a": {
"b": ""
}
}
So it works with 2 or 3 elements in lists, but if I have more (random) elements of lists, I have to have kind of recursive function, that I can't think of.
There is no real need for a recursive function here (unless it's a requirement). You also don't need to care for the size or amount of lists. Simply iterate through each list while keeping an updated reference for the inner dicts as you go.
You can also use setdefault to avoid the checks if a key exists already.
d = {}
for sub in l:
inner = d
for elem in sub[:-1]:
inner = inner.setdefault(elem, {})
inner[sub[-1]] = ""
If for some reason you really want this as a recursive function, then the following is an equivalent version. It starts off with a base dict, and with each call creates an inner dict and the next call goes down one level of the dict and passes the rest of the list. The base-case is when the list has one element so the string is used instead of a dict. Again, for simplicity, setdefault is used:
def create_dict(l, d):
if len(l) == 1:
d[l[0]] = ""
else:
d = d.setdefault(l[0], {})
create_dict(l[1:], d)
d = {}
for sub in l:
create_dict(sub, d)
Try to avoid using built-in names for variables. Both list and dict represent the respective class' constructor which are not any more available in your program.
I did this rudimentary logic recursion function. Which I tested for your sample input to work well.
def subdict(dic,val):
if len(val)==1:
dic.update({val[0]:""})
else:
if val[0] not in dic.keys():
dic.update({val[0]:{}})
subdict(dic[val[0]],val[1:])
full execution:
lists = [
['1', '2', '3'],
['a', 'b'],
['I', 'II'],
['A', 'B', 'C'],
['A', 'B', 'D']
]
rootdict = {}
def subdict(dic,val):
if len(val)==1:
dic.update({val[0]:""})
else:
if val[0] not in dic.keys():
dic.update({val[0]:{}})
subdict(dic[val[0]],val[1:])
for li in lists:
subdict(rootdict,li)
print(rootdict)
Output:
{'1': {'2': {'3': ''}}, 'a': {'b': ''}, 'I': {'II': ''}, 'A': {'B': {'C': '', 'D': ''}}}
Explainaton:
subdict function :
Checks if we have reached the end, then it happily terminates this leaf of recursion by adding the final entry of key with value ''
if we aren't at the end leaf, it checks if the key[0] is not present in this level of the dict and if so it adds that key. Then finally recursively proceeds for the next iteration, further deep down.
I know this is boring logic, but it works :)
As Tomerikoo said, you don't have to use a recursion, but if you want to solve it using recursion, you should define a recursion function, with your base case - last element in a list.
Then iterate your input (list of lists), and pass the current working list to the recursive function.
UPDATE: thanks to a bug Tomerikoo found, I had to fix my answer. When you use .update() method of a dict, it doesn't do "deepcopy" of the values. So you have to implement it yourself, with another recursion :-)
You need to implement a merge_dict function that, before updating the result. The function below takes two dicts, and merge them using deepcopy. I'll try to simplify the steps:
Iterate the second dict items, if the value is a dict then -
Check if the key already exists in the result, if not create an empty dict, and call the merge function again
If the value is not a dict - simply add it to the result dict.
import json
from copy import deepcopy
final_dict = {}
my_list = [
['1', '2', '3'],
['a', 'b'],
['I', 'II'],
['A', 'B', 'C'],
['A', 'B', 'D']
]
def nested_dict(a_list):
if len(a_list) == 1:
print("base case: {}".format(a_list))
return {a_list[0]: ""}
return {a_list[0]: nested_dict(a_list[1:])}
def merge_dicts(d1, d2):
res = deepcopy(d1)
for k, v in d2.items():
if isinstance(v, dict):
res[k] = merge_dicts(res.get(k, {}), v)
else:
res[k] = v
return res
for sub in my_list:
my_dict = nested_dict(sub)
final_dict = merge_dicts(final_dict, my_dict)
print("final dict: {}".format((json.dumps(final_dict, sort_keys=True, indent=4))))

How to get a list which is a value of a dictionary by a value from the list?

I have the following dictionary :
d = {'1' : [1, 2, 3, 4], '2' : [10, 20, 30, 40]}
How do I get the corresponding key I'm searching by a value from one of the lists?
Let's say I want key '1' if I'm looking for value 3 or key '2' if I'm looking for value 10.
You can reverse the dictionary into this structure to do that kind of lookup:
reverse_d = {
1: '1',
2: '1',
3: '1',
4: '1',
10: '2',
…
}
which can be built by looping over each value of each key:
reverse_d = {}
for key, values in d.items():
for value in values:
reverse_d[value] = key
or more concisely as a dict comprehension:
reverse_d = {value: key for key, values in d.items() for value in values}
Lookups are straightforward now!
k = reverse_d[30]
# k = '2'
This only offers better performance than searching through the whole original dictionary if you do multiple lookups, though.
You can use a generator expression with a filtering condition, like this
>>> def get_key(d, search_value):
... return next(key for key, values in d.items() if search_value in values)
...
>>> get_key(d, 10)
'2'
>>> get_key(d, 2)
'1'
If none of the keys contain the value being searched for, None will be returned.
>>> get_key(d, 22)
None
This is my first time to answer question. How about this method?
def get_key(d,search_value):
res = []
for v in d.items():
if search_value in v[1]:
res.append(v[0])
return res
>>> D = {'a':[2,2,3,4,5],'b':[5,6,7,8,9]}
>>> getkey.get_key(D,2)
['a']
>>> getkey.get_key(D,9)
['b']
>>> getkey.get_key(D,5)
['a', 'b']

Duplicate values in a Python dictionary

I have a dictionary in the following format:
{ 'a' : [1], 'b' : [1,2,3], 'c' : [1,1,2], 'd' : [2,3,4] }
and I want to create a list of the keys which have a '1' in their values.
So my result list should look like:
['a','b','c','c']
I cannot understand how to work with duplicate values.
Any ideas how can I get such a list?
You can use list comprehensions
>>> d = { 'a' : [1], 'b' : [1,2,3], 'c' : [1,1,2], 'd' : [2,3,4] }
>>> [key for key, values in d.items() for element in values if element==1]
['c', 'c', 'b', 'a']
Here we have two nested for loops in our list comprehension. The first iterate over each key, values pairs in the dictionary and the second loop iterate over each element in the "value" list and return the key each time that element equal to 1. The result list is unordered because dict are unordered which means there are no guarantees about the order of the items.
Here is one way:
>>> x = { 'a' : [1], 'b' : [1,2,3], 'c' : [1,1,2], 'd' : [2,3,4] }
>>> list(itertools.chain.from_iterable([k]*v.count(1) for k, v in x.iteritems() if 1 in v))
['a', 'c', 'c', 'b']
If using Python 3, use items instead of iteritems.
This uses two loops, k,v in d.items() which gets each (key,value) pair from the dictionary, and n in v which loops through each value in v:
d = { 'a' : [1], 'b' : [1,2,3], 'c' : [1,1,2], 'd' : [2,3,4] }
l = []
for k,v in d.items():
for n in v:
if n == 1:
l.append(k)
l.sort()
If you want a one-liner:
l = sorted(k for k,v in d.items() for n in v if n == 1)
The sort must be made on the dictionary to get the expected result. This should work:
list = []
for i in sorted(d.keys()):
list+=[i for x in d[i] if x == 1]
print list
will output:
['a', 'b', 'c', 'c']
easy way: (Python 3)
d = { 'a' : [1], 'b' : [1,2,3], 'c' : [1,1,2], 'd' : [2,3,4] }
n = 1
result = []
for key, value in d.items():
for i in value.count(n):
res.append(key)
if you want list sorted than:
result.sort()

Sorting alphanumeric keys of a dictionary in python

I have a python dictionary whose keys have the following pattern
<some x number of digits/alphabets> <some y number of alphabets><some z number of digits>
I want to sort the dictionary based on this keys.
For e.g
01IB0610, 01IB062, 01IB064
should be 01IB062, 01IB064 01IB0610
Complete example is something like this:
{ '01IB0610' : {'a' : [] , 'b': [] }, '01IB062' : {'a' : [] , 'b': [] } , '01IB064' : {'a' : [] , 'b': [] }
Final Output should be:{ '01IB062' : {'a' : [] , 'b': [] }, '01IB064' : {'a' : [] , 'b': [] } , '01IB0610' : {'a' : [] , 'b': [] }
import re
def key_func(s):
return [int(x) if x.isdigit() else x for x in re.findall(r'\D+|\d+', s)]
sorted_keys = sorted(d, key=key_func)
Example:
>>> d = {'01IB0610': 'foo', '01IB062': 'bar', '01IB0604': 'baz'}
>>> sorted(d, key=key_func)
['01IB062', '01IB0604', '01IB0610']
I'm not sure I totally get the sorting criteria, but you can use an OrderedDict to have a dict that keeps particular order.
from collections import OrderedDict
import re
d = {'01IB0610': 1, '01IB062': 2, '01IB064': 3}
def criteria(x):
number = int(re.sub('[^0-9]', '', x[0]) )
length = len(x[0])
return length, number
d = OrderedDict( sorted( d.items(), key = criteria ) )
d.keys()
>> ['01IB062', '01IB064', '01IB0610']
This creates an OrderedDict where the order of the elements in the original dict is based on a hierarchical sort of the keys of those elements. The first criteria is the length of the key, ie 01IB0610 comes after 01IB064 because its longer. The second criteria is based on the digits in the key, ie 01062 is before 01064.

Categories