Group items by their ancestors - python

Assume we have an array:
l = ["1", "1.1", "1.1.1", "2", "3"]
What is the efficent way to build such dict structure:
d = {"1": [{"1.1": [{"1.1.1": []}]}], "2": [], "3": []}

I suggest doing it in three steps.
Transform l into [['1'], ['1', '1'], ['1', '1', '1'], ['2'], ['3']] using str.split().
Write a function make_groups that groups by first element.
Write a recursive function make_tree that applies make_groups once, then applies make_tree recursively to each group.
If this is for learning purposes, I encourage you to try following the three points above, without looking at the code below.
l0 = ["1", "1.1", "1.1.1", "2", "3"]
l = [s.split('.') for s in l0] # [['1'], ['1', '1'], ['1', '1', '1'], ['2'], ['3']]
def make_groups(l, depth=0):
d = {}
for x in l:
if len(x) > depth:
d.setdefault('.'.join(x[:depth+1]), []).append(x)
return d
# print(make_groups(l))
# {'1': [['1'], ['1', '1'], ['1', '1', '1']], '2': [['2']], '3': [['3']]}
def make_tree(l, depth=0):
d = make_groups(l, depth)
for k, v in d.items():
d[k] = make_tree(v, depth+1)
return d
# print(make_tree(l))
# {'1': {'1.1': {'1.1.1': {}}}, '2': {}, '3': {}}

This is actually pretty simple. Note it requires well formed list that is you can't jump to 1.1.1 from 1 for example.
First build a dictionary for each value as key and empty list as value. And then go through each key and if it has a "." then go to the key which is to left of the "." and append this dictionary to the list for that parent key.
And finally remove all the keys which have a "."
d = {e:[] for e in l}
for k, v in d.items():
if "." in k:
p = k[:k.rindex(".")]
d[p].append({k:v})
d = {k: v for k, v in d.items() if "." not in k}
print(d)
{'1': [{'1.1': [{'1.1.1': []}]}], '2': [], '3': []}

Related

How to delete duplicates in dictionary with list values

I need to delete the elements that are duplicated in a dictionary like this:
{
1: ['1', '2', '3'],
2: ['4', '3', '6', '7'],
3: ['8', '1', '9']
}
as to make the final result like this
{
1: ['1', '2', '3'],
2: ['4', '6', '7'],
3: ['8', '9']
}
Please help me how to do that, I have no idea
Assuming d the input, you can use a set to keep track of the seen values. Here using a dictionary comprehension and "cheating" a bit to add the values:
seen = set()
out = {k: [x for x in v
if x not in seen and not seen.add(x)]
for k,v in d.items()}
Output:
{1: ['1', '2', '3'],
2: ['4', '6', '7'],
3: ['8', '9']}
Same with a classical loop:
out = {}
seen = set()
for k,v in d.items():
l = []
for x in v:
if x not in seen:
seen.add(x)
l.append(x)
out[k] = l
Rehashing the same old seen-set solution is boring :-P. Let's have some fun with Counters:
from collections import Counter
d = {
1: ['1', '2', '3'],
2: ['4', '3', '6', '7'],
3: ['8', '1', '9']
}
seen = Counter()
for a in d.values():
uniq = Counter(dict.fromkeys(a, 1))
a[:] = uniq - seen
seen |= uniq
print(d)
Each list is first deduplicated on its own by using a dict. Then turned into a Counter so we can conveniently subtract the previously seen values. Write the new ones into the list and add them to the seen ones.
Try it online!
You could do the same with set union and difference operators. As sets are unordered the final list would need to be sorted. Again assuming d is the original dictionary.
s = set()
for k in d:
z = d[k]
d[k]= sorted(list(set(z).difference(s)))
s |= set(z)

List comprehension - attempting to create dictionary from array of key-value pair arrays

While trying to create a dict using list comprehension (without overwriting generated list keys)
x = {}
entries = [[1,'1a'], [2,'2a'], [3,'3a'], ['1', '1b'], ['2', '2b'], ['3', '3b']]
discardable = [x.setdefault(entry[0], []).append(entry[1]) for entry in entries]
Error: name 'x' is not defined
I was expecting x to be populated as:
{1: ['1a', '1b'], 2: ['2a', '2b'], 3: ['3a', '3b']}
How to explain this / make this work?
Is there any other way to achieve this?
Some of the keys are str and some are int, so this will produce
{'1': ['1a', '1b'], 2: ['2a'], 3: ['3a'], '2': ['2b'], '3': ['3b']}
You need to cast entry[0] to int
x = {}
entries = [[1,'1a'], [2,'2a'], [3,'3a'], ['1', '1b'], ['2', '2b'], ['3', '3b']]
[x.setdefault(int(entry[0]), []).append(entry[1]) for entry in entries]
print(x) will give
{1: ['1a', '1b'], 2: ['2a', '2b'], 3: ['3a', '3b']}
You can try:
from collections import defaultdict
entries = [['1','1a'], [2,'2a'], [3,'3a'], ['1', '1b'], ['2', '2b'], ['3', '3b']]
x = defaultdict(list)
[x[a].append(b) for a,b in entries]
x = dict(x)
Output
{'1': ['1a', '1b'], 2: ['2a'], 3: ['3a'], '2': ['2b'], '3': ['3b']}
What you want is to merge the value of (1, '1'), (2, '2'), (3, '3'). The code you provided works, but it doesn't handle the type between int and str. All you need to do is cast str into int so the value would be merged.
Try this:
x = {}
entries = [[1,'1a'], [2,'2a'], [3,'3a'], ['1', '1b'], ['2', '2b'], ['3', '3b']]
discardable = [x.setdefault(int(entry[0]), []).append(entry[1]) for entry in entries]
How to explain this / make this work?
A comprehension creates one output for each input, and they're not intended for mutation
Is there any other way to achieve this?
Use a normal procedural loop. Or use a multidict but the standard library doesn't provide one.
You could perform functional transformations until you get the proper "shape" (using sorted, itertools.groupby and some more mapping) but I don't think that'd be worth it, a bog-standard for loop would be much more readable:
for k, v in entries:
x.setdefault(int(x), []).append(v)
versus:
x = {
x: list(v[1] for v in vs)
for x, vs in itertools.groupby(
sorted((int(k), v) for k, v in entries),
lambda it: it[0]
)
}
the comprehension is less readable and less efficient
You can use a dict comprehension. they work much like list comprehensions
entries = [[1,'1a'], [2,'2a'], [3,'3a'], ['1', '1b'], ['2', '2b'], ['3', '3b']]
# Build a dictionary from the values in entries
discardable = {key:val for key, val in entries}
print (discardable)
# Result: {1: '1a', 2: '2a', 3: '3a', '1': '1b', '2': '2b', '3': '3b'}

Python Dictionary using Lists as Values, find other Keys with same values

Say I have the following dictionary.
>> sample_dict = {"1": ['a','b','c'], "2": ['d','e','f'], "3": ['g','h','a']}
I would like to find a way that would look at the values of each of the keys and return whether or not the value lists have the a duplicate variable inside.
For example it would output:
>> [["1","3"] , ['a']]
I've looked at a few of the posts here and tried to use and/or change them to accomplish this, however none of what I have found has worked as intended. They would work if it was as follows:
>> sample_dict = {"1": ['a','b','c'], "2": ['d','e','f'], "3": ['a','b','c']}
but not if only a single value within the list was the same.
You could use another dictionary to map the values to the lists of corresponding keys. Then just select the values that map to more than one key, e.g.:
from collections import defaultdict
sample_dict = {'1': ['a','b','c'], '2': ['d','e','f'], '3': ['g','h','a']}
d = defaultdict(list) # automatically initialize every value to a list()
for k, v in sample_dict.items():
for x in v:
d[x].append(k)
for k, v in d.items():
if len(v) > 1:
print([v, k])
Output:
[['1', '3'], 'a']
If the list elements are hashable, you can use .setdefault to build an inverse mapping like so:
>>> sample_dict = {"1": ['a','b','c'], "2": ['d','e','f'], "3": ['g','h','a']}
>>> aux = {}
>>> for k, v in sample_dict.items():
... for i in v:
... aux.setdefault(i, []).append(k)
...
>>> [[v, k] for k, v in aux.items() if len(v) > 1]
[[['1', '3'], 'a']]
Dictionaries map from keys to values, not from values to keys. But you can write a function for one-off calculations. This will incur O(n) time complexity and is not recommended for larger dictionaries:
def find_keys(d, val):
return [k for k, v in d.items() if val in v]
res = find_keys(sample_dict, 'a') # ['1', '3']
If you do this often, I recommend you "invert" your dictionary via collections.defaultdict:
from collections import defaultdict
dd = defaultdict(list)
for k, v in sample_dict.items():
for w in v:
dd[w].append(k)
print(dd)
defaultdict(<class 'list'>, {'a': ['1', '3'], 'b': ['1'], 'c': ['1'], 'd': ['2'],
'e': ['2'], 'f': ['2'], 'g': ['3'], 'h': ['3']})
This costs O(n) for the inversion, as well as additional memory, but now allows you to access the keys associated with an input value in O(1) time, e.g. dd['a'] will return ['1', '3'].
You can use defaultdict from collections module to do this
for example,
from collections import defaultdict
sample_dict = {"1": ['a','b','c'], "2": ['d','e','f'], "3": ['g','h','a']}
d = defaultdict(list)
for keys, vals in sample_dict.items():
for v in vals:
d[v].append(keys)
print(d)
d will return a dict, where the keys will be the values that are repeated and values will be the list in which they were repeated in
The output of above code is defaultdict(list,{'a': ['1', '3'],'b': ['1'],'c': ['1'],'d': ['2'],'e': ['2'],'f': ['2'],'g': ['3'],'h': ['3']})
​
Although it IS possible to get form in which you desired the output to be in, but it is not generally recommended because we are trying to get what character get repeated in which list, that feels like a job of a dictionary

Concatenating string to keys and values in a dict of list

I have a dict which contains lists as it's values. Let's say
mydict = {
'1': ['4', '3', '8'],
'2': ['3', '7', '5']
}
What I want to do is add a character to each key and a different character to each element of the list. Let's say mydict should become
mydict = {
'1a': ['4b', '3b', '8b'],
'2a': ['3b', '7b', '5b']
}
I was able to change the keys by doing the following:
def m(mydict):
return dict((k + 'a', m(v) if hasattr(v, 'keys') else v) for k,v in mydict.items())
But wasn't able to do it with the elements of the lists in the same function. Is there a concise way to do so?
A list comprehension on the dict values will do:
print({k + 'a': [i + 'b' for i in v] for k, v in mydict.items()})
This outputs:
{'1a': ['4b', '3b', '8b'], '2a': ['3b', '7b', '5b']}
You can do so by nesting a dict comparison and a list comparison as follows
mydict = {
'1': ['4', '3', '8'],
'2': ['3', '7', '5']
}
letterdict = {
key + 'a' : [e + 'b' for e in value]
for key, value in mydict.items()
}
letterdict
> dict_items([('1a', ['4b', '3b', '8b']), ('2a', ['3b', '7b', '5b'])])

Selecting K largest values in a dictionary of dictionary in python

I have a dictionary of dictionary as below:
ls = [{'0': {'1': '1','2': '0.5','3': '1'},'1': {'0': '0.2','2': '1','3': '0.8'},}]
I would like to select k-largest values with their keys for each key of dictionary (ls). I have written below commands. It just gives me the k-largest keys without their values.
Python Code:
import heapq
k=2
for dic in ls:
for key in dic:
print(heapq.nlargest(k, dic[key], key=dic[key].get))
Output
['2', '3']
['3', '1']
I need to have value of each selected key.
First of all, I just wanted to check why you have
ls = [{'0': {'1': '1','2': '0.5','3': '1'},'1': {'0': '0.2','2': '1','3': '0.8'},}]
This is a list containing a dict, which doesn't match your description in the question.
Here is a solution that uses dict comprehensions that should give you what you want :)
def get_n_largest_vals(n, d):
x = {key: heapq.nlargest(len, map(int, d[key])) for key in d}
return {key: map(str, x[key]) for key in x}
Here it is being used for your problem:
ls = [{'0': {'1': '1','2': '0.5','3': '1'},'1': {'0': '0.2','2': '1','3': '0.8'},}]
d = ls[0]
get_n_largest_vals(2, d)
>>> {'0': ['3', '2'], '1': ['3', '2']}
How about:
from operator import itemgetter
for d in ls:
for key, d2 in d.items():
print(dict(heapq.nlargest(k, d2.items(), key=itemgetter(1))))
Notice that your values are still strings so they'd be lexically ordered, which is not what you want, because '2' > 12' And the dictionary is not ordered!

Categories