Remove elements from dict in for loop - python

How can I delete all elements in a dictionary after a modified one?
If the second one is changed, then we delete everything that is behind it or, if the first one is changed, then we delete everything after it and so on.
d = {'first': 'one', 'second': 'two', 'third': 'three'}
k = 'second'
for key in d:
if k in key:
d[key] = 'new value'
# delete third

Instead of deleting elements, just create a new dictionary.
Since dictionaries are not ordered, you should sort the items in some way.
d = {'first': 'one', 'second': 'two', 'third': 'three'}
k = 'second'
new_d = {}
for key, value in sorted(d.items()):
if key == k:
new_d[key] = 'new value'
break
new_d[key] = value

You can use the following code. it makes the dict items into a list and creates a new dict from the remaining items:
d = {'first': 'one', 'second': 'two', 'third': 'three'}
k = 'second'
d[k] = 'NewValue'
d = dict(list(d.items())[:list(d.keys()).index(k)+1])
print(d)
Out:
{'first': 'one', 'second': 'NewValue'}
Out of curiosity I timed it against #Daniel's answer those are my timeit code and result:
import timeit, functools
def test_1(d,k):
d = dict(list(d.items())[:list(d.keys()).index(k)+1])
d[k] = 'new vlaue'
return d
def test_2(d, k):
new_d = {}
for key, value in sorted(d.items()):
if key == k:
new_d[key] = 'new value'
break
new_d[key] = value
return new_d
if __name__ == '__main__':
keys = [x for x in range(100000)]
values = [x for x in range(100000)]
d = dict(zip(keys, values))
k = 9999
a = timeit.timeit(functools.partial(test_1, d, k), number = 1000)
b = timeit.timeit(functools.partial(test_2, d, k), number = 1000)
print(a, b)
Output:
5.107241655999815 6.745305094000059
If you change the sorted(...) into list(...) in Daniels answer it is the other way around:
5.046288972999946 4.211456709999993
It is a constant offset, probably due to creating the list out of the dict twice instead of once. So #Daniels answer is both faster and less memory expensive

you can delete rest of dict by keys. like below:
d = {'first': 'one', 'second': 'two', 'third': 'three'}
k = 'first' #for modify
keys = ['second', 'third'] #rest of dict to delete
list(map(d.pop, keys))
it will return {'first':'modified_value'}

You should not be using normal dictionary if you're trying to keep them in an order, at least if you're below py3.7 of course.
Please use if you're below 3.7.
We are creating a new dictionary in the below code and removing the old one, you can even put this inside a function and use it.
from collections import OrderedDict
d =OrderedDict([('first','one'), ('second', 'two'), ('third', 'three')])
k = 'second'
output_dict = OrderedDict()
for key,value in d.items():
if k in key:
output_dict[key] = value
break
else:
output_dict[key] = value
del d
print(output_dict)

Related

Python - Return the dictionary key comparing dictionary values with list

I was trying to compare a list with dictionary values to see if they match so I could return the key, but I couldn't find a solution to match them:
>>> my_dict = {'A': ['apple', 'america'], B: ['bee', 'bar'], C: ['car','cake','cat'], D: ['dad']}
>>> my_list = ['apple', 'dad']
In this case, the result would be:
['A', 'D']
What would be the best approach? I tried a nested for-loop, but I was wondering if there is a more direct answer.
my_second_dict = {}
for key in my_dict.keys():
for values in my_dict[key]:
I am not really sure how to do it. I'd appreciate your help.
Thanks!
You have a list of values and you want the keys of the dictionary items that have those values in their values (which are lists of values). This is assuming there are no duplicates in the lists in the dictionary (which seems reasonable, since it appears to group them by initial).
This is a solution:
my_dict = {'A': ['apple', 'america'], 'B': ['bee', 'bar'], 'C': ['car','cake','cat'], 'D': ['dad']}
my_list = ['apple', 'dad']
my_result = [k for v in my_list for k, vs in my_dict.items() if v in vs]
print(my_result)
Result:
['A', 'D']
Do you mean something like this?
my_dict = {'A': ['apple', 'america'], B: ['bee', 'bar'], C: ['car','cake','cat'], D: ['dad']}
my_list = ['apple', 'dad']
results = []
for key in my_dict.keys():
for value in my_list:
if value in my_dict[key]:
results.append(key)
break
print(results)
Loop through the keys in the dictionary, and check if any of the values in the list are in the dictionary.
You do not need to use the break statement if you use set intersection or list comprehension.
my_dict = {'A': ['apple', 'america'], B: ['bee', 'bar'], C: ['car','cake','cat'], D: ['dad']}
my_list = ['apple', 'dad']
results = []
for key in my_dict.keys():
if set(my_list).intersection(set(dict[key])):
results.append(key)
print(results)
my_dict = {'A': ['apple', 'america'], B: ['bee', 'bar'], C: ['car','cake','cat'], D: ['dad']}
my_list = ['apple', 'dad']
results = []
for key in my_dict.keys():
if any([val in my_dict[key] for val in my_list]):
results.append(key)
print(results)
I think if you finish your loop, you can end up with something like this:
output = []
for i in my_list:
for k, v in my_dict.items():
if i in v:
output.append(k)
or a one-liner
output = [k for i in my_list for k, v in my_dict.items() if i in v]

Python: Create a dictionary where keys have multiple values

The problem that I have is hard to explain, easy to understand:
I have a list of tuples:
L=[('a','111'),('b','222'),('a','333'),('b','444')]
from this list I want to createa dictionary where the keys are the first elements of the tuples ('a' and 'b') and the values associated are in a list:
expected output:
{'a':['111','333'],'b':['222','444']}
How can I solve this problem?
d={}
for x in range (len(L)):
d[L[x][0]]=[L[x][1]]
return d
but as you can easy understand, the output won't be complete since the list will show just the last value associated to that key in L
You can use setdefault() to set the key in the dict the first time. Then append your value:
L=[('a','111'),('b','222'),('a','333'),('b','444')]
d = {}
for key, value in L:
d.setdefault(key, []).append(value)
print(d)
# {'a': ['111', '333'], 'b': ['222', '444']}
You have to append L[x][1] to an existing list, not replace whatever was there with a new singleton list.
d={}
for x in range (len(L)):
if L[x][0] not in d:
d[L[x][0]] = []
d[L[x][0]].append(L[x][1])
return d
A defaultdict makes this easier:
from collections import defaultdict
d = defaultdict(list)
for x in range(len(L)):
d[L[x][0]].append(L[x][1])
return d
A more idiomatic style of writing this would be to iterate directly over the list and unpack the key and value immediately:
d = defaultdict(list)
for key, value in L:
d[key].append(value)
You can try this:
L = [('a','111'),('b','222'),('a','333'),('b','444')]
my_dict = {}
for item in L:
if item[0] not in my_dict:
my_dict[item[0]] = []
my_dict[item[0]].append(item[1])
print(my_dict)
Output:
python your_script.py
{'a': ['111', '333'], 'b': ['222', '444']}
As pointed by #chepner, you can use defaultdict to.
Basically, with defaultdict you'll not need to check if there is no key yet in your dict.
So it would be:
L = [('a','111'),('b','222'),('a','333'),('b','444')]
my_dict = defaultdict(list)
for item in L:
my_dict[item[0]].append(item[1])
print(my_dict)
And the output:
defaultdict(<class 'list'>, {'a': ['111', '333'], 'b': ['222', '444']})
And if you want to get a dict from the defaultdict, you can simply create a new dict from it:
print(dict(my_dict))
And the output will be:
{'a': ['111', '333'], 'b': ['222', '444']}

In python, combine the values of 2 dictionaries

I'm new to Python, and I have two questions regarding dictionary in python.
I have dict1 (where the values is already a list), then I have dict2with the same keys as dict1, I want to add the new value to the list of values in dict1. How can I write it? Can you show it with an example?
dict1 = {'uid': ['u1'], 'sid': ['s1'], 'os': ['os1']}
dict2 = {'uid': ['u2'], 'sid': ['s2'], 'os': ['os2']}
Expected output:
dict1 = {'uid': ['u1', 'u2'], 'sid': ['s1', 's2'], 'os': ['os1', 'os2']}
I will be process a lot of lines in a text file, with each line creating a new dictionary. Here is the idea of my code:
count = 0
for line in f:
if count == 0:
dict1 = parse_qs(line)
count = count+1
else:
dict2 = parse_qs(line)
#combine dict1 with dict 2, and assign the new dict to dict1
Is there a better way that uses less memory or runs faster (still using dictionary)?
Thank you in advance for your help!
Since you're apparently not allowed to rewrite you parser, you can do:
for k in dict1:
dict1[k].extend(dict2.get(k, []))
You can drop the .get and use direct subscription if the keys from both dicts are always matching.
for k in dict1:
dict1[k].extend(dict2[k])
Otherwise, you can create one defaultdict(list) and let your parser append values to that.
You can do like this,
for key in dict1.keys():
if dict2.has_key(key):
dict1[key].extend(dict2[key])
if you are using python3 you can use key in dict2 instead of dict2.has_key(key).
Result
{'os': ['os1', 'os2'], 'sid': ['s1', 's2'], 'uid': ['u1', 'u2']}
If you don't want the two dictionaries anymore then you can do this:
def merge_dict(dict1, dict2):
for k in dict1:
try:
dict1[k].extend(dict2[k])
except KeyError:
pass
return dict1
Else if you want to preserve them both for future use, try this
def merge_dict(dict1, dict2):
new_dict = {}
keys = dict1.keys() + dict2.keys()
for k in keys:
try:
new_dict[k] = dict1[k]
except KeyError:
new_dict[k] = dict2[k]
continue
try:
new_dict[k]+= dict2[k]
except KeyError:
pass
return dict1
this may be helpful to you.
from itertools import chain
from collections import defaultdict
# ------ for dict1 -------#
u1 = [1,2,3]
s1 = ['a','b','c']
x1 = [10,100,1000]
# ------ for dict2 -------#
u2 = [4,5,6]
s2 = ['d','e','f']
x2 = ['aa','bb','cc']
dict1 = {1:u1,2:s1,3:x1}
dict2 = {1:u2,2:s2,3:x2}
dict3 = defaultdict(list)
for a, b in chain(dict1.items(), dict2.items()):
dict3[a].append(b)
#for a, b in dict3.items():
# print(a, b)
print dict3

python create dict using list of strings with length of strings as values

I'm sure this can be done, but I have thus far been unsuccessful:
I have a list of strings. I want to create a dictionary with the length of said strings (which can be expressed as a range) as the key and the string itself as the value.
example:
Here's something like the list I have: ['foo','bar','help','this','guy']
I'd like to end up with a dictionary like this:
{3:['foo','bar','guy], 4:['this','help']}
Using defaultdict so you don't have to check whether or not to create the list for a new key:
from collections import defaultdict
x = ['foo','bar','help','this','guy']
len_dict = defaultdict(list)
for word in x:
len_dict[len(word)].append(word)
len_dict
#
# Out[5]: defaultdict(list, {3: ['foo', 'bar', 'guy'], 4: ['help', 'this']})
You can use a dictionary as a container with setdefault:
lst = ['foo','bar','help','this','guy']
result = {}
for w in lst:
result.setdefault(len(w), []).append(w)
result
# {3: ['foo', 'bar', 'guy'], 4: ['help', 'this']}
You can do it like that:
d={}
lst=['foo','bar','help','this','guy']
for i in lst:
if len(i) in d:
d[len(i)].append(i)
else:
d[len(i)]=[i]
This solution is pythonic, elegant and fast: (by the Famous Raymond Hettinger in one of his many conferences).
dict.setdefault is the dictionary method that initialises a key-value if the key is not found in dict as well as performing dict.get for provided key.
l = ['foo','bar','help','this','guy']
d = {}
for e in l:
key = len(e)
d.setdefault(key, []).append(name)
print(d)
Output:
{3: ['foo', 'bar', 'guy'], 4: ['help', 'this']}
This solution is the modern way of the solution above:
defaultdict from collection is a subclass of dict that automatically initialises value to any given key that is not in the defaultdict.
from collections import defaultdict
l = ['foo','bar','help','this','guy']
d = defaultdict(list)
for e in l:
key = len(e)
d[key].append(e)
print(d)
Output:
defaultdict(<class 'list'>, {3: ['foo', 'bar', 'guy'], 4: ['help', 'this']})
Similar to what have been said, but using the get method of dict class:
the_list=['foo','bar','help','this','guy']
d = {}
for word in the_list:
key = len(word)
d[key] = d.get(key, []) + [word]
print(d)
# {3: ['foo', 'bar', 'guy'], 4: ['help', 'this']}
Another approach:
from collections import defaultdict
given_list=['foo','bar','help','this','guy']
len_words=[len(i) for i in given_list]
d=defaultdict(list)
for i,j in list(zip(len_words,given_list)):
d[i].append(j)

Group similar dict entries as a tuple of keys

I would like to group similar entries of a dataset.
ds = {1: 'foo',
2: 'bar',
3: 'foo',
4: 'bar',
5: 'foo'}
>>>tupelize_dict(ds)
{
(1,3,5): 'foo',
(2,4): 'bar'
}
I wrote this function, but I am sure there is something way simpler, isn't?
def tupelize_dict(data):
from itertools import chain, combinations
while True:
rounds = []
for x in combinations(data.keys(), 2):
rounds.append((x, data[x[0]], data[x[1]]))
end = True
for k, a, b in rounds:
if a == b:
k_chain = [x if isinstance(x, (tuple, list)) else [x] for x in k]
data[tuple(sorted(chain.from_iterable(k_chain)))] = a
[data.pop(r) for r in k]
end = False
break
if end:
break
return data
EDIT
I am interested in the general case where the content of the dataset can be any type of object that allows ds[i] == ds[j]:
ds = {1: {'a': {'b':'c'}},
2: 'bar',
3: {'a': {'b':'c'}},
4: 'bar',
5: {'a': {'b':'c'}}}
something like this should do the trick:
>>> from collections import defaultdict
>>> ds = {1: 'foo',
... 2: 'bar',
... 3: 'foo',
... 4: 'bar',
... 5: 'foo'}
>>>
>>> d = defaultdict(list)
>>> for k, v in ds.items():
... d[v].append(k)
...
>>> res = {tuple(v): k for k, v in d.items()}
>>> res
{(1, 3, 5): 'foo', (2, 4): 'bar'}
as well as you could do something like this.
def tupelize_dict(ds):
cache = {}
for key, value in ds.items():
cache.setdefault(value, []).append(key)
return {tuple(v): k for k, v in cache.items()}
ds = {1: 'foo',
2: 'bar',
3: 'foo',
4: 'bar',
5: 'foo'}
print(tupelize_dict(ds))
Following the answer of acushner, it is possible to make it work if I can compute a hash of the content of dataset's elements.
import pickle
from collections import defaultdict
def tupelize_dict(ds):
t = {}
d = defaultdict(list)
for k, v in ds.items():
h = dumps(ds)
t[h] = v
d[h].append(k)
return {tuple(v): t[k] for k, v in d.items()}
This solution is MUCH faster than my original proposition.
To test it I made a set of big random nested dictionary and run cProfile on both implementations:
original: 204.9 seconds
new: 6.4 seconds
EDIT:
I realized the dumps does not work with some dictionaries because the keys order can internally vary for obscure reasons (see this question)
A workaround would be to order all the dicts:
import copy
import collections
def faithfulrepr(od):
od = od.deepcopy(od)
if isinstance(od, collections.Mapping):
res = collections.OrderedDict()
for k, v in sorted(od.items()):
res[k] = faithfulrepr(v)
return repr(res)
if isinstance(od, list):
for i, v in enumerate(od):
od[i] = faithfulrepr(v)
return repr(od)
return repr(od)
def tupelize_dict(ds):
taxonomy = {}
binder = collections.defaultdict(list)
for key, value in ds.items():
signature = faithfulrepr(value)
taxonomy[signature] = value
binder[signature].append(key)
def tu(keys):
return tuple(sorted(keys)) if len(keys) > 1 else keys[0]
return {tu(keys): taxonomy[s] for s, keys in binder.items()}

Categories