finds key but not KEY.lower() in dictionary Python - python

So I need to write this program where I create a class and an object of that class is a dictionary with categories as keys, and words that are included in such categories are the values (Example: {'name' : {'patrick', 'jane'}, 'discipline' : {'geography',...}, ...}).
At some point in the program (in that class) I have to create a method which takes a the name of a category as an argument. I then have to pick a random word out of that category. In the dictionary all keys(categories) need to be lowercase but when I give a category to choose a word from that shouldn't matter.
Here is my code first (part of it):
import random
class MadLibs:
def __init__(self, woordenschat = {}):
self.woordenschat = woordenschat
def suggereren(self, categorie):
assert categorie.lower() in self.woordenschat, 'onbekende categorie'
randwoord = random.choice(list(self.woordenschat[categorie.lower()]))
if categorie.isupper():
return randwoord.upper()
elif categorie.islower():
return randwoord
else:
return randwoord.capitalize()
so say I got a category 'name' as key in my dictionary with a sequence of words, when I then use the method suggereren and give as argument 'name' it works, but when I give 'NAME' then self.woordenschat[category.lower()] returns an empty list (see the line where I initialize randwoord )
Would somebody be able to tell me why this happens?
UPDATE:
this is how you add the words in the dictionary, categorie is where you give the category, and woorden is where you give new words that belong to that category
def leren(self, categorie, woorden):
if isinstance(woorden, (tuple, list, set)):
woorden = set(woorden)
else:
woorden = {woorden}
if categorie in self.woordenschat:
self.woordenschat[categorie.lower()].add(woord.lower() for woord in woorden)
else:
self.woordenschat[categorie.lower()] = (woord.lower() for woord in woorden)
return None
UPDATE:
seems like the way I added the words in leren was the problem an error something like: object 'generator' does not have ... 'add'
here's my new code:
def leren(self, categorie, woorden):
if isinstance(woorden, (tuple, list, set)):
woorden = set(woorden)
else:
woorden = {woorden}
set_to_add = {woord.lower() for woord in woorden}
if categorie in self.woordenschat:
self.woordenschat[categorie.lower()].union(set_to_add)
else:
self.woordenschat[categorie.lower()] = (set_to_add)
return None
now the only problem left is that my object doesn't really get updated when I add new words to an existing category I'll try to find it first but if I don't I'll just ask a new question.
update: nevermind found it, twas a stupid mistake

actually , in the requests sources code , there have a solution about the caselessdict object maybe satisfied you need.
import collections
class CaseInsensitiveDict(collections.MutableMapping):
def __init__(self, data=None, **kwargs):
self._store = dict()
if data is None:
data = {}
self.update(data, **kwargs)
def __setitem__(self, key, value):
# Use the lowercased key for lookups, but store the actual
# key alongside the value.
self._store[key.lower()] = (key, value)
def __getitem__(self, key):
return self._store[key.lower()][1]
def __delitem__(self, key):
del self._store[key.lower()]
def __iter__(self):
return (casedkey for casedkey, mappedvalue in self._store.values())
def __len__(self):
return len(self._store)
def lower_items(self):
"""Like iteritems(), but with all lowercase keys."""
return (
(lowerkey, keyval[1])
for (lowerkey, keyval)
in self._store.items()
)
def __eq__(self, other):
if isinstance(other, collections.Mapping):
other = CaseInsensitiveDict(other)
else:
return NotImplemented
# Compare insensitively
return dict(self.lower_items()) == dict(other.lower_items())
# Copy is required
def copy(self):
return CaseInsensitiveDict(self._store.values())
def __repr__(self):
return str(dict(self.items()))
#property
def keys(self):
return [i for i in self]
#property
def values(self):
return [self[i] for i in self]

Related

How to find Dictionary Key(s) from Value in a large nested dictionary of variable depth?

Say that I have a large dictionary full of nested values such as this:
large_dic ={
...
"key":{"sub-key1" :{"sub-key2": "Test"}},
"0key":{"0sub-key1": "0Test"},
"1key":{"1sub-key1":{"1sub-key2":{"1sub-key3":"1Test"}}}
...
}
What I would like to do is to be able to get for example from the final value:
"1Test"
the key(s) to access it, such as in this case:
large_dic["1key"]["1sub-key1"]["1sub-key2"]["1sub-key3"]
Thanks for the support.
Edit to add more infos: The dictionary trees I'm talking about are linear(YAML files converted into a python dictionary structure), there is never more than one key, the ending leaf values may not be unique.
Since OP is looking for hierarchical keys instead
I made this class :
class PointingSlice:
def __init__(self, obj, *slices) -> None:
self.obj = obj
self.slices = slices
def __str__(self):
return f"{str(self.obj)}{''.join(map(self._repr_slice, self.slices))}"
def _repr_slice(self, sliced: slice):
sqbrackets = "[{}]"
if not isinstance(sliced, slice):
return sqbrackets.format(repr(sliced))
items = [sliced.start, sliced.stop, sliced.step]
fn = lambda x: str() if x is None else str(x)
return sqbrackets.format(":".join(map(fn, items)))
def resolve(self):
obj = self.obj
for sliced in self.slices:
obj = obj.__getitem__(sliced)
return obj
and this function for instantiation :
def find_longest(mapping, key):
keys = [key]
value = mapping[key]
while isinstance(value, dict):
((k, value),) = value.items()
keys.append(k)
return PointingSlice(mapping, *keys)
Example use:
print(find_longest(large_dic, "1key"))
# output:
# {'key': {'sub-key1': {'sub-key2': 'Test'}}, '0key': {'0sub-key1': '0Test'}, '1key': {'1sub-key1': {'1sub-key2': {'1sub-key3': '1Test'}}}}['1key']['1sub-key1']['1sub-key2']['1sub-key3']
# do note that it is the same thing as large_dic['1key']['1sub-key1']['1sub-key2']['1sub-key3']
print(find_longest(large_dic, "1key").resolve()) # 1Test
So I made some changes and now it supports additional repr options matching your exact use case :
class PointingSlice:
def __init__(self, obj, *slices, object_name=None) -> None:
self.obj = obj
self.slices = slices
self.object_name = object_name
def __str__(self):
return f"{self.object_name or str(self.obj)}{''.join(map(self._repr_slice, self.slices))}"
def _repr_slice(self, sliced: slice):
sqbrackets = "[{}]"
if not isinstance(sliced, slice):
return sqbrackets.format(repr(sliced))
items = [sliced.start, sliced.stop, sliced.step]
fn = lambda x: str() if x is None else str(x)
return sqbrackets.format(":".join(map(fn, items)))
def resolve(self):
obj = self.obj
for sliced in self.slices:
obj = obj.__getitem__(sliced)
return obj
large_dic = {
"key": {"sub-key1": {"sub-key2": "Test"}},
"0key": {"0sub-key1": "0Test"},
"1key": {"1sub-key1": {"1sub-key2": {"1sub-key3": "1Test"}}},
}
def find_longest(mapping, key):
keys = [key]
value = mapping[key]
while isinstance(value, dict):
((k, value),) = value.items()
keys.append(k)
return PointingSlice(mapping, *keys)
f = find_longest(large_dic, "1key")
f.object_name = "large_dic" # for representational purposes, it works without this
print(f) # large_dic['1key']['1sub-key1']['1sub-key2']['1sub-key3']
print(f.resolve()) # 1Test
There are numerous ways to achieve this. You might want to look up "prefix tree traversal" (or "trie traversal").
A simple recursive solution with poor memory efficiency could look like this:
def find_trie_leaf_path(trie: dict, leaf_value, trie_path: list[str] = []):
for key, value in trie.items():
if isinstance(value, dict):
yield from find_trie_leaf_path(value, leaf_value, trie_path + [key])
elif value == leaf_value:
yield trie_path + [key]
large_dic = {
"key": {"sub-key1": {"sub-key2": "Test"}},
"0key": {"0sub-key1": "0Test"},
"1key": {"1sub-key1": {"1sub-key2": {"1sub-key3": "Test"}}},
}
first_match = next(find_trie_leaf_path(large_dic, "Test"))
all_matches = list(find_trie_leaf_path(large_dic, "Test"))
This should work even if your trie is very wide. If it is very high, I'd rather use an iterative algorithm.
I want to point out, though, that prefix trees are usually used the other way round. If you find yourself needing this search a lot, you should consider a different data structure.
Yes, it's totally possible. Here's the function to get the deeply nested value:
def get_final_value(mapping, key):
value = mapping[key]
while isinstance(value, dict):
(value,) = value.values()
return value
Example use:
>>> get_final_value(large_dic, "key")
'Test'
>>> get_final_value(large_dic, "0key")
'0Test'
>>> get_final_value(large_dic, "1key")
'1Test'
>>>
Can the parent keys be deduced from your final value in any way or is the tree structure rather random? If latter is the case then you'll probably just end up searching your tree until you find your value, what path search algorithm you choose for that again depends on the tree structure you have. As already asked in the comments, does each node only have one other node or is it binary or can it have many child nodes?

Calling functions when a list changes in Python

I simply wanted to know if you could call a function whenever a list is changed (item added to it).
If anything it would look like this:
def listchanged():
print("The list has been changed!")
list = ["apples","bananas"]
listevent(list,listchanged)
And in the shell you would append an item to the list and it would print "The list has been changed!"
Thanks!
You can subclass list to print when changing the list.
Here's an example:
class EventList(list):
def __setitem__(self, key, value):
super(EventList, self).__setitem__(key, value)
print("The list has been changed!")
def __delitem__(self, value):
super(EventList, self).__delitem__(value)
print("The list has been changed!")
def __add__(self, value):
super(EventList, self).__add__(value)
print("The list has been changed!")
def __iadd__(self, value):
super(EventList, self).__iadd__(value)
print("The list has been changed!")
def append(self, value):
super(EventList, self).append(value)
print("The list has been changed!")
def remove(self, value):
super(EventList, self).remove(value)
l = EventList(["apples","bananas"])
l.append('test')
Prints:
The list has been changed!
OR
Just compare lists:
old_list = ["apples","bananas"]
new_list = ["apples","bananas"]
newer_list = ["apples","bananas", "oranges"]
old_list == new_list #true
old_list == newer_list #false
This would work with no subclass as == compares if the lists have the same elements in the same indicies not if they are exactly the same by id or hash (in lamens). Just save the old one in a variable, then when you want to check just use this. (note: this does not provide a way for the print to be automatically called. It's just another way)
def listchanged():
print("The list has been changed!")
lt = ["apples","bananas"]
def listevent(lt):
prev_len=len(lt)
lt.append("grapes")
new_len=len(lt)
if(prev_len!=new_len):
listchanged()
listevent(lt)
I needed a container that can fire an event when a new item arrives, I've chosen deque and subclassed it this way:
In:
from collections import deque
class ArrivalDeque(deque):
def append(self, value):
super().append(value)
return value
def appendleft(self, value):
super().appendleft(value)
return value
def extend(self, value):
super().extend(value)
return value
def extendleft(self, value):
super().extendleft(value)
return value
pack = [66346.8, 45646.8, 89050.4, 67050.1]
deq = ArrivalDeque([43253.8, 325325.2, 25425.2])
deq.extend(pack)
Out:
[66346.8, 45646.8, 89050.4, 67050.1]
Returns can be modified obviously, to call another function, or to tell length of iterable passed to extend() , for example.
P.S.: take note that sometimes doing this: deq.extend(reversed(pack)) might save the day.

Find object from list that has attribute equal to some value and also get the next object after

I am able to find a object that its attribute equals to some value. But I would like to also get the object after that from a list (and also if the found object is the last in list, the next object after that should be the first object). Something like:
from pprint import pprint
class User(object):
def __init__(self, name):
self.name = name
users = []
users.append(User("Peter"))
users.append(User("James"))
users.append(User("John"))
# find object that has attribute name equal to James
pprint(vars([user for user in users if user.name == "James"][0]))
And the output from pprint line prints:
{'name': 'James'}
That is correct.
I would like to ask you how to get the next object after "James" and also if I would search for "John" the next object after "John" should be returned "Peter". Suggestions?
I also tried with itertools, but I cannot get the next element if the found element is last:
from itertools import enumerate
_i = next(i for i, user in enumerate(users) if (user.name == "John"))
print users[_i + 1] #this is not working
I could add if condition to change the counter before operation [_i+1] but I would like to know if there is more smoother solution to this?
To handle the last element, you can use modulo: index % len(users).
Here is one way:
def find_after_name(users, name):
for i, user in enumerate(users):
if user.name == name:
return users[(i+1) % len(users)]
Another option would be to zip the list with a shifted copy of the list. deque.rotate() is useful for such shifting:
from collections import deque
def find_after_name(users, name):
users2 = deque(users)
users2.rotate(-1)
for user1, user2 in zip(users1, users2):
if user1.name == name:
return user2
Since you've chosen to use OOP, why not implement a UserList class inherited from built-in list class?
class User(object):
def __init__(self, name):
self.name = name
def __str__(self):
return repr(self)
def __repr__(self):
return "User('{0}')".format(self.name)
class UserList(list):
def find(self, name):
for k, user in enumerate(self):
if user.name == name:
return k, user
def next_to(self, name):
"""get user next to the one of name (e.g. 'James')"""
index, user = self.find(name)
next_to_index = self.get_next_to_index(index)
return self[next_to_index]
def get_next_to_index(self, index):
next_to_index = index + 1
if next_to_index == len(self):
# meaning index is already the last element, need to reset index
next_to_index = 0
return next_to_index
users = UserList()
users.append(User("Peter"))
users.append(User("James"))
users.append(User("John"))
print users.find('James')
print users.next_to('James')
print users.next_to('John')
Output:
(1, User('James'))
User('John')
User('Peter')
For kicks, I wanted to see if there was an itertools solution:
from itertools import dropwhile
def find_after_name(users, name):
for i, _ in dropwhile(lambda eu: eu[1].name != name, enumerate(users)):
return users[(i+1) % len(users)]
Note: There really should be a list.index(value, key=None) method where key is like the key argument to list.sort(). Then you could do something like this:
index = users.index("John", key=lambda u: u.name)
Solution extending list class:
class extended_list(list):
def index(self, value, key=None):
if key is None:
return super().index(self, value)
try:
return next(i for i,o in enumerate(self) if value == key(o))
except StopIteration:
raise ValueError("{} is not in list".format(repr(value)))
def find_after_name(users, name):
i = extended_list(users).index(name, key=lambda u: u.name)
return users[(i+1) % len(users)]

Format canonical URL structure correctly with URL Processors

EDIT: Could you read my question more carefully? This question is specific and not duplicated as has been said. Obviously I read this question before post.
In this demo code from the docs, the fragment after the lang parameter will be static. So, in English /en/about, and for example, in Portuguese /pt/about. Well, the correct, should be /pt/sobre.
Any idea about the correct way to make that with URL Processors?
from flask import Flask, g
app = Flask(__name__)
#app.url_defaults
def add_language_code(endpoint, values):
if 'lang_code' in values or not g.lang_code:
return
if app.url_map.is_endpoint_expecting(endpoint, 'lang_code'):
values['lang_code'] = g.lang_code
#app.url_value_preprocessor
def pull_lang_code(endpoint, values):
g.lang_code = values.pop('lang_code', None)
#app.route('/<lang_code>/')
def index():
...
#app.route('/<lang_code>/about')
def about():
Ok, you already have language prefix. So you need have several translations for next part.
The easiest way it use several routes or converters:
#app.route('/<lang_code>/about')
#app.route('/<lang_code>/sobre')
def about():
pass
or
#app.route('/<lang_code>/<any(about,sobre):about>')
def about(about):
pass
But it hard to support and add new languages.
Second way is change route method to translate special words or add special translate processor converter, last more interesting and implicit:
from werkzeug.routing import AnyConverter
languages = ['en', 'pt']
def translate_url(word, language):
if language == 'pt' and word == 'about':
return 'sobre'
return word
class TranslateConverter(AnyConverter):
def __init__(self, map, item):
AnyConverter.__init__(self, map, *[translate_url(item, language)
for language in languages])
app.url_map.converters['tr'] = TranslateConverter
#app.route('/<lang_code>/<tr(about):about>')
def about(about):
pass
But this example have next issue:
/en/about
/en/sorbe
/pt/about
/pt/sorbe
is valid urls, but you can also try use own Rule class (Flask.url_rule_class) where in match method you can process this cases:
from werkzeug.routing import AnyConverter, Rule
class TranslateConverter(AnyConverter):
def __init__(self, map, item):
self.language_pairs = {language: translate_url(item, language)
for language in languages}
AnyConverter.__init__(self, map, *tuple(self.language_pairs.values()))
class TranslateCorrelationRule(Rule):
def match(self, path):
result = Rule.match(self, path)
if result is None:
return result
lang_code = result.get('lang_code')
if lang_code is None:
return result
for name, value in self._converters.items():
if not isinstance(value, TranslateConverter):
continue
if value.language_pairs[lang_code] != result[name]:
return
return result
app.url_map.converters['tr'] = TranslateConverter
app.url_rule_class = TranslateCorrelationRule
If you will simplify url_for usage for this examples you can use next sample:
#app.url_value_preprocessor
def pull_lang_code(endpoint, values):
if not values:
return
g.lang_code = values.pop('lang_code', None)
for key, value in values.items():
if key.startswith('_'):
values.pop(key)
class TranslateCorrelationRule(Rule):
def _update_translate_values(self, values):
lang_code = values.get('lang_code', getattr(g, 'lang_code', None))
if lang_code is None:
return values
values = values.copy()
for argument in self.arguments:
if argument in values:
continue
converter = self._converters[argument]
if not isinstance(converter, TranslateConverter):
continue
values[argument] = converter.language_pairs[lang_code]
return values
def suitable_for(self, values, method=None):
return Rule.suitable_for(self, self._update_translate_values(values),
method)
def build(self, values, append_unknown=True):
return Rule.build(self, self._update_translate_values(values),
append_unknown)

python cache dictionary - counting number of hits

I'm implementing a caching service in python. I'm using a simple dictionary so far. What I'd like to do is to count number of hits (number of times when a stored value was retrieved by the key). Python builtin dict has no such possibility (as far as I know). I searched through 'python dictionary count' and found Counter (also on stackoverflow), but this doesn't satisfy my requirements I guess. I don't need to count what already exists. I need to increment something that come from the outside. And I think that storing another dictionary with hits counting only is not the best data structure I can get :)
Do you have any ideas how to do it efficiently?
For an alternative method, if you're using Python 3 (or are willing to add this module to your Python 2 project, which has a slightly different interface), I strongly recommend the lru_cache decorator.
See the docs here. For example, this code :
from functools import lru_cache
#lru_cache(maxsize=32)
def meth(a, b):
print("Taking some time", a, b)
return a + b
print(meth(2, 3))
print(meth(2, 4))
print(meth(2, 3))
...will output :
Taking some time 2 3
5
Taking some time 2 4
6
5 <--- Notice that this function result is cached
As per the documentation, you can get the number of hits and misses with meth.cache_info(), and clear the cache with meth.cache_clear().
You can subclass a built-in dict class:
class CustomDict(dict):
def __init__(self, *args, **kwargs):
self.hits = {}
super(CustomDict, self).__init__(*args, **kwargs)
def __getitem__(self, key):
if key not in self.hits:
self.hits[key] = 0
self.hits[key] += 1
return super(CustomDict, self).__getitem__(key)
usage:
>>> d = CustomDict()
>>> d["test"] = "test"
>>> d["test"]
'test'
>>> d["test"]
'test'
>>> d.hits["test"]
2
Having another dictionary to store the hit counts is probably not a bad option, but you could also do something like:
class CacheService(object):
def __init__(self):
self.data = {}
def __setitem__(self, key, item):
self.data[key] = [item, 0]
def __getitem__(self, key):
value = self.data[key]
value[1] += 1
return value[0]
def getcount(self, key):
return self.data[key][1]
You can use it something like this:
>>> cs = CacheService()
>>> cs[1] = 'one'
>>> cs[2] = 'two'
>>> print cs.getcount(1)
0
>>> cs[1]
'one'
>>> print cs.getcount(1)
1
It will be much easier to just overload the built-in dict data type. This will solve your problem.
def CountDict(dict):
count = {}
def __getitem__(self, key):
CountDict.count[key] = CountDict.count.get(key, 0) + 1
return super(CountDict, self).__getitem__(self, key)
def __setitem__(self, key, value):
return super(CountDict, self).__setitem__(self, key, value)
def get_count(self, key):
return CountDict.count.get(key, 0)
This will give you lot more flexibility. Like you can have two counts one for number of reads and another for number of writes, if you wish without much of a complexity. To learn more about super, see here.
Edited to meet OP's need of keeping a count for reading a key. The output can be obtained by calling get_count method.
>>>my_dict = CountDict()
>>>my_dict["a"] = 1
>>>my_dict["a"]
>>>1
>>>my_dict["a"]
>>>1
>>>my_dict.get_count("a")
>>>2
You could try this approach.
class AccessCounter(object):
'''A class that contains a value and implements an access counter.
The counter increments each time the value is changed.'''
def __init__(self, val):
super(AccessCounter, self).__setattr__('counter', 0)
super(AccessCounter, self).__setattr__('value', val)
def __setattr__(self, name, value):
if name == 'value':
super(AccessCounter, self).__setattr__('counter', self.counter + 1)
# Make this unconditional.
# If you want to prevent other attributes to be set, raise AttributeError(name)
super(AccessCounter, self).__setattr__(name, value)
def __delattr__(self, name):
if name == 'value':
super(AccessCounter, self).__setattr__('counter', self.counter + 1)
super(AccessCounter, self).__delattr__(name)

Categories