Generic arguments in recursive functions: terrible habit? - python

I catch myself doing this a lot. The example is simple, but, in practice, there are a lot of complex assignments to update data structures and conditions under which the second recursion is not called.
I'm working with mesh data. Points, Edges, and Faces are stored in separate dictionaries and "pointers" (dict keys) are heavily used.
import itertools
class Demo(object):
def __init__(self):
self.a = {}
self.b = {}
self.keygen = itertools.count()
def add_to_b(self, val):
new_key = next(self.keygen)
self.b[new_key] = val
return new_key
def recur_method(self, arg, argisval=True):
a_key = next(self.keygen)
if argisval is True:
# arg is a value
b_key = self.add_to_b(arg)
self.a[a_key] = b_key
self.recur_method(b_key, argisval=False)
else:
# arg is a key
self.a[a_key] = arg
demo = Demo()
demo.recur_method(2.2)
Is there a better way? short of cutting up all of my assignment code into seven different methods? Should I be worried about this anyway?

Try
def recur_method(self, key=None, val=None):
if key is None and val is None:
raise exception("You fail it")
If None is a valid input, then use a guard value:
sentinel = object()
def recur_method(self, key=sentinel, val=sentinel):
if key is sentinel and val is sentinel:
raise exception("You fail it")

Related

How to find Dictionary Key(s) from Value in a large nested dictionary of variable depth?

Say that I have a large dictionary full of nested values such as this:
large_dic ={
...
"key":{"sub-key1" :{"sub-key2": "Test"}},
"0key":{"0sub-key1": "0Test"},
"1key":{"1sub-key1":{"1sub-key2":{"1sub-key3":"1Test"}}}
...
}
What I would like to do is to be able to get for example from the final value:
"1Test"
the key(s) to access it, such as in this case:
large_dic["1key"]["1sub-key1"]["1sub-key2"]["1sub-key3"]
Thanks for the support.
Edit to add more infos: The dictionary trees I'm talking about are linear(YAML files converted into a python dictionary structure), there is never more than one key, the ending leaf values may not be unique.
Since OP is looking for hierarchical keys instead
I made this class :
class PointingSlice:
def __init__(self, obj, *slices) -> None:
self.obj = obj
self.slices = slices
def __str__(self):
return f"{str(self.obj)}{''.join(map(self._repr_slice, self.slices))}"
def _repr_slice(self, sliced: slice):
sqbrackets = "[{}]"
if not isinstance(sliced, slice):
return sqbrackets.format(repr(sliced))
items = [sliced.start, sliced.stop, sliced.step]
fn = lambda x: str() if x is None else str(x)
return sqbrackets.format(":".join(map(fn, items)))
def resolve(self):
obj = self.obj
for sliced in self.slices:
obj = obj.__getitem__(sliced)
return obj
and this function for instantiation :
def find_longest(mapping, key):
keys = [key]
value = mapping[key]
while isinstance(value, dict):
((k, value),) = value.items()
keys.append(k)
return PointingSlice(mapping, *keys)
Example use:
print(find_longest(large_dic, "1key"))
# output:
# {'key': {'sub-key1': {'sub-key2': 'Test'}}, '0key': {'0sub-key1': '0Test'}, '1key': {'1sub-key1': {'1sub-key2': {'1sub-key3': '1Test'}}}}['1key']['1sub-key1']['1sub-key2']['1sub-key3']
# do note that it is the same thing as large_dic['1key']['1sub-key1']['1sub-key2']['1sub-key3']
print(find_longest(large_dic, "1key").resolve()) # 1Test
So I made some changes and now it supports additional repr options matching your exact use case :
class PointingSlice:
def __init__(self, obj, *slices, object_name=None) -> None:
self.obj = obj
self.slices = slices
self.object_name = object_name
def __str__(self):
return f"{self.object_name or str(self.obj)}{''.join(map(self._repr_slice, self.slices))}"
def _repr_slice(self, sliced: slice):
sqbrackets = "[{}]"
if not isinstance(sliced, slice):
return sqbrackets.format(repr(sliced))
items = [sliced.start, sliced.stop, sliced.step]
fn = lambda x: str() if x is None else str(x)
return sqbrackets.format(":".join(map(fn, items)))
def resolve(self):
obj = self.obj
for sliced in self.slices:
obj = obj.__getitem__(sliced)
return obj
large_dic = {
"key": {"sub-key1": {"sub-key2": "Test"}},
"0key": {"0sub-key1": "0Test"},
"1key": {"1sub-key1": {"1sub-key2": {"1sub-key3": "1Test"}}},
}
def find_longest(mapping, key):
keys = [key]
value = mapping[key]
while isinstance(value, dict):
((k, value),) = value.items()
keys.append(k)
return PointingSlice(mapping, *keys)
f = find_longest(large_dic, "1key")
f.object_name = "large_dic" # for representational purposes, it works without this
print(f) # large_dic['1key']['1sub-key1']['1sub-key2']['1sub-key3']
print(f.resolve()) # 1Test
There are numerous ways to achieve this. You might want to look up "prefix tree traversal" (or "trie traversal").
A simple recursive solution with poor memory efficiency could look like this:
def find_trie_leaf_path(trie: dict, leaf_value, trie_path: list[str] = []):
for key, value in trie.items():
if isinstance(value, dict):
yield from find_trie_leaf_path(value, leaf_value, trie_path + [key])
elif value == leaf_value:
yield trie_path + [key]
large_dic = {
"key": {"sub-key1": {"sub-key2": "Test"}},
"0key": {"0sub-key1": "0Test"},
"1key": {"1sub-key1": {"1sub-key2": {"1sub-key3": "Test"}}},
}
first_match = next(find_trie_leaf_path(large_dic, "Test"))
all_matches = list(find_trie_leaf_path(large_dic, "Test"))
This should work even if your trie is very wide. If it is very high, I'd rather use an iterative algorithm.
I want to point out, though, that prefix trees are usually used the other way round. If you find yourself needing this search a lot, you should consider a different data structure.
Yes, it's totally possible. Here's the function to get the deeply nested value:
def get_final_value(mapping, key):
value = mapping[key]
while isinstance(value, dict):
(value,) = value.values()
return value
Example use:
>>> get_final_value(large_dic, "key")
'Test'
>>> get_final_value(large_dic, "0key")
'0Test'
>>> get_final_value(large_dic, "1key")
'1Test'
>>>
Can the parent keys be deduced from your final value in any way or is the tree structure rather random? If latter is the case then you'll probably just end up searching your tree until you find your value, what path search algorithm you choose for that again depends on the tree structure you have. As already asked in the comments, does each node only have one other node or is it binary or can it have many child nodes?

Python check if one or more values is None and knows which ones are

I have a Python app with a Firebase-database backend.
When I retrieve the data from my database, I want to check if those values
are available (if not, that means that the database is somehow corrupted, as mandatories fields are missing)
My current implementation is the following:
self.foo = myDbRef.get('foo')
self.bar = myDbRef.get('bar')
self.bip = myDbRef.get('bip')
self.plop = myDbRef.get('plop')
if self.foo is None or self.bar is None or self.bip is None or self.plop is None:
self.isValid = False
return ErrorCode.CORRUPTED_DATABASE
This works fine, is compact, but have a major issue: I will get the information that the database is corrupted,
but not what field is missing (could be just one of them, or more, or all !)
The idiomatic approach should be
if self.foo is None:
self.isValid = False
return ErrorCode.CORRUPTED_DATABASE, "FOO IS MISSING" # could be a string, an enum value, whatever, I have the information
if self.bar is None:
self.isValid = False
return ErrorCode.CORRUPTED_DATABASE, "BAR IS MISSING"
if self.bip is None:
self.isValid = False
return ErrorCode.CORRUPTED_DATABASE, "BIP IS MISSING"
But this is not pretty, not factorized (All my 'init from db' functions use the same pattern... I don't want to multiply my
number of lines by a factor of 10 for such a case).
This is not a '100% python' question, but I hope the langage has something for me to handle this like a boss (it's python: it usually does !)
You could extract the checks into a generator and leave the flag and return statements outside.
def invalid_fields():
if self.foo is None: yield "FOO"
if self.bar is None: yield "BAR"
if self.bip is None: yield "BIP"
invalid = list(invalid_fields())
if invalid:
self.isValid = False
return ErrorCode.CORRUPTED_DATABASE, "MISSING {}".format(", ".join(invalid))
This has the advantage of telling you about all the missing fields if there are more than one.
I made a class to contain some of your functionality that I can't access. I also made ErrorCode a string as a hack, since that's not defined in my tools and I'm not sure how you want the None names returned with/beside the ErrorCode.
Build a dict of names and values, check that the dict contains no None values, and if it does, return which keys:
myDbRef = {'foo' : None,
'bar': 1,
'bip': 2,
'plop': 3}
class Foo():
def __init__(self):
self.foo = myDbRef.get('foo')
self.bar = myDbRef.get('bar')
self.bip = myDbRef.get('bip')
self.plop = myDbRef.get('plop')
def check(self):
temp_dict = {}
for key in ['foo','bar','bip','plop']:
temp_dict[key] = myDbRef.get(key)
vals = {k:v for k,v in temp_dict.items() if v is None}
if vals:
self.isValid = False
return ("ErrorCode.CORRUPTED_DATABASE", [k for k in vals.keys()])
f = Foo()
print(f.check())
Result: ('ErrorCode.CORRUPTED_DATABASE', ['foo'])
Use a function and a loop:
def checknone(**things_with_names):
for name, thing in things_with_names.items():
if thing is None:
return ErrorCode.CORRUPTED_DATABASE, name + " IS MISSING"
return True
And use as such:
result = checknone(foo=self.foo, bar=self.bar, bip=self.bip, plop=self.plop)
if result is not True:
self.isValid = False
return result
For maximum gains, put it as a method of a class that you will Mixin into all your classes that use this. That way it can also set isValid.
You can dynamically create and search your instance attributes like so:
class Foo():
def __init__(self):
# First, define the list of attributes you want to look for and an empty list of errors
self.attrbs = ['foo','bar','bip','plop']
self.errors = []
# Iterate through the attributes list
for attrb in self.attrbs:
# Create and assign self.foo to MyDbRef.get('foo'), etc
self.__dict__[attrb] = myDbRef.get(attrb)
# Check if attribute is empty, if so, add to error
if not self.__dict__[attrb]:
self.errors.append(attrb.upper())
# Check if there are any errors
if self.errors:
self.is_valid = False
return (ErrorCode.CORRUPTED_DATABASE, "MISSING {errs}".format(errs='/'.join(self.errors)))
else:
self.is_valid = True

python cache dictionary - counting number of hits

I'm implementing a caching service in python. I'm using a simple dictionary so far. What I'd like to do is to count number of hits (number of times when a stored value was retrieved by the key). Python builtin dict has no such possibility (as far as I know). I searched through 'python dictionary count' and found Counter (also on stackoverflow), but this doesn't satisfy my requirements I guess. I don't need to count what already exists. I need to increment something that come from the outside. And I think that storing another dictionary with hits counting only is not the best data structure I can get :)
Do you have any ideas how to do it efficiently?
For an alternative method, if you're using Python 3 (or are willing to add this module to your Python 2 project, which has a slightly different interface), I strongly recommend the lru_cache decorator.
See the docs here. For example, this code :
from functools import lru_cache
#lru_cache(maxsize=32)
def meth(a, b):
print("Taking some time", a, b)
return a + b
print(meth(2, 3))
print(meth(2, 4))
print(meth(2, 3))
...will output :
Taking some time 2 3
5
Taking some time 2 4
6
5 <--- Notice that this function result is cached
As per the documentation, you can get the number of hits and misses with meth.cache_info(), and clear the cache with meth.cache_clear().
You can subclass a built-in dict class:
class CustomDict(dict):
def __init__(self, *args, **kwargs):
self.hits = {}
super(CustomDict, self).__init__(*args, **kwargs)
def __getitem__(self, key):
if key not in self.hits:
self.hits[key] = 0
self.hits[key] += 1
return super(CustomDict, self).__getitem__(key)
usage:
>>> d = CustomDict()
>>> d["test"] = "test"
>>> d["test"]
'test'
>>> d["test"]
'test'
>>> d.hits["test"]
2
Having another dictionary to store the hit counts is probably not a bad option, but you could also do something like:
class CacheService(object):
def __init__(self):
self.data = {}
def __setitem__(self, key, item):
self.data[key] = [item, 0]
def __getitem__(self, key):
value = self.data[key]
value[1] += 1
return value[0]
def getcount(self, key):
return self.data[key][1]
You can use it something like this:
>>> cs = CacheService()
>>> cs[1] = 'one'
>>> cs[2] = 'two'
>>> print cs.getcount(1)
0
>>> cs[1]
'one'
>>> print cs.getcount(1)
1
It will be much easier to just overload the built-in dict data type. This will solve your problem.
def CountDict(dict):
count = {}
def __getitem__(self, key):
CountDict.count[key] = CountDict.count.get(key, 0) + 1
return super(CountDict, self).__getitem__(self, key)
def __setitem__(self, key, value):
return super(CountDict, self).__setitem__(self, key, value)
def get_count(self, key):
return CountDict.count.get(key, 0)
This will give you lot more flexibility. Like you can have two counts one for number of reads and another for number of writes, if you wish without much of a complexity. To learn more about super, see here.
Edited to meet OP's need of keeping a count for reading a key. The output can be obtained by calling get_count method.
>>>my_dict = CountDict()
>>>my_dict["a"] = 1
>>>my_dict["a"]
>>>1
>>>my_dict["a"]
>>>1
>>>my_dict.get_count("a")
>>>2
You could try this approach.
class AccessCounter(object):
'''A class that contains a value and implements an access counter.
The counter increments each time the value is changed.'''
def __init__(self, val):
super(AccessCounter, self).__setattr__('counter', 0)
super(AccessCounter, self).__setattr__('value', val)
def __setattr__(self, name, value):
if name == 'value':
super(AccessCounter, self).__setattr__('counter', self.counter + 1)
# Make this unconditional.
# If you want to prevent other attributes to be set, raise AttributeError(name)
super(AccessCounter, self).__setattr__(name, value)
def __delattr__(self, name):
if name == 'value':
super(AccessCounter, self).__setattr__('counter', self.counter + 1)
super(AccessCounter, self).__delattr__(name)

How to create a Tree-based Map with keys

For my intro to computer science class we have a tree based map problem. I'm getting really confused on how to make the tree in the fashion they are asking it.
What I have so far:
class EmptyMap():
__slots__ = ()
class NonEmptyMap():
__slots__ = ('left','key','value','right')
def mkEmptyMap():
m = EmptyMap()
return m
def mkNonEmptyMap(map1, key, value, map2):
m = NonEmptyMap()
m.left = map1
m.key = key
m.value = value
m.right = map2
return m
def mapInsert(key, value, map1):
if isinstance(map1, EmptyMap):
else:
I'm getting stuck on the mapInsert function which is supposed to be recursive. Our tutoring lab doesnt have any tutors in it now so any help is appreciated.
Link to homework file http://www.cs.rit.edu/~vcss241/Homeworks/08/TreeMap-stu.pdf
Thanks!
I have never written or seen Python, but try this:
def mapInsert(key, value, map1):
if isinstance(map1, EmptyMap):
return mkNonEmptyMap(mkEmptyMap(), key, value, mkEmptyMap())
else:
if map1.key == key:
map1.value = value;
else if map1.key > key:
return map1.left = mapInsert(key, value, map1.left)
else:
return map1.right = mapInsert(key, value, map1.right)

How to add __iter__ to dynamic type?

Source
def flags(*opts):
keys = [t[0] for t in opts]
words = [t[1] for t in opts]
nums = [2**i for i in range(len(opts))]
attrs = dict(zip(keys,nums))
choices = iter(zip(nums,words))
return type('Flags', (), dict(attrs))
Abilities = flags(
('FLY', 'Can fly'),
('FIREBALL', 'Can shoot fireballs'),
('INVISIBLE', 'Can turn invisible'),
)
Question
How can I add an __iter__ method to Abilities so that I can iterate over choices?
Why?
This way I can use things like
hero.abilities = Abilities.FLY | Abilities.FIREBALL
if hero.abilities & Abilities.FIREBALL:
for k, v in Abilities:
print k, v
in my code without having to use any magic numbers or strings, and I can also save the set of flags to the DB as a single int, or display the list in a readable format.
Other improvements are welcome.
There's no need to use a dynamic type here; I'd restructure this as a simple class, for example:
class flags(object):
def __init__(self, *opts):
keys = [t[0] for t in opts]
words = [t[1] for t in opts]
nums = [2**i for i in range(len(opts))]
self.attrs = dict(zip(keys,nums))
self.choices = zip(nums,words)
def __getattr__(self, a):
return self.attrs[a]
def __iter__(self):
return iter(self.choices)
Abilities = flags(
('FLY', 'Can fly'),
('FIREBALL', 'Can shoot fireballs'),
('INVISIBLE', 'Can turn invisible'),
)
print Abilities.FLY
for k, v in Abilities:
print k, v
Why are you doing it the hard way? If you want a dict with __getattr__ overriding why not start with one:
class Flags(dict):
def __init__(self, *args):
dict.__init__(self, args)
def __getattr__(self, name):
return self[name]
...
This also has the Advantage of Least Surprise, since dict.__iter__() generates keys and dict.iteritems() yields tuples.
You need two key changes -- the last lines of flags should be:
choices = iter(zip(nums,words))
attrs['__iter__'] = lambda self: choices
return type('Flags', (), dict(attrs))()
Note that I've added a line setting __iter__, and a trailing () in the return to instantiate the type (to loop on a type you'd have to use a custom metaclass -- way overkill, no need).
The last line of flags should actually be:
return type('Flags', (), attrs)()
as there's no reason to make a copy of attrs, which is already a dict (but that's an innocuous redundancy, not a killer mistake;-).
It would be a more Pythonic solution if you implement your own __getattr__ method for accessing dynamic fields instead of dealing with metaclasses through type.
Edit: It's not clear to me what do you mean by choices, but here is an example:
class Abilities(object):
def __init__(self, abilities):
self.abilities = abilities
def __getattr__(self, name):
a = [x for x in self.abilities if x[0] == name]
if len(a) != 1:
raise AttributeError('attribute {0} not found'.format(name))
title, id, help = a[0]
return id
def __iter__(self):
return (id, help for title, id, help in self.abilities)
SPEC = [
('FLY', 10, 'Can fly'),
('FIREBALL', 13, 'Can shoot fireballs'),
('INVISIBLE', 14, 'Can turn invisible'),
]
abitilies = Abilities(SPEC)
hero.abilities = abilities.FLY | abilities.FIREBALL
for k, v in abilities:
print k, v
Based on your guys' suggestions, I came up with this:
Source
class enumerable(object):
def __init__(self, func, *opts):
keys = func(len(opts))
self.attrs = dict(zip([t[0] for t in opts], keys))
self.opts = zip(keys, [t[1] for t in opts])
def __getattr__(self, a):
return self.attrs[a]
def __len__(self):
return len(self.opts)
def __iter__(self):
return iter(self.opts)
def __deepcopy__(self, memo):
return self
class enum(enumerable):
def __init__(self, *opts):
return super(enum, self).__init__(range, *opts)
class flags(enumerable):
def __init__(self, *opts):
return super(flags, self).__init__(lambda l: [1<<i for i in range(l)], *opts)
### tests:
Abilities = enum(
('FLY', 'Can fly'),
('FIREBALL', 'Can shoot fireballs'),
('INVISIBLE', 'Can turn invisible'),
('FROST_NOVA', 'Can call down an ice storm'),
('BLINK', 'Can teleport short distances'),
)
print 'Fireball = %d' % Abilities.FIREBALL
print 'Number of options = %d' % len(Abilities)
for k, v in Abilities:
print '%d: %s' % (k, v)
Output
Fireball = 1
Number of options = 5
0: Can fly
1: Can shoot fireballs
2: Can turn invisible
3: Can call down an ice storm
4: Can teleport short distances
For whatever reason, my particular application needs __deepcopy__ to be implemented. Since these classes are for building "constants", none of their attributes should ever be changed after creation; thus I hope it's safe just to return self.

Categories