De-uglify memcache boilerplate in Python - python

I'm just getting started with Python. I'm making heavy use of caching in my app and my code is increasingly littered with this same pattern, which is the standard caching pattern I've seen used all over the shop. Are there some sexy syntactic tricks in Python that can DRY out some of this boilerplate?
(btw, this is not actual code)
# Determine if we are allowed to use cache
cacheable = settings.cache.lifetime is not None
# Generate unique cache key
cache_key = 'something_unique_{some_arg}'.format(some_arg=*args[0])
# Return cached version if allowed and available
if cacheable:
cached = memcache.get(cache_key)
if cached:
return cached
# Generate output
output = do_something_fooey(args[0])
# Cache output if allowed
if cacheable:
memcache.set(cache_key, output, settings.cache.lifetime)
return output
I'm going to have a stab at this too, probably writing a caching wrapper function and passing the output generation to it as a "delegate" (dunno if that's Python lingo), but it'd be great to get some advice from Python experts.

You want a decorator:
def cached(func):
def _cached(*args):
# Determine if we are allowed to use cache
cacheable = settings.cache.lifetime is not None
# Generate unique cache key
cache_key = '{0}-{1}-{2}'.format(func.__module__, func.__name__, args[0])
# Return cached version if allowed and available
if cacheable:
result = memcache.get(cache_key)
if result is not None:
return result
# Generate output
result = func(args[0])
# Cache output if allowed
if cacheable and result is not None:
memcache.set(cache_key, result, settings.cache.lifetime)
return result
return _cached
#cached
def do_something_fooey(*args):
return something
You may want to use functools.wraps (http://docs.python.org/2/library/functools.html#functools.wraps) for a well-behaved decorator.

I've found a couple of alternate pre-rolled solutions:
https://github.com/jayferd/python-cache
and
https://gist.github.com/abahgat/1395810
In the end I created the below, which is a fleshed-out version of #bruno's example. The nice thing about this one is that you can pass an extra_key to the decorator, which forms part of the caching key and can be either a string or a delegate function. (lifetime can also be a delegate function or an integer). This allows you to add stuff at runtime such as caching uniquely by user id.
def cached(lifetime=settings.cache.default_lifetime, extra_key=None):
def _cached(func):
#functools.wraps(func)
def wrapper(*args, **kwargs):
# Resolve lifetime if it's a function
resolved_lifetime = lifetime(*args) if hasattr(lifetime, '__call__') else lifetime
if resolved_lifetime is not None:
# Hash function args
items = kwargs.items()
items.sort()
hashable_args = (args, tuple(items))
args_key = hashlib.md5(pickle.dumps(hashable_args)).hexdigest()
# Generate unique cache key
cache_key = '{0}-{1}-{2}-{3}'.format(
func.__module__,
func.__name__,
args_key,
extra_key() if hasattr(extra_key, '__call__') else extra_key
)
# Return cached version if allowed and available
result = memcache.get(cache_key)
if result is not None:
return result
# Generate output
result = func(*args, **kwargs)
# Cache output if allowed
if resolved_lifetime is not None and result is not None:
memcache.set(cache_key, result, resolved_lifetime)
return result
return wrapper
return _cached

Related

How can I override global variables just for the scope of callees of a function in Python?

I'm writing a decorator which needs to pass data to other utility functions; something like:
STORE = []
def utility(message):
STORE.append(message)
def decorator(func):
def decorator_wrap(*args, **kwargs):
global STORE
saved_STORE = STORE
STORE = list()
func(*args, **kwargs)
for line in STORE:
print(line)
STORE = saved_STORE
return decorator_wrap
#decorator
def foo(x):
# ...
utility(x)
# ...
But that's kind of yuck, and not thread safe. Is there a way to override utility()'s view of STORE for the duration of decorator_wrap()? Or some other way to signal to utility() that there's an alternate STORE it should use?
Alternatively, to present an different utility() to foo() and all its callees; but that seems like exactly the same problem.
From this answer I find that I can implement it this way:
import inspect
STORE = []
def utility(message):
global STORE
store = STORE
frame = inspect.currentframe()
while frame:
if 'LOCAL_STORE' in frame.f_locals:
store = frame.f_locals['LOCAL_STORE']
break;
frame = frame.f_back
store.append(message)
def decorator(func):
def decorator_wrap(*args, **kwargs):
LOCAL_STORE = []
func(*args, **kwargs)
for line in LOCAL_STORE:
print(line)
return decorator_wrap
Buuuut while reading the documentation I see f_globals is present in every stack frame. I think the more efficient method would be to inject my local into my callee's f_globals. This would be similar to setting an environment variable before executing another command, but I don't know if it's legal.

Using decorator yields NameError on a defined function

Why does this :
def fn(proc, *args, **kwargs):
cache = proc.cache = {}
def cached_execution(cache, *args, **kwargs):
if proc in cache:
if args in cache[proc]:
return cache[proc][args]
res = proc(args)
cache[proc] = {args: res}
return res
return cached_execution(cache, proc, *args, **kwargs)
#fn
def cached_fibo(n):
if n == 1 or n == 0:
return n
else:
return cached_fibo(n-1) + cached_fibo(n-2)
print cached_fibo(100)
throw an exception like this:
NameError: global name 'cached_fibo' is not defined
What fundamental concept am I missing?
(Conceptually, **kwargs is for decoration only. Not utilizing in retrieving the cached result, but don't worry about it).
A decorator should return a function, not the result of calling a function.
But this leads us to the next mistake: when you're passing cache and proc to cached_execution function they land in *args which in turn gets passed to proc. This doesn't make sense. Just let cache and proc be captured within the inner method:
def fn(proc, *args, **kwargs):
cache = proc.cache = {}
def cached_execution(*args, **kwargs):
if proc in cache:
if args in cache[proc]:
return cache[proc][args]
res = proc(*args)
cache[proc] = {args: res}
return res
return cached_execution
Another problem: you were not unpacking args. You should call proc(*args) instead of proc(args) (already fixed above).
The wrapper seems a little malformed. Here is an updated version:
def fn(proc):
cache = proc.cache = {}
def cached_execution(*args, **kwargs):
if proc in cache:
if args in cache[proc]:
return cache[proc][args]
res = proc(args[0])
cache[proc] = {args: res}
return res
return cached_execution
You were trying to run the wrapper function inside the wrapper instead of returning it to be run as the function, causing issues.
The next issue is that the argument you supply is a list of tuples *args at proc(args) when you only want the first one, so needs to turn into proc(args[0])

Using function value as kwargs.get() default

I have a factory function for a model with several foreign keys in my unit tests. I would like for that factory function to be variadic, allowing the user to specify the objects to use as foreign keys as keyword arguments, but calling the relevant factory function to spawn a new one for any that are left out.
I originally wrote something like:
def model_factory(i, **kwargs):
"""Create a new Model for testing"""
test_model_data = {
'fk1': kwargs.get('fk1', fk1_factory(i)),
'fk2': kwargs.get('fk2', fk2_factory(i)),
'fk3': kwargs.get('fk3', fk3_factory(i)),
}
return Model.objects.create(**test_model_data)
but this calls the fkN_factory() methods even if the keyword is present, causing a lot of side effects that are interfering with my tests. My question is whether or not there is a simpler way to do what I intended here without resulting in lots of needless function calls, rather than what I have now, which is more like:
def model_factory(i, **kwargs):
"""Create a new Model for testing"""
test_model_data = {
'fk1': kwargs.get('fk1', None),
'fk2': kwargs.get('fk2', None),
}
if kwargs['f1'] is None:
kwargs['f1'] = fk1_factory(i)
if kwargs['f2'] is None:
kwargs['f2'] = fk2_factory(i)
You want to factor out that repeated code in some way. The simplest is:
def get_value(mapping, key, default_func, *args):
try:
return mapping[key]
except KeyError:
return default_func(*args)
# ...
test_model_data = {
'fk1': get_value(kwargs, 'fk1', fk1_factory, i),
'fk2': get_value(kwargs, 'fk2', fk2_factory, i),
# etc.
}
Almost as simple as your original non-working version.
You could take this even farther:
def map_data(mapping, key_factory_map, *args):
return {key: get_value(mapping, key, factory, *args)
for key, factory in key_factory_map.items()}
# …
test_model_data = map_data(kwargs, {
'fk1': fk1_factory,
'fk2': fk2_factory,
# …
}, i)
But I'm not sure that's actually better. (If you have an obvious place to define that key-to-factory mapping out-of-line, it probably is; if not, probably not.)

How to get through decorator to get the underlying function arguments information?

I write get_function_arg_data(func) as below code to get the function func's arguments information:
def get_function_arg_data(func):
import inspect
func_data = inspect.getargspec(func)
args_name = func_data.args #func argument list
args_default = func_data.defaults #funcargument default data list
return args_name, args_default
def showduration(user_function):
''' show time duration decorator'''
import time
def wrapped_f(*args, **kwargs):
t1 = time.clock()
result = user_function(*args, **kwargs)
print "%s()_Time: %0.5f"%(user_function.__name__, time.clock()-t1)
return result
return wrapped_f
def foo(para1, para2=5, para3=7):
for i in range(1000):
s = para1+para2+para3
return s
#showduration
def bar(para1, para2, para3):
for i in range(1000):
s=para1+para2+para3
return s
print get_function_arg_data(foo)
bar(1,2,3)
print get_function_arg_data(bar)
>>>
(['para1', 'para2', 'para3'], (5, 7))
bar()_Time: 0.00012
([], None)
>>>
get_function_arg_data() works for foo, not for bar for bar is decorated by a decorator #showduration . My question is how to penetrate the decorator to get the underlying function's information (argument list and default value) ?
Thanks for your tips.
I don't think there is, or at least know of, any general way to "penetrate" a decorated function and get at the underlying function's information because Python's concept of function decoration is so general -- if fact, generally speaking, there's nothing that requires or guarantees that the original function will be called at all (although that's usually the case).
Therefore, a more practical question would be: How could I write my own decorators which would allow me to inspect the underlying function's argument information?
One easy way, previously suggested, would be to use Michele Simionato's decorator module (and write decorators compatible with it).
A less robust, but extremely simple way of doing this would be to do what is shown below based on the code in your question:
def get_function_arg_data(func):
import inspect
func = getattr(func, '_original_f', func) # use saved original if decorated
func_data = inspect.getargspec(func)
args_name = func_data.args #func argument list
args_default = func_data.defaults #funcargument default data list
return args_name, args_default
def showduration(user_function):
'''show time duration decorator'''
import time
def wrapped_f(*args, **kwargs):
t1 = time.clock()
result = user_function(*args, **kwargs)
print "%s()_Time: %0.5f"%(user_function.__name__, time.clock()-t1)
return result
wrapped_f._original_f = user_function # save original function
return wrapped_f
def foo(para1, para2=5, para3=7):
for i in range(1000):
s = para1+para2+para3
return s
#showduration
def bar(para1, para2, para3):
for i in range(1000):
s=para1+para2+para3
return s
print 'get_function_arg_data(foo):', get_function_arg_data(foo)
print 'get_function_arg_data(bar):', get_function_arg_data(bar)
All the modification involves is saving the original function in an attribute named _original_f which is added the wrapped function returned by the decorator. The get_function_arg_data() function then simply checks for this attribute and returns information based its value rather the decorated function passed to it.
While this approach doesn't work with just any decorated function, only ones which have had the special attribute added to them, it is compatible with both Python 2 & 3.
Output produced by the code shown:
get_function_arg_data(foo): (['para1', 'para2', 'para3'], (5, 7))
get_function_arg_data(bar): (['para1', 'para2', 'para3'], None)
Assuming you've installed Michele Simionato's decorator module, you can make yourshowdurationdecorator work with it by making some minor modifications to it and to the nestedwrapped_f()function defined in it so the latter fits the signature that module's decorator.decorator() function expects:
import decorator
def showduration(user_function):
''' show time duration decorator'''
import time
def wrapped_f(user_function, *args, **kwargs):
t1 = time.clock()
result = user_function(*args, **kwargs)
print "%s()_Time: %0.5f"%(user_function.__name__, time.clock()-t1)
return result
return decorator.decorator(wrapped_f, user_function)
However, the module really shines because it will let you reduce boilerplate stuff like the above down to just:
import decorator
#decorator.decorator
def showduration(user_function, *args, **kwargs):
import time
t1 = time.clock()
result = user_function(*args, **kwargs)
print "%s()_Time: %0.5f"%(user_function.__name__, time.clock()-t1)
return result
With either set of the above changes, your sample code would output:
(['para1', 'para2', 'para3'], (5, 7))
bar()_Time: 0.00026
(['para1', 'para2', 'para3'], None)

Python file cache

I'm creating some objects from files (validators from templates xsd files, to draw together other xsd files, as it happens), and I'd like to recreate the objects when the file on disk changes.
I could create something like:
def getobj(fname, cache = {}):
try:
obj, lastloaded = cache[fname]
if lastloaded < last_time_written(fname):
# same stuff as in except clause
except KeyError:
obj = create_from_file(fname)
cache[fname] = (obj, currenttime)
return obj
However, I would prefer to use someone else's tested code if it exists. Is there an existing library that does something like this?
Update: I'm using python 2.7.1.
Your code (including the cache logic) looks fine.
Consider moving the cache variable outside the function definition. That will make it possible to add other functions to clear or inspect the cache.
If you want to look at code that does something similar, look at the source for the filecmp module: http://hg.python.org/cpython/file/2.7/Lib/filecmp.py The interesting part is how the stat module is used to determine whether a file has changed. Here is the signature function:
def _sig(st):
return (stat.S_IFMT(st.st_mode),
st.st_size,
st.st_mtime)
Three thoughts.
Use try... except... else for a neater control flow.
File modification times are notoriously unstable -- in particular, they don't necessarily correspond to the most recent time the file was modified!
Python 3 contains a caching decorator: functools.lru_cache. Here's the source.
def lru_cache(maxsize=100):
"""Least-recently-used cache decorator.
If *maxsize* is set to None, the LRU features are disabled and the cache
can grow without bound.
Arguments to the cached function must be hashable.
View the cache statistics named tuple (hits, misses, maxsize, currsize) with
f.cache_info(). Clear the cache and statistics with f.cache_clear().
Access the underlying function with f.__wrapped__.
See: http://en.wikipedia.org/wiki/Cache_algorithms#Least_Recently_Used
"""
# Users should only access the lru_cache through its public API:
# cache_info, cache_clear, and f.__wrapped__
# The internals of the lru_cache are encapsulated for thread safety and
# to allow the implementation to change (including a possible C version).
def decorating_function(user_function,
tuple=tuple, sorted=sorted, len=len, KeyError=KeyError):
hits = misses = 0
kwd_mark = (object(),) # separates positional and keyword args
lock = Lock() # needed because ordereddicts aren't threadsafe
if maxsize is None:
cache = dict() # simple cache without ordering or size limit
#wraps(user_function)
def wrapper(*args, **kwds):
nonlocal hits, misses
key = args
if kwds:
key += kwd_mark + tuple(sorted(kwds.items()))
try:
result = cache[key]
hits += 1
except KeyError:
result = user_function(*args, **kwds)
cache[key] = result
misses += 1
return result
else:
cache = OrderedDict() # ordered least recent to most recent
cache_popitem = cache.popitem
cache_renew = cache.move_to_end
#wraps(user_function)
def wrapper(*args, **kwds):
nonlocal hits, misses
key = args
if kwds:
key += kwd_mark + tuple(sorted(kwds.items()))
try:
with lock:
result = cache[key]
cache_renew(key) # record recent use of this key
hits += 1
except KeyError:
result = user_function(*args, **kwds)
with lock:
cache[key] = result # record recent use of this key
misses += 1
if len(cache) > maxsize:
cache_popitem(0) # purge least recently used cache entry
return result
def cache_info():
"""Report cache statistics"""
with lock:
return _CacheInfo(hits, misses, maxsize, len(cache))
def cache_clear():
"""Clear the cache and cache statistics"""
nonlocal hits, misses
with lock:
cache.clear()
hits = misses = 0
wrapper.cache_info = cache_info
wrapper.cache_clear = cache_clear
return wrapper
return decorating_function
Unless there is a specific reason to use it as argument I would use cache as a global object

Categories