I do have following decorator:
def memo(f):
"""Decorator that caches the return value for each call to f(args).
Then when called again with same args, we can just look it up."""
cache = {}
def _f(*args):
try:
return cache[args]
except KeyError:
cache[args] = result = f(*args)
return result
except TypeError:
# some element of args can't be a dict key
return f(args)
return _f
I need to write some tests to know if it works good. How can I test such decorator?
I only managed to write test for performance to know if it speeds up functions.
def test_memo(self):
def fib(n):
if n == 0:
return 0
elif n == 1:
return 1
else:
return fib(n - 1) + fib(n - 2)
#memo
def cached_fib(n):
if n == 0:
return 0
elif n == 1:
return 1
else:
return cached_fib(n - 1) + cached_fib(n - 2)
t0 = time.clock()
fib(20)
t = time.clock() - t0
t1 = time.clock()
cached_fib(20)
t2 = time.clock() - t1
self.assertGreater(t, t2)
Maybe it would be wise to test if it store some value in cache but I do not know how to achieve it in python. Any ideas?
The function decoarted should be called once for the same argument. Check that.
def test_memo__function_should_be_called_once_for_same_arg(self):
#memo
def f(arg):
f.call_count += 1
return arg
f.call_count = 0
self.assertEqual(f(1), 1)
self.assertEqual(f(1), 1)
self.assertEqual(f.call_count, 1)
self.assertEqual(f(2), 2)
self.assertEqual(f(2), 2)
self.assertEqual(f(2), 2)
self.assertEqual(f.call_count, 2)
BTW, In the cached_fib function, it should call cache_fib, not fib to take advantage of the memoization.
Related
I've got the code below that is giving me the following error: bubble_sort() missing 1 required positional argument: 'a_list'. I am passing a list to the function, so I do not know where the error is coming from. I am learning python right now, so understanding what I am doing wrong and the why is important to me.
import functools
import time
def sort_timer(func):
"""
Timer function that counts how many seconds it takes the decoration function to run, in this case the sort functions
described above. Returns the number of seconds using the time module.
:param func:
:return total_time:
"""
#functools.wraps(func)
def wrapper(*args, **kwargs):
"""
Calculates the total time a function runs.
:return:
"""
total_time = None
start_time = time.perf_counter()
func(*args, **kwargs)
end_time = time.perf_counter()
local_total_time = end_time - start_time
return total_time
return wrapper()
#sort_timer
def bubble_sort(a_list):
"""
Sorts a_list in ascending order
"""
for pass_num in range(len(a_list) - 1):
for index in range(len(a_list) - 1 - pass_num):
if a_list[index] > a_list[index + 1]:
temp = a_list[index]
a_list[index] = a_list[index + 1]
a_list[index + 1] = temp
def main():
random_list = [5,1,10,15,3,6,45,21,90, 76,44,33,41,27,81]
print(bubble_sort(random_list))
if __name__ == "__main__":
main()
There are two problems with your code:
You are calling the wrapper function when you return it from sort_timer, but instead you should return the function without calling it.
return wrapper
You are printing the return value of the bubble_sort function, but that function doesn't return a value, so it just ends up printing None. Instead, you should print random_list after running bubble_sort on it.
bubble_sort(random_list)
print(random_list)
Here is your code with those fixes applied:
import functools
import time
def sort_timer(func):
"""
Timer function that counts how many seconds it takes the decoration function to run, in this case the sort functions
described above. Returns the number of seconds using the time module.
:param func:
:return total_time:
"""
#functools.wraps(func)
def wrapper(*args, **kwargs):
"""
Calculates the total time a function runs.
:return:
"""
total_time = None
start_time = time.perf_counter()
func(*args, **kwargs)
end_time = time.perf_counter()
local_total_time = end_time - start_time
return total_time
return wrapper
#sort_timer
def bubble_sort(a_list):
"""
Sorts a_list in ascending order
"""
for pass_num in range(len(a_list) - 1):
for index in range(len(a_list) - 1 - pass_num):
if a_list[index] > a_list[index + 1]:
temp = a_list[index]
a_list[index] = a_list[index + 1]
a_list[index + 1] = temp
def main():
random_list = [5,1,10,15,3,6,45,21,90, 76,44,33,41,27,81]
bubble_sort(random_list)
print(random_list)
if __name__ == "__main__":
main()
Running it produces the following output:
[1, 3, 5, 6, 10, 15, 21, 27, 33, 41, 44, 45, 76, 81, 90]
def one(*args):
if len(args) == 0:
return 1
return args
def plus(*args):
first_number = args[1]
second_number = args[0]
sum = first_number + second_number
return sum
def two(*args):
if len(args) == 0:
return 2
return args
print(one(plus(two())))
the question is to do calculations by using functions and it contains 3 parts first number, operation, and the second number and the goal is to do the operation (second part) on those two numbers (first part and third part) for example one(plus(two())) should return 3
now here is the part I don't know how to do, plus() function is called with one argument two() one(plus(two())) I want to add a second argument the initial number (first number) to the function so plus() function is now called with two args like this plus(two(), 1).
Now plus(two()) is args[0] inside one() function how can I append 1 to that args[0] so that the function be called like this plus(two(), 1) to access it inside plus() and first_number = args[1] becomes valid
The code below implements a similar syntax to the example, but with slightly different argument structure. I think this way the logic is clearer.
The point is that I define plus as a partial function so that it "absorb" two to form a function that works as "add_two".
def plus(number):
def _plus(x):
return x + number
return _plus
def one(op=None):
if op is None:
return 1
return op(1)
def two(op=None):
if op is None:
return 2
return op(2)
one(plus(two()))
# 3
One idea is to have plus return a function that will be called afterward by one or two:
def one(*args):
if len(args) == 0:
return 1
elif len(args) == 1:
fn = args[0]
return fn(1)
else:
raise ValueError("Invalid number of arguments")
def two(*args):
if len(args) == 0:
return 2
elif len(args) == 1:
fn = args[0]
return fn(2)
else:
raise ValueError("Invalid number of arguments")
def plus(second_number):
def adder(first_number):
return first_number + second_number
return adder
Now:
>>> one(plus(two()))
3
>>> one(plus(one()))
2
>>> two(plus(one()))
3
>>> two(plus(two()))
4
And to simplify and DRY it:
def make_number(n):
def number_fn(fn=None):
if fn:
return fn(n)
else:
return n
return number_fn
def make_operator(operator):
def operator_wrapper(second_number):
def partial(first_number):
return operator(first_number, second_number)
return partial
return operator_wrapper
one = make_number(1)
two = make_number(2)
plus = make_operator(lambda a, b: a + b)
minus = make_operator(lambda a, b: a - b)
>>> two(minus(one()))
1
Just of curiosity, I've written 3 tests in Python and timed them out using timeit:
import timeit
# simple range based on generator
def my_range(start, stop):
i = start
while (i < stop):
yield i
i += 1
# test regular range
def test_range():
x = range(1, 100000)
sum = 0
for i in x:
sum += i
# test xrange
def test_xrange():
x = xrange(1, 100000)
sum = 0
for i in x:
sum += i
# test my range
def test_my_range():
x = my_range(1, 100000)
sum = 0
for i in x:
sum += i
print timeit.timeit("test_range()", setup = "from __main__ import test_range", number = 100)
print timeit.timeit("test_xrange()", setup = "from __main__ import test_xrange", number = 100)
print timeit.timeit("test_my_range()", setup = "from __main__ import test_my_range", number = 100)
And I've got these benchmarks:
regular range based test - 0.616795163262
xrange based test - 0.537716731096
my_range (generator) based test - **1.27872886337**
My range was X2 slower even than a range that creates a list. Why?
Are xrange() / range() implemented using C directly?
Are they implemented without condition check?
Thanks!
I feel that the simple answer is that xrange() is builtin and written in C.
I added another case to your test (see below): A pure-Python reference implementation of xrange() based on the CPython source.
import timeit
from collections import Sequence, Iterator
from math import ceil
# simple range based on generator
def my_range(start, stop):
i = start
while (i < stop):
yield i
i += 1
# test regular range
def test_range():
x = range(1, 100000)
sum = 0
for i in x:
sum += i
# test xrange
def test_xrange():
x = xrange(1, 100000)
sum = 0
for i in x:
sum += i
# test my range
def test_my_range():
x = my_range(1, 100000)
sum = 0
for i in x:
sum += i
class pure_python_xrange(Sequence):
"""Pure-Python implementation of an ``xrange`` (aka ``range``
in Python 3) object. See `the CPython documentation
<http://docs.python.org/py3k/library/functions.html#range>`_
for details.
"""
def __init__(self, *args):
if len(args) == 1:
start, stop, step = 0, args[0], 1
elif len(args) == 2:
start, stop, step = args[0], args[1], 1
elif len(args) == 3:
start, stop, step = args
else:
raise TypeError('pure_python_xrange() requires 1-3 int arguments')
try:
start, stop, step = int(start), int(stop), int(step)
except ValueError:
raise TypeError('an integer is required')
if step == 0:
raise ValueError('pure_python_xrange() arg 3 must not be zero')
elif step < 0:
stop = min(stop, start)
else:
stop = max(stop, start)
self._start = start
self._stop = stop
self._step = step
self._len = (stop - start) // step + bool((stop - start) % step)
def __repr__(self):
if self._start == 0 and self._step == 1:
return 'pure_python_xrange(%d)' % self._stop
elif self._step == 1:
return 'pure_python_xrange(%d, %d)' % (self._start, self._stop)
return 'pure_python_xrange(%d, %d, %d)' % (self._start, self._stop, self._step)
def __eq__(self, other):
return isinstance(other, xrange) and \
self._start == other._start and \
self._stop == other._stop and \
self._step == other._step
def __len__(self):
return self._len
def index(self, value):
"""Return the 0-based position of integer `value` in
the sequence this xrange represents."""
diff = value - self._start
quotient, remainder = divmod(diff, self._step)
if remainder == 0 and 0 <= quotient < self._len:
return abs(quotient)
raise ValueError('%r is not in range' % value)
def count(self, value):
"""Return the number of ocurrences of integer `value`
in the sequence this xrange represents."""
# a value can occur exactly zero or one times
return int(value in self)
def __contains__(self, value):
"""Return ``True`` if the integer `value` occurs in
the sequence this xrange represents."""
try:
self.index(value)
return True
except ValueError:
return False
def __reversed__(self):
"""Return an xrange which represents a sequence whose
contents are the same as the sequence this xrange
represents, but in the opposite order."""
sign = self._step / abs(self._step)
last = self._start + ((self._len - 1) * self._step)
return pure_python_xrange(last, self._start - sign, -1 * self._step)
def __getitem__(self, index):
"""Return the element at position ``index`` in the sequence
this xrange represents, or raise :class:`IndexError` if the
position is out of range."""
if isinstance(index, slice):
return self.__getitem_slice(index)
if index < 0:
# negative indexes access from the end
index = self._len + index
if index < 0 or index >= self._len:
raise IndexError('xrange object index out of range')
return self._start + index * self._step
def __getitem_slice(self, slce):
"""Return an xrange which represents the requested slce
of the sequence represented by this xrange.
"""
start, stop, step = slce.start, slce.stop, slce.step
if step == 0:
raise ValueError('slice step cannot be 0')
start = start or self._start
stop = stop or self._stop
if start < 0:
start = max(0, start + self._len)
if stop < 0:
stop = max(start, stop + self._len)
if step is None or step > 0:
return pure_python_xrange(start, stop, step or 1)
else:
rv = reversed(self)
rv._step = step
return rv
def __iter__(self):
"""Return an iterator which enumerates the elements of the
sequence this xrange represents."""
return xrangeiterator(self)
class xrangeiterator(Iterator):
"""An iterator for an :class:`xrange`.
"""
def __init__(self, xrangeobj):
self._xrange = xrangeobj
# Intialize the "last outputted value" to the value
# just before the first value; this simplifies next()
self._last = self._xrange._start - self._xrange._step
self._count = 0
def __iter__(self):
"""An iterator is already an iterator, so return ``self``.
"""
return self
def next(self):
"""Return the next element in the sequence represented
by the xrange we are iterating, or raise StopIteration
if we have passed the end of the sequence."""
self._last += self._xrange._step
self._count += 1
if self._count > self._xrange._len:
raise StopIteration()
return self._last
# test xrange
def test_pure_python_xrange():
x = pure_python_xrange(1, 100000)
sum = 0
for i in x:
sum += i
print timeit.timeit("test_range()", setup = "from __main__ import test_range", number = 100)
print timeit.timeit("test_xrange()", setup = "from __main__ import test_xrange", number = 100)
print timeit.timeit("test_my_range()", setup = "from __main__ import test_my_range", number = 100)
print timeit.timeit("test_pure_python_xrange()", setup = "from __main__ import test_pure_python_xrange", number = 100)
The results?
$ python so.py
0.426695823669
0.371111869812
0.964643001556
6.06390094757
This is simply the difference between interpreted Python code and C. Additionally, as #byels mentioned above, xrange() is limited to short integers, which likely has positive effect.
This is an interesting test. Looking at the python 2 docs on xrange, one guess that comes to mind is that xrange is alowed to take advantage of type restrictions (only uses "short" integers)
I have a list which I want to sort by multiple keys, like:
L = [ ... ]
L.sort(key = lambda x: ( f(x), g(x) ))
This works fine. However, this results with unnecessary calls to g, which I would like to avoid (for being potentially slow). In other words, I want to partially and lazily evaluate the key.
For example, if f is unique over L (i.e. len(L) == len(set(map(f,L)))) no calls to g should be made.
What would be the most elegant/pythonic way to do this?
One way I can think of is to define a custom cmp function (L.sort(cmp=partial_cmp)), but IMO this is less elegant and more complicated than using the key parameter.
Another way would be to define a key-wrapper class which takes a generator expression to generate the different parts of the key, and override the comparison operators to compare one-by-one. However, I'm feeling there must be a simpler way...
EDIT: I'm interested in a solution for the general problem of sorting by multiple functions, not only two as in my example above.
You can try using itertools.groupby:
result = []
for groupKey, group in groupby(sorted(L, key=f), key=f):
sublist = [y for y in group]
if len(sublist) > 1:
result += sorted(sublist, key=g)
else:
result += sublist
Another possibility, even less elegant, but in place:
L.sort(key = f)
start = None
end = None
for i,x in enumerate(L):
if start == None:
start = i
elif f(x) == f(L[start]):
end = i
elif end == None:
start = i
else:
L[start:end+1] = sorted(L[start:end+1], key=g)
start = None
if start != None and end != None:
L[start:end+1] = sorted(L[start:end+1], key=g)
First version generalized to any number of functions:
def sortBy(l, keyChain):
if not keyChain:
return l
result = []
f = keyChain[0]
for groupKey, group in groupby(sorted(l, key=f), key=f):
sublist = [y for y in group]
if len(sublist) > 1:
result += sortBy(sublist, keyChain[1:])
else:
result += sublist
return result
The second version generalized to any number of functions (not fully in place though):
def subSort(l, start, end, keyChain):
part = l[start:end+1]
sortBy(part, keyChain[1:])
l[start:end+1] = part
def sortBy(l, keyChain):
if not keyChain:
return
f = keyChain[0]
l.sort(key = f)
start = None
end = None
for i,x in enumerate(l):
if start == None:
start = i
elif f(x) == f(l[start]):
end = i
elif end == None:
start = i
else:
subSort(l, start, end, keyChain)
start = i
end = None
if start != None and end != None:
subSort(l, start, end, keyChain)
Given a function, you could create a LazyComparer class like this:
def lazy_func(func):
class LazyComparer(object):
def __init__(self, x):
self.x = x
def __lt__(self, other):
return func(self.x) < func(other.x)
def __eq__(self, other):
return func(self.x) == func(other.x)
return lambda x: LazyComparer(x)
To make a lazy key function out of multiple functions, you could create a utility function:
def make_lazy(*funcs):
def wrapper(x):
return [lazy_func(f)(x) for f in funcs]
return wrapper
And together they could be used like this:
def countcalls(f):
"Decorator that makes the function count calls to it."
def _f(*args, **kwargs):
_f._count += 1
return f(*args, **kwargs)
_f._count = 0
return _f
#countcalls
def g(x): return x
#countcalls
def f1(x): return 0
#countcalls
def f2(x): return x
def report_calls(*funcs):
print(' | '.join(['{} calls to {}'.format(f._count, f.func_name)
for f in funcs]))
L = range(10)[::-1]
L.sort(key=make_lazy(f1, g))
report_calls(f1, g)
g._count = 0
L.sort(key=make_lazy(f2, g))
report_calls(f2, g)
which yields
18 calls to f1 | 36 calls to g
36 calls to f2 | 0 calls to g
The #countcalls decorator above is used to connfirm that when f1 returns a lot
of ties, g is called to break the ties, but when f2 returns distinct values,
g does not get called.
NPE's solution adds memoization within the Key class. With the solution above,
you could add memoization outside (independent of) the LazyComparer class:
def memo(f):
# Author: Peter Norvig
"""Decorator that caches the return value for each call to f(args).
Then when called again with same args, we can just look it up."""
cache = {}
def _f(*args):
try:
return cache[args]
except KeyError:
cache[args] = result = f(*args)
return result
except TypeError:
# some element of args can't be a dict key
return f(*args)
_f.cache = cache
return _f
L.sort(key=make_lazy(memo(f1), memo(g)))
report_calls(f1, g)
which results in fewer calls to g:
10 calls to f1 | 10 calls to g
You could use a key object that would lazily evaluate and cache g(x):
class Key(object):
def __init__(self, obj):
self.obj = obj
self.f = f(obj)
#property
def g(self):
if not hasattr(self, "_g"):
self._g = g(self.obj)
return self._g
def __cmp__(self, rhs):
return cmp(self.f, rhs.f) or cmp(self.g, rhs.g)
Here is an example of use:
def f(x):
f.count += 1
return x // 2
f.count = 0
def g(x):
g.count += 1
return x
g.count = 0
L = [1, 10, 2, 33, 45, 90, 3, 6, 1000, 1]
print sorted(L, key=Key)
print f.count, g.count
I've got a simple memoizer decorator:
def funcmemo(f):
memo = {}
#wraps(f)
def wrapper(*args):
if args in memo:
return memo[args]
else:
temp = f(*args)
print "memoizing: ", args, temp
memo[args] = temp
return temp
return wrapper
Now, when I use it via the "#" token,
#funcmemo
def fib(n):
print "fib called with:", n
if n < 2: return n
return fib(n-2) + fib(n-1)
res = fib(3)
print "result:", res
it works correctly, as seen in the printed output:
fib called with: 3
fib called with: 1
memoizing: (1,) 1
fib called with: 2
fib called with: 0
memoizing: (0,) 0
memoizing: (2,) 1
memoizing: (3,) 2
result: 2
However, when I do this:
def fib(n):
print "fib called with:", n
if n < 2: return n
return fib(n-2) + fib(n-1)
memfib = funcmemo(fib)
res = memfib(3)
print "result:", res
Apparently an undecorated fib gets called, with only the final return value "reaching" the cache (obviously resulting in huge slowdown):
fib called with: 3
fib called with: 1
fib called with: 2
fib called with: 0
fib called with: 1
memoizing: (3,) 2
result: 2
Curiously, this one works fine:
def fib(n):
print "fib called with:", n
if n < 2: return n
return fib(n-2) + fib(n-1)
fib = funcmemo(fib)
res = fib(3)
print "result:", res
Also, the very same thing happens with a class-based version:
class Classmemo(object):
def __init__ (self, f):
self.f = f
self.mem = {}
def __call__ (self, *args):
if args in self.mem:
return self.mem[args]
else:
tmp = self.f(*args)
print "memoizing: ", args, temp
self.mem[args] = tmp
return tmp
The problem also occurs when using an "anonymous" decorated function, like
res = Classmemo(fib)(3)
I'd be glad to be enlightened about the reasons behind this.
There is nothing curious about this. When you do
memofib = funcmemo(fib)
You're not changing the function fib points to in any way, but creating a new function and pointing the name memofib at it.
So when memofib gets called, it calls the function pointed to by the name fib -- which recursively calls itself, not memofib -- so no memoization occurs.
In your second example, you do
fib = funcmemo(fib)
so it calls itself recursively and memoization happens at all levels.
If you don't want to overwrite the name fib, as the decorator version or your second example does, you could alter fib to take a function name:
def fib(n, fibfunc):
print "fib called with:", n
if n < 2: return n
return fibfunc(n-2, fibfunc) + fibfunc(n-1, fibfunc)
memofib = funcmemo(fib)
res = fib(3, memofib)
You could also use a fixed point combinator to avoid passing fibfunc every time:
def Y(f):
def Yf(*args):
return f(Yf)(*args)
return f(Yf)
#Y
def fib(f):
def inner_fib(n):
print "fib called with:", n
if n < 2: return n
return f(n-2) + f(n-1)
return inner_fib
In case your question is just a simple why, I guess the answer is just because the recursion-call with fib() does call the function with the name fib(). To decorate that you have to replace the value of the pointer fib; this is not done by memfib = funcmemo(fib) nor by the class version.