I have been making a program involving complex number calculations and three of the functions I am using are these:
import turtle
import cmath
import numpy as np
from numba import jit
#jit
def quadratics(arange=[0,10],brange=[0,100],crange=[0,100], step=2):
l = []
for a in range(arange[0],arange[1]+1,step):
for b in range(brange[0],brange[1]+1,step):
for c in range(crange[0],crange[1]+1,step):
if a != 0:
l.append((-b+cmath.sqrt(b**2-4*a*c))/(2*a))
l.append((-b-cmath.sqrt(b**2-4*a*c))/(2*a))
return l
def mindistance(point, roots):
return min(np.array([(point.real-i.real)**2+(point.imag-i.imag)**2 for i in roots]))
#jit
def drawing_matrix(imsz=500,xrange=[-5,5],yrange=[-5,5],poly=2,acc=0.01):
l = np.zeros((imsz, imsz))
roots = quadratics()
for x in range(0, imsz):
for y in range(0, imsz):
c = complex((x/imsz)*(xrange[1]-xrange[0])+xrange[0],(y/imsz)*(yrange[1]-yrange[0])+yrange[0])
if mindistance(c, roots) <= acc:
l[x,y] = 1
return l
Now, I have been using Numba to speed things up with the #jit decorator and it's fine apart from mindistance(). If I put the #jit decorator on that function (which would be really useful, since it is called thousands of times during a program run) it produces the most almighty of error messages ending with:
numba.errors.LoweringError: Failed at object (object mode backend)
make_function(code=<code object <listcomp> at 0x000001F460FAB540, file "C:\Users\Isky\Documents\IT\Programs\Mathematics\AlgebraicNumbers.py", line 19>, name=$const0.7, defaults=None, closure=$0.5)
File "AlgebraicNumbers.py", line 19
[1] During: lowering "$0.8 = make_function(code=<code object <listcomp> at 0x000001F460FAB540, file "C:\Users\Isky\Documents\IT\Programs\Mathematics\AlgebraicNumbers.py", line 19>, name=$const0.7, defaults=None, closure=$0.5)" at C:\Users\Isky\Documents\IT\Programs\Mathematics\AlgebraicNumbers.py (19)
which is line 19 (as in def mindistance()). Can you tell me why Numba doesn't like this function?
Related
I have a program in Python and I use numba to compile the code to native and run faster.
I want to accelerate the run even further, and implement a cache for function results - if the function is called twice with the same parameters, the first time the calculation will run and return the result and the same time the function will return the result from the cache.
I tried to implement this with a dict, where the keys are tuples containing the function parameters, and the values are the function return values.
However, numba doesn't support dictionaries and the support for global variables is limited, so my solution didn't work.
I can't use a numpy.ndarray and use the indices as the parameters, since some of my parameters are floats.
The problem i that both the function with cached results and and the calling function are compiled with numba (if the calling function was a regular python function, I could cache using just Python and not numba)
How can I implement this result cache with numba?
============================================
The following code gives an error, saying the Memoize class is not recognized
from __future__ import annotations
from numba import njit
class Memoize:
def __init__(self, f):
self.f = f
self.memo = {}
def __call__(self, *args):
if args not in self.memo:
self.memo[args] = self.f(*args)
#Warning: You may wish to do a deepcopy here if returning objects
return self.memo[args]
#Memoize
#njit
def bla(a: int, b: float):
for i in range(1_000_000_000):
a *= b
return a
#njit
def caller(x: int):
s = 0
for j in range(x):
s += bla(j % 5, (j + 1) % 5)
return s
if __name__ == "__main__":
print(caller(30))
The error:
Untyped global name 'bla': Cannot determine Numba type of <class '__main__.Memoize'>
File "try_numba2.py", line 30:
def caller(x: int):
<source elided>
for j in range(x):
s += bla(j % 5, (j + 1) % 5)
^
Changing the order of the decorators for bla gives the following error:
TypeError: The decorated object is not a function (got type <class '__main__.Memoize'>).
I have just discovered numba, and learnt that optimal performance requires adding #njit to most functions, such that numba rarely exits LLVM mode.
I still have a few expensive/lookup functions that could benefit from memoization, but so far none of my attempts have found a workable solution that compiles without error.
Using common decorator functions, before #njit results in a numba not being able to do type inference.
Using decorators after #njit fails to compile the decorator
Numba doesn't like the use of global variables, even when using numba.typed.Dict
Numba doesn't like using closures to store mutable state
Removing #njit also causes type errors when called from other #njit functions
What is the correct way to add memoization to functions when working inside numba?
import functools
import time
import fastcache
import numba
import numpy as np
import toolz
from numba import njit
from functools import lru_cache
from fastcache import clru_cache
from toolz import memoize
# #fastcache.clru_cache(None) # BUG: Untyped global name 'expensive': cannot determine Numba type of <class 'fastcache.clru_cache'>
# #functools.lru_cache(None) # BUG: Untyped global name 'expensive': cannot determine Numba type of <class 'functools._lru_cache_wrapper'>
# #toolz.memoize # BUG: Untyped global name 'expensive': cannot determine Numba type of <class 'function'>
#njit
# #fastcache.clru_cache(None) # BUG: AttributeError: 'fastcache.clru_cache' object has no attribute '__defaults__'
# #functools.lru_cache(None) # BUG: AttributeError: 'functools._lru_cache_wrapper' object has no attribute '__defaults__'
# #toolz.memoize # BUG: CALL_FUNCTION_EX with **kwargs not supported
def expensive():
bitmasks = np.array([ 1 << n for n in range(0, 64) ], dtype=np.uint64)
return bitmasks
# #fastcache.clru_cache(None) # BUG: Untyped global name 'expensive_nojit': cannot determine Numba type of <class 'fastcache.clru_cache'>
# #functools.lru_cache(None) # BUG: Untyped global name 'expensive_nojit': cannot determine Numba type of <class 'fastcache.clru_cache'>
# #toolz.memoize # BUG: Untyped global name 'expensive_nojit': cannot determine Numba type of <class 'function'>
def expensive_nojit():
bitmasks = np.array([ 1 << n for n in range(0, 64) ], dtype=np.uint64)
return bitmasks
# BUG: Failed in nopython mode pipeline (step: analyzing bytecode)
# Use of unsupported opcode (STORE_GLOBAL) found
_expensive_cache = None
#njit
def expensive_global():
global _expensive_cache
if _expensive_cache is None:
bitmasks = np.array([ 1 << n for n in range(0, 64) ], dtype=np.uint64)
_expensive_cache = bitmasks
return _expensive_cache
# BUG: The use of a DictType[unicode_type,array(int64, 1d, A)] type, assigned to variable 'cache' in globals,
# is not supported as globals are considered compile-time constants and there is no known way to compile
# a DictType[unicode_type,array(int64, 1d, A)] type as a constant.
cache = numba.typed.Dict.empty(
key_type = numba.types.string,
value_type = numba.uint64[:]
)
#njit
def expensive_cache():
global cache
if "expensive" not in cache:
bitmasks = np.array([ 1 << n for n in range(0, 64) ], dtype=np.uint64)
cache["expensive"] = bitmasks
return cache["expensive"]
# BUG: Cannot capture the non-constant value associated with variable 'cache' in a function that will escape.
#njit()
def _expensive_wrapped():
cache = []
def wrapper(bitmasks):
if len(cache) is None:
bitmasks = np.array([ 1 << n for n in range(0, 64) ], dtype=np.uint64)
cache.append(bitmasks)
return cache[0]
return wrapper
expensive_wrapped = _expensive_wrapped()
#njit
def loop(count):
for n in range(count):
expensive()
# expensive_nojit()
# expensive_cache()
# expensive_global)
# expensive_wrapped()
def main():
time_start = time.perf_counter()
count = 10000
loop(count)
time_taken = time.perf_counter() - time_start
print(f'{count} loops in {time_taken:.4f}s')
loop(1) # precache numba
main()
# Pure Python: 10000 loops in 0.2895s
# Numba #njit: 10000 loops in 0.0026s
You already mentioned that your real code is more complex, but looking at your minimal example, I would recommend the following pattern:
#njit
def loop(count):
expensive_result = expensive()
for i in range(count):
do_something(count, expensive_result)
Instead of using a cache, you could pre-compute it outside of the loop and provide the result to the loop body. Instead of using globals, I would recommend you to pass every argument explicitly (always, but especially when using the numba jit).
Numba documentation specifies that other compiled functions can be inlined and called from other compiled functions. This does not seem to be true when compiling ahead of time.
For example: here are two functions that compute the inner dot product between 2 vector arrays, one of them does the actual product, the other makes the inline call within a loop:
# Module test.py
import numpy as np
from numba import njit, float64
#njit(float64(float64[:], float64[:]))
def product(a, b):
prod = 0
for i in range(a.size):
prod += a[i] * b[i]
return prod
#njit(float64[:](float64[:,:], float64[:,:]))
def n_inner1d(a, b):
prod = np.empty(a.shape[0])
for i in range(a.shape[0]):
prod[i] = product(a[i], b[i])
return prod
As is, I can do import test and use test.n_inner1d perfectly fine. Now lets do some modifications so this can be compiled to a .pyd
# Module test.py
import numpy as np
from numba import float64
from numba.pycc import CC
cc = CC('test')
cc.verbose = True
#cc.export('product','float64(float64[:], float64[:])')
def product(a, b):
prod = 0
for i in range(a.size):
prod += a[i] * b[i]
return prod
#cc.export('n_inner1d','float64[:](float64[:,:], float64[:,:])')
def n_inner1d(a, b):
prod = np.empty(a.shape[0])
for i in range(a.shape[0]):
prod[i] = product(a[i], b[i])
return prod
if __name__ == "__main__":
cc.compile()
When trying to compile, i get the following error:
# python test.py
Failed at nopython (nopython frontend)
Untyped global name 'product': cannot determine Numba type of <type 'function'>
File "test.py", line 20
QUESTION
For a module compiled ahead of time, is it possible for functions defined within to call one another and be used inline?
I reached out to the numba devs and they kindly answered that adding the #njit decorator after #cc.export will make the function call type resolution work and resolve.
So for example:
#cc.export('product','float64(float64[:], float64[:])')
#njit
def product(a, b):
prod = 0
for i in range(a.size):
prod += a[i] * b[i]
return prod
Will make the product function available to others. The caveat being that it is entirely possible in some cases that the inlined function ends up with a different type signature to that of the one declared AOT.
what is the best way to have better, dynamic control on the decorators - choosing from numba.cuda.jit, numba.jit and none (pure python). [please note that a project can have 10s or 100s of functions, so this should be easy to apply to all the functions]
here is an example from numba website.
import numba as nb
import numpy as np
# global control of this --> #nb.jit or #nb.cuda.jit or none
# some functions with #nb.jit or cuda.jit with kwargs like (nopython=True, **other_kwargs)
def sum2d(arr):
M, N = arr.shape
result = 0.0
for i in range(M):
for j in range(N):
result += arr[i,j]
return result
a = np.arange(81).reshape(9,9)
sum2d(a)
You may want something more sophisticated, but a relatively simple solution is redefining jit based on settings. For example
def _noop_jit(f=None, *args, **kwargs):
""" returns function unmodified, discarding decorator args"""
if f is None:
return lambda x: x
return f
# some config flag
if settings.PURE_PYTHON_MODE:
jit = _noop_jit
else: # etc
from numba import jit
#jit(nopython=True)
def f(a):
return a + 1
I am trying to do some timing comparisons using numba.
What I don't understand in the following mwe.py is why I get different results
from __future__ import print_function
import numpy as np
from numba import autojit
import time
def timethis(method):
'''decorator for timing function calls'''
def timed(*args, **kwargs):
ts = time.time()
result = method(*args, **kwargs)
te = time.time()
print('{!r} {:f} s'.format(method.__name__, te - ts))
return result
return timed
def pairwise_pure(x):
'''sample function, compute pairwise distancee, see: jakevdp.github.io/blog/2013/06/15/numba-vs-cython-take-2/'''
M, N = x.shape
D = np.empty((M, M), dtype=np.float)
for i in range(M):
for j in range(M):
d = 0.
for k in range(N):
tmp = x[i, k] - x[j, k]
d += tmp * tmp
D[i, j] = np.sqrt(d)
return D
# first version
#timethis
#autojit
def pairwise_numba(args):
return pairwise_pure(args)
# second version
#timethis
def pairwise_numba_alt(args):
return autojit(pairwise_pure)(args)
x = np.random.random((1e3, 10))
pairwise_numba(x)
pairwise_numba_alt(x)
Evaluating python3 mwe.py gives this output:
'pairwise_numba' 5.971631 s
'pairwise_numba_alt' 0.191500 s
In the first version, I decorate the method using timethis to calculate the timings, and with autojit to speed up the code , whereas in the second one I decorate the function with timethis, and call autojit(...) afterwards.
Does someone have an explanation ?
Actually the documentation explicitly states that for optimization each call to other functions "inside" a decorated function should be decorated as well or it isn't optimized.
For many functions like numpy functions that isn't necessary since they are highly optimized but for native python functions it is.