I'm trying to pass a jitclass instance to a function that's using guvectorize but I don't understand what the signature should be. For instance in the example below, what should be the signature for the C parameter ? I tried typeof(myClass()) but it doesn't work.
import numpy as np
from numba import guvectorize, float64,typeof
from numba.experimental import jitclass
spec=[('a', float64[:])]
#jitclass(spec)
class myClass():
def __init__(self):
self.a= np.ones(4,)
#guvectorize([( typeof(myClass()), # the njit class
float64[:], # the result
float64[:], # dummy for size of b
)], '(),(n)->(n)')
def compute(C,c,b):
b = C.a + C.a
C = myClass()
print(C)
b= np.zeros(4)
c= np.zeros(4)
compute(C,c,b)
print(b)
error
NotImplementedError: instance.jitclass.myClass#22dfcdfb280<a:array(float64, 1d, A)> cannot be represented as a Numpy dtype
Related
I would like to use scipy's LowLevelCallable utility to improve the performance of my call to generic_filter with my own defined function, which takes in two arrays as input parameters. In this working example they show how a regular call to generic_filter could look.
LowLevelCallable already has fixed input arguments:
int callback(double *buffer, npy_intp filter_size,
double *return_value, void *user_data)
so the only way to pass this second array is via the user_data pointer. However, in order to create a carray object I need both the pointer to the array as well as it's length. How can I modify my function wrapper to pass two objects to my function?
from numba import cfunc,carray,jit
from numba.types import intc, CPointer, float64, intp, voidptr
from scipy import LowLevelCallable, ndimage
import numpy as np
image = np.random.random((128, 128))
footprint = np.array([[0, 1, 0],
[1, 1, 1],
[0, 1, 0]], dtype=bool)
def jit_filter_function(filter_function):
jitted_function = jit(filter_function, nopython=True)
#cfunc(intc(CPointer(float64), intp, CPointer(float64), voidptr))
def wrapped(values_ptr, len_values, result, data):
values = carray(values_ptr, (len_values,), dtype=float64)
more = carray(...) # what do I put here?
result[0] = jitted_function(values,more)
return 1
return LowLevelCallable(wrapped.ctypes)
#jit_filter_function
def fmin(values: np.ndarray, more:np.ndarray):
result = np.inf
for v in values:
if v < result:
result = v
return result
ndimage.generic_filter(image, fmin, footprint=footprint,extra_arguments = (np.array([1,2,3]),))
I am using ray.put(large_2d_array) to store a large numpy 2D boolean array and then in the worker process I take a column from this shared numpy array and pass it to a cython function and then create a flat buffer view via cdef cnp.npy_bool view = &sliced_array[0]. With this view, I can modify the underlying buffer (setting some indices to True).
Since plasma store objects are immutable, what are the unknown problems I might face because of doing this?
I would be glad if someone can throw some light on this.
Code template:
import numpy as np
# ray init code
......
large_2d_array = np.zeros((6000000000, 205), dtype=bool, order='F')
shared_array = ray.put(large_2d_array)
# Call worker via ray remote here and pass the shared_array
......
......
#ray.remote(num_cpus=1)
def worker(large_2d_array, col_idx):
array_slice = large_2d_array[:, col_idx]
cython_function(array_slice)
#In file: cython_func.pyx
def cython_function(cnp.ndarray[cnp.npy_bool, ndim=1, mode='c'] sliced_array):
cdef cnp.npy_bool view = &sliced_array[0]
cdef int i
for i in range(100):
view[i] = True
return
P.S: No two workers gain access to the same slice at the same time. Each slice (column in this case) is accessed only once and modified/writte to only once.
Attempted actors method
This still does not work, I think I am not doing something wrong.
import ray
import numpy as np
ray.init(num_cpus=4)
#ray.remote
class test:
def __init__(self, shape):
self.shape = shape
self.np_array = np.zeros(shape, dtype=bool)
def get_col_slice(self, col_idx):
return self.np_array[:, col_idx]
def get_array(self):
return self.np_array
def write_to_slice(self, col_idx, nrows):
self.np_array[:, col_idx] = [1] * nrows
#ray.remote
def write_to_alternate_slice(actor_handle, col_idx, nrows):
actor_handle.write_to_slice.remote(col_idx, nrows)
shape = (10, 20)
test_actor = test.remote(shape)
for i in range(10, 2):
write_to_alternate_slice.remote(test_actor, i, 10)
print(ray.get(test_actor.get_array.remote()))
I think in this case, the sliced array will be copied to your python heap memory, and modifying the array won't be reflected to other workers who access the same object reference.
I write a function to test numba.guvectorize. This function takes product of two numpy arrays and compute the sum after first axis, as following:
from numba import guvectorize, float64
import numpy as np
#guvectorize([(float64[:], float64[:], float64)], '(n),(n)->()')
def g(x, y, res):
res = np.sum(x * y)
However, the above guvectorize function returns wrong results as shown below:
>>> a = np.random.randn(3,4)
>>> b = np.random.randn(3,4)
>>> np.sum(a * b, axis=1)
array([-0.83053829, -0.15221319, -2.27825015])
>>> g(a, b)
array([4.67406747e-310, 0.00000000e+000, 1.58101007e-322])
What might be causing this problem?
Function g() receives an uninitialized array through the res parameter. Assigning a new value to it doesn't modify the original array passed to the function.
You need to replace the contents of res (and declare it as an array):
#guvectorize([(float64[:], float64[:], float64[:])], '(n),(n)->()')
def g(x, y, res):
res[:] = np.sum(x * y)
The function operates on 1D vectors and returns a scalar (thus the signature (n),(n)->()) and guvectorize does the job of dealing with 2D inputs and returning a 1D output.
>>> a = np.random.randn(3,4)
>>> b = np.random.randn(3,4)
>>> np.sum(a * b, axis=1)
array([-3.1756397 , 5.72632531, 0.45359806])
>>> g(a, b)
array([-3.1756397 , 5.72632531, 0.45359806])
But the original Numpy function np.sum is already vectorized and compiled, so there is little speed gain in using guvectorize in this specific case.
Your a and b arrays are 2-dimensional, while your guvectorized function has signature of accepting 1D arrays and returning 0D scalar. You have to modify it to accept 2D and return 1D.
In one case you do np.sum with axis = 1 in another case without it, you have to do same thing in both cases.
Also instead of res = ... use res[...] = .... Maybe it is not the problem in case of guvectorize but it can be a general problem in Numpy code because you have to assign values instead of variable reference.
In my case I added cache = True param to guvectorize decorator, it only speeds up running through caching/re-using Numba compiled code and not re-compiling it on every run. It just speeds up things.
Full modified corrected code see below:
Try it online!
from numba import guvectorize, float64
import numpy as np
#guvectorize([(float64[:, :], float64[:, :], float64[:])], '(n, m),(n, m)->(n)', cache = True)
def g(x, y, res):
res[...] = np.sum(x * y, axis = 1)
# Test
np.random.seed(0)
a = np.random.randn(3, 4)
b = np.random.randn(3, 4)
print(np.sum(a * b, axis = 1))
print(g(a, b))
Output:
[ 2.57335386 3.41749149 -0.42290296]
[ 2.57335386 3.41749149 -0.42290296]
I am trying to compute the gradient of some function using autograd but fails and shows the below error
from autograd import grad
def f(a):
return a[0]*np.sin(2*np.pi*a[1]) + a[2]*np.sin(2*np.pi*a[3])
a=[1.0,1.0,1.0,1.0]
gr = grad(f,0)
print(gr(a))
File "C:\Users\user\Desktop\auto.py", line 23, in f
return a[0]*np.sin(2*np.pi*a[1]) + a[2]*np.sin(2*np.pi*a[3])
TypeError: loop of ufunc does not support argument 0 of type ArrayBox which has no callable sin method
I had the same issue. I think this a problem with autograd. You have to import numpy from autograd! Add at the beginning : import autograd.numpy as np
from autograd import grad
import autograd.numpy as np ; <-----------
def f(a):
return a[0]*np.sin(2*np.pi*a[1]) + a[2]*np.sin(2*np.pi*a[3])
a=[1.0,1.0,1.0,1.0]
gr = grad(f,0)
print(gr(a))
Usually when I mock, I have the following type of setup
# my_script.py
import numpy as np
def my_func(x):
out = np.power(x, 2)
return out
then to test the numpy power call in my_script:
# test_myscript.py
import numpy as np
import unittest
import mock
from my_script import my_func
class TestMyScript(unittest.TestCase):
#mock.patch("my_script.np")
def test_my_func(self, mock_os):
"""Test that numpy.power was called"""
a = np.array([1, 2, 3])
my_func(a)
mock_os.power.assert_called_with(a, 2)
if __name__ == '__main__':
unittest.main()
This works fine.
But now if the situation changes, and say I give the numpy module as an argument into my_func; I don't know how to mock numpy in this case.
How would I mock numpy in the function below in the same way as it was mocked in test_myscript above?
Note that numpy will not be imported in my_script.py but will instead be imported in a separate script that runs functions from my_script.py.
# my_script.py
# numpy NOT imported in this script!
def my_func(x, numpy):
out = numpy.power(x, 2)
return out
EDIT:
Based on #Daniel Roseman's comment, I am including some more code to be explicit on how the functions are called
# main_script.py
import numpy as np
from my_script import my_func
def main():
a = np.array([1, 2, 3])
my_func(a, np) # numpy is passed into `my_func`
Then to test, I am trying the below
# test_myscript.py
import numpy as np
import unittest
import mock
from my_script import my_func
class TestMyScript(unittest.TestCase):
#mock.patch("main_script.np") # import from main_script since numpy is imported here
def test_my_func(self, mock_os):
"""Test that numpy.power was called"""
a = np.array([1, 2, 3])
my_func(a)
mock_os.power.assert_called_with(a, 2)
if __name__ == '__main__':
unittest.main()
But this fails with
Ran 1 test in 0.154s
>>> FAILED (failures=1)
>>> AssertionError: Expected 'power' to have been called.
I found that using the unittest.mock.Mock object here worked best.
So if we have:
# my_script.py
def my_func(x, numpy):
out = numpy.power(x, 2)
return out
Then to test it we have:
# test_myscript.py
import numpy as np
import unittest
from unittest.mock import Mock
from my_script import my_func
numpy_mock = Mock()
class TestMyScript(unittest.TestCase):
def test_my_func(self):
"""Test that numpy.power was called"""
a = np.array([1, 2, 3])
_ = my_func(a, numpy_mock) # pass the mocked object here
numpy_mock.power.assert_called_once_with(a, 2)
if __name__ == '__main__':
unittest.main()
Which passes the test