I want to make a boolean numpy array in cython with the given size of another numpy.array but it raises an error message:
CosmoTest.pyx
import numpy as np
cimport numpy as np
cimport cython
from libcpp cimport bool
x=np.array([[-0.3,1.2],[2.5,0.82],[0.61,-0.7]])
mask= np.ones_like(x,dtype=bool)
error:
mask= np.ones_like(x,dtype=bool)
^
------------------------------------------------------------
CosmoTest.pyx:318:39: 'bool' is not a constant, variable or function identifier
How should it be defined in cython?
Update:
cpdef np.ndarray arc( np.ndarray x):
cdef np.ndarray[double, ndim=1, mode='c'] out = np.zeros_like(x)
cdef np.ndarray[np.uint8_t,cast=True, ndim=1] mask = (x < 0.999).view(dtype=np.uint8)
if mask.any():
out[mask] = 0.5*np.log((1.+((1.-x[mask])/(x[mask]+1.))**0.5)/(1.-((1.-x[mask])/(x[mask]+1.))**0.5))/(1-x[mask]**2)**0.5
cdef np.ndarray[np.uint8_t,cast=True, ndim=1] mask = (x > 1.001).view(dtype=np.uint8)
if mask.any():
out[mask] = np.arctan(((x[mask]-1.)/(x[mask]+1.))**0.5)/(x[mask]**2 - 1)**0.5
cdef np.ndarray[np.uint8_t,cast=True , ndim=1] mask = ((x >= 0.999) & (x <= 1.001)).view(dtype=np.uint8)
if mask.any():
out[mask] = 5./6. - x[mask]/3.
return out
Error Message:
Error compiling Cython file:
------------------------------------------------------------
...
if mask.any():
out[mask] = 0.5*np.log((1.+((1.-x[mask])/(x[mask]+1.))**0.5)/(1.-((1.-x[mask])/(x[mask]+1.))**0.5))/(1-x[mask]**2)**0.5
cdef np.ndarray[np.uint8_t,cast=True, ndim=1] mask = (x > 1.001).view(dtype=np.uint8)
if mask.any():
out[mask] = np.arctan(((x[mask]-1.)/(x[mask]+1.))**0.5)/(x[mask]**2 - 1)**0.5
^
------------------------------------------------------------
CosmoTest.pyx:9:55: local variable 'mask' referenced before assignment
If you change (the last line of) your code to
mask= np.ones_like(x,dtype=np.bool)
it will work (take bool from numpy rather than trying to use the lipcpp definition). However, actually statically typing boolean numpy arrays doesn't quite work currently (see Passing a numpy pointer (dtype=np.bool) to C++).
The best way forward currently is to statically type them as
def f(np.ndarray[dtype=np.int8_t,ndim=1] x):
cdef np.ndarray[dtype=np.int8_t,ndim=1] y
y = np.ones_like(x,dtype=np.int8)
return y.view(dtype=np.bool) # returns as boolean array
Internally numpy uses an 8 bit integer to store a bool, and thus you can just use view to reinterpret the array without copying.
If you had a boolean array and wanted to call f you'd do
mask = np.array([True,False,True])
f(mask.view(dtype=np.int8))
You could always write a small wrapper function as your public interface to f to do that reinterpretation automatically.
It's more fiddly than it needs be be, but it is possible to work with.
Addition in response to comments
The article I linked to suggested using cast=True:
cdef np.ndarray[np.uint8_t,cast=True] mask = (x > 0.01)
This also works fine. Written in my approach that would be
cdef np.ndarray[np.uint8_t] mask = (x > 0.01).view(dtype=np.uint8)
(i.e. no cast, but with a view). As far as I can tell there's no practical difference, so pick which one you think looks nicer.
And edited to respond to additional issues
The working code is below (I've checked and it compiles - I haven't checked to make sure it runs). You were getting compiler errors because you'd defined the type of mask multiple times. You're only allowed to use cdef once per variable per function, but having defined the type you can assign to it as often as you like.
cpdef np.ndarray arc( np.ndarray x):
cdef np.ndarray[double, ndim=1, mode='c'] out = np.zeros_like(x)
cdef np.ndarray[np.uint8_t, ndim=1] mask = (x < 0.999).view(dtype=np.uint8)
if mask.any():
out[mask] = 0.5*np.log((1.+((1.-x[mask])/(x[mask]+1.))**0.5)/(1.-((1.-x[mask])/(x[mask]+1.))**0.5))/(1-x[mask]**2)**0.5
mask = (x > 1.001).view(dtype=np.uint8) # REMOVED cdef!
if mask.any():
out[mask] = np.arctan(((x[mask]-1.)/(x[mask]+1.))**0.5)/(x[mask]**2 - 1)**0.5
mask = ((x >= 0.999) & (x <= 1.001)).view(dtype=np.uint8) # REMOVED cdef!
if mask.any():
out[mask] = 5./6. - x[mask]/3.
return out
(I've also removed cast=True from the definition. This isn't important. You can either use that, or use view(dtype=np.uint8). You can use both if you like, but it's more typing!)
Related
I am learning cython and I have modified the code in the tutorial to try to do numerical differentiation:
import numpy as np
cimport numpy as np
import cython
np.import_array()
def test3(a, int order=2, int axis=-1):
cdef int i
if axis<0:
axis = len(a.shape) + axis
out = np.empty(a.shape, np.double)
cdef np.flatiter ita = np.PyArray_IterAllButAxis(a, &axis)
cdef np.flatiter ito = np.PyArray_IterAllButAxis(out, &axis)
cdef int a_axis_stride = a.strides[axis]
cdef int o_axis_stride = out.strides[axis]
cdef int axis_length = out.shape[axis]
cdef double value
while np.PyArray_ITER_NOTDONE(ita):
# first element
pt1 = <double*>((<char*>np.PyArray_ITER_DATA(ita)))
pt2 = (<double*>((<char*>np.PyArray_ITER_DATA(ita)) + 1*a_axis_stride))
pt3 = (<double*>((<char*>np.PyArray_ITER_DATA(ita)) + 2*a_axis_stride))
value = -1.5*pt1[0] + 2*pt2[0] - 0.5*pt3[0]
(<double*>((<char*>np.PyArray_ITER_DATA(ito))))[0] = value
for i in range(axis_length-2):
pt1 = (<double*>((<char*>np.PyArray_ITER_DATA(ita)) + i*a_axis_stride))
pt2 = (<double*>((<char*>np.PyArray_ITER_DATA(ita)) + (i+2)*a_axis_stride))
value = -0.5*pt1[0] + 0.5*pt2[0]
(<double*>((<char*>np.PyArray_ITER_DATA(ito)) + (i+1)*o_axis_stride))[0] = value
# last element
pt1 = (<double*>((<char*>np.PyArray_ITER_DATA(ita))+ (axis_length-3)*a_axis_stride))
pt2 = (<double*>((<char*>np.PyArray_ITER_DATA(ita))+ (axis_length-2)*a_axis_stride))
pt3 = (<double*>((<char*>np.PyArray_ITER_DATA(ita))+ (axis_length-1)*a_axis_stride))
value = 1.5*pt3[0] - 2*pt2[0] + 0.5*pt1[0]
(<double*>((<char*>np.PyArray_ITER_DATA(ito))+(axis_length-1)*o_axis_stride))[0] = value
np.PyArray_ITER_NEXT(ita)
np.PyArray_ITER_NEXT(ito)
return out
The code produces correct results, and it is indeed faster than my own numpy implementation without cython. The problem is the following:
I thought about only having one pt1 = (<double*>((<char*>np.PyArray_ITER_DATA(ita)) + i*a_axis_stride)) statement and then use pt1[0], pt1[-1], pt1[1] to access the array elements, but this only works if the specified axis is the last one. If I am differentiating a different axis (not the last one), then (<double*>((<char*>np.PyArray_ITER_DATA(ita)) + i*a_axis_stride)) points to the right one, but pt[-1] and pt[1] point to the elements right before and after pt[0], which is along the last axis. The current version works, but if I want to implement higher-order differentiation which requires more points to evaluate, then I will end up having many such lines, and I'm not sure if there are better/more efficient ways to do it using pt[1] or
something like pt[xxx] to access neighbouring points (along the specified axis).
Are there other ways to speed up this piece of code? I am looking for some minor details that I may have overlooked or subtle things that can have a big effect.
To my slight surprise I can't actually beat your version using Cython typed memoryviews - the numpy iterators look pretty quick. However I think I can manage a significant increase in readability to let you use the Python slicing syntax. The only restriction is that the input array must be C contiguous to allow it to be reshaped easily (I think Fortran contiguous might also work, but I haven't tested)
The basic trick is to flatten all the axes before and after selected axis so it is a known 3D shape, at which point you can use Cython memoryviews.
#cython.boundscheck(False)
def test4(a,order=2,axis=-1):
assert a.flags['C_CONTIGUOUS'] # otherwise the reshape doesn't work
before = np.product(a.shape[:axis])
after = np.product(a.shape[(axis+1):])
cdef double[:,:,::1] a_new = a.reshape((before, a.shape[axis], after)) # this should not involve copying memory - it's just a new view
cdef double[:] a_slice
cdef double[:,:,::1] out = np.empty_like(a_new)
assert a_new.shape[1] > 3
cdef int m,n,i
for m in range(a_new.shape[0]):
for n in range(a_new.shape[2]):
a_slice = a_new[m,:,n]
out[m,0,n] = -1.5*a_slice[0] + 2*a_slice[1] - 0.5*a_slice[2]
for i in range(a_slice.shape[0]-2):
out[m,i+1,n] = -0.5*a_slice[i] + 0.5*a_slice[i+2]
# last element
out[m,-1,n] = 1.5*a_slice[-1] - 2*a_slice[-2] + 0.5*a_slice[-3]
return np.asarray(out).reshape(a.shape)
The speed is very slightly slower than your version I think.
In terms of improving your code, you could work out the stride in doubles instead of bytes (a_axis_stride_dbl = a_axis_stride/sizeof(double)) and then index as pt[i*a_axis_stride_dbl]). It probably won't gain much speed but will be more readable. (This is what you ask about in point 1)
I am currently working on improving the runtime for a simple Cython function to multiply a numpy matrix A and a numpy vector x using BLAS (i.e. runs A.dot.x in normal numpy)
My current implementation matrix_multiply(A,x) does this without copying the data:
import cython
import numpy as np
cimport numpy as np
cimport scipy.linalg.cython_blas as blas
DTYPE = np.float64
ctypedef np.float64_t DTYPE_T
#cython.boundscheck(False)
#cython.wraparound(False)
#cython.nonecheck(False)
def matrix_multiply(np.ndarray[DTYPE_T, ndim=2, mode="fortran"] A, np.ndarray[DTYPE_T, ndim=1, mode="fortran"] x):
#calls dgemv from BLAS which computes y = alpha * trans(A) + beta * y
#see: http://www.nag.com/numeric/fl/nagdoc_fl22/xhtml/F06/f06paf.xml
cdef int N = A.shape[0]
cdef int D = A.shape[1]
cdef int lda = N
cdef int incx = 1 #increments of x
cdef int incy = 1 #increments of y
cdef double alpha = 1.0
cdef double beta = 0.0
cdef np.ndarray[DTYPE_T, ndim=1, mode = "fortran"] y = np.empty(N, dtype = DTYPE)
blas.dgemv("N", &N, &D, &alpha, &A[0,0], &lda, &x[0], &incx, &beta, &y[0], &incy)
return y
I am wondering how I can change this so that it computes A(:,selected).dot.x instead of A.dot.x, where selected is a set of ordered indices of the columns.
I am open to any implementation, though I suppose that the easiest way would be to change the function header to matrix_multiply(A,x,selected) so it also expects selected as an input. I believe that the answer has to use memory views, but I am not sure.
Consider the following example:
cdef test_function():
cdef:
double[:] p1 = np.array([3.2, 2.1])
double[:] p2 = np.array([0.9, 6.])
return p1-p2
If used, it returns the following error:
Error compiling Cython file:
------------------------------------------------------------
...
cdef test_function():
cdef:
double[:] p1 = np.array([3.2, 2.1])
double[:] p2 = np.array([0.9, 6.])
return p1-p2
^
------------------------------------------------------------
cython_cell_v3.pyx:354:13: Invalid operand types for '-' (double[:]; double[:])
If I am using numpy arrays to initialize the memory view, how can I go about using its functionality? Do I have to somehow do some dereferencing on the memoryviews?
This works:
cpdef test_function():
cdef:
double[:] p1 = np.array([3.2, 2.1])
double[:] p2 = np.array([0.9, 6.])
# return p1-p2
cdef int I
I = p1.shape[0]
for i in range(I):
p1[i] -= p2[i]
return np.asarray(p1)
print "Test _function", test_function()
I iterate on the arrays as though they were 'c' arrays. And without the final np.asarray, it will just display
>>> memview.test_function()
<MemoryView of 'ndarray' at 0xb60e772c>
See also the example in
http://docs.cython.org/src/userguide/memoryviews.html#comparison-to-the-old-buffer-support
I tried a different function:
cpdef test_function1(x):
cdef:
int i, N = x.shape[0]
double[:] p1 = x
for i in range(N):
p1[i] *= p1[i]
return np.asarray(p1)*2
x = np.arange(10.)
print "test_function1 return", test_function1(x)
print "x after test_function1", x
As expected, after the function x is x**2. But what the function returns is 2*x**2.
I modify p1 directly, but end up modifying x as well. I think of p1 as a view of x, but one with reduced functionality. np.asarray(p1) gives it a numpy functionality, so I can perform an array * on it and return the result (without further modifying x).
If instead I'd finished the function with:
out = np.asarray(p1)
out *= 2
return out
I end up modifying the original x as well. out is a numpy view on x. out behaves like an array because it is one, not because of some distant link to x.
The problem
I'm trying to Cythonize two small functions that mostly deal with numpy ndarrays for some scientific purpose. These two smalls functions are called millions of times in a genetic algorithm and account for the majority of the time taken by the algo.
I made some progress on my own and both work nicely, but i get only a tiny speed improvement (10%). More importantly, cython --annotate show that the majority of the code is still going through Python.
The code
First function:
The aim of this function is to get back slices of data and it is called millions of times in an inner nested loop. Depending on the bool in data[1][1], we either get the slice in the forward or reverse order.
#Ipython notebook magic for cython
%%cython --annotate
import numpy as np
from scipy import signal as scisignal
cimport cython
cimport numpy as np
def get_signal(data):
#data[0] contains the data structure containing the numpy arrays
#data[1][0] contains the position to slice
#data[1][1] contains the orientation to slice, forward = 0, reverse = 1
cdef int halfwinwidth = 100
cdef int midpoint = data[1][0]
cdef int strand = data[1][1]
cdef int start = midpoint - halfwinwidth
cdef int end = midpoint + halfwinwidth
#the arrays we want to slice
cdef np.ndarray r0 = data[0]['normals_forward']
cdef np.ndarray r1 = data[0]['normals_reverse']
cdef np.ndarray r2 = data[0]['normals_combined']
if strand == 0:
normals_forward = r0[start:end]
normals_reverse = r1[start:end]
normals_combined = r2[start:end]
else:
normals_forward = r1[end - 1:start - 1: -1]
normals_reverse = r0[end - 1:start - 1: -1]
normals_combined = r2[end - 1:start - 1: -1]
#return the result as a tuple
row = (normals_forward,
normals_reverse,
normals_combined)
return row
Second function
This one gets a list of tuples of numpy arrays, and we want to add up the arrays element wise, then normalize them and get the integration of the intersection.
def calculate_signal(list signal):
cdef int halfwinwidth = 100
cdef np.ndarray profile_normals_forward = np.zeros(halfwinwidth * 2, dtype='f')
cdef np.ndarray profile_normals_reverse = np.zeros(halfwinwidth * 2, dtype='f')
cdef np.ndarray profile_normals_combined = np.zeros(halfwinwidth * 2, dtype='f')
#b is a tuple of 3 np.ndarrays containing 200 floats
#here we add them up elementwise
for b in signal:
profile_normals_forward += b[0]
profile_normals_reverse += b[1]
profile_normals_combined += b[2]
#normalize the arrays
cdef int count = len(signal)
#print "Normalizing to number of elements"
profile_normals_forward /= count
profile_normals_reverse /= count
profile_normals_combined /= count
intersection_signal = scisignal.detrend(np.fmin(profile_normals_forward, profile_normals_reverse))
intersection_signal[intersection_signal < 0] = 0
intersection = np.sum(intersection_signal)
results = {"intersection": intersection,
"profile_normals_forward": profile_normals_forward,
"profile_normals_reverse": profile_normals_reverse,
"profile_normals_combined": profile_normals_combined,
}
return results
Any help is appreciated - I tried using memory views but for some reason the code got much, much slower.
After fixing the array cdef (as has been indicated, with the dtype specified), you should probably put the routine in a cdef function (which will only be callable by a def function in the same script).
In the declaration of the function, you'll need to provide the type (and the dimensions if it's an array numpy):
cdef get_signal(numpy.ndarray[DTYPE_t, ndim=3] data):
I'm not sure using a dict is a good idea though. You could make use of numpy's column or row slices like data[:, 0].
I have made some Numpy C-extensions before with great help from this site, but as far as I can see the returned parameters are all fixed length.
Is there any way to have a Numpy C-extension return a variable length numpy array instead?
You may find it easier to make numpy extensions in Cython using the Numpy C-API which simplifies the process as it allows you to mix python and c objects. In that case there is little difficult about making a variable length array, you can simply specify an array with an arbitrary shape.
The Cython numpy tutorial is probably the best source on this topic.
For example, here is a function I recently wrote:
import numpy as np
cimport numpy as np
cimport cython
dtype = np.double
ctypedef double dtype_t
np.import_ufunc()
np.import_array()
def ewma(a, d, axis):
#Calculates the exponentially weighted moving average of array a along axis using the parameter d.
cdef void *args[1]
cdef double weight[1]
weight[0] = <double>np.exp(-d)
args[0] = &weight[0]
return apply_along_axis(&ewma_func, np.array(a, dtype = float), np.double, np.double, False, &(args[0]), <int>axis)
cdef void ewma_func(int n, void* aData,int astride, void* oData, int ostride, void** args):
#Exponentially weighted moving average calculation function
cdef double avg = 0.0
cdef double weight = (<double*>(args[0]))[0]
cdef int i = 0
for i in range(n):
avg = (<double*>((<char*>aData) + i * astride))[0]*weight + avg * (1.0 - weight)
(<double*>((<char*>oData) + i * ostride))[0] = avg
ctypedef void (*func_1d)(int, void*, int, void*, int, void **)
cdef apply_along_axis(func_1d function, a, adtype, odtype, reduce, void** args, int axis):
#generic function for applying a cython function along a particular dimension
oshape = list(a.shape)
if reduce :
oshape[axis] = 1
out = np.empty(oshape, odtype)
cdef np.flatiter ita, ito
ita = np.PyArray_IterAllButAxis(a, &axis)
ito = np.PyArray_IterAllButAxis(out, &axis)
cdef int axis_length = a.shape[axis]
cdef int a_axis_stride = a.strides[axis]
cdef int o_axis_stride = out.strides[axis]
if reduce:
o_axis_stride = 0
while np.PyArray_ITER_NOTDONE(ita):
function(axis_length, np.PyArray_ITER_DATA (ita), a_axis_stride, np.PyArray_ITER_DATA (ito), o_axis_stride, args)
np.PyArray_ITER_NEXT(ita)
np.PyArray_ITER_NEXT(ito)
if reduce:
oshape.pop(axis)
out.shape = oshape
return out
If this doesn't suit you, there is a function for making a new empty array with arbitrary shape (link).
I am interpreting your question to mean "I have a function that takes a NumPy array of length n, but it will return another array of length m different from n." If that is the case, you will need to malloc a new C array in the extension, e.g.
new_array = malloc(m * sizeof(int64)); // or whatever your data type is
then create a new NumPy array with that. This example assumes a 1D array:
int npy_intp dims[1];
dims[0] = m;
PyArrayObject *out = (PyArrayObject *)PyArray_SimpleNewFromData(1, // 1D array
dims, // dimensions
NPY_INT64, // type
new_array);
PyArray_ENABLEFLAGS(out, NPY_ARRAY_OWNDATA);
Then return the new array. The important part here is to set the NPY_ARRAY_OWNDATA flag so that the memory you allocated is freed when the Python object is garbage collected.