Python Tuple to Cython Struct - python

Scipy splprep (spline preperation) produces a Tuple tckp
tckp : tuple (t,c,k) a tuple containing the vector of knots,
the B-spline coefficients, and the degree of the spline.
tckp = [array[double,double ,..,double],
[array[double,double ,..,double],
array[double,double ,..... ,double],
array[double,double ,..... ,double]], int]
How can I construct and fill an equivalent Cython Structure to be able to use
splev (spline evaluation) within Cython

As discussed in the comments, it depends on how you will pass tckp to other functions. One way to store this information and pass to other functions is using a struct.
In the example below you pass the tckp list using a struct to a cdef function that takes a void * as input, simulating a C function... this example function adds 1 to all the arrays assuming that int0 is the size of the arrays.
import numpy as np
cimport numpy as np
cdef struct type_tckp_struct:
double *array0
double *array1
double *array2
double *array3
int *int0
def main():
cdef type_tckp_struct tckp_struct
cdef np.ndarray[np.float64_t, ndim=1] barray0, barray1, barray2, barray3
cdef int bint
tckp = [np.arange(1,11).astype(np.float64),
2*np.arange(1,11).astype(np.float64),
3*np.arange(1,11).astype(np.float64),
4*np.arange(1,11).astype(np.float64), 10]
barray0 = tckp[0]
barray1 = tckp[1]
barray2 = tckp[2]
barray3 = tckp[3]
bint = tckp[4]
tckp_struct.array0 = &barray0[0]
tckp_struct.array1 = &barray1[0]
tckp_struct.array2 = &barray2[0]
tckp_struct.array3 = &barray3[0]
tckp_struct.int0 = &bint
intern_func(&tckp_struct)
cdef void intern_func(void *args):
cdef type_tckp_struct *args_in=<type_tckp_struct *>args
cdef double *array0
cdef double *array1
cdef double *array2
cdef double *array3
cdef int int0, i
array0 = args_in.array0
array1 = args_in.array1
array2 = args_in.array2
array3 = args_in.array3
int0 = args_in.int0[0]
for i in range(int0):
array0[i] += 1
array1[i] += 1
array2[i] += 1
array3[i] += 1

Related

How to copy a 2D array (matrix) from python with a C function (and do some computer heavy computation) which return a 2D array (matrix) in python?

I want to copy a 2D numpy array (matrix) in a C function a get it back in python (and then do some calculation on it in C taking the speed advantage of C) . Therefore I need the C function matrix_copy to return a 2D array (or, I guess, a pointer to it). I tried with the following code but I get the following output (where one can see the second dimension of the array is lost).
matrix_in.shape:
(300, 200)
matrix_out.shape:
(300,)
How could I change the code (I guess the matrix_copy.c adding some pointer magic) so I could obtain an exact copy of the matrix_in in matrix_out?
Here is the main.py script:
from ctypes import c_void_p, c_double, c_int, cdll
from numpy.ctypeslib import ndpointer
import numpy as np
import pdb
n = 300
m = 200
matrix_in = np.random.randn(n, m)
lib = cdll.LoadLibrary("matrix_copy.so")
matrix_copy = lib.matrix_copy
matrix_copy.restype = ndpointer(dtype=c_double,
shape=(n,))
matrix_out = matrix_copy(c_void_p(matrix_in.ctypes.data),
c_int(n),
c_int(m))
print("matrix_in.shape:")
print(matrix_in.shape)
print("matrix_out.shape:")
print(matrix_out.shape)
Here is the matrix_copy.c script:
#include <stdlib.h>
#include <stdio.h>
double * matrix_copy(const double * matrix_in, int n, int m){
double * matrix_out = (double *)malloc(sizeof(double) * (n*m));
int index = 0;
for(int i=0; i< n; i++){
for(int j=0; j<m; j++){
matrix_out[i+j] = matrix_in[i+j];
//matrix_out[i][j] = matrix_in[i][j];
// some heavy computations not yet implemented
}
}
return matrix_out;
}
which I compile with the command
cc -fPIC -shared -o matrix_copy.so matrix_copy.c
And as a side note, why does the notation matrix_out[i][j] = matrix_in[i][j]; throws me an error on compilation?
matrix_copy.c:10:26: error: subscripted value is not an array, pointer, or vector
matrix_out[i][j] = matrix_in[i][j];
~~~~~~~~~~~~~^~
matrix_copy.c:10:44: error: subscripted value is not an array, pointer, or vector
matrix_out[i][j] = matrix_in[i][j];
The second dimension is 'lost' because you explicitly omit it in the named shape argument of ndpointer. Change:
matrix_copy.restype = ndpointer(dtype=c_double, shape=(n,))
to
matrix_copy.restype = ndpointer(dtype=c_double, shape=(n,m), flags='C')
Where flags='C' additionally notes that the returned data is stored contiguously in row major order.
With regards to matrix_out[i][j] = matrix_in[i][j]; throwing an error, consider that matrix_in is of type const double *. matrix_in[i] would yield a value of type const double - how do you further index this value (i.e., with [j])?
If you want to emulate accessing a 2-dimensional array via a single pointer, you must calculate offsets manually. matrix_out[i+j] is not sufficient, as you must account for the span of each sub array:
matrix_out[i * m + j] = matrix_in[i * m + j];
Note that in C, size_t is the generally preferred type to use when dealing with memory sizes or array lengths.
matrix_copy.c, simplified:
#include <stdlib.h>
double *matrix_copy(const double *matrix_in, size_t n, size_t m)
{
double *matrix_out = malloc(sizeof *matrix_out * n * m);
for (size_t i = 0; i < n; i++)
for (size_t j = 0; j < m; j++)
matrix_out[i * m + j] = matrix_in[i * m + j];
return matrix_out;
}
matrix.py, with more explicit typing:
from ctypes import c_void_p, c_double, c_size_t, cdll, POINTER
from numpy.ctypeslib import ndpointer
import numpy as np
c_double_p = POINTER(c_double)
n = 300
m = 200
matrix_in = np.random.randn(n, m).astype(c_double)
lib = cdll.LoadLibrary("matrix_copy.so")
matrix_copy = lib.matrix_copy
matrix_copy.argtypes = c_double_p, c_size_t, c_size_t
matrix_copy.restype = ndpointer(
dtype=c_double,
shape=(n,m),
flags='C')
matrix_out = matrix_copy(
matrix_in.ctypes.data_as(c_double_p),
c_size_t(n),
c_size_t(m))
print("matrix_in.shape:", matrix_in.shape)
print("matrix_out.shape:", matrix_out.shape)
print("in == out", matrix_in == matrix_out)
The incoming data is a probably single block of memory. You need to create the substructure.
In my C++ code I have to do the following on data (block) coming in via swig:
void divide2DDoubleArray(double * &block, double ** &subblockdividers, int noofsubblocks, int subblocksize){
/* The starting address of a block of doubles is used to generate
* pointers to subblocks.
*
* block: memory containing the original block of data
* subblockdividers: array of subblock addresses
* noofsubblocks: specify the number of subblocks produced
* subblocksize: specify the size of the subblocks produced
*
* Design by contract: application should make sure the memory
* in block is allocated and initialized properly.
*/
// Build 2D matrix for cols
subblockdividers=new double *[noofsubblocks];
subblockdividers[0]= block;
for (int i=1; i<noofsubblocks; ++i) {
subblockdividers[i] = &subblockdividers[i-1][subblocksize];
}
}
Now the pointer returned in subblockdividers can be used the way you would like to.
Don't forget to free subblockdividers when your done. (Note: adjustments might be needed to compile this as C code)

Optimize cython functions operating on python lists

I am currently migrating to Cython a set of functions that are currently implemented in C++ through scipy.weave (now deprecated).
These functions operate on timeseries points that are 2D-lists (eg. [[17100, 19.2], [17101, 20.7], [17102, 20.3], ...]) both in input and in output. A sample function is subtract that accepts two timeseries and calculates a new timeserie as subtraction of the two inputs going date-by-date.
The structure and the interfaces have to be mantained for retrocompatibility, but my profiling trials show that Cython porting is about 30%-40% slower than the original scipy.weave implementation.
I have tried many ways to optimize (inner conversions to Numpy arrays and memoryviews, C pointers, ...), but the conversion time required lenghtens the overall execution time. Even trying to define input and output as C++ vectors, leveraging on Cython implicit conversions doesn't seem to be effective in order to mantain scipy.weave speed. I have also used the various hints on boundscheck, wraparound, division, ...
The highest slow-downs seem to be on functions that uses nested loops and I've seen that a little gain can be predefining the list size (cdef list target = [[-1, float('nan')]]*size).
I am aware that Cython can't be so much performing on Python structures, especially lists, but are there any other tricks or techniques with which a speedup can be obtained?
=== EDIT - ADD CODE EXAMPLE ===
A good example of the typology of functions is the following.
The function takes a 2-D list of dates/prices and a 2-D list of dates/decimal factors and searches matching dates between the two lists, calculating the output on the corresponding price/factor by multiplying or dividing (that is a third input parameter).
My best-performing cython code:
#cython.cdivision(True)
#cython.boundscheck(False)
#cython.wraparound(False)
cpdef apply_conversion(list original_timeserie, list factor_timeserie, int divide_or_multiply=False):
cdef:
Py_ssize_t i, j = 0, size = len(original_timeserie), size2 = len(factor_timeserie)
long original_date, factor_date
double original_price, factor_price, conv_price
list result = []
for i in range(size):
original_date = original_timeserie[i][0]
for j in range(j, size2):
factor_date = factor_timeserie[j][0]
if original_date == factor_date:
original_price = original_timeserie[i][1]
factor_price = factor_timeserie[j][1]
if divide_or_multiply:
if factor_price != 0:
conv_price = original_price / factor_price
else:
conv_price = float('inf')
else:
conv_price = original_price * factor_price
result.append([original_date, conv_price])
break
return result
The original scipy function:
int len = original_timeserie.length();
int len2 = factor_timeserie.length();
PyObject* py_serieconv = PyList_New(len);
PyObject* original_item = NULL;
PyObject* factor_item = NULL;
PyObject* date = NULL;
PyObject* value = NULL;
long original_date = 0;
long factor_date = 0;
double original_price = 0;
double factor_price = 0;
int j = 0;
for(int i=0;i<len;i++) {
original_item = PyList_GetItem(original_timeserie, i);
date = PyList_GetItem(original_item, 0);
original_date = PyInt_AsLong(date);
original_price = PyFloat_AsDouble( PyList_GetItem(original_item, 1) );
factor_item = NULL;
for(;j<len2;) {
factor_item = PyList_GetItem(factor_timeserie, j++);
factor_date = PyInt_AsLong(PyList_GetItem(factor_item, 0));
if (factor_date == original_date) {
factor_price = PyFloat_AsDouble(PyList_GetItem(factor_item, 1));
value = PyFloat_FromDouble(original_price * (divide_or_multiply==0 ? factor_price : 1/factor_price));
PyObject* py_new_item = PyList_New(2);
Py_XINCREF(date);
PyList_SetItem(py_new_item, 0, date);
PyList_SetItem(py_new_item, 1, value);
PyList_SetItem(py_serieconv, i, py_new_item);
break;
}
}
}
return_val = py_serieconv;
Py_XDECREF(py_serieconv);

Cython typed memory view of struct

I have this code
import random
from random import randint
from cython cimport boundscheck, wraparound
cdef char * flip(float p):
cdef:
char* head = 'h'
char* tail = 't'
return head if random.random() < p else tail
cdef struct v_bag:
float v1
float v_rand
float v_min
cdef v_bag[:] flip(float num_flips,float num_coins_flipped,float num_experiments):
cdef:
float[:] head_count_coins
v_bag[:] results
int N,M,i,j,l
v_bag vs
N=num_experiments.shape[0]; M=num_coins_flipped.shape[0]
with boundscheck(False), wraparound(False):
for i in range(N):
for j in range(M):
flips = [flip(0.5) for k in xrange(num_flips)]
count = float(flips.count('h'))/num_flips
head_count_coins[j]=count
l = randint(0, 999)
results[i]= v_bag(v1=head_count_coins[0],
v_rand=head_count_coins[l],
v_min=float(min(head_count_coins)))
return results
And I keep getting this error: ' Function signature does not match previous declaration'
I can't seem to figure out how to get cython to compile this with a typed memory view, any help or suggestions would be greatly appreciated, the issue seems to be the memory view of structs.

Cython - Optimize read binary file

I am trying to improve this cython code (which works). Please note that I don't want to use numpy.fromfile.. because I want to be able to parse not fixed binary structures.
from libc.stdio cimport *
import struct
cpdef inline cimport_td(char* f, double[:] dates, double[:] tpx, int[:] tvo):
f_b = open(f.replace('\\','/'),'rb').read()
cdef int B = len(f_b), bb = 0, dd = 0
while bb < B:
dates[dd], tpx[dd], tvo[dd] = struct.unpack('ddi', f_b[bb:bb+20])
bb += 20
dd += 1
del f_b
return dates, tpx, tvo
Is there anything better than open/read and struct unpack ?
Thank you.

Fast string array - Cython

Having following hypothetical code:
cdef extern from "string.h":
int strcmp(char* str1, char* str2)
def foo(list_str1, list_str2):
cdef unsigned int i, j
c_arr1 = ??
c_arr2 = ??
for i in xrange(len(list_str1)):
for j in xrange(len(list_str2)):
if not strcmp(c_arr1[i], c_arr2[j]):
do some funny stuff
is there some way how to convert the lists to c arrays?
I have read and tried Cython - converting list of strings to char ** but that only throws errors.
Try following code. to_cstring_array function in the following code is what you want.
from libc.stdlib cimport malloc, free
from libc.string cimport strcmp
from cpython.string cimport PyString_AsString
cdef char ** to_cstring_array(list_str):
cdef char **ret = <char **>malloc(len(list_str) * sizeof(char *))
for i in xrange(len(list_str)):
ret[i] = PyString_AsString(list_str[i])
return ret
def foo(list_str1, list_str2):
cdef unsigned int i, j
cdef char **c_arr1 = to_cstring_array(list_str1)
cdef char **c_arr2 = to_cstring_array(list_str2)
for i in xrange(len(list_str1)):
for j in xrange(len(list_str2)):
if i != j and strcmp(c_arr1[i], c_arr2[j]) == 0:
print i, j, list_str1[i]
free(c_arr1)
free(c_arr2)
foo(['hello', 'python', 'world'], ['python', 'rules'])
If you're on Python 3, here's an update to #falsetru's answer (untested on Python 2).
cdef extern from "Python.h":
char* PyUnicode_AsUTF8(object unicode)
from libc.stdlib cimport malloc, free
from libc.string cimport strcmp
cdef char ** to_cstring_array(list_str):
cdef char **ret = <char **>malloc(len(list_str) * sizeof(char *))
for i in xrange(len(list_str)):
ret[i] = PyUnicode_AsUTF8(list_str[i])
return ret
def foo(list_str1, list_str2):
cdef unsigned int i, j
cdef char **c_arr1 = to_cstring_array(list_str1)
cdef char **c_arr2 = to_cstring_array(list_str2)
for i in range(len(list_str1)):
for j in range(len(list_str2)):
if i != j and strcmp(c_arr1[i], c_arr2[j]) == 0:
print(i, j, list_str1[i])
free(c_arr1)
free(c_arr2)
foo(['hello', 'python', 'world'], ['python', 'rules'])
Warning: The pointer returned by PyUnicode_AsUTF8 is cached in the parent unicode-object. Which has two consequences:
this pointer is only valid as long as the parent unicode-object is alive. Accessing it afterwards leads to undefined behavior (e.g. possible segmentation fault).
The caller of the PyUnicode_AsUTF8 isn't responsible for the freeing the memory.

Categories