I have a number of C functions, and I would like to call them from python. cython seems to be the way to go, but I can't really find an example of how exactly this is done. My C function looks like this:
void calculate_daily ( char *db_name, int grid_id, int year,
double *dtmp, double *dtmn, double *dtmx,
double *dprec, double *ddtr, double *dayl,
double *dpet, double *dpar ) ;
All I want to do is to specify the first three parameters (a string and two integers), and recover 8 numpy arrays (or python lists. All the double arrays have N elements). My code assumes that the pointers are pointing to an already allocated chunk of memory. Also, the produced C code ought to link to some external libraries.
Here's a tiny but complete example of passing numpy arrays
to an external C function, logically
fc( int N, double* a, double* b, double* z ) # z = a + b
using Cython.
(This is surely well-known to those who know it well.
Comments are welcome.
Last change: 23 Feb 2011, for Cython 0.14.)
First read or skim
Cython build
and Cython with NumPy .
2 steps:
python f-setup.py build_ext --inplace
turns f.pyx and fc.cpp -> f.so, a dynamic library
python test-f.py
import f loads f.so; f.fpy( ... ) calls the C fc( ... ).
python f-setup.py uses distutils to run cython, compile and link:
cython f.pyx -> f.cpp
compile f.cpp and fc.cpp
link f.o fc.o -> f.so,
a dynamic lib that python import f will load.
For students, I'd suggest: make a diagram of these steps,
look through the files below, then download and run them.
(distutils is a huge, convoluted package used to
make Python packages for distribution, and install them.
Here we're using just a small part of it to compile and link to f.so.
This step has nothing to do with Cython, but it can be confusing;
simple mistakes in a .pyx can cause pages of obscure error messages from g++ compile and link.
See also
distutils doc
and/or
SO questions on distutils .)
Like make, setup.py will rerun
cython f.pyx and g++ -c ... f.cpp
if f.pyx is newer than f.cpp.
To cleanup, rm -r build/ .
An alternative to setup.py would be to run the steps separately, in a script or Makefile:
cython --cplus f.pyx -> f.cpp # see cython -h
g++ -c ... f.cpp -> f.o
g++ -c ... fc.cpp -> fc.o
cc-lib f.o fc.o -> dynamic library f.so.
Modify the cc-lib-mac wrapper
below for your platform and installation: it's not pretty, but small.
For real examples of Cython wrapping C,
look at .pyx files in just about any
SciKit .
See also:
Cython for NumPy users
and SO questions/tagged/cython .
To unpack the following files,
cut-paste the lot to one big file, say cython-numpy-c-demo,
then in Unix (in a clean new directory) run sh cython-numpy-c-demo.
#--------------------------------------------------------------------------------
cat >f.pyx <<\!
# f.pyx: numpy arrays -> extern from "fc.h"
# 3 steps:
# cython f.pyx -> f.c
# link: python f-setup.py build_ext --inplace -> f.so, a dynamic library
# py test-f.py: import f gets f.so, f.fpy below calls fc()
import numpy as np
cimport numpy as np
cdef extern from "fc.h":
int fc( int N, double* a, double* b, double* z ) # z = a + b
def fpy( N,
np.ndarray[np.double_t,ndim=1] A,
np.ndarray[np.double_t,ndim=1] B,
np.ndarray[np.double_t,ndim=1] Z ):
""" wrap np arrays to fc( a.data ... ) """
assert N <= len(A) == len(B) == len(Z)
fcret = fc( N, <double*> A.data, <double*> B.data, <double*> Z.data )
# fcret = fc( N, A.data, B.data, Z.data ) grr char*
return fcret
!
#--------------------------------------------------------------------------------
cat >fc.h <<\!
// fc.h: numpy arrays from cython , double*
int fc( int N, const double a[], const double b[], double z[] );
!
#--------------------------------------------------------------------------------
cat >fc.cpp <<\!
// fc.cpp: z = a + b, numpy arrays from cython
#include "fc.h"
#include <stdio.h>
int fc( int N, const double a[], const double b[], double z[] )
{
printf( "fc: N=%d a[0]=%f b[0]=%f \n", N, a[0], b[0] );
for( int j = 0; j < N; j ++ ){
z[j] = a[j] + b[j];
}
return N;
}
!
#--------------------------------------------------------------------------------
cat >f-setup.py <<\!
# python f-setup.py build_ext --inplace
# cython f.pyx -> f.cpp
# g++ -c f.cpp -> f.o
# g++ -c fc.cpp -> fc.o
# link f.o fc.o -> f.so
# distutils uses the Makefile distutils.sysconfig.get_makefile_filename()
# for compiling and linking: a sea of options.
# http://docs.python.org/distutils/introduction.html
# http://docs.python.org/distutils/apiref.html 20 pages ...
# https://stackoverflow.com/questions/tagged/distutils+python
import numpy
from distutils.core import setup
from distutils.extension import Extension
from Cython.Distutils import build_ext
# from Cython.Build import cythonize
ext_modules = [Extension(
name="f",
sources=["f.pyx", "fc.cpp"],
# extra_objects=["fc.o"], # if you compile fc.cpp separately
include_dirs = [numpy.get_include()], # .../site-packages/numpy/core/include
language="c++",
# libraries=
# extra_compile_args = "...".split(),
# extra_link_args = "...".split()
)]
setup(
name = 'f',
cmdclass = {'build_ext': build_ext},
ext_modules = ext_modules,
# ext_modules = cythonize(ext_modules) ? not in 0.14.1
# version=
# description=
# author=
# author_email=
)
# test: import f
!
#--------------------------------------------------------------------------------
cat >test-f.py <<\!
#!/usr/bin/env python
# test-f.py
import numpy as np
import f # loads f.so from cc-lib: f.pyx -> f.c + fc.o -> f.so
N = 3
a = np.arange( N, dtype=np.float64 )
b = np.arange( N, dtype=np.float64 )
z = np.ones( N, dtype=np.float64 ) * np.NaN
fret = f.fpy( N, a, b, z )
print "fpy -> fc z:", z
!
#--------------------------------------------------------------------------------
cat >cc-lib-mac <<\!
#!/bin/sh
me=${0##*/}
case $1 in
"" )
set -- f.cpp fc.cpp ;; # default: g++ these
-h* | --h* )
echo "
$me [g++ flags] xx.c yy.cpp zz.o ...
compiles .c .cpp .o files to a dynamic lib xx.so
"
exit 1
esac
# Logically this is simple, compile and link,
# but platform-dependent, layers upon layers, gloom, doom
base=${1%.c*}
base=${base%.o}
set -x
g++ -dynamic -arch ppc \
-bundle -undefined dynamic_lookup \
-fno-strict-aliasing -fPIC -fno-common -DNDEBUG `# -g` -fwrapv \
-isysroot /Developer/SDKs/MacOSX10.4u.sdk \
-I/Library/Frameworks/Python.framework/Versions/2.6/include/python2.6 \
-I${Pysite?}/numpy/core/include \
-O2 -Wall \
"$#" \
-o $base.so
# undefs: nm -gpv $base.so | egrep '^ *U _+[^P]'
!
# 23 Feb 2011 13:38
The following Cython code from
http://article.gmane.org/gmane.comp.python.cython.user/5625 doesn't require explicit casts and also handles non-continous arrays:
def fpy(A):
cdef np.ndarray[np.double_t, ndim=2, mode="c"] A_c
A_c = np.ascontiguousarray(A, dtype=np.double)
fc(&A_c[0,0])
Basically you can write your Cython function such that it allocates the arrays (make sure you cimport numpy as np):
cdef np.ndarray[np.double_t, ndim=1] rr = np.zeros((N,), dtype=np.double)
then pass in the .data pointer of each to your C function. That should work. If you don't need to start with zeros you could use np.empty for a small speed boost.
See the Cython for NumPy Users tutorial in the docs (fixed it to the correct link).
You should check out Ctypes it's probably the most easiest thing to use if all you want is one function.
Related
I know it is not appropriate to call a cpp function in dll/so, eg. int foo(int&), by ctpyes, due to there is no equivalent concept of reference variable in c. But i'd like to show a demo of do this, and i'm really confused about python's behavior(version 3.7).
Basically, i have a dll from other people, and i believe it is built by vs c++ and exported using extern "C". There are some interfaces in the dll take a form of int foo(int&). When i use python, i need a ctypes layer to wrap it up. For example,
int foo(int&)
is translated to (_foo is loaded by ctypes)
_foo.argtypes = [POINTER(c_int)]
_foo.restype = c_int
and i call foo in python like
i = c_int(1)
_foo(i)
and IT WORKS.
I further test with a demo in linux with following code,
demo.cpp
#include <stdio.h>
extern "C" int foo(int&);
int foo(int& i)
{
printf("foo is called.\n");
printf("Got i: %d\n", i);
i += 10;
printf("Set i: %d\n", i);
return 0;
}
build
g++ -Wall -fPIC -c demo.cpp
g++ -shared -Wl,-soname,libdemo.so.1 -o libdemo.so demo.o
So, a libdemo.so is built.
demo.py
#!/usr/bin/env python3
from ctypes import CDLL, c_int,POINTER
l = CDLL("libdemo.so")
_foo = l.foo
_foo.argtypes = [POINTER(c_int)]
_foo.restype = c_int
def foo(i):
print("Input: i",i)
_i = c_int(i)
r = _foo(_i)
i = _i.value
print("Output: i", i)
foo(1)
if i run demo.py
LD_LIBRARY_PATH=. ./demo.py
it will work fine, and give me the right answer, i.e.
Input: i 1
foo is called.
Got i: 1
Set i: 11
Output: i 11
And, if i passed the i by byref, changed the demo.py as
#!/usr/bin/env python3
from ctypes import CDLL, c_int,POINTER,byref
l = CDLL("libdemo.so")
_demo = l.demo
_demo.argtypes = [PONTER(c_int)]
_demo.restype = c_int
def demo(i):
print("Input: i",i)
_i = c_int(i)
r = _demo(byref(_i)) # calling by byref
i = _i.value
print("Output: i", i)
demo(1)
it still works as it is, and output the same.
So, what is happen under the hood? Why the above two versions demo.py have the same output? Can i depend on such a feature to use ctpyes to call cpp functions which could have parameters by reference?
I want to create .so file from python and execute the .so file in C.
To do it I used cython to convert .pyx to .so
## print_me.pyx
cimport numpy as cnp
import numpy as np
cimport cython
cpdef public char* print_me(f):
# I know this numpy line does nothing
cdef cnp.ndarray[cnp.complex128_t, ndim=3] a = np.zeros((3,3,3), dtype=np.complex128)
return f
Then I used setup.py to actually convert .pyx to .so
## setup.py
from distutils.core import setup
from Cython.Build import cythonize
import numpy as np
setup(
ext_modules=cythonize("print_me.pyx"),
include_dirs=[np.get_include()]
)
By running the following command line, I was able to create .so file
python setup.py build_ext --inplace
When I tried to run so file using the following C code, I got a Segmentation Fault.
/* toloadso.c */
#include <stdio.h>
#include <stdlib.h>
#include <dlfcn.h>
#include <time.h>
#include <python2.7/Python.h>
int main(void)
{
// define function
void *handle;
char* (*print_me)(PyObject*);
char *error;
PyObject* filename = PyString_FromString("hello");
// load so file
handle = dlopen("./print_me.so", RTLD_LAZY);
if (!handle) {
fprintf(stderr, "%s\n", dlerror());
exit(EXIT_FAILURE);
}
dlerror();
// get function handler from so file
print_me = (char* (*)(PyObject*))dlsym(handle, "print_me");
// check if handler got error
error = dlerror();
if (error != NULL) {
fprintf(stderr, "%s\n", error);
exit(EXIT_FAILURE);
}
// execute loaded function
printf("%s\n", (char*)(*print_me)(filename));
dlclose(handle);
exit(EXIT_SUCCESS);
}
I compiled this .c file with following command:
gcc -fPIC -I/usr/include/ -o toloadso toloadso.c -lpython2.7 -ldl
(It compiled without error or warning)
When I tried to run this code, I got a segmentation Fault
[root#localhost ~]# ./toloadso
Segmentation fault
If I comment out the following line in print_me.pyx
cdef cnp.ndarray[cnp.complex128_t, ndim=3] a = np.zeros((3,3,3), dtype=np.complex128)
My C code runs without error, but once I uncomment this line, it does not work.
I think that trying to use numpy in cython generates an error somehow.
How can I fix it??
I thank you so much for your reply
You must initialize the numpy C API by calling import_array().
Add this line to your cython file:
cnp.import_array()
And as pointed out by #user4815162342 and #DavidW in the comments, you must call Py_Initialize() and Py_Finalize() in main().
Thank you for your help first. I could get something useful information, even though that could not directly solve my problem.
By referring to others advice,
rather than calling print_me function from .so file, I decided to call directly from C. This is what I did.
# print_me.pyx
import numpy as np
cimport numpy as np
np.import_array()
cdef public char* print_me(f):
cdef int[2][4] ll = [[1, 2, 3, 4], [5,6,7,8]]
cdef np.ndarray[np.int_t, ndim=2] nll = np.zeros((4, 6), dtype=np.int)
print nll
nll += 1
print nll
return f + str(ll[1][0])
This is my .c file
// main.c
#include <python2.7/Python.h>
#include "print_me.h"
int main()
{
// initialize python
Py_Initialize();
PyObject* filename = PyString_FromString("hello");
initsquare_number();
//initprint_me();
// call python-oriented function
printf("%s\n", print_me(filename));
// finalize python
Py_Finalize();
return 0;
}
I compiled then as follows
# to generate print_me.c and print_me.h
cython print_me.pyx
# to build main.c and print_me.c into main.o and print_me.o
cc -c main.c print_me.c -I/usr/include/python2.7 -I/usr/lib64/python2.7/site-packages/numpy/core/include
# to linke .o files
cc -lpython2.7 -ldl main.o print_me.o -o main
# execute main
./main
This results the following
[[0 0 0 0 0 0]
[0 0 0 0 0 0]
[0 0 0 0 0 0]
[0 0 0 0 0 0]]
[[1 1 1 1 1 1]
[1 1 1 1 1 1]
[1 1 1 1 1 1]
[1 1 1 1 1 1]]
hello5
Thank you for all of your help again!! :)
I read the answer to this question How to profile cython functions line-by-line, but I can't seem to get it to work with my setup.
I have a cumsum.pyx file:
# cython: profile=True
# cython: linetrace=True
# cython: binding=True
DEF CYTHON_TRACE = 1
def cumulative_sum(int n):
cdef int s=0, i
for i in range(n):
s += i
return s
I compiled it with:
cython cumsum.pyx
gcc cumsum.c $(pkg-config --cflags --libs python3) -o cumsum.so -shared -fPIC
Then I tried to profile it in ipython:
%load_ext line_profiler
from cumsum import cumulative_sum
%lprun -f cumulative_sum cumulative_sum(100)
I don't get an error message, only an empty profile:
Timer unit: 1e-06 s
Total time: 0 s
File: cumsum.pyx
Function: cumulative_sum at line 6
Line # Hits Time Per Hit % Time Line Contents
==============================================================
6 def cumulative_sum(int n):
7 cdef int s=0, i
8 for i in range(n):
9 s += i
10
11 return s
How can I get this to work?
PS: I use CMake, not setup.py, so I would appreciate a build system agnostic solution
The documentation on Cythons "Profiling" already includes an example how to set the CYTHON_TRACE macro:
# distutils: define_macros=CYTHON_TRACE_NOGIL=1
instead of your DEF CYTHON_TRACE = 1.
It worked when I compiled it using %%cython:
%load_ext cython
%%cython
# cython: profile=True
# cython: linetrace=True
# cython: binding=True
# distutils: define_macros=CYTHON_TRACE_NOGIL=1
def cumulative_sum(int n):
cdef int s=0, i
for i in range(n):
s += i
return s
And showed the profiling:
%load_ext line_profiler
%lprun -f cumulative_sum cumulative_sum(100)
[...]
Line # Hits Time Per Hit % Time Line Contents
==============================================================
7 def cumulative_sum(int n):
8 1 8 8.0 3.5 cdef int s=0, i
9 1 3 3.0 1.3 for i in range(n):
10 100 218 2.2 94.4 s += i
11 1 2 2.0 0.9 return s
Turns out the issue was that DEF CYTHON_TRACE = 1 doesn't actually set the right constant.
Workarounds include:
1.
MSeifert's answer, using distutils
2.
Changing the gcc line to
gcc cumsum.c $(pkg-config --cflags --libs python3) -o cumsum.so -shared -fPIC -DCYTHON_TRACE=1
3.
Making an extra header trace.h and setting the constant there
#define CYTHON_TRACE 1
along with adding the following to cumsum.pyx
cdef extern from "trace.h":
pass
4.
With CMake, adding
add_definitions(-DCYTHON_TRACE)
My question is virtually identical to this one. However, I'm looking for a solution that uses Cython instead of ctypes.
I'm wrapping some legacy F77 code for using in Python. I've written wrappers for the subroutines, using a module and the iso_c_bindings, that I can then use from Cython. This works well for calling the subroutines, passing data as arguments, etc. However, I'd now like to access the common block data in the library directly from Cython.
So my question is two parts:
A) Can I access the common block data using Cython directly as in the ctypes example above? I so how? I gather I'm supposed to reference the common block as a struct using cdef extern, but I'm not sure how to point to the library data.
B) Would I be better off, and not sacrifice performance, by simply writing setter/getter functions in my wrapper module? This was suggested in the responses to the ctypes question referenced above.
A) After trial & error, it seems that the following code works with python3.5/gfortran4.8/cython0.25 on Linux x86_64, so could you try it to see whether it works or not...?
fort.f90:
module mymod
use iso_c_binding
implicit none
contains
subroutine fortsub() bind(c)
double precision x( 2 )
real y( 3 )
real z( 4 )
integer n( 5 )
common /mycom/ x, y, z, n
data z / 100.0, 200.0, 300.0, 400.0 / !! initialize only z(:) (for check)
print *, "(fort) x(:) = ", x(:)
print *, "(fort) y(:) = ", y(:)
print *, "(fort) z(:) = ", z(:)
print *, "(fort) n(:) = ", n(:)
end subroutine
end module
fort.h:
extern void fortsub( void ); /* or fortsub_() if bind(c) is not used */
extern struct Mycom {
double x[ 2 ];
float y[ 3 ];
float z[ 4 ];
int n[ 5 ];
} mycom_;
test.pyx:
cdef extern from "fort.h":
void fortsub()
struct Mycom:
double x[ 2 ]
float y[ 3 ]
int n[ 5 ]
Mycom mycom_;
def go():
mycom_.x[:] = [ 1.0, 2.0 ]
mycom_.y[:] = [ 11.0, 12.0, 13.0 ]
mycom_.n[:] = [ 1, 2, 3, 4, 5 ]
fortsub()
setup.py:
from distutils.core import setup
from distutils.extension import Extension
from Cython.Distutils import build_ext
from os import system
system( 'gfortran -c fort.f90 -o fort.o -fPIC' )
ext_modules = [Extension( 'test', ['test.pyx'],
extra_compile_args = ['-fPIC'],
extra_link_args = ['fort.o', '-lgfortran'] )]
setup( name = 'test',
cmdclass = {'build_ext': build_ext},
ext_modules = ext_modules )
compile:
$ python setup.py build_ext --inplace
test:
$ python
>>> import test
>>> test.go()
(fort) x(:) = 1.0000000000000000 2.0000000000000000
(fort) y(:) = 11.0000000 12.0000000 13.0000000
(fort) z(:) = 100.000000 200.000000 300.000000 400.000000
(fort) n(:) = 1 2 3 4 5
Here, please note that I did not include z in test.pyx to check whether we can declare only selected variables in the common block. Also, some compiler options may be necessary to make the alignment of common variables consistent between C and Fortran (this YoLinux page may be useful).
B) I guess it would depend on the amount of computation peformed by the Fortran routine... If the routine is heavy (takes at least a few minutes), copy operations in getter/setter may be no problem. On the other hand, if the routine finishes quickly while called a huge number of times, then the overhead may be non-negligible...
For efficiency, it might be useful to pass pointer variables from Cython to Fortran, get the address of selected common variables somehow by c_loc(), and access them via pointers on the Cython side directly (though not still sure if it works...) But if there is no problem of memory alignment (for the compiler used), it may be more straight-forward to use structs as above.
Since you're already familiar with modular programming, I suggest you put the common block in a module and import the variables when access is required:
module common_block_mod
common /myCommonBlock/ var, var2, var3
save ! This is not necessary in Fortran 2008+
end module common_block_mod
You can now import the variables when access is required.
subroutine foo()
use common_block_mod
!.. do stuff
end subroutine foo
You can read more about this approach at http://iprc.soest.hawaii.edu/users/furue/improve-fortran.html
I was trying to measure the performance between python dictionaries, cythonized python dictionaries and cythonized cpp std::unordered_map doing only a init procedure. If the cythonized cpp code is compiled I thought it should be faster than the pure python version. I did a test using 4 different scenario/notation options:
Cython CPP code using std::unordered_map and Cython book notation (defining a pair and using insert method)
Cython CPP code using std::unordered_map and python notation (map[key] = value)
Cython code (typed code) using python dictionaries (map[key] = value)
Pure python code
I was expecting see how cython code outperforms pure python code, but in this case there is not improvement. Which could be the reason? I'm using Cython-0.22, python-3.4 and g++-4.8.
I got this exec time (seconds) using timeit:
Cython CPP book notation -> 15.696417249999968
Cython CPP python notation -> 16.481350984999835
Cython python notation -> 18.585355018999962
Pure python -> 18.162724677999904
Code is here and you can use it:
cython -a map_example.pyx
python3 setup_map.py build_ext --inplace
python3 use_map_example.py
map_example.pyx
from libcpp.unordered_map cimport unordered_map
from libcpp.pair cimport pair
cpdef int example_cpp_book_notation(int limit):
cdef unordered_map[int, int] mapa
cdef pair[int, int] entry
cdef int i
for i in range(limit):
entry.first = i
entry.second = i
mapa.insert(entry)
return 0
cpdef int example_cpp_python_notation(int limit):
cdef unordered_map[int, int] mapa
cdef pair[int, int] entry
cdef int i
for i in range(limit):
mapa[i] = i
return 0
cpdef int example_ctyped_notation(int limit):
mapa = {}
cdef int i
for i in range(limit):
mapa[i] = i
return 0
setup_map.py
from distutils.core import setup
from distutils.extension import Extension
from Cython.Build import cythonize
from Cython.Distutils import build_ext
import os
os.environ["CC"] = "g++"
os.environ["CXX"] = "g++"
modules = [Extension("map_example",
["map_example.pyx"],
language = "c++",
extra_compile_args=["-std=c++11"],
extra_link_args=["-std=c++11"])]
setup(name="map_example",
cmdclass={"build_ext": build_ext},
ext_modules=modules)
use_map_example.py
import map_example
C_MAXV = 100000000
C_NUMBER = 10
def cython_cpp_book_notation():
x = 1
while(x<C_MAXV):
map_example.example_cpp_book_notation(x)
x *= 10
def cython_cpp_python_notation():
x = 1
while(x<C_MAXV):
map_example.example_cpp_python_notation(x)
x *= 10
def cython_ctyped_notation():
x = 1
while(x<C_MAXV):
map_example.example_ctyped_notation(x)
x *= 10
def pure_python():
x = 1
while(x<C_MAXV):
map_a = {}
for i in range(x):
map_a[i] = i
x *= 10
return 0
if __name__ == '__main__':
import timeit
print("Cython CPP book notation")
print(timeit.timeit("cython_cpp_book_notation()", setup="from __main__ import cython_cpp_book_notation", number=C_NUMBER))
print("Cython CPP python notation")
print(timeit.timeit("cython_cpp_python_notation()", setup="from __main__ import cython_cpp_python_notation", number=C_NUMBER))
print("Cython python notation")
print(timeit.timeit("cython_ctyped_notation()", setup="from __main__ import cython_ctyped_notation", number=C_NUMBER))
print("Pure python")
print(timeit.timeit("pure_python()", setup="from __main__ import pure_python", number=C_NUMBER))
My timings from your code (after correcting that python *10 indent :) ) are
Cython CPP book notation
21.617647969018435
Cython CPP python notation
21.229907534987433
Cython python notation
24.44413448998239
Pure python
23.609809526009485
Basically everyone is in the same ballpark, with a modest edge for the CPP versions.
Nothing special about my machine, the usual Ubuntu 14.10, 0.202 Cython, 3.42 Python.