Profiling C extension which calls back into Python - python

Suppose for the purpose of this discussion, I have a function like this:
PyObject* tuple_from_dict(PyObject* ftype, PyObject* factory, PyObject* values) {
PyObject* ttype = PyTuple_GetItem(factory, 1);
PyObject* fmapping = PyTuple_GetItem(factory, 2);
PyObject* key;
PyObject* value;
Py_ssize_t pos = 0;
Py_ssize_t arg_len = 0;
Py_ssize_t field;
PyObject* result;
if (PyDict_Size(fmapping) == 0) {
result = PyObject_Call(ttype, PyTuple_New(0), NULL);
Py_INCREF(result);
return result;
}
while (PyDict_Next(fmapping, &pos, &key, &value)) {
field = PyLong_AsSsize_t(value);
if (field > arg_len) {
arg_len = field;
}
}
PyObject* args = PyTuple_New(arg_len + 1);
pos = 0;
while (pos < arg_len + 1) {
Py_INCREF(Py_None);
PyTuple_SetItem(args, pos, Py_None);
pos++;
}
pos = 0;
while (PyDict_Next(values, &pos, &key, &value)) {
field = PyLong_AsSsize_t(PyDict_GetItem(fmapping, key));
PyTuple_SetItem(args, field, value);
}
result = PyObject_Call(ttype, args, NULL);
if (result) {
Py_INCREF(result);
}
return result;
}
It doesn't matter what exactly does it do, the important point is that it calls PyObject_Call(...), which I suspect to be slow. But, the slowness we are talking about would not be noticeable on per call basis (the code overall does couple thousands calls per 1/100 of second). So... I need an aggregate, or some way of measuring the time with very high precision (so, clock_t doesn't seem like it's a good level of precision).
It's OK if the solution will work only on Linux. It is also OK if I could somehow slow everything down, but get a more precise measurement of the timing in question.

Is clock_gettime() useful? It is POSIX interface to high resolution timer. This post provides this example usage.
#include <iostream>
#include <time.h>
using namespace std;
timespec diff(timespec start, timespec end);
int main()
{
timespec time1, time2;
int temp;
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &time1);
for (int i = 0; i< 242000000; i++)
temp+=temp;
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &time2);
cout<<diff(time1,time2).tv_sec<<":"<<diff(time1,time2).tv_nsec<<endl;
return 0;
}
timespec diff(timespec start, timespec end)
{
timespec temp;
if ((end.tv_nsec-start.tv_nsec)<0) {
temp.tv_sec = end.tv_sec-start.tv_sec-1;
temp.tv_nsec = 1000000000+end.tv_nsec-start.tv_nsec;
} else {
temp.tv_sec = end.tv_sec-start.tv_sec;
temp.tv_nsec = end.tv_nsec-start.tv_nsec;
}
return temp;
}

Related

Python ctypes cannot get long string value returned from .so file

I am learning C and trying to import a .so into my python file for higher performance by using a python package ctypes. So everything going well until I had a hard time when trying to get a string returned from .so file.
C code:
char *convert_to_16(char *characters, int n){
char sub_buffer[3];
char code[3];
char *buffer = (char*)malloc(sizeof(characters) * 2);
for(int i=0; i < n; i++){
strncpy(code, characters+i, 1);
sprintf(sub_buffer, "%x", *code);
strncat(buffer, sub_buffer, 2);
}
return buffer;
}
// main() only for test
int main(){
char param[] = "ABCDEFGHTUIKLL";
printf("%s\n", param);
int length = strlen(param);
printf("%s\n", convert_to_16(param, length));
}
It runs well with output:
41424344454647485455494b4c4c
Python code :
c_convert_to_16 = ctypes.CDLL('./convert_to_16.so').convert_to_16
c_convert_to_16.restype = ctypes.c_char_p
a_string = "ABCDEFGHTUIKLL"
new_16base_string = c_convert_to_16(a_string, len(a_string))
print new_16base_string
It runs but only returns two characters:
41
I read the official doc and set restype as ctypes.c_char_p, and try to set it to other values. But it seems it's the only option, just oddly only two characters were returned.
Is it the problem of my ctypes configuration or my C wasn't written correctly?
Many thanks.
I don't know much about ctypes in python but you should create your string like that c_char_p("ABCDEFGHTUIKLL").
And maybe tell what argument take your function c_convert_to_16.argtypes = [c_char_p, c_size_t]
This will fix your undefined behavior in C:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char *convert_to_16(char const *array, size_t const len);
char *convert_to_16(char const *array, size_t const len) {
size_t const len_buffer = len * 2 + 1;
char *buffer = malloc(len_buffer);
if (buffer == NULL) {
return NULL;
}
size_t used = 0;
for (size_t i = 0; i < len; i++) {
if (len_buffer < used || len_buffer - used < 3) {
free(buffer);
return NULL;
}
int ret = snprintf(buffer + used, 3, "%.2x", (unsigned char)array[i]);
if (ret != 2) {
free(buffer);
return NULL;
}
used += 2;
}
return buffer;
}
int main(void) {
char const param[] = "ABCDEFGHTUIKLL";
printf("%s\n", param);
char *ret = convert_to_16(param, sizeof param - 1);
if (ret != NULL) {
printf("%s\n", ret);
}
free(ret);
}

How to Copy PyObject*?

I am calling a Python Function from a C++ function like below.
void CPPFunction(PyObject* pValue)
{
...
pValue = PyObject_CallObject(PythonFunction, NULL);
...
}
int main()
{
PyObject *pValue = NULL;
CPPFunction(PValue);
int result_of_python_function = Pylong_aslong(PValue);
}
I would like to access the return value of python function outside the CPPFunction. since scope of PObject* returned by PyObject_CallObject is within CPPFunction, how to access the value outside CPPFunction?
Return it from the function like you would anywhere else.
PyObject* CPPFunction()
{
// ...
PyObject* pValue = PyObject_CallObject(PythonFunction, NULL);
// ...
return pValue;
}
int main()
{
PyObject *value = CPPFunction();
int result_of_python_function = Pylong_aslong(value);
}
make following changes and u can access the return value of python function outside the CPPFunction.Hope this helps:
PyObject* CPPFunction(PyObject* PythonFunction) // changes return type from void to PyObject and pass PythonFunction to be called
{
pValue = PyObject_CallObject(PythonFunction, NULL);
return pValue;
}
int main()
{
PyObject *pValue = NULL;
pValue = CPPFunction(PythonFunction); // assign return value from CPPFunction call to PyObject pointer pvalue
long int result_of_python_function = Pylong_aslong(PValue);// data type changed from int to long int
cout << result_of_python_function << endl; // just printing the python result
}

Python C-Extension segfaults when accessing through tp_getset

I'm trying to write a C-Extension for python. What I'd like to write is a ModPolynomial class which represents a polynomial on (Z/nZ)[x]/x^r-1[even though you may answer to my question without knowing anything of such polynomials].
I've written some code, which seems to work. Basically I just store three PyObject* in my ModPoly structure. Now I'd like to add the storage for the coefficients of the polynomial.
Since I want the coefficients to be read-only, I'd like to add a getter/setter pair of functions through PyGetSetDef. But when I access the getter from python(e.g print pol.coefficients) I receive a Segmentation Fault.
The original code, without the "coefficients" can be found here.
The code with the coefficients is here.
I hope someone of you can tell me where I'm doing wrong here.
By the way, also comments on the code are welcome. This is my first extension and I know that I'm probably doing things quite badly.
As ecatmur says in the comments PyVarObject store a certain number of "slots" at the end of the struct. So I've decided to avoid them.
The relevant code is:
typedef struct {
PyObject_HEAD
/* Type specific fields */
Py_ssize_t ob_size;
PyObject **ob_item;
Py_ssize_t allocated;
PyObject *r_modulus;
PyObject *n_modulus;
PyObject *degree;
} ModPoly;
static PyObject *
ModPoly_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{
ModPoly *self;
self = (ModPoly *)type->tp_alloc(type, 0);
if (self != NULL) {
[...]
self->ob_size = 0;
self->ob_item = NULL;
self->allocated = 0;
}
return (PyObject *)self;
}
static int
ModPoly_init(ModPoly *self, PyObject *args, PyObject *kwds)
{
PyObject *r_modulus=NULL, *n_modulus=NULL, *coefs=NULL, *tmp;
PyObject **tmp_ar;
static char *kwlist[] = {"r_modulus", "n_modulus", "coefficients", NULL};
if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO|O", kwlist,
&r_modulus, &n_modulus, &coefs))
return -1;
[...]
// The polynomial defaults to "x", so the coefficients should be [0, 1].
tmp_ar = (PyObject **)malloc(2 * sizeof(PyObject*));
if (tmp_ar == NULL) {
Py_DECREF(self->r_modulus);
Py_DECREF(self->n_modulus);
Py_DECREF(self->degree);
return -1;
}
tmp_ar[0] = PyInt_FromLong(0);
if (tmp_ar[0] != NULL) {
tmp_ar[1] = PyInt_FromLong(1);
}
if (tmp_ar[0] == NULL || tmp_ar[0] == NULL) {
Py_DECREF(self->r_modulus);
Py_DECREF(self->n_modulus);
Py_DECREF(self->degree);
Py_XDECREF(tmp_ar[0]);
Py_XDECREF(tmp_ar[1]);
free(tmp_ar);
return -1;
}
self->ob_size = 2;
self->allocated = 2;
return 0;
}
[...]
static PyObject *
ModPoly_getcoefs(ModPoly *self, void *closure)
{
printf("here"); // "here" is never printed
PyTupleObject *res=(PyTupleObject*)PyTuple_New(self->ob_size);
Py_ssize_t i;
PyObject *tmp;
if (res == NULL)
return NULL;
for (i=0; i < self->ob_size; i++) {
tmp = self->ob_item[i];
Py_INCREF(tmp);
PyTuple_SET_ITEM(res, i, tmp);
}
return (PyObject *)res;
}
static PyObject *
ModPoly_setcoefs(ModPoly *self, PyObject *value, void* closure)
{
PyErr_SetString(PyExc_AttributeError,
"Cannot set the coefficients of a polynomial.");
return NULL;
}
[...]
static PyGetSetDef ModPoly_getsetters[] = {
{"coefficients",
(getter)ModPoly_getcoefs, (setter)ModPoly_setcoefs,
"The polynomial coefficients.", NULL},
{NULL, 0, 0, NULL, NULL}
};
static PyTypeObject ModPolyType = {
PyObject_HEAD_INIT(NULL)
0, /* ob_size */
[...]
ModPoly_members, /* tp_members */
ModPoly_getsetters, /* tp_getset */
0, /* tp_base */
[...]
};
[...]
edit
I tried to reimplement the getter instruction by instruction, and I understood what I wasn't doing. In the ModPoly_init function I create the tmp_ar where I store the coefficients, but I do not assign it to self->ob_item.
-facepalm-
You only seem to be assigning to ModPoly.ob_item in ModPoly_new() (setting it to NULL).
ModPoly_getcoefs() then dereferences the null pointer, which would give you your segfault. It looks like you intended to assign to ob_item in ModPoly_init(), but don't actually get around to doing so.

extending global c variable array with swig and python

I have a global variable array in c that I'd like to pull into python. And I'm having difficulties with varout typemap:
/* example.c */
int foo[] = {0, 1};
And here is the very vanilla interface:
/* example.i */
%module example
%{
extern int foo[2];
%}
%typemap(varout) int foo[] {
int i;
//$1, $1_dim0, $1_dim1
$result = PyList_New($1_dim0);
for (i = 0; i < $1_dim0; i++) {
PyObject *o = PyInt_FromLong((double) $1[i]);
PyList_SetItem($result,i,o);
}
}
%include "example.c"
When I try to build it with the following SConstruct:
import distutils.sysconfig
env = Environment(SWIGFLAGS='-python -shadow -Wall'.split(),
CPPPATH=[distutils.sysconfig.get_python_inc()],
SHLIBPREFIX="")
env.SharedLibrary('_example.so', ['example.c', 'example.i'])
$1_dim0 special variable is not populated, resulting in the following non-compilable code in example_wrap.c:
SWIGINTERN PyObject *Swig_var_foo_get(void) {
PyObject *pyobj = 0;
{
int i;
//foo, , foo_dim1
pyobj = PyList_New();
for (i = 0; i < ; i++) {
PyObject *o = PyInt_FromLong((double) foo[i]);
PyList_SetItem(pyobj,i,o);
}
}
return pyobj;
}
So clearly the typemap match has happened, but dimensionality of array is missing. What am I missing? Hard coding the dimension does works.
In general, is there any way to extend global cvar variables with swig?
$ swig -version
SWIG Version 2.0.4
Compiled with g++ [i686-pc-linux-gnu]
Configured options: +pcre
Please see http://www.swig.org for reporting bugs and further information
You're almost there with your varout typemap. You need to make two minor changes:
You need to add the size ANY to the int foo[] typemap:
%typemap(varout) int foo[ANY] {
int i;
//$1, $1_dim0, $1_dim1
$result = PyList_New($1_dim0);
for (i = 0; i < $1_dim0; i++) {
PyObject *o = PyInt_FromLong((double) $1[i]);
PyList_SetItem($result,i,o);
}
}
This makes sure your typemap is a match for arrays of (any) known size, not just equivalent to int *foo.
You need to modify example.c to make the size of foo clearer. It's legal and correct C as it stands but tricky to deduce the size of the array unless you happen to be a complete C compiler. Changing it to:
int foo[2] = {0, 1};
is sufficient to make sure that it matches the varout typemap.
With those two changes the generated code works as you'd hope:
SWIGINTERN PyObject *Swig_var_foo_get(void) {
PyObject *pyobj = 0;
{
int i;
//foo, 2, foo_dim1
pyobj = PyList_New(2);
for (i = 0; i < 2; i++) {
PyObject *o = PyInt_FromLong((double) foo[i]);
PyList_SetItem(pyobj,i,o);
}
}
return pyobj;
}
is what gets generated on my machine with those changes.
For those like me who ponders what to do with arrays of non-simple types -- here is one way to do it:
The non-simple type:
typedef struct {
int a;
float b;
} Foo;
and a global array:
extern Foo *foov[40];
%typemap(varout) Foo *foov[ANY] {
int i;
$result = PyList_New($1_dim0);
for (i = 0; i < $1_dim0; i++) {
PyObject *o = SWIG_NewPointerObj($1[i], SWIGTYPE_p_Foo, 0);
PyList_SetItem($result, i, o);
}
}
Just shared this since it took me forever to find out, and this article helped. Just needed to find out how to allocate the SWIG version of my non-simple type -- found that buried here:
http://www.swig.org/Doc2.0/Python.html#Python_nn64

Python extension module with variable number of arguments

I am trying to figure out how in C extension modules to have a variable (and maybe) quite large number of arguments to a function.
Reading about PyArg_ParseTuple it seems you have to know how many to accept, some mandatory and some optional but all with their own variable. I was hoping PyArg_UnpackTuple would be able to handle this but it seems to just give me bus errors when I try and use it in what appears to be the wrong way.
As an example take the following python code that one might want to make into an extension module (in C).
def hypot(*vals):
if len(vals) !=1 :
return math.sqrt(sum((v ** 2 for v in vals)))
else:
return math.sqrt(sum((v ** 2 for v in vals[0])))
This can be called with any number of arguments or iterated over, hypot(3,4,5), hypot([3,4,5]), and hypot(*[3,4,5]) all give the same answer.
The start of my C function looks like this
static PyObject *hypot_tb(PyObject *self, PyObject *args) {
// lots of code
// PyArg_ParseTuple or PyArg_UnpackTuple
}
Many thinks to yasar11732. Here for the next guy is a fully working extension module (_toolboxmodule.c) that simply takes in any number or integer arguments and returns a list made up of those arguments (with a poor name). A toy but illustrates what needed to be done.
#include <Python.h>
int ParseArguments(long arr[],Py_ssize_t size, PyObject *args) {
/* Get arbitrary number of positive numbers from Py_Tuple */
Py_ssize_t i;
PyObject *temp_p, *temp_p2;
for (i=0;i<size;i++) {
temp_p = PyTuple_GetItem(args,i);
if(temp_p == NULL) {return NULL;}
/* Check if temp_p is numeric */
if (PyNumber_Check(temp_p) != 1) {
PyErr_SetString(PyExc_TypeError,"Non-numeric argument.");
return NULL;
}
/* Convert number to python long and than C unsigned long */
temp_p2 = PyNumber_Long(temp_p);
arr[i] = PyLong_AsUnsignedLong(temp_p2);
Py_DECREF(temp_p2);
}
return 1;
}
static PyObject *hypot_tb(PyObject *self, PyObject *args)
{
Py_ssize_t TupleSize = PyTuple_Size(args);
long *nums = malloc(TupleSize * sizeof(unsigned long));
PyObject *list_out;
int i;
if(!TupleSize) {
if(!PyErr_Occurred())
PyErr_SetString(PyExc_TypeError,"You must supply at least one argument.");
return NULL;
}
if (!(ParseArguments(nums, TupleSize, args)) {
free(nums);
return NULL;
}
list_out = PyList_New(TupleSize);
for(i=0;i<TupleSize;i++)
PyList_SET_ITEM(list_out, i, PyInt_FromLong(nums[i]));
free(nums);
return (PyObject *)list_out;
}
static PyMethodDef toolbox_methods[] = {
{ "hypot", (PyCFunction)hypot_tb, METH_VARARGS,
"Add docs here\n"},
// NULL terminate Python looking at the object
{ NULL, NULL, 0, NULL }
};
PyMODINIT_FUNC init_toolbox(void) {
Py_InitModule3("_toolbox", toolbox_methods,
"toolbox module");
}
In python then it is:
>>> import _toolbox
>>> _toolbox.hypot(*range(4, 10))
[4, 5, 6, 7, 8, 9]
I had used something like this earlier. It could be a bad code as I am not an experienced C coder, but it worked for me. The idea is, *args is just a Python tuple, and you can do anything that you could do with a Python tuple. You can check http://docs.python.org/c-api/tuple.html .
int
ParseArguments(unsigned long arr[],Py_ssize_t size, PyObject *args) {
/* Get arbitrary number of positive numbers from Py_Tuple */
Py_ssize_t i;
PyObject *temp_p, *temp_p2;
for (i=0;i<size;i++) {
temp_p = PyTuple_GetItem(args,i);
if(temp_p == NULL) {return NULL;}
/* Check if temp_p is numeric */
if (PyNumber_Check(temp_p) != 1) {
PyErr_SetString(PyExc_TypeError,"Non-numeric argument.");
return NULL;
}
/* Convert number to python long and than C unsigned long */
temp_p2 = PyNumber_Long(temp_p);
arr[i] = PyLong_AsUnsignedLong(temp_p2);
Py_DECREF(temp_p2);
if (arr[i] == 0) {
PyErr_SetString(PyExc_ValueError,"Zero doesn't allowed as argument.");
return NULL;
}
if (PyErr_Occurred()) {return NULL; }
}
return 1;
}
I was calling this function like this:
static PyObject *
function_name_was_here(PyObject *self, PyObject *args)
{
Py_ssize_t TupleSize = PyTuple_Size(args);
Py_ssize_t i;
struct bigcouples *temp = malloc(sizeof(struct bigcouples));
unsigned long current;
if(!TupleSize) {
if(!PyErr_Occurred())
PyErr_SetString(PyExc_TypeError,"You must supply at least one argument.");
free(temp);
return NULL;
}
unsigned long *nums = malloc(TupleSize * sizeof(unsigned long));
if(!ParseArguments(nums, TupleSize, args)){
/* Make a cleanup and than return null*/
return null;
}

Categories