Python C-Extension segfaults when accessing through tp_getset - python

I'm trying to write a C-Extension for python. What I'd like to write is a ModPolynomial class which represents a polynomial on (Z/nZ)[x]/x^r-1[even though you may answer to my question without knowing anything of such polynomials].
I've written some code, which seems to work. Basically I just store three PyObject* in my ModPoly structure. Now I'd like to add the storage for the coefficients of the polynomial.
Since I want the coefficients to be read-only, I'd like to add a getter/setter pair of functions through PyGetSetDef. But when I access the getter from python(e.g print pol.coefficients) I receive a Segmentation Fault.
The original code, without the "coefficients" can be found here.
The code with the coefficients is here.
I hope someone of you can tell me where I'm doing wrong here.
By the way, also comments on the code are welcome. This is my first extension and I know that I'm probably doing things quite badly.
As ecatmur says in the comments PyVarObject store a certain number of "slots" at the end of the struct. So I've decided to avoid them.
The relevant code is:
typedef struct {
PyObject_HEAD
/* Type specific fields */
Py_ssize_t ob_size;
PyObject **ob_item;
Py_ssize_t allocated;
PyObject *r_modulus;
PyObject *n_modulus;
PyObject *degree;
} ModPoly;
static PyObject *
ModPoly_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{
ModPoly *self;
self = (ModPoly *)type->tp_alloc(type, 0);
if (self != NULL) {
[...]
self->ob_size = 0;
self->ob_item = NULL;
self->allocated = 0;
}
return (PyObject *)self;
}
static int
ModPoly_init(ModPoly *self, PyObject *args, PyObject *kwds)
{
PyObject *r_modulus=NULL, *n_modulus=NULL, *coefs=NULL, *tmp;
PyObject **tmp_ar;
static char *kwlist[] = {"r_modulus", "n_modulus", "coefficients", NULL};
if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO|O", kwlist,
&r_modulus, &n_modulus, &coefs))
return -1;
[...]
// The polynomial defaults to "x", so the coefficients should be [0, 1].
tmp_ar = (PyObject **)malloc(2 * sizeof(PyObject*));
if (tmp_ar == NULL) {
Py_DECREF(self->r_modulus);
Py_DECREF(self->n_modulus);
Py_DECREF(self->degree);
return -1;
}
tmp_ar[0] = PyInt_FromLong(0);
if (tmp_ar[0] != NULL) {
tmp_ar[1] = PyInt_FromLong(1);
}
if (tmp_ar[0] == NULL || tmp_ar[0] == NULL) {
Py_DECREF(self->r_modulus);
Py_DECREF(self->n_modulus);
Py_DECREF(self->degree);
Py_XDECREF(tmp_ar[0]);
Py_XDECREF(tmp_ar[1]);
free(tmp_ar);
return -1;
}
self->ob_size = 2;
self->allocated = 2;
return 0;
}
[...]
static PyObject *
ModPoly_getcoefs(ModPoly *self, void *closure)
{
printf("here"); // "here" is never printed
PyTupleObject *res=(PyTupleObject*)PyTuple_New(self->ob_size);
Py_ssize_t i;
PyObject *tmp;
if (res == NULL)
return NULL;
for (i=0; i < self->ob_size; i++) {
tmp = self->ob_item[i];
Py_INCREF(tmp);
PyTuple_SET_ITEM(res, i, tmp);
}
return (PyObject *)res;
}
static PyObject *
ModPoly_setcoefs(ModPoly *self, PyObject *value, void* closure)
{
PyErr_SetString(PyExc_AttributeError,
"Cannot set the coefficients of a polynomial.");
return NULL;
}
[...]
static PyGetSetDef ModPoly_getsetters[] = {
{"coefficients",
(getter)ModPoly_getcoefs, (setter)ModPoly_setcoefs,
"The polynomial coefficients.", NULL},
{NULL, 0, 0, NULL, NULL}
};
static PyTypeObject ModPolyType = {
PyObject_HEAD_INIT(NULL)
0, /* ob_size */
[...]
ModPoly_members, /* tp_members */
ModPoly_getsetters, /* tp_getset */
0, /* tp_base */
[...]
};
[...]
edit
I tried to reimplement the getter instruction by instruction, and I understood what I wasn't doing. In the ModPoly_init function I create the tmp_ar where I store the coefficients, but I do not assign it to self->ob_item.
-facepalm-

You only seem to be assigning to ModPoly.ob_item in ModPoly_new() (setting it to NULL).
ModPoly_getcoefs() then dereferences the null pointer, which would give you your segfault. It looks like you intended to assign to ob_item in ModPoly_init(), but don't actually get around to doing so.

Related

Creating a module subclass in a Python extension

I am trying to create a Python extension module with multi-phase initialization, following the advice I got from a previous question. PEP 489 suggests that it is preferable for the Py_mod_create function to return a module subclass, which presumably means a subclass of PyModule, but I cannot figure out how to do this. In all my attempts, the module segfaults when it is imported. It works fine if Py_mod_create returns some other object, (one which is not a subclass of PyModule), but I am not sure if this will cause problems in future, since isinstance(mymodule, types.ModuleType) returns false in this case.
Following the docs on subclassing built-in types, I set tp_base to PyModule_Type, and my tp_init function calls PyModule_Type.tp_init. The docs also suggest that my structure should contain the superclass structure at the beginning, which in this case is PyModuleObject. This structure is not in the public Python header files, (it is defined in moduleobject.c in the Python sources), so for now I copied and paste the definitions of the PyModuleObject fields at the start of my structure. The complete code looks like this:
#define PY_SSIZE_T_CLEAN
#include <Python.h>
#include <stdio.h>
struct testmod_s {
// Fields copied from PyModuleObject in moduleobject.c
PyObject_HEAD
PyObject *md_dict;
struct PyModuleDef *md_def;
void *md_state;
PyObject *md_weaklist;
PyObject *md_name;
};
static int testmod_init(PyObject *self, PyObject *args, PyObject *kwds);
static PyObject *testmod_create(PyObject *spec, PyModuleDef *def);
static PyModuleDef_Slot testmod_slots[] = {
{Py_mod_create, testmod_create},
{0, 0} /* Sentinel */
};
static struct PyModuleDef testmod_def = {
PyModuleDef_HEAD_INIT, /* m_base */
"testmod", /* m_name */
NULL, /* m_doc */
sizeof(struct testmod_s), /* m_size */
NULL, /* m_methods */
testmod_slots, /* m_slots */
NULL, /* m_traverse */
NULL, /* m_clear */
NULL /* m_free */
};
static PyTypeObject testmodtype = {
PyVarObject_HEAD_INIT (NULL, 0)
"testmodtype", /* tp_name */
sizeof (struct testmod_s), /* tp_basicsize */
/* fields omitted for brevity, all set to zero */
Py_TPFLAGS_DEFAULT |
Py_TPFLAGS_BASETYPE, /* tp_flags */
/* fields omitted for brevity, all set to zero */
testmod_init, /* tp_init */
0, /* tp_alloc */
0, /* tp_new */
};
PyMODINIT_FUNC
PyInit_testmod(void)
{
testmodtype.tp_base = &PyModule_Type;
if (PyType_Ready(&testmodtype)) {
return NULL;
}
PyObject *moduledef = PyModuleDef_Init(&testmod_def);
if (moduledef == NULL) {
return NULL;
}
return moduledef;
}
static int testmod_init(PyObject *self, PyObject *args, PyObject *kwds)
{
if (PyModule_Type.tp_init((PyObject *)self, args, kwds) < 0) {
return -1;
}
return 0;
}
static PyObject *testmod_create(PyObject *spec, PyModuleDef *def)
{
struct testmod_s *module = PyObject_New(struct testmod_s, &testmodtype);
if (module == NULL) {
return NULL;
}
return (PyObject *) module;
}
Importing this module causes a segfault. What am I doing wrong?
I am running Python 3.8.5 on macOS 12.0.1 with a build from Anaconda:
>>> sys.version
'3.8.5 (default, Sep 4 2020, 02:22:02) \n[Clang 10.0.0 ]'
After some tests I could build a custom module type by copying parts of code from moduleobject.c
Your problem is that your code does create an instance of a subclass of module, but never initializes it and gets random values in key members. Additionaly, modules are expected to be gc collectables, so you have to create your custom module with PyObject_GC_New.
The following code replaces your initial testmod_create function with a full initialization of the module:
...
// copied from moduleobject.c
static int
module_init_dict(struct testmod_s* mod, PyObject* md_dict,
PyObject* name, PyObject* doc)
{
_Py_IDENTIFIER(__name__);
_Py_IDENTIFIER(__doc__);
_Py_IDENTIFIER(__package__);
_Py_IDENTIFIER(__loader__);
_Py_IDENTIFIER(__spec__);
if (md_dict == NULL)
return -1;
if (doc == NULL)
doc = Py_None;
if (_PyDict_SetItemId(md_dict, &PyId___name__, name) != 0)
return -1;
if (_PyDict_SetItemId(md_dict, &PyId___doc__, doc) != 0)
return -1;
if (_PyDict_SetItemId(md_dict, &PyId___package__, Py_None) != 0)
return -1;
if (_PyDict_SetItemId(md_dict, &PyId___loader__, Py_None) != 0)
return -1;
if (_PyDict_SetItemId(md_dict, &PyId___spec__, Py_None) != 0)
return -1;
if (PyUnicode_CheckExact(name)) {
Py_INCREF(name);
Py_XSETREF(mod->md_name, name);
}
return 0;
}
static PyObject* testmod_create(PyObject* spec, PyModuleDef* def)
{
struct testmod_s* module = PyObject_GC_New(struct testmod_s, &testmodtype);
if (module == NULL) {
return NULL;
}
PyObject* name = PyUnicode_FromString("testmod");
if (name == NULL) {
Py_DECREF(module);
return 0;
}
module->md_def = NULL;
module->md_state = NULL;
module->md_weaklist = NULL;
module->md_name = NULL;
module->md_dict = PyDict_New();
int cr = module_init_dict(module, module->md_dict, name, NULL);
Py_DECREF(name);
if (cr != 0) {
Py_DECREF(module);
return NULL;
}
return (PyObject*)module;
}

Dynamic attribute in a Python C module

I have a custom Python module written in C, and I want to add an attribute to the module which is dynamically populated. E.g.:
import mymod
print(mymod.x) # At this point, the value of x is computed
The name of the attribute is known in advance.
From what I understand, this should be possible using descriptors, but it is not working as expected. I implemented a custom type, implemented the tp_descr_get function for the type, and assigned an instance of the type to my module, but the tp_descr_get function is never called.
Here is my test module:
#define PY_SSIZE_T_CLEAN
#include <Python.h>
#include <stdio.h>
static struct PyModuleDef testmod = {
PyModuleDef_HEAD_INIT,
"testmod",
NULL,
-1
};
typedef struct testattrib_s {
PyObject_HEAD
} testattrib;
static PyObject *testattrib_descr_get(PyObject *self, PyObject *obj, PyObject *type);
static int testattrib_descr_set(PyObject *self, PyObject *obj, PyObject *value);
PyTypeObject testattribtype = {
PyVarObject_HEAD_INIT (NULL, 0)
"testattrib", /* tp_name */
sizeof (testattrib), /* tp_basicsize */
/* lots of zeros omitted for brevity */
testattrib_descr_get, /* tp_descr_get */
testattrib_descr_set /* tp_descr_set */
};
PyMODINIT_FUNC
PyInit_testmod(void)
{
if (PyType_Ready(&testattribtype)) {
return NULL;
}
testattrib *attrib = PyObject_New(testattrib, &testattribtype);
if (attrib == NULL) {
return NULL;
}
PyObject *m = PyModule_Create(&testmod);
if (m == NULL) {
return NULL;
}
if (PyModule_AddObject(m, "myattrib", (PyObject *) attrib)) {
return NULL;
}
return m;
}
static PyObject *testattrib_descr_get(PyObject *self, PyObject *obj, PyObject *type)
{
printf("testattrib_descr_get called\n");
Py_INCREF(self);
return self;
}
static int testattrib_descr_set(PyObject *self, PyObject *obj, PyObject *value)
{
printf("testattrib_descr_set called\n");
return 0;
}
I test it like this:
import testmod
print(testmod.myattrib) # should call tp_descr_get
testmod.myattrib = 1 # should call tp_descr_set
The getter/setter functions are never called. What am I doing wrong?
I am running Python 3.8.5 on macOS 12.0.1 with a build from Anaconda:
>>> sys.version
'3.8.5 (default, Sep 4 2020, 02:22:02) \n[Clang 10.0.0 ]'
Descriptors operate only as attributes on a type. You would have to create your module as an instance of a module subclass equipped with the descriptor. The easiest way to do that is to use the Py_mod_create slot (not to be confused with __slots__).

C++ call Python function with several argument

I want to call the Python function in C++ using python.h but there are some problems.
Here is my Python function sample
def function1(tuple1, tuple2, string1, string2 ,string3, double1, string4 = ""):
...
and the C++ sample
double function_adapter(const vector<double> &vec1, const vector<double> &vec2, const string &string1, const string &string2, const string &string3, const double double1, const string &string4)
{
Py_Initial();
PyRun_SimpleString("import sys");
PyRun_SimpleString("sys.path.append(\"./\")");
PyObject *pModule, *pFunc, *pRes;
pModule = PyImport_Import(PyString_FromString("PythonFile"));
pFunc = PyObject_GetAttrString(pModule, "function1");
PyObject *pArgs = PyTuple_New(7);
PyObject *pVec1 = PyTuple_New(vec1.size());
for(size_t i = 0; i < vec1.size(); ++i) {
PyTuple_SetItem(pVec1, i, Py_BuildValue("f", vec1[i]));
}
PyObject *pVec2 = PyTuple_New(vec2.size());
for(size_t i = 0; i < vec2.size(); ++i) {
PyTuple_SetItem(pVec2, i, Py_BuildValue("f", vec2[i]));
}
PyObject *pString1 = Py_BuildValue("s", string1);
PyObject *pString2 = Py_BuildValue("s", string2);
PyObject *pString3 = Py_BuildValue("s", string3);
PyObject *pDouble1 = Py_BuildValue("f", double1);
PyObject *pString4 = Py_BuildValue("s", string4);
PyTuple_SetItem(pArgs, 0, pVec1);
PyTuple_SetItem(pArgs, 1, pVec2);
PyTuple_SetItem(pArgs, 2, pString1);
PyTuple_SetItem(pArgs, 3, pString2);
PyTuple_SetItem(pArgs, 4, pString3);
PyTuple_SetItem(pArgs, 5, pDouble1);
PyTuple_SetItem(pArgs, 6, pString4);
pRes = PyObject_CallObject(pFunc, pArgs);
if(pRes == NULL) {
return -2;
}
int res = PyLong_AsLong(pRes);
Py_DecRef(pModule);
.
.
.
Py_Finalize();
return res;
}
But the pRes returns NULL. And the PyObject_CallObject is just like a blackbox, I can't step in to look how it works.
So I want to ask that is there anything wrong in my code?
And the arguments preparation is right or not? (There are few examples in this case on the internet.)
A CPython API function that returns NULL means that an error has occurred. You can use the PyErr_* set of API functions to query this exception.
For debugging, the most useful is probably PyErr_Print(), which will just print the exception and stack trace (if any) to stderr.
Make sure to double check that pModule and pFunc are not NULL. The exception may have occurred when trying to import the module or get the function from the module. CPython does not raise exceptions, it just returns NULL on an error. So checking the return value of C-API functions is important.

Profiling C extension which calls back into Python

Suppose for the purpose of this discussion, I have a function like this:
PyObject* tuple_from_dict(PyObject* ftype, PyObject* factory, PyObject* values) {
PyObject* ttype = PyTuple_GetItem(factory, 1);
PyObject* fmapping = PyTuple_GetItem(factory, 2);
PyObject* key;
PyObject* value;
Py_ssize_t pos = 0;
Py_ssize_t arg_len = 0;
Py_ssize_t field;
PyObject* result;
if (PyDict_Size(fmapping) == 0) {
result = PyObject_Call(ttype, PyTuple_New(0), NULL);
Py_INCREF(result);
return result;
}
while (PyDict_Next(fmapping, &pos, &key, &value)) {
field = PyLong_AsSsize_t(value);
if (field > arg_len) {
arg_len = field;
}
}
PyObject* args = PyTuple_New(arg_len + 1);
pos = 0;
while (pos < arg_len + 1) {
Py_INCREF(Py_None);
PyTuple_SetItem(args, pos, Py_None);
pos++;
}
pos = 0;
while (PyDict_Next(values, &pos, &key, &value)) {
field = PyLong_AsSsize_t(PyDict_GetItem(fmapping, key));
PyTuple_SetItem(args, field, value);
}
result = PyObject_Call(ttype, args, NULL);
if (result) {
Py_INCREF(result);
}
return result;
}
It doesn't matter what exactly does it do, the important point is that it calls PyObject_Call(...), which I suspect to be slow. But, the slowness we are talking about would not be noticeable on per call basis (the code overall does couple thousands calls per 1/100 of second). So... I need an aggregate, or some way of measuring the time with very high precision (so, clock_t doesn't seem like it's a good level of precision).
It's OK if the solution will work only on Linux. It is also OK if I could somehow slow everything down, but get a more precise measurement of the timing in question.
Is clock_gettime() useful? It is POSIX interface to high resolution timer. This post provides this example usage.
#include <iostream>
#include <time.h>
using namespace std;
timespec diff(timespec start, timespec end);
int main()
{
timespec time1, time2;
int temp;
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &time1);
for (int i = 0; i< 242000000; i++)
temp+=temp;
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &time2);
cout<<diff(time1,time2).tv_sec<<":"<<diff(time1,time2).tv_nsec<<endl;
return 0;
}
timespec diff(timespec start, timespec end)
{
timespec temp;
if ((end.tv_nsec-start.tv_nsec)<0) {
temp.tv_sec = end.tv_sec-start.tv_sec-1;
temp.tv_nsec = 1000000000+end.tv_nsec-start.tv_nsec;
} else {
temp.tv_sec = end.tv_sec-start.tv_sec;
temp.tv_nsec = end.tv_nsec-start.tv_nsec;
}
return temp;
}

Python extension module with variable number of arguments

I am trying to figure out how in C extension modules to have a variable (and maybe) quite large number of arguments to a function.
Reading about PyArg_ParseTuple it seems you have to know how many to accept, some mandatory and some optional but all with their own variable. I was hoping PyArg_UnpackTuple would be able to handle this but it seems to just give me bus errors when I try and use it in what appears to be the wrong way.
As an example take the following python code that one might want to make into an extension module (in C).
def hypot(*vals):
if len(vals) !=1 :
return math.sqrt(sum((v ** 2 for v in vals)))
else:
return math.sqrt(sum((v ** 2 for v in vals[0])))
This can be called with any number of arguments or iterated over, hypot(3,4,5), hypot([3,4,5]), and hypot(*[3,4,5]) all give the same answer.
The start of my C function looks like this
static PyObject *hypot_tb(PyObject *self, PyObject *args) {
// lots of code
// PyArg_ParseTuple or PyArg_UnpackTuple
}
Many thinks to yasar11732. Here for the next guy is a fully working extension module (_toolboxmodule.c) that simply takes in any number or integer arguments and returns a list made up of those arguments (with a poor name). A toy but illustrates what needed to be done.
#include <Python.h>
int ParseArguments(long arr[],Py_ssize_t size, PyObject *args) {
/* Get arbitrary number of positive numbers from Py_Tuple */
Py_ssize_t i;
PyObject *temp_p, *temp_p2;
for (i=0;i<size;i++) {
temp_p = PyTuple_GetItem(args,i);
if(temp_p == NULL) {return NULL;}
/* Check if temp_p is numeric */
if (PyNumber_Check(temp_p) != 1) {
PyErr_SetString(PyExc_TypeError,"Non-numeric argument.");
return NULL;
}
/* Convert number to python long and than C unsigned long */
temp_p2 = PyNumber_Long(temp_p);
arr[i] = PyLong_AsUnsignedLong(temp_p2);
Py_DECREF(temp_p2);
}
return 1;
}
static PyObject *hypot_tb(PyObject *self, PyObject *args)
{
Py_ssize_t TupleSize = PyTuple_Size(args);
long *nums = malloc(TupleSize * sizeof(unsigned long));
PyObject *list_out;
int i;
if(!TupleSize) {
if(!PyErr_Occurred())
PyErr_SetString(PyExc_TypeError,"You must supply at least one argument.");
return NULL;
}
if (!(ParseArguments(nums, TupleSize, args)) {
free(nums);
return NULL;
}
list_out = PyList_New(TupleSize);
for(i=0;i<TupleSize;i++)
PyList_SET_ITEM(list_out, i, PyInt_FromLong(nums[i]));
free(nums);
return (PyObject *)list_out;
}
static PyMethodDef toolbox_methods[] = {
{ "hypot", (PyCFunction)hypot_tb, METH_VARARGS,
"Add docs here\n"},
// NULL terminate Python looking at the object
{ NULL, NULL, 0, NULL }
};
PyMODINIT_FUNC init_toolbox(void) {
Py_InitModule3("_toolbox", toolbox_methods,
"toolbox module");
}
In python then it is:
>>> import _toolbox
>>> _toolbox.hypot(*range(4, 10))
[4, 5, 6, 7, 8, 9]
I had used something like this earlier. It could be a bad code as I am not an experienced C coder, but it worked for me. The idea is, *args is just a Python tuple, and you can do anything that you could do with a Python tuple. You can check http://docs.python.org/c-api/tuple.html .
int
ParseArguments(unsigned long arr[],Py_ssize_t size, PyObject *args) {
/* Get arbitrary number of positive numbers from Py_Tuple */
Py_ssize_t i;
PyObject *temp_p, *temp_p2;
for (i=0;i<size;i++) {
temp_p = PyTuple_GetItem(args,i);
if(temp_p == NULL) {return NULL;}
/* Check if temp_p is numeric */
if (PyNumber_Check(temp_p) != 1) {
PyErr_SetString(PyExc_TypeError,"Non-numeric argument.");
return NULL;
}
/* Convert number to python long and than C unsigned long */
temp_p2 = PyNumber_Long(temp_p);
arr[i] = PyLong_AsUnsignedLong(temp_p2);
Py_DECREF(temp_p2);
if (arr[i] == 0) {
PyErr_SetString(PyExc_ValueError,"Zero doesn't allowed as argument.");
return NULL;
}
if (PyErr_Occurred()) {return NULL; }
}
return 1;
}
I was calling this function like this:
static PyObject *
function_name_was_here(PyObject *self, PyObject *args)
{
Py_ssize_t TupleSize = PyTuple_Size(args);
Py_ssize_t i;
struct bigcouples *temp = malloc(sizeof(struct bigcouples));
unsigned long current;
if(!TupleSize) {
if(!PyErr_Occurred())
PyErr_SetString(PyExc_TypeError,"You must supply at least one argument.");
free(temp);
return NULL;
}
unsigned long *nums = malloc(TupleSize * sizeof(unsigned long));
if(!ParseArguments(nums, TupleSize, args)){
/* Make a cleanup and than return null*/
return null;
}

Categories