python3 str object cannot pass PyUnicode_Check - python

I was writing a C extension function, which was supposed to accept a str object as argument. The code is shown below:
static PyObject *py_print_chars(PyObject *self, PyObject *o) {
PyObject *bytes;
char *s;
if (!PyUnicode_Check(o)) {
PyErr_SetString(PyExc_TypeError, "Expected string");
return NULL;
}
bytes = PyUnicode_AsUTF8String(o);
s = PyBytes_AsString(bytes);
print_chars(s);
Py_DECREF(bytes);
Py_RETURN_NONE;
}
But as I test the module in python3 console, I find str objects can't pass the PyUnicode_Check:
>>> from sample2 import *
>>> print_chars('Hello world')
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
TypeError: Expected string
As far as I know, Python 3’s str() type is called PyUnicode in C and the C code above was written in refer to "python cookbook3" Char15.13. I just can't work out the problem. Can anybody tell me what's wrong with my code.
Here is what "python cookbook3" said:
If for some reason, you are working directly with a PyObject * and can’t use PyArg_ParseTuple(), the following code samples show how you can check and extract a suitable char * reference, from both a bytes and string object:
/* Some Python Object (obtained somehow) */
PyObject *obj;
/* Conversion from bytes */
{
char *s;
s = PyBytes_AsString(o);
if (!s) {
return NULL; /* TypeError already raised */
}
print_chars(s);
}
/* Conversion to UTF-8 bytes from a string */
{
PyObject *bytes;
char *s;
if (!PyUnicode_Check(obj)) {
PyErr_SetString(PyExc_TypeError, "Expected string");
return NULL;
}
bytes = PyUnicode_AsUTF8String(obj);
s = PyBytes_AsString(bytes);
print_chars(s);
Py_DECREF(bytes);
}
And the whole code:
#include "Python.h"
#include "sample.h"
static PyObject *py_print_chars(PyObject *self, PyObject *o) {
PyObject *bytes;
char *s;
if (!PyUnicode_Check(o)) {
PyErr_SetString(PyExc_TypeError, "Expected string");
return NULL;
}
bytes = PyUnicode_AsUTF8String(o);
s = PyBytes_AsString(bytes);
print_chars(s);
Py_DECREF(bytes);
Py_RETURN_NONE;
}
/* Module method table */
static PyMethodDef SampleMethods[] = {
{"print_chars", py_print_chars, METH_VARARGS, "print character"},
{ NULL, NULL, 0, NULL}
};
/* Module structure */
static struct PyModuleDef samplemodule = {
PyModuleDef_HEAD_INIT,
"sample",
"A sample module",
-1,
SampleMethods
};
/* Module initialization function */
PyMODINIT_FUNC
PyInit_sample2(void) {
return PyModule_Create(&samplemodule);
}

If the goal is to accept exactly one argument, the function should be declared as METH_O, not METH_VARARGS; the former passes along the single argument without wrapping, the latter wraps in a tuple which would need to be unpacked or parsed to get the PyUnicode* inside.

Related

Dynamic attribute in a Python C module

I have a custom Python module written in C, and I want to add an attribute to the module which is dynamically populated. E.g.:
import mymod
print(mymod.x) # At this point, the value of x is computed
The name of the attribute is known in advance.
From what I understand, this should be possible using descriptors, but it is not working as expected. I implemented a custom type, implemented the tp_descr_get function for the type, and assigned an instance of the type to my module, but the tp_descr_get function is never called.
Here is my test module:
#define PY_SSIZE_T_CLEAN
#include <Python.h>
#include <stdio.h>
static struct PyModuleDef testmod = {
PyModuleDef_HEAD_INIT,
"testmod",
NULL,
-1
};
typedef struct testattrib_s {
PyObject_HEAD
} testattrib;
static PyObject *testattrib_descr_get(PyObject *self, PyObject *obj, PyObject *type);
static int testattrib_descr_set(PyObject *self, PyObject *obj, PyObject *value);
PyTypeObject testattribtype = {
PyVarObject_HEAD_INIT (NULL, 0)
"testattrib", /* tp_name */
sizeof (testattrib), /* tp_basicsize */
/* lots of zeros omitted for brevity */
testattrib_descr_get, /* tp_descr_get */
testattrib_descr_set /* tp_descr_set */
};
PyMODINIT_FUNC
PyInit_testmod(void)
{
if (PyType_Ready(&testattribtype)) {
return NULL;
}
testattrib *attrib = PyObject_New(testattrib, &testattribtype);
if (attrib == NULL) {
return NULL;
}
PyObject *m = PyModule_Create(&testmod);
if (m == NULL) {
return NULL;
}
if (PyModule_AddObject(m, "myattrib", (PyObject *) attrib)) {
return NULL;
}
return m;
}
static PyObject *testattrib_descr_get(PyObject *self, PyObject *obj, PyObject *type)
{
printf("testattrib_descr_get called\n");
Py_INCREF(self);
return self;
}
static int testattrib_descr_set(PyObject *self, PyObject *obj, PyObject *value)
{
printf("testattrib_descr_set called\n");
return 0;
}
I test it like this:
import testmod
print(testmod.myattrib) # should call tp_descr_get
testmod.myattrib = 1 # should call tp_descr_set
The getter/setter functions are never called. What am I doing wrong?
I am running Python 3.8.5 on macOS 12.0.1 with a build from Anaconda:
>>> sys.version
'3.8.5 (default, Sep 4 2020, 02:22:02) \n[Clang 10.0.0 ]'
Descriptors operate only as attributes on a type. You would have to create your module as an instance of a module subclass equipped with the descriptor. The easiest way to do that is to use the Py_mod_create slot (not to be confused with __slots__).

How do you correctly pass a pointer between calls with Python C-API

I'm writing a python + c module, and I'm trying to pass a pointer to a certain struct I need. I'm using PyCapsule to encapsulate the pointer, but I'm having problems when retrieving the pointer from it.
The C functions used are like:
static PyObject *
spam_new (PyObject *self, PyObject *args)
{
unsigned int number;
struct spam *pointer;
if (!PyArg_ParseTuple(args, "I", &number)) {
return NULL;
}
state = (struct spam*) malloc(sizeof (struct spam));
if (state == NULL) {
return NULL;
}
spam_init(*pointer, number);
return PyCapsule_New((void*) pointer, "spam", &spam_destroy);
}
static PyObject *
spam_get (PyObject *self, PyObject *args)
{
PyObject *capsule, *result;
void *raw_pointer;
struct spam *pointer;
unsigned long long int number;
if (!PyArg_ParseTuple(args, "OK", capsule, &number)) {
return NULL;
}
printf("[DEBUG] Number: %llu\n", number);
printf("[DEBUG] Capsule pointer: %p\n", capsule);
raw_pointer = PyCapsule_GetPointer(capsule, "spam");
if (raw_pointer == NULL) {
return NULL;
}
pointer = (struct spam*) raw_pointer;
.
.
.
}
They are both declared with METH_VARARGS.
When in python, custom.new(1) returns a capsule as expected, which I store in a variable c.
When calling custom.get(c, 14) python crashes at the PyCapsule_GetPointer function call. Both prints show the same (14), meaning that PyArg_ParseTuple is not getting the capsule passed as a parameter.
For security reasons passing the pointer as a long is not an option.
Thanks.
In the Python documentation is stated that the "O" format string will try to get a pointer to a PyObject (a PyObject*), not a PyObject.
Therefore when using PyArg_ParseTuple to get a PyObject* you have to pass a pointer to a PyObject*.
The provided code was fixed by adding a & to the capsule in the line
if (!PyArg_ParseTuple(args, "OK", &capsule, &number)) {
Fixed thanks to Davis Herring's comment.

How to use basic string operations with PyObject* strings?

I would not like to keep converting every Python String Object from PyObject* to std::string or char* with PyUnicode_DecodeUTF8 and PyUnicode_AsUTF8 because it is an expensive operation.
On my last question How to extend/reuse Python C Extensions/API implementation?, I managed to use the Python open function, to directly give me a PyObject* string. Once doing that, it is very simple to send the string back to the Python program because I can just pass its PyObject* pointer back, instead of doing a full char-by-char copy as PyUnicode_DecodeUTF8 or PyUnicode_AsUTF8 do.
On the regex implementation of CPython API, I can find a function like this:
static void* getstring(PyObject* string, Py_ssize_t* p_length,
int* p_isbytes, int* p_charsize,
Py_buffer *view)
{
/* given a python object, return a data pointer, a length (in
characters), and a character size. return NULL if the object
is not a string (or not compatible) */
/* Unicode objects do not support the buffer API. So, get the data directly. */
if (PyUnicode_Check(string)) {
if (PyUnicode_READY(string) == -1)
return NULL;
*p_length = PyUnicode_GET_LENGTH(string);
*p_charsize = PyUnicode_KIND(string);
*p_isbytes = 0;
return PyUnicode_DATA(string);
}
/* get pointer to byte string buffer */
if (PyObject_GetBuffer(string, view, PyBUF_SIMPLE) != 0) {
PyErr_SetString(PyExc_TypeError, "expected string or bytes-like object");
return NULL;
}
*p_length = view->len;
*p_charsize = 1;
*p_isbytes = 1;
if (view->buf == NULL) {
PyErr_SetString(PyExc_ValueError, "Buffer is NULL");
PyBuffer_Release(view);
view->buf = NULL;
return NULL;
}
return view->buf;
}
It does not seem to be using PyUnicode_DecodeUTF8 or PyUnicode_AsUTF8 to work with the PyObject* coming from the Python Interpreter.
How can I use basic string operations with PyObject* strings without conversion then to std::string or char*?
I would call basic operations the following examples: (Just for exemplifying, I am using Py_BuildValue to build a PyObject* string from a string as a char* or std::string)
static PyObject* PyFastFile_do_concatenation(PyFastFile* self)
{
PyObject* hello = Py_BuildValue( "s", "Hello" );
PyObject* word = Py_BuildValue( "s", "word" );
// I am just guessing the `->value` property
PyObject* hello_world = hello->value + word->value;
hello_world; // return the `PyObject*` string `Hello word`
}
static PyObject* PyFastFile_do_substring(PyFastFile* self)
{
PyObject* hello = Py_BuildValue( "s", "Hello word" );
PyObject* hello_world = hello->value[5:];
hello_world; // return the `PyObject*` string `word`
}
static PyObject* PyFastFile_do_contains(PyFastFile* self)
{
PyObject* hello = Py_BuildValue( "s", "Hello word" );
if( "word" in hello->value ) {
Py_BuildValue( "p", true ); // return the `PyObject*` boolean `true`
}
Py_BuildValue( "p", false ); // return the `PyObject*` boolean `false`
}

Calling a C function from a Python file. Getting error when using Setup.py file

My problem is as follows:
I would like to call a C function from my Python file and return a value back to that Python file.
I have tried the following method of using embedded C in Python (the following code is the C code called "mod1.c). I am using Python3.4 so the format follows that given in the documentation guidelines. The problem comes when I call my setup file (second code below).
#include
#include "sum.h"
static PyObject*
mod_sum(PyObject *self, PyObject *args)
{
int a;
int b;
int s;
if (!PyArg_ParseTuple(args,"ii",&a,&b))
return NULL;
s = sum(a,b);
return Py_BuildValue("i",s);
}
/* DECLARATION OF METHODS */
static PyMethodDef ModMethods[] = {
{"sum", mod_sum, METH_VARARGS, "Descirption"}, // {"methName", modName_methName, METH_VARARGS, "Description.."}, modName is name of module and methName is name of method
{NULL,NULL,0,NULL}
};
// Module Definition Structure
static struct PyModuleDef summodule = {
PyModuleDef_HEAD_INIT,
"sum",
NULL,
-1,
ModMethods
};
/* INITIALIZATION FUNCTION */
PyMODINIT_FUNC initmod(void)
{
PyObject *m;
m = PyModule_Create(&summodule);
if (m == NULL)
return m;
}
Setup.py
from distutils.core import setup, Extension
setup(name='buildsum', version='1.0', \
ext_modules=[Extension('buildsum', ['mod1.c'])])
The result that I get when I compile my code using gcc is the following error: Cannot export PyInit_buildsum: symbol not defined
I would greatly appreciate any insight or help on this problem, or any suggestion in how to call C from Python. Thank you!
---------------------------------------EDIT ---------------------------------
Thank you for the comments:
I have tried the following now:
static PyObject*
PyInit_sum(PyObject *self, PyObject *args)
{
int a;
int b;
int s;
if (!PyArg_ParseTuple(args,"ii",&a,&b))
return NULL;
s = sum(a,b);
return Py_BuildValue("i",s);
}
For the first function; however, I still get the same error of PyInit_sum: symbol not defined
The working code from above in case anyone runs into the same error: the answer from #dclarke is correct. The initialization function in python 3 must have PyInit_(name) as its name.
#include <Python.h>
#include "sum.h"
static PyObject* mod_sum(PyObject *self, PyObject *args)
{
int a;
int b;
int s;
if (!PyArg_ParseTuple(args,"ii",&a,&b))
return NULL;
s = sum(a,b);
return Py_BuildValue("i",s);
}
/* DECLARATION OF METHODS*/
static PyMethodDef ModMethods[] = {
{"modsum", mod_sum, METH_VARARGS, "Descirption"},
{NULL,NULL,0,NULL}
};
// Module Definition Structure
static struct PyModuleDef summodule = {
PyModuleDef_HEAD_INIT,"modsum", NULL, -1, ModMethods
};
/* INITIALIZATION FUNCTION*/
PyMODINIT_FUNC PyInit_sum(void)
{
PyObject *m;
m = PyModule_Create(&summodule);
return m;
}

Python extension module with variable number of arguments

I am trying to figure out how in C extension modules to have a variable (and maybe) quite large number of arguments to a function.
Reading about PyArg_ParseTuple it seems you have to know how many to accept, some mandatory and some optional but all with their own variable. I was hoping PyArg_UnpackTuple would be able to handle this but it seems to just give me bus errors when I try and use it in what appears to be the wrong way.
As an example take the following python code that one might want to make into an extension module (in C).
def hypot(*vals):
if len(vals) !=1 :
return math.sqrt(sum((v ** 2 for v in vals)))
else:
return math.sqrt(sum((v ** 2 for v in vals[0])))
This can be called with any number of arguments or iterated over, hypot(3,4,5), hypot([3,4,5]), and hypot(*[3,4,5]) all give the same answer.
The start of my C function looks like this
static PyObject *hypot_tb(PyObject *self, PyObject *args) {
// lots of code
// PyArg_ParseTuple or PyArg_UnpackTuple
}
Many thinks to yasar11732. Here for the next guy is a fully working extension module (_toolboxmodule.c) that simply takes in any number or integer arguments and returns a list made up of those arguments (with a poor name). A toy but illustrates what needed to be done.
#include <Python.h>
int ParseArguments(long arr[],Py_ssize_t size, PyObject *args) {
/* Get arbitrary number of positive numbers from Py_Tuple */
Py_ssize_t i;
PyObject *temp_p, *temp_p2;
for (i=0;i<size;i++) {
temp_p = PyTuple_GetItem(args,i);
if(temp_p == NULL) {return NULL;}
/* Check if temp_p is numeric */
if (PyNumber_Check(temp_p) != 1) {
PyErr_SetString(PyExc_TypeError,"Non-numeric argument.");
return NULL;
}
/* Convert number to python long and than C unsigned long */
temp_p2 = PyNumber_Long(temp_p);
arr[i] = PyLong_AsUnsignedLong(temp_p2);
Py_DECREF(temp_p2);
}
return 1;
}
static PyObject *hypot_tb(PyObject *self, PyObject *args)
{
Py_ssize_t TupleSize = PyTuple_Size(args);
long *nums = malloc(TupleSize * sizeof(unsigned long));
PyObject *list_out;
int i;
if(!TupleSize) {
if(!PyErr_Occurred())
PyErr_SetString(PyExc_TypeError,"You must supply at least one argument.");
return NULL;
}
if (!(ParseArguments(nums, TupleSize, args)) {
free(nums);
return NULL;
}
list_out = PyList_New(TupleSize);
for(i=0;i<TupleSize;i++)
PyList_SET_ITEM(list_out, i, PyInt_FromLong(nums[i]));
free(nums);
return (PyObject *)list_out;
}
static PyMethodDef toolbox_methods[] = {
{ "hypot", (PyCFunction)hypot_tb, METH_VARARGS,
"Add docs here\n"},
// NULL terminate Python looking at the object
{ NULL, NULL, 0, NULL }
};
PyMODINIT_FUNC init_toolbox(void) {
Py_InitModule3("_toolbox", toolbox_methods,
"toolbox module");
}
In python then it is:
>>> import _toolbox
>>> _toolbox.hypot(*range(4, 10))
[4, 5, 6, 7, 8, 9]
I had used something like this earlier. It could be a bad code as I am not an experienced C coder, but it worked for me. The idea is, *args is just a Python tuple, and you can do anything that you could do with a Python tuple. You can check http://docs.python.org/c-api/tuple.html .
int
ParseArguments(unsigned long arr[],Py_ssize_t size, PyObject *args) {
/* Get arbitrary number of positive numbers from Py_Tuple */
Py_ssize_t i;
PyObject *temp_p, *temp_p2;
for (i=0;i<size;i++) {
temp_p = PyTuple_GetItem(args,i);
if(temp_p == NULL) {return NULL;}
/* Check if temp_p is numeric */
if (PyNumber_Check(temp_p) != 1) {
PyErr_SetString(PyExc_TypeError,"Non-numeric argument.");
return NULL;
}
/* Convert number to python long and than C unsigned long */
temp_p2 = PyNumber_Long(temp_p);
arr[i] = PyLong_AsUnsignedLong(temp_p2);
Py_DECREF(temp_p2);
if (arr[i] == 0) {
PyErr_SetString(PyExc_ValueError,"Zero doesn't allowed as argument.");
return NULL;
}
if (PyErr_Occurred()) {return NULL; }
}
return 1;
}
I was calling this function like this:
static PyObject *
function_name_was_here(PyObject *self, PyObject *args)
{
Py_ssize_t TupleSize = PyTuple_Size(args);
Py_ssize_t i;
struct bigcouples *temp = malloc(sizeof(struct bigcouples));
unsigned long current;
if(!TupleSize) {
if(!PyErr_Occurred())
PyErr_SetString(PyExc_TypeError,"You must supply at least one argument.");
free(temp);
return NULL;
}
unsigned long *nums = malloc(TupleSize * sizeof(unsigned long));
if(!ParseArguments(nums, TupleSize, args)){
/* Make a cleanup and than return null*/
return null;
}

Categories