Accessing view of a NumPy array using the C API - python

In a Python extension module I've written in C++, I use the following snippet of code to convert a NumPy array into an Armadillo array for use in the C++ portion of the code:
static arma::mat convertPyArrayToArma(PyArrayObject* pyarr, int nrows, int ncols)
{
// Check if the dimensions are what I expect.
if (!checkPyArrayDimensions(pyarr, nrows, ncols)) throw WrongDimensions();
const std::vector<int> dims = getPyArrayDimensions(pyarr); // Gets the dimensions using the API
PyArray_Descr* reqDescr = PyArray_DescrFromType(NPY_DOUBLE);
if (reqDescr == NULL) throw std::bad_alloc();
// Convert the array to Fortran-ordering as required by Armadillo
PyArrayObject* cleanArr = (PyArrayObject*)PyArray_FromArray(pyarr, reqDescr,
NPY_ARRAY_FARRAY);
if (cleanArr == NULL) throw std::bad_alloc();
reqDescr = NULL; // The new reference from DescrFromType was stolen by FromArray
double* dataPtr = static_cast<double*>(PyArray_DATA(cleanArr));
arma::mat result (dataPtr, dims[0], dims[1], true); // this copies the data from cleanArr
Py_DECREF(cleanArr);
return result;
}
The problem is that when I pass this a view of a NumPy array (i.e. my_array[:, 3]), it doesn't seem to handle the strides of the underlying C array correctly. Based on the output, it seems like the array pyarr received by the function is actually the full base array, and not the view (or at least when I access the data using PyArray_DATA, I seem to be getting a pointer to the full base array). If I instead pass this function a copy of the view (i.e. my_array[:, 3].copy()), it works as expected, but I don't want to have to remember to do that every time.
So, is there a way to make PyArray_FromArray copy only the slice of the matrix I want? I tried using the flag NPY_ARRAY_ENSURECOPY, but that didn't help.
Edit 1
As suggested in the comments, here is a full working example:
In file example.cpp:
#define NPY_NO_DEPRECATED_API NPY_1_9_API_VERSION
extern "C" {
#include <Python.h>
#include <numpy/arrayobject.h>
}
#include <exception>
#include <cassert>
#include <string>
#include <type_traits>
#include <map>
#include <vector>
#include <armadillo>
class WrongDimensions : public std::exception
{
public:
WrongDimensions() {}
const char* what() const noexcept { return msg.c_str(); }
private:
std::string msg = "The dimensions were incorrect";
};
class NotImplemented : public std::exception
{
public:
NotImplemented() {}
const char* what() const noexcept { return msg.c_str(); }
private:
std::string msg = "Not implemented";
};
class BadArrayLayout : public std::exception
{
public:
BadArrayLayout() {}
const char* what() const noexcept { return msg.c_str(); }
private:
std::string msg = "The matrix was not contiguous";
};
static const std::vector<npy_intp> getPyArrayDimensions(PyArrayObject* pyarr)
{
npy_intp ndims = PyArray_NDIM(pyarr);
npy_intp* dims = PyArray_SHAPE(pyarr);
std::vector<npy_intp> result;
for (int i = 0; i < ndims; i++) {
result.push_back(dims[i]);
}
return result;
}
/* Checks the dimensions of the given array. Pass -1 for either dimension to say you don't
* care what the size is in that dimension. Pass dimensions (X, 1) for a vector.
*/
static bool checkPyArrayDimensions(PyArrayObject* pyarr, const npy_intp dim0, const npy_intp dim1)
{
const auto dims = getPyArrayDimensions(pyarr);
assert(dims.size() <= 2 && dims.size() > 0);
if (dims.size() == 1) {
return (dims[0] == dim0 || dim0 == -1) && (dim1 == 1 || dim1 == -1);
}
else {
return (dims[0] == dim0 || dim0 == -1) && (dims[1] == dim1 || dim1 == -1);
}
}
template<typename outT>
static arma::Mat<outT> convertPyArrayToArma(PyArrayObject* pyarr, int nrows, int ncols)
{
if (!checkPyArrayDimensions(pyarr, nrows, ncols)) throw WrongDimensions();
int arrTypeCode;
if (std::is_same<outT, uint16_t>::value) {
arrTypeCode = NPY_UINT16;
}
else if (std::is_same<outT, double>::value) {
arrTypeCode = NPY_DOUBLE;
}
else {
throw NotImplemented();
}
const auto dims = getPyArrayDimensions(pyarr);
if (dims.size() == 1) {
outT* dataPtr = static_cast<outT*>(PyArray_DATA(pyarr));
return arma::Col<outT>(dataPtr, dims[0], true);
}
else {
PyArray_Descr* reqDescr = PyArray_DescrFromType(arrTypeCode);
if (reqDescr == NULL) throw std::bad_alloc();
PyArrayObject* cleanArr = (PyArrayObject*)PyArray_FromArray(pyarr, reqDescr, NPY_ARRAY_FARRAY);
if (cleanArr == NULL) throw std::bad_alloc();
reqDescr = NULL; // The new reference from DescrFromType was stolen by FromArray
outT* dataPtr = static_cast<outT*>(PyArray_DATA(cleanArr));
arma::Mat<outT> result (dataPtr, dims[0], dims[1], true); // this copies the data from cleanArr
Py_DECREF(cleanArr);
return result;
}
}
static PyObject* convertArmaToPyArray(const arma::mat& matrix)
{
npy_intp ndim = matrix.is_colvec() ? 1 : 2;
npy_intp nRows = static_cast<npy_intp>(matrix.n_rows); // NOTE: This narrows the integer
npy_intp nCols = static_cast<npy_intp>(matrix.n_cols);
npy_intp dims[2] = {nRows, nCols};
PyObject* result = PyArray_SimpleNew(ndim, dims, NPY_DOUBLE);
if (result == NULL) throw std::bad_alloc();
double* resultDataPtr = static_cast<double*>(PyArray_DATA((PyArrayObject*)result));
for (int i = 0; i < nRows; i++) {
for (int j = 0; j < nCols; j++) {
resultDataPtr[i * nCols + j] = matrix(i, j);
}
}
return result;
}
extern "C" {
// An example function that takes a NumPy array and converts it to
// an arma::mat and back. This should return the array unchanged.
static PyObject* example_testFunction(PyObject* self, PyObject* args)
{
PyArrayObject* myArray = NULL;
if (!PyArg_ParseTuple(args, "O!", &PyArray_Type, &myArray)) {
return NULL;
}
PyObject* output = NULL;
try {
arma::mat myMat = convertPyArrayToArma<double>(myArray, -1, -1);
output = convertArmaToPyArray(myMat);
}
catch (const std::bad_alloc&) {
PyErr_NoMemory();
Py_XDECREF(output);
return NULL;
}
catch (const std::exception& err) {
PyErr_SetString(PyExc_RuntimeError, err.what());
Py_XDECREF(output);
return NULL;
}
return output;
}
static PyMethodDef example_methods[] =
{
{"test_function", example_testFunction, METH_VARARGS, "A test function"},
{NULL, NULL, 0, NULL}
};
static struct PyModuleDef example_module = {
PyModuleDef_HEAD_INIT,
"example", /* name of module */
NULL, /* module documentation, may be NULL */
-1, /* size of per-interpreter state of the module,
or -1 if the module keeps state in global variables. */
example_methods
};
PyMODINIT_FUNC
PyInit_example(void)
{
import_array();
PyObject* m = PyModule_Create(&example_module);
if (m == NULL) return NULL;
return m;
}
}
And setup.py for compiling:
from setuptools import setup, Extension
import numpy as np
example_module = Extension(
'example',
include_dirs=[np.get_include(), '/usr/local/include'],
libraries=['armadillo'],
library_dirs=['/usr/local/lib'],
sources=['example.cpp'],
language='c++',
extra_compile_args=['-std=c++11', '-mmacosx-version-min=10.10'],
)
setup(name='example',
ext_modules=[example_module],
)
Now assume we have the example array
a = np.array([[ 1, 2, 3, 4, 5, 6],
[ 7, 8, 9,10,11,12],
[13,14,15,16,17,18]], dtype='float64')
The function seems to work fine for multidimensional slices like a[:, :3], and it returns the matrix unaltered as I'd expect. But if I give it a single-dimensional slice, I get the wrong components unless I make a copy:
>>> example.test_function(a[:, 3])
array([ 4., 5., 6.])
>>> example.test_function(a[:, 3].copy())
array([ 4., 10., 16.])

The view of the array is just another information-wrapper for the same data array. Numpy doesn't copy any data here. Only the information for the interpretation of the data are adjusted and the pointer to the data moved if useful.
In your code you're assuming, that the data of a vector a[:, 3] is represented as a vector in memory which wouldn't differ for NPY_ARRAY_CARRAY and NPY_ARRAY_FARRAY. But this representation you only get after creating a (fortran ordered) copy of the array itself.
To make it work I modified your convertPyArrayToArma() function a little to create a copy even if it's a vector:
template<typename outT>
static arma::Mat<outT> convertPyArrayToArma(PyArrayObject* pyarr, int nrows, int ncols)
{
if (!checkPyArrayDimensions(pyarr, nrows, ncols)) throw WrongDimensions();
int arrTypeCode;
if (std::is_same<outT, uint16_t>::value) {
arrTypeCode = NPY_UINT16;
}
else if (std::is_same<outT, double>::value) {
arrTypeCode = NPY_DOUBLE;
}
else {
throw NotImplemented();
}
PyArray_Descr* reqDescr = PyArray_DescrFromType(arrTypeCode);
if (reqDescr == NULL) throw std::bad_alloc();
PyArrayObject* cleanArr = (PyArrayObject*)PyArray_FromArray(pyarr, reqDescr, NPY_ARRAY_FARRAY);
if (cleanArr == NULL) throw std::bad_alloc();
reqDescr = NULL; // The new reference from DescrFromType was stolen by FromArray
const auto dims = getPyArrayDimensions(pyarr);
outT* dataPtr = static_cast<outT*>(PyArray_DATA(cleanArr));
// this copies the data from cleanArr
arma::Mat<outT> result;
if (dims.size() == 1) {
result = arma::Col<outT>(dataPtr, dims[0], true);
}
else {
result = arma::Mat<outT>(dataPtr, dims[0], dims[1], true);
}
Py_DECREF(cleanArr);
return result;
}

Just a short answer that can help you about this. PyArray_DATA is not aware about the state of the view.
Before using it you can use PyArray_GETCONTIGUOUS on the py object reference and then use the return reference in PyArray_DATA. This way, you get a copy of the numpy object as a contiguous one.
=> void* pContiguousInput = PyArray_DATA(PyArray_GETCONTIGUOUS(pyInputData));
Hopes it help !

Related

Cannot assign contents of list attribute in Pybind11 defined class

I have a sparse matrix implementation in C++, and I used pybind11 to expose it to python. Here is the problem:
>>> D1 = phc.SparseMatrix(3, [[0],[1],[2]])
>>> D1.cData
[[0], [1], [2]]
>>> D1.cData[1] = [1,2]
>>> D1.cData
[[0], [1], [2]] #Should be [[0], [1,2], [2]]
In python, I cannot change the contents of the SparseMatrix.cData attribute with the assignment operator. I can change the entire list with D1.cData = [[1],[2],[3]]. This behavior is bewildering to me. D1.cData is just a list, so I would expect that the above code would work.
I suspect it has something to do with my pybind11 code since this behavior is not present in python-defined custom classes. But I have no idea what is wrong (I am a novice programmer). Here is the source code info:
Python Bindings
#include <pybind11/pybind11.h>
#include <pybind11/stl.h>
namespace py = pybind11;
#include <SparseMatrix.h>
namespace phc = ph_computation;
using SparseMatrix = phc::SparseMatrix;
using Column = phc::Column;
using CData = phc::CData;
PYBIND11_MODULE(ph_computations, m)
{
m.doc() = "ph_computations python bindings";
using namespace pybind11::literals;
m.def("add_cols", &phc::add_cols);//begin SparseMatrix.h
py::class_<SparseMatrix>(m, "SparseMatrix")
.def(py::init<size_t, CData>())
.def(py::init<std::string>())
.def_readwrite("n_rows", &SparseMatrix::n_rows)
.def_readwrite("n_cols", &SparseMatrix::n_cols)
.def_readwrite("cData", &SparseMatrix::cData)
.def("__neq__", &SparseMatrix::operator!=)
.def("__eq__", &SparseMatrix::operator==)
.def("__add__", &SparseMatrix::operator+)
.def("__mul__", &SparseMatrix::operator*)
.def("transpose", &SparseMatrix::transpose)
.def("__str__", &SparseMatrix::print)
.def("save", &SparseMatrix::save)
;
m.def("identity", &phc::make_identity);
m.def("matching_pivots", &phc::matching_pivots);//end SparseMatrix.h
}
SparseMatrix.h
#pragma once
#include <iostream>
#include <sstream>
#include <fstream>
#include <string>
#include <iterator>
#include <algorithm>
#include <vector>
#include <stdexcept>
namespace ph_computation{
using Int = int;
using Column = std::vector<Int>;//a Column is represented by a vector of indices
using CData = std::vector<Column>;//a matrix is represented by a vector of Columns
//Add columns in Z2
Column add_cols(const Column& c1, const Column& c2);
struct SparseMatrix
{
size_t n_rows{0};
size_t n_cols{0};
CData cData;
SparseMatrix()=default;
SparseMatrix(size_t n_rows_, CData cData_):
n_rows(n_rows_), n_cols(cData_.size()), cData(cData_){}
SparseMatrix(std::string path);
bool operator!=(const SparseMatrix &other) const;
bool operator==(const SparseMatrix &other) const;
SparseMatrix operator+(const SparseMatrix &other) const;
SparseMatrix operator*(const SparseMatrix &other) const;
void transpose();
void print() const;
void save(std::string path);
};
SparseMatrix make_identity(size_t n_cols_);
bool matching_pivots(const SparseMatrix& a, const SparseMatrix& b);
}
SparseMatrix.cpp (you probably don't need this)
#include <SparseMatrix.h>
namespace ph_computation {
Column add_cols(const Column& c1, const Column& c2){
Column c3;
int idx1{0};
int idx2{0};
while(idx1 < c1.size() && idx2 < c2.size()){
if(c1[idx1] < c2[idx2]){
c3.push_back(c1[idx1]);
++idx1;
}
else if(c1[idx1] > c2[idx2]){
c3.push_back(c2[idx2]);
++idx2;
}
else {
++idx1;
++idx2;
}
}
if (idx1 < c1.size()){
c3.insert(c3.end(), std::next(c1.begin(), idx1), c1.end());
}
else if (idx2 < c2.size()){
c3.insert(c3.end(), std::next(c2.begin(), idx2), c2.end());
}
return c3;
}
SparseMatrix make_identity(size_t n_cols_){
CData cData_(n_cols_);
for (int j = 0; j < n_cols_; ++j){
cData_[j] = {j};
}
return SparseMatrix(n_cols_, cData_);
}
SparseMatrix::SparseMatrix(std::string path){
std::fstream f_in;
f_in.open(path, std::ios::in);
if(f_in.is_open()){
std::string n_rows_line;
std::getline(f_in, n_rows_line); //first line of file contains number of rows
n_rows = std::stoi(n_rows_line);
std::string n_cols_line;
std::getline(f_in, n_cols_line); //second line of file contains number of cols
n_cols = std::stoi(n_cols_line);
CData cData_(n_cols);
cData = cData_;
std::string line;
int j = 0;
int nnz, data;
while (std::getline(f_in, line)){
std::stringstream line_str = std::stringstream(line);
while (line_str >> nnz){
Column col_j(nnz);
for (int i =0; i < nnz; ++i){
line_str >> data;
col_j[i] = data;
}
cData[j] = col_j;
}
++j;
}
f_in.close();
}
else{
throw std::runtime_error("File did not open.");
}
}
bool SparseMatrix::operator!=(const SparseMatrix &other) const{
if (n_rows != other.n_rows || cData != other.cData){
return true;
}
return false;
}
bool SparseMatrix::operator==(const SparseMatrix &other) const{
return !(*this != other);
}
SparseMatrix SparseMatrix::operator+(const SparseMatrix &other) const{
if (n_rows != other.n_rows || n_cols != other.n_cols){
throw std::invalid_argument("Matrices must have same dimension to add.");
}
CData ans_cData;
for (int j = 0; j < n_cols; ++j){
ans_cData.push_back(add_cols(cData[j], other.cData[j]));
}
return SparseMatrix(n_rows, ans_cData);
}
SparseMatrix SparseMatrix::operator*(const SparseMatrix &other) const{
if(n_cols != other.n_rows){
throw std::invalid_argument("Matrices must have compatible dimensions.");
}
size_t ans_rows = n_rows;
CData ans_cData(other.n_cols);
SparseMatrix ans(ans_rows, ans_cData);
for(int j =0; j<ans.n_cols; ++j){
for(int idx : other.cData[j]){
ans.cData[j] = add_cols(ans.cData[j], cData[idx]);
}
}
return ans;
}
void SparseMatrix::transpose(){
CData cData_T(n_rows);
for(int j =0; j<n_cols; ++j){
if(!cData[j].empty()){
for(int x: cData[j]){
cData_T[x].push_back(j);
}
}
}
cData = cData_T;
n_rows = n_cols;
n_cols = cData.size();
}
void SparseMatrix::print() const{
for (int i = 0; i < n_rows; ++i){
for (int j = 0; j < n_cols; ++j){
if (cData[j].empty())
{std::cout << " 0";}
else if (std::binary_search(cData[j].begin(), cData[j].end(), i))//Assumes row indices
{std::cout << " 1";} //are ordered
else
{std::cout << " 0";}
if (n_cols-1 == j)
{std::cout << " \n";}
}
}
}
void SparseMatrix::save(std::string path){
std::fstream f_out;
f_out.open(path, std::ios::out);
if(f_out.is_open()){
f_out << n_rows << "\n";
f_out << n_cols << "\n";
for(int j = 0; j < n_cols; ++j){
int col_j_sz = cData[j].size();
f_out << col_j_sz;
for(int i = 0; i < col_j_sz; ++i){
f_out << " " << cData[j][i];
}
f_out << "\n";
}
f_out.close();
}
else{
throw std::runtime_error("File did not open.");
}
}
bool matching_pivots(const SparseMatrix& a, const SparseMatrix& b){
if(a.n_rows != b.n_rows || a.n_cols != b.n_cols){
throw std::invalid_argument("Input matrices must have the same size.");
}
for (int j = 0; j<a.n_cols; ++j){
bool a_j_empty = a.cData[j].empty();
bool b_j_empty = b.cData[j].empty();
if (a_j_empty != b_j_empty){
return false;
}
else if (!a_j_empty){
if(a.cData[j].back() != b.cData[j].back()){
return false;
}
}
}
return true;
}
} // namespace ph_computation
I found the answer in the pybind11 documentation here: https://pybind11.readthedocs.io/en/stable/advanced/cast/stl.html?highlight=opaque#making-opaque-types
Apparently, STL container data members can be overwritten in their entirety in python, but modification of the data member through list methods does not work. I don't really understand it, but the link above answers the question.

about pybind11 Return to the c++ array modification problem question

c++
How to return the xyz array without changing the definition of the Tile structure? You can use the subscript to modify the value
#include <pybind11/pybind11.h>
#include <pybind11/numpy.h>
namespace py = pybind11;
struct Tile {
float xyz[3];
};
struct Vector3d
{
float x;
float y;
float z;
Vector3d(float x, float y, float z) {
this->x = x;
this->y = y;
this->z = z;
}
Tile* pTile = 0;
/*
struct *p01 = 0;
struct *p02 = 0;
....
*/
};
PYBIND11_MODULE(bbbb, m)
{
py::class_<Vector3d>(m, "Vector3d")
.def(py::init<float, float, float>())
.def_property("x", [](Vector3d& p)->float {
return p.x;
}, [](Vector3d &p, float x) {
p.x = x;
if (p.pTile)
p.pTile->xyz[0] = x;
});
py::class_<Tile>(m, "Tile")
.def(py::init <>())
.def_property("xyz", [](Tile& p)->py::array {
auto dtype = pybind11::dtype(pybind11::format_descriptor<float>::format());
return pybind11::array(dtype, { 3 }, { sizeof(float) }, p.xyz, nullptr);
}, [](Tile& p) {})
.def_property("vec_xyz", [](Tile& p)->Vector3d {
Vector3d vec(p.xyz[0], p.xyz[1], p.xyz[2]);
vec.pTile = &p;
return vec;
}, [](Tile& p) {})
.def("__repr__", [](const Tile &p) {
char buff[100] = { 0 };
sprintf(buff, "x:%f y:%f z:%f", p.xyz[0], p.xyz[1], p.xyz[2]);
return std::string(buff, strlen(buff));;
});
}
python
>>> import bbbb
>>> t = bbbb.Tile()
>>> print(t)
x:0.000000 y:0.000000 z:0.000000
>>> t.xyz[0] = 1.5 #Modifying the value does not work
>>> print(t)
x:0.000000 y:0.000000 z:0.000000
>>> t.vec_xyz.x = 2.5 #Values can be modified
>>> print(t)
x:2.500000 y:0.000000 z:0.000000
>>>
How to make t.xyz[0] = 1.5 work without changing the definition of the Tile structure
If there are multiple similar structures, you need to add structure pointers one by one in Vector3D, which is too troublesome
is there a better way than to return to Vector3D
#define LIST_DEFINE_NEW(TT,A,B,M) \
py::class_<LIST_DEFINE(TT,A,B)>(M, "List_"#A"_"#B) \
.def("__getitem__", [](LIST_DEFINE(TT,A,B)& p, int i) { return p.get(i); }) \
.def("__setitem__", [](LIST_DEFINE(TT,A,B)& p, int i, A v) { p.set(i, v); }) \
.def_property_readonly("length", [](LIST_DEFINE(TT,A,B)& p) { return p.size(); })
void init_Tile1(py::module& m){
//......
LIST_DEFINE_NEW(Tile1, float, 3, m);
}
It can also be defined as a macro definition to facilitate adding definitions of different types and lengths
c++
#include <pybind11/pybind11.h>
#include <pybind11/numpy.h>
namespace py = pybind11;
struct Tile1 { //Assume that the imported library cannot modify the definition
float xyz[3];
};
struct Tile2 { //Assume that the imported library cannot modify the definition
short xyz[6];
};
template<typename TT, typename A, typename B>
struct List {
TT& m_obj;
B TT::*pp;
List(TT& obj, B TT::*pm) : m_obj(obj), pp(pm) {
}
void set(int i, A v) {
if (i >= size()) {
py::pybind11_fail("IndexError: list index out of range");
}
(m_obj.*pp)[i] = v;
}
A get(int i) {
if (i >= size()) {
py::pybind11_fail("IndexError: list index out of range");
}
return (m_obj.*pp)[i];
}
int size() {
return sizeof((m_obj.*pp)) / sizeof((m_obj.*pp)[0]);
}
};
#define LIST_DEFINE(TT,A,B) List<TT, A, A[B]>
#define LIST_DEFINE_TILE1 LIST_DEFINE(Tile1, float, 3)
#define LIST_DEFINE_TILE2 LIST_DEFINE(Tile2, short, 6)
#define LIST_DEFINE_NEW(TT,A,B,M) \
py::class_<LIST_DEFINE(TT,A,B)>(M, "List_"#A"_"#B) \
.def("__getitem__", [](LIST_DEFINE(TT,A,B)& p, int i) { return p.get(i); }) \
.def("__setitem__", [](LIST_DEFINE(TT,A,B)& p, int i, A v) { p.set(i, v); }) \
.def_property_readonly("length", [](LIST_DEFINE(TT,A,B)& p) { return p.size(); })
void init_Tile1(py::module& m) {
py::class_<Tile1>(m, "Tile1")
.def(py::init <>())
.def_property_readonly("xyz", [](Tile1& p) {
return LIST_DEFINE_TILE1(p, &Tile1::xyz);
})
.def("__repr__", [](const Tile1 &p) {
char buff[100] = { 0 };
sprintf(buff, "x:%f y:%f z:%f", p.xyz[0], p.xyz[1], p.xyz[2]);
return std::string(buff, strlen(buff));;
});
LIST_DEFINE_NEW(Tile1, float, 3, m); //List_flost_3
}
void init_Tile2(py::module& m) {
py::class_<Tile2>(m, "Tile2")
.def(py::init <>())
.def_property_readonly("xyz", [](Tile2& p) {
return LIST_DEFINE_TILE2(p, &Tile2::xyz);
})
.def("__repr__", [](const Tile2 &p) {
char buff[100] = { 0 };
sprintf(buff, "ax:%d ay:%d az:%d\nbx:%d by:%d bz:%d",
p.xyz[0], p.xyz[1], p.xyz[2],
p.xyz[3], p.xyz[4], p.xyz[5]);
return std::string(buff, strlen(buff));
});
LIST_DEFINE_NEW(Tile2, short, 6, m); //List_short_6
}
PYBIND11_MODULE(bbbb, m)
{
init_Tile1(m);
init_Tile2(m);
}
python
>>> import bbbb
>>> a = bbbb.Tile1()
>>> a.xyz.length
3
>>> a
x:0.000000 y:0.000000 z:0.000000
>>> a.xyz[1] = 1.5 #Can modify the value and work
>>> a
x:0.000000 y:1.500000 z:0.000000
>>> b = bbbb.Tile2()
>>> b.xyz.length
6
>>> b
ax:0 ay:0 az:0
bx:0 by:0 bz:0
>>> b.xyz[5] = 100 #Can modify the value and work
>>> b
ax:0 ay:0 az:0
bx:0 by:0 bz:100
A few days after I posted the question and no one replied to me, I came up with a temporary solution during this time, but it is still unavoidable that if there are arrays of different types and lengths, you need to define different types of List_type_n to return, by the way Ask if there is a better solution than this

How to return an array in C into python?

I have been working on a selection sort with C extension on python, which aimed to intake a list in python, sort using C code and return a sorted list in python. Sounds simple, but I just could not get the value of the sorted list correct in python, as I would get a value of 1 when I was trying to print the sorted list.
Here is my code in C:
#include <Python.h>
int selectionSort(int array[], int N){
int i, j, min_element;
for (i = 0; i < N-1; i++) {
min_element = i;
for (j = i+1; j < N; j++)
if (array[j] < array[min_element])
min_element = j;
int temp = array[min_element];
array[min_element] = array[i];
array[i] = temp;
}
return *array;
}
static PyObject* selectSort(PyObject *self, PyObject *args)
{
PyObject* list;
int *array, N;
if (!PyArg_ParseTuple(args, "O", &list))
return NULL;
N = PyObject_Length(list);
if (N < 0)
return NULL;
array = (int *) malloc(sizeof(int *) * N);
if (array == NULL)
return NULL;
for (int index = 0; index < N; index++) {
PyObject *item;
item = PyList_GetItem(list, index);
if (!PyFloat_Check(item))
array[index] = 0;
array[index] = PyFloat_AsDouble(item);
}
return Py_BuildValue("i", selectionSort(array, N));
}
static PyMethodDef myMethods[] = {
{ "selectSort", selectSort, METH_VARARGS, "..." },
{ NULL, NULL, 0, NULL }
};
static struct PyModuleDef myModule = {
PyModuleDef_HEAD_INIT,
"myModule",
"Test Module",
-1,
myMethods
};
PyMODINIT_FUNC PyInit_myModule(void)
{
return PyModule_Create(&myModule);
}
Here is the command line I executed:
>>> import myModule
>>> unsortedList = [1, 4, 3, 90, 22, 34, 32]
>>> sortedList = myModule.selectSort(unsortedList)
>>> print(sortedList)
1
Anyone has any ideas on this? I would really appreciate it! Thanks!
Just change return type from int to int* and return with return array, not return *array.

Using swig for python list input and output

I am using SWIG to build a Python module for some functions' evaluation based on their C code.
The main function I need is defined as follow:
void eval(double *x, int nx, int mx, double *f, int func_id)
And the aimed python function should be:
value_list = module.eval(point_matrix, func_id)
Here, eval will call a benchmark function and return their values. func_id is the id of function eval going to call, nx is the dimension of the function, and mx is the number of points which will be evaluated.
Actually, I did not clearly understand how SWIG pass values between typemaps (like, temp$argnum, why always using $argnum?). But by looking into the wrap code, I finished the typemap.i file:
%module cec17
%{
#include "cec17.h"
%}
%typemap(in) (double *x, int nx, int mx) (int count){
if (PyList_Check($input)) {
$3 = PyList_Size($input);
$2 = PyList_Size(PyList_GetItem($input, 0));
count = $3;
int i = 0, j = 0;
$1 = (double *) malloc($2*$3*sizeof(double));
for (i = 0; i < $3; i++){
for (j = 0; j < $2; j++){
PyObject *o = PyList_GetItem(PyList_GetItem($input, i), j);
if (PyFloat_Check(o))
$1[i*$2+j] = PyFloat_AsDouble(o);
else {
PyErr_SetString(PyExc_TypeError, "list must contrain strings");
free($1);
return NULL;
}
}
}
} else {
PyErr_SetString(PyExc_TypeError, "not a list");
return NULL;
}
}
%typemap(freearg) double *x {
free((void *) $1);
}
%typemap(in, numinputs=0) double *f (double temp) {
$1 = &temp;
}
%typemap(argout) double *f {
int i = 0;
int s = count1;
printf("pass arg %d", s);
$result = PyList_New(0);
for (i = 0; i < s; i++){
PyList_Append($result, PyFloat_FromDouble($1[i]));
}
}
void eval(double *x, int nx, int mx, double *f, int func_num);
However, strange things happened then. Usually, I test 30 dimensional functions. When evaluating less than 10 points (mx < 10), the module works fine. When evaluating more points, an error occurs:
[1] 13616 segmentation fault (core dumped) python test.py
I'm quite sure the problem is not in the c code, because the only place where 'mx' occurs is in the 'for-loop' line in which are evaluations of each point.
I also tried to read the wrap code and debug, but I just can't find where the problem is. Following is a part of the wrap code generated by SWIG, and I added a 'printf' line. Even this string is not printed before the error.
#ifdef __cplusplus
extern "C" {
#endif
SWIGINTERN PyObject *_wrap_eval(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
PyObject *resultobj = 0;
double *arg1 = (double *) 0 ;
int arg2 ;
int arg3 ;
double *arg4 = (double *) 0 ;
int arg5 ;
int count1 ;
double temp4 ;
int val5 ;
int ecode5 = 0 ;
PyObject * obj0 = 0 ;
PyObject * obj1 = 0 ;
printf("check point 0");
{
arg4 = &temp4;
}
if (!PyArg_ParseTuple(args,(char *)"OO:eval",&obj0,&obj1)) SWIG_fail;
{
if (PyList_Check(obj0)) {
arg3 = PyList_Size(obj0);
arg2 = PyList_Size(PyList_GetItem(obj0, 0));
count1 = arg3;
int i = 0, j = 0;
arg1 = (double *) malloc(arg2*arg3*sizeof(double));
for (i = 0; i < arg3; i++){
for (j = 0; j < arg2; j++){
PyObject *o = PyList_GetItem(PyList_GetItem(obj0, i), j);
if (PyFloat_Check(o))
arg1[i*arg2+j] = PyFloat_AsDouble(o);
else {
PyErr_SetString(PyExc_TypeError, "list must contrain strings");
free(arg1);
return NULL;
}
}
}
} else {
PyErr_SetString(PyExc_TypeError, "not a list");
return NULL;
}
}
ecode5 = SWIG_AsVal_int(obj1, &val5);
if (!SWIG_IsOK(ecode5)) {
SWIG_exception_fail(SWIG_ArgError(ecode5), "in method '" "eval" "', argument " "5"" of type '" "int""'");
}
arg5 = (int)(val5);
eval(arg1,arg2,arg3,arg4,arg5);
resultobj = SWIG_Py_Void();
{
int i = 0;
int s = count1;
resultobj = PyList_New(0);
for (i = 0; i < s; i++){
PyList_Append(resultobj, PyFloat_FromDouble(arg4[i]));
}
}
return resultobj;
fail:
return NULL;
}
The problem seems a little tedious. Maybe you could just show me how to write the proper typemap.i code.
I'm not sure what your evaluation function is supposed to do, so I took a guess and implemented a wrapper for it. I took value_list = module.eval(point_matrix, func_id) to mean you want to return a list of result of evaluating some function against each row of data points, and came up with the following. Things I changed:
The typemaps replace the first four parameters with a Python list of lists of numbers.
space for the results in f was malloced.
To accept other numeric types except float, PyFloat_AsDouble was called on each value, and PyErr_Occurred was called to see if it failed to convert.
The freearg typemap now frees both allocations.
The argout typemap now handles the f output parameter correctly.
I added a sample eval implementation.
%module cec17
%typemap(in) (double *x, int nx, int mx, double* f) %{
if (PyList_Check($input)) {
$3 = PyList_Size($input);
$2 = PyList_Size(PyList_GetItem($input, 0));
$1 = malloc($2 * $3 * sizeof(double));
$4 = malloc($3 * sizeof(double));
for (int i = 0; i < $3; i++) {
for (int j = 0; j < $2; j++) {
PyObject *o = PyList_GetItem(PyList_GetItem($input, i), j);
double tmp = PyFloat_AsDouble(o);
if(PyErr_Occurred())
SWIG_fail;
$1[i * $2 + j] = PyFloat_AsDouble(o);
}
}
} else {
PyErr_SetString(PyExc_TypeError, "not a list");
return NULL;
}
%}
%typemap(freearg) (double *x, int nx, int mx, double* f) %{
free($1);
free($4);
%}
%typemap(argout) (double *x, int nx, int mx, double* f) (PyObject* tmp) %{
tmp = PyList_New($3);
for (int i = 0; i < $3; i++) {
PyList_SET_ITEM(tmp, i, PyFloat_FromDouble($4[i]));
}
$result = SWIG_Python_AppendOutput($result, tmp);
%}
%inline %{
void eval(double *x, int nx, int mx, double *f, int func_num)
{
for(int i = 0; i < mx; ++i) {
f[i] = 0.0;
for(int j = 0; j < nx; ++j)
f[i] += x[i*nx+j];
}
}
%}
Output:
>>> import cec17
>>> cec17.eval([[1,2,3],[4,5,6]],99)
[6.0, 15.0]
Error checking could be improved. For example, checking for sequences instead of lists. Only the outer list is checked that it actually is a list, so if [1,2,3] was the first parameter instead of nested lists, it won't behave properly. There is no check that all the sublists are the same size, either.
Hope this helps. Let me know if anything is unclear.

Python ctypes : OSError undefined symbol when loading library

In Ubuntu 14.04, I wrote a C file called hash.c:
/* hash.c: hash table with linear probing */
typedef struct {
void *key;
void *value;
} ht_entry;
typedef struct {
ht_entry *table;
int len;
int num_entries;
int (*hash_fn)(void *key);
int (*key_cmp)(void *k1, void *k2);
} hashtable;
and compiled it with
gcc -shared hash.c -o test.so -fPIC
Afterwards, I tried to load test.so in a Python script (for testing), but I got the following error: "OSError: .../test.so: undefined symbol: hash_fn"
hash_fn is a function pointer in the hashtable struct. It is referenced a number of times by functions later in the file.
I do not understand why this error is happening. I have Googled but all other cases either concern C++ or includes. In my case I just have 1 C file that includes only stdio and stdlib.
here is the FULL code.
When I comment out all but hash_create and print_info, it loads succesfully. When I uncomment find(), it the error happens.
(print_info is just for testing that ctypes works)
/* hash.c: hash table with linear probing */
#include <stdio.h>
#include <stdlib.h>
typedef struct {
void *key;
void *value;
} ht_entry;
typedef struct {
ht_entry *table;
int len;
int num_entries;
int (*hash_fn)(void *key);
int (*key_cmp)(void *k1, void *k2);
} hashtable;
static void close_gap(hashtable *ht, int i);
static int find(hashtable *ht, void *key);
hashtable* hash_create(int len, int (*hash_fn)(void*), int (*key_cmp)(void*, void*))
{
hashtable* ht = (hashtable*) malloc(sizeof(hashtable));
ht->len = len;
ht->table = calloc(len, sizeof(ht_entry));
ht->hash_fn = hash_fn;
ht->key_cmp = key_cmp;
ht->table[0].key = 2;
ht->table[0].value = 3;
return ht;
}
void print_info(hashtable *ht)
{
printf("%d, %d, %d\n", ht->len, ht->table[0].key, ht->table[0].value);
}
void* hash_retrieve(hashtable* ht, void *key)
{
int i = find(ht, key);
if(i < 0) {
return NULL;
}
return ht->table[i].value;
}
void hash_insert(hashtable* ht, void *key, void *value)
{
if(ht->num_entries == ht->len) {
return;
}
int i = hash_fn(key) % ht->len;
while(ht->table[i].key != NULL) {
i = (i + i) % ht->len;
}
ht->table[i].key = key;
ht->table[i].value = value;
}
void hash_remove(hashtable *ht, void *key)
{
int i = find(ht, key);
if(i < 0) {
return;
}
ht->table[i].key = 0;
ht->table[i].value = 0;
close_gap(ht, i);
}
static int find(hashtable *ht, void *key)
{
int i = hash_fn(key) % ht->len;
int num_checked = 0;
while(ht->table[i].key && num_checked != ht->len) {
if(!ht->key_cmp(ht->table[i].key, key)) {
return i;
}
num_checked++;
i = (i + i) % ht->len;
}
return -1;
}
static void close_gap(hashtable *ht, int i)
{
int j = (i + 1) % ht->len;
while(ht->table[j].key) {
int loc = ht->hash_fn(ht->table[j].key);
if((j > i && (loc <= i || loc > j)) || (j < i && (loc <= i && loc > j))) {
ht->table[i] = ht->table[j];
ht->table[j].key = 0;
ht->table[j].value = 0;
close_gap(ht, j);
return;
}
}
}
When I use your compilation line I get five warnings. There are several problems here. First you are trying to assign an int to void * in several places. That raises a warning, and it would crash at runtime because you are passing 2 and 3 as addresses.
Second, you are calling hash_fn in a couple of places instead of ht->hash_fn. That causes the linker error, but you should consider my other changes, otherwise it will crash at runtime with a SIGSEGV:
/* hash.c: hash table with linear probing */
#include <stdio.h>
#include <stdlib.h>
typedef struct {
void *key;
void *value;
} ht_entry;
typedef struct {
ht_entry *table;
int len;
int num_entries;
int (*hash_fn)(void *key);
int (*key_cmp)(void *k1, void *k2);
} hashtable;
static void close_gap(hashtable *ht, int i);
static int find(hashtable *ht, void *key);
hashtable* hash_create(int len, int (*hash_fn)(void*), int (*key_cmp)(void*, void*))
{
hashtable* ht = (hashtable*) malloc(sizeof(hashtable));
ht->len = len;
ht->table = calloc(len, sizeof(ht_entry));
ht->hash_fn = hash_fn;
ht->key_cmp = key_cmp;
// <<< Code changed here
/*
ht->table[0].key = 2;
ht->table[0].value = 3;
*/
{
int *p = malloc(sizeof(int));
*p = 2;
ht->table[0].key = p;
p = malloc(sizeof(int));
*p = 3;
ht->table[0].value = p;
}
// end of code change
return ht;
}
void print_info(hashtable *ht)
{
// <<<< Code changed
printf("%d, %d, %d\n", ht->len,
*(int *)ht->table[0].key, *(int *)ht->table[0].value);
}
void* hash_retrieve(hashtable* ht, void *key)
{
int i = find(ht, key);
if(i < 0) {
return NULL;
}
return ht->table[i].value;
}
void hash_insert(hashtable* ht, void *key, void *value)
{
if(ht->num_entries == ht->len) {
return;
}
// <<< Code changed
int i = ht->hash_fn(key) % ht->len;
while(ht->table[i].key != NULL) {
i = (i + i) % ht->len;
}
ht->table[i].key = key;
ht->table[i].value = value;
}
void hash_remove(hashtable *ht, void *key)
{
int i = find(ht, key);
if(i < 0) {
return;
ht->table[i].key = 0;
ht->table[i].value = 0;
close_gap(ht, i);
}
static int find(hashtable *ht, void *key)
{
// <<< Code changed
int i = ht->hash_fn(key) % ht->len;
int num_checked = 0;
while(ht->table[i].key && num_checked != ht->len) {
if(!ht->key_cmp(ht->table[i].key, key)) {
return i;
}
num_checked++;
i = (i + i) % ht->len;
}
return -1;
}
static void close_gap(hashtable *ht, int i)
{
int j = (i + 1) % ht->len;
while(ht->table[j].key) {
int loc = ht->hash_fn(ht->table[j].key);
if((j > i && (loc <= i || loc > j)) || (j < i && (loc <= i && loc > j))) {
ht->table[i] = ht->table[j];
ht->table[j].key = 0;
ht->table[j].value = 0;
close_gap(ht, j);
return;
}
}
}
I only coded around the errors and warnings, I did not check the logic. You will see that I have used malloc to allocate memory for key and value. Obviously you will need memory management on these two (i.e. free()).

Categories