Passing 2 dimensional C array to python numpy - python

I need some help regarding passing C array to python(numpy).
I have 2d array of doubles NumRows x NumInputs, it seems that PyArray_SimpleNewFromData does not convert it right way - it is hard to see because debugger does not show much, only pointers.
What would be the right way to pass 2 dimensional array ?
int NumRows = X_test.size();
int NumInputs = X_test_row.size();
double **X_test2 = new double*[NumRows];
for(int i = 0; i < NumRows; ++i)
{
X_test2[i] = new double[NumInputs];
}
for(int r = 0; r < NumRows; ++r)
{
for(int c = 0; c < NumInputs; ++c)
{
X_test2[r][c] = X_test[r][c];
}
}
const char *ScriptFName = "100-ABN-PREDICT";
char *FunctionName=NULL;
FunctionName="PredictGBC_DBG";
npy_intp Dims[2];
Dims[0]= NumRows;
Dims[1] = NumInputs;
PyObject *ArgsArray;
PyObject *pName, *pModule, *pDict, *pFunc, *pValue, *pArgs;
int row, col, rows, cols, size, type;
const double* outArray;
double ArrayItem;
//===================
Py_Initialize();
pName = PyBytes_FromString(ScriptFName);
pModule = PyImport_ImportModule(ScriptFName);
if (pModule != NULL)
{
import_array(); // Required for the C-API
ArgsArray = PyArray_SimpleNewFromData (2, Dims, NPY_DOUBLE, X_test2);//SOMETHING WRONG
pDict = PyModule_GetDict(pModule);
pArgs = PyTuple_New (1);
PyTuple_SetItem (pArgs, 0, ArgsArray);
pFunc = PyDict_GetItemString(pDict, FunctionName);
if (pFunc && PyCallable_Check(pFunc))
{
pValue = PyObject_CallObject(pFunc, pArgs);//CRASHING HERE
if (pValue != NULL)
{
rows = PyArray_DIM(pValue, 0);
cols = PyArray_DIM(pValue, 1);
size = PyArray_SIZE(pValue);
type = PyArray_TYPE(pValue);
// get direct access to the array data
//PyObject* m_obj;
outArray = static_cast<const double*>(PyArray_DATA(pValue));
for (row=0; row < rows; row++)
{
ArrayItem = outArray[row];
y_pred.push_back(ArrayItem);
}
}
else
{
y_pred.push_back(EMPTY_VAL);
}
}
else
{
PyErr_Print();
}//pFunc && PyCallable_Check(pFunc)
}//(pModule!=NULL
else
{
PyErr_SetString(PyExc_TypeError, "Cannot call function ?!");
PyErr_Print();
}
Py_DECREF(pValue);
Py_DECREF(pFunc);
Py_DECREF(ArgsArray);
Py_DECREF(pModule);
Py_DECREF(pName);
Py_Finalize ();

You'll have to copy your data to a contiguous block of memory. To represent a 2d array, numpy does not use an array of pointers to 1d arrays. Numpy expects the array to be stored in a contiguous block of memory, in (by default) row major order.
If you create your array using PyArray_SimpleNew(...), numpy allocates the memory for you. You have to copy X_test2 to this array, using, say, std::memcpy or std::copy in a loop over the rows.
That is, change this:
ArgsArray = PyArray_SimpleNewFromData (2, Dims, NPY_DOUBLE, X_test2);//SOMETHING WRONG
to something like this:
// PyArray_SimpleNew allocates the memory needed for the array.
ArgsArray = PyArray_SimpleNew(2, Dims, NPY_DOUBLE);
// The pointer to the array data is accessed using PyArray_DATA()
double *p = (double *) PyArray_DATA(ArgsArray);
// Copy the data from the "array of arrays" to the contiguous numpy array.
for (int k = 0; k < NumRows; ++k) {
memcpy(p, X_test2[k], sizeof(double) * NumInputs);
p += NumInputs;
}
(It looks like X_test2 is a copy of X_test, so you might want to modify the above code to copy directly from X_test to the numpy array.)

Related

Trying to use cython to pass a list from a python script to a C++ function, causes segmentation fault [duplicate]

I just want to create a simple C++ struct that has an int index and an int grayValue .The function is given the vector with the gray values. When I try to compile it I get a segmentation fault, does anyone know why? (didn't sort the vector) Thank you.
vector<gray> createStruct( vector<int> grayValues)
{
vector <gray> grayStruct;
for (int i = 0; i<grayValues.size();i++)
{
grayStruct[i].originalIndex= i;
grayStruct[i].grayValue= grayValues[i];
}
return grayStruct;
}
int main() {
vector <int> grayVals={411,21,78,23};
vector <gray> grayStruct=createStruct(grayVals);
// sort(grayStruct);
for (int i = 0; i < grayStruct.size(); i++)
{cout << grayStruct[i].originalIndex<<' '<<grayStruct[i].grayValue;
cout<<endl;
}
return 0;
}
It is because you are using elements of grayStruct, which actually doesn't exist, in the function createStruct.
You have to create elements before use or use push_back() to create elements.
Create elements via the constructor:
vector<gray> createStruct( vector<int> grayValues)
{
vector <gray> grayStruct(grayValues.size()); // add number of elements to create
for (int i = 0; i<grayValues.size();i++)
{
grayStruct[i].originalIndex= i;
grayStruct[i].grayValue= grayValues[i];
}
return grayStruct;
}
Create elements via resize():
vector<gray> createStruct( vector<int> grayValues)
{
vector <gray> grayStruct;
grayStruct.resize(grayValues.size()); // create elements
for (int i = 0; i<grayValues.size();i++)
{
grayStruct[i].originalIndex= i;
grayStruct[i].grayValue= grayValues[i];
}
return grayStruct;
}
Add elements one-by-one via push_back():
vector<gray> createStruct( vector<int> grayValues)
{
vector <gray> grayStruct;
for (int i = 0; i<grayValues.size();i++)
{
gray value;
value.originalIndex= i;
value.grayValue= grayValues[i];
grayStruct.push_back(value); // add an element
}
return grayStruct;
}

Writing my own ufunc; ufunc not supported for the input types

I'm trying to write a C function that coverts numpy string array to a float array. How can I receive numpy's array in C as const char *?
static void double_logitprod(char **args, npy_intp *dimensions,
npy_intp* steps, void* data)
{
npy_intp i;
npy_intp n = dimensions[0];
char *in1 = args[0], *in2 = args[1];
char *out = args[2];
npy_intp in1_step = steps[0];
npy_intp out_step = steps[2];
for (i = 0; i < n; i++) {
/*BEGIN main ufunc computation*/
char *tmp1 = *((char **) in1);
double tmp2 = *((double *)in2);
*((double *) out) = to_float(tmp1, tmp2);
/*END main ufunc computation*/
in1 += in1_step;
out += out_step;
}
}
/*This a pointer to the above function*/
PyUFuncGenericFunction funcs[1] = {&double_logitprod};
/* These are the input and return dtypes of logit.*/
static char types[3] = {NPY_STRING, NPY_DOUBLE,
NPY_DOUBLE};
How to accept a numpy string array in C? NPY_STRING or NPY_UNICODE gives error.
Numpy string array is like this:
x = np.array(['1.0', '2.0', 'N/A'])

C++ lib in Python: custom sorting method

I want to make a custom sorting method in C++ and import it in Python. I am not an expert in C++, here are implementation of "sort_counting"
#include <iostream>
#include <time.h>
using namespace std;
const int MAX = 30;
class cSort
{
public:
void sort( int* arr, int len )
{
int mi, mx, z = 0; findMinMax( arr, len, mi, mx );
int nlen = ( mx - mi ) + 1; int* temp = new int[nlen];
memset( temp, 0, nlen * sizeof( int ) );
for( int i = 0; i < len; i++ ) temp[arr[i] - mi]++;
for( int i = mi; i <= mx; i++ )
{
while( temp[i - mi] )
{
arr[z++] = i;
temp[i - mi]--;
}
}
delete [] temp;
}
private:
void findMinMax( int* arr, int len, int& mi, int& mx )
{
mi = INT_MAX; mx = 0;
for( int i = 0; i < len; i++ )
{
if( arr[i] > mx ) mx = arr[i];
if( arr[i] < mi ) mi = arr[i];
}
}
};
int main( int* arr )
{
cSort s;
s.sort( arr, 100 );
return *arr;
}
and then using it in python
from ctypes import cdll
lib = cdll.LoadLibrary('sort_counting.so')
result = lib.main([3,4,7,5,10,1])
compilation goes nice
How to rewrite a C++ method to receive an array and then return a sorted array?
The error is quite clear: ctypes doesn't know how to convert a python list into a int * to be passed to your function. In fact a python integer is not a simple int and a list is not just an array.
There are limitations on what ctypes can do. Converting a generic python list to an array of ints is not something that can be done automatically.
This is explained here:
None, integers, bytes objects and (unicode) strings are the only
native Python objects that can directly be used as parameters in these
function calls. None is passed as a C NULL pointer, bytes objects and
strings are passed as pointer to the memory block that contains their
data (char * or wchar_t *). Python integers are passed as the
platforms default C int type, their value is masked to fit into the C
type.
If you want to pass an integer array you should read about arrays. Instead of creating a list you have to create an array of ints using the ctypes data types and pass that in instead.
Note that you must do the conversion from python. It doesn't matter what C++ code you write. The alternative way is to use the Python C/API instead of ctypes to only write C code.
A simple example would be:
from ctypes import *
lib = cdll.LoadLibrary('sort_counting.so')
data = [3,4,7,5,10,1]
arr_type = c_int * len(data)
array = arr_type(*data)
result = lib.main(array)
data_sorted = list(result)

Swig and multidimensional arrays

I am using Swig to interface python with C code.
I want to call a C function that takes for argument a struct containing an int** var:
typedef struct
{
(...)
int** my2Darray;
} myStruct;
void myCFunction( myStruct struct );
I am struggling with multi dimensional arrays.
My code looks like this:
In the interface file, I am using carray like this:
%include carrays.i
%array_class( int, intArray );
%array_class( intArray, intArrayArray );
In python, I have:
myStruct = myModule.myStruct()
var = myModule.intArrayArray(28)
for j in range(28):
var1 = myModule.intArray(28)
for i in range(28):
var1[i] = (...) filling var1 (...)
var[j] = var1
myStruct.my2Darray = var
myCFonction( myStruct )
I get an error on the line myStruct.my2Darray = var:
TypeError: in method 'maStruct_monTableau2D_set', argument 2 of type 'int **'
I doubt about the line %array_class( intArray, intArrayArray ).
I tried using a typedef for int* to create my array like this:
%array_class( myTypeDef, intArrayArray );
But it didn't seem to work.
Do you know how to handle multidimensional arrays in Swig ?
Thanks for your help.
Have you considered using numpy for this? I have used numpy with my SWIG-wrapped C++ project for 1D, 2D, and 3D arrays of double and std::complex elements with a lot of success.
You would need to get numpy.i and install numpy in your python environment.
Here is an example of how you would structure it:
.i file:
// Numpy Related Includes:
%{
#define SWIG_FILE_WITH_INIT
%}
// numpy arrays
%include "numpy.i"
%init %{
import_array(); // This is essential. We will get a crash in Python without it.
%}
// These names must exactly match the function declaration.
%apply (int* INPLACE_ARRAY2, int DIM1, int DIM2) \
{(int* npyArray2D, int npyLength1D, int npyLength2D)}
%include "yourheader.h"
%clear (int* npyArray2D, int npyLength1D, int npyLength2D);
.h file:
/// Get the data in a 2D Array.
void arrayFunction(int* npyArray2D, int npyLength1D, int npyLength2D);
.cpp file:
void arrayFunction(int* npyArray2D, int npyLength1D, int npyLength2D)
{
for(int i = 0; i < npyLength1D; ++i)
{
for(int j = 0; j < npyLength2D; ++j)
{
int nIndexJ = i * npyLength2D + j;
// operate on array
npyArray2D[nIndexJ];
}
}
}
.py file:
def makeArray(rows, cols):
return numpy.array(numpy.zeros(shape=(rows, cols)), dtype=numpy.int)
arr2D = makeArray(28, 28)
myModule.arrayFunction(arr2D)
This is how I handled 2d arrays. The trick I used was to write some inline code to handle the creation and mutation of an array. Once that is done, I can use those functions to do my bidding.
Below is the sample code.
ddarray.i
%module ddarray
%inline %{
// Helper function to create a 2d array
int* *int_array(int rows, int cols) {
int i;
int **arr = (int **)malloc(rows * sizeof(int *));
for (i=0; i<rows; i++)
arr[i] = (int *)malloc(cols * sizeof(int));
return arr;
}
void *setitem(int **array, int row, int col, int value) {
array[row][col] = value;
}
%}
ddarray.c
int calculate(int **arr, int rows, int cols) {
int i, j, sum = 0, product;
for(i = 0; i < rows; i++) {
product = 1;
for(j = 0; j < cols; j++)
product *= arr[i][j];
sum += product;
}
return sum;
}
Sample Python script
import ddarray
a = ddarray.int_array(2, 3)
for i in xrange(2):
for j in xrange(3):
ddarray.setitem(a, i, j, i + 1)
print ddarray.calculate(a, 2, 3)

Returning numpy array from a C extension

For the sake of learning something new, I am currently trying to reimplement the numpy.mean() function in C. It should take a 3D array and return a 2D array with the mean of the elements along axis 0. I manage to calculate the mean of all values, but don't really know how I would return a new array to Python.
My code so far:
#include <Python.h>
#include <numpy/arrayobject.h>
// Actual magic here:
static PyObject*
myexts_std(PyObject *self, PyObject *args)
{
PyArrayObject *input=NULL;
int i, j, k, x, y, z, dims[2];
double out = 0.0;
if (!PyArg_ParseTuple(args, "O!", &PyArray_Type, &input))
return NULL;
x = input->dimensions[0];
y = input->dimensions[1];
z = input->dimensions[2];
for(k=0;k<z;k++){
for(j=0;j<y;j++){
for(i=0;i < x; i++){
out += *(double*)(input->data + i*input->strides[0]
+j*input->strides[1] + k*input->strides[2]);
}
}
}
out /= x*y*z;
return Py_BuildValue("f", out);
}
// Methods table - this defines the interface to python by mapping names to
// c-functions
static PyMethodDef myextsMethods[] = {
{"std", myexts_std, METH_VARARGS,
"Calculate the standard deviation pixelwise."},
{NULL, NULL, 0, NULL}
};
PyMODINIT_FUNC initmyexts(void)
{
(void) Py_InitModule("myexts", myextsMethods);
import_array();
}
What I understand so far (and please correct me if I'm wrong) is that I need to create a new PyArrayObject, which will be my output (maybe with PyArray_FromDims ?). Then I need an array of adresses to the memory of this array and fill it with data. How would I go about this?
EDIT:
After doing some more reading on pointers (here: http://pw1.netcom.com/~tjensen/ptr/pointers.htm), I achieved what I was aiming at. Now another question arises: Where would I find the origingal implementation of numpy.mean()? I'd like to see how it is, that the python operation is so much faster than my version. I assume it avoids the ugly looping.
Here is my solution:
static PyObject*
myexts_std(PyObject *self, PyObject *args)
{
PyArrayObject *input=NULL, *output=NULL; // will be pointer to actual numpy array ?
int i, j, k, x, y, z, dims[2]; // array dimensions ?
double *out = NULL;
if (!PyArg_ParseTuple(args, "O!", &PyArray_Type, &input))
return NULL;
x = input->dimensions[0];
y = dims[0] = input->dimensions[1];
z = dims[1] = input->dimensions[2];
output = PyArray_FromDims(2, dims, PyArray_DOUBLE);
for(k=0;k<z;k++){
for(j=0;j<y;j++){
out = output->data + j*output->strides[0] + k*output->strides[1];
*out = 0;
for(i=0;i < x; i++){
*out += *(double*)(input->data + i*input->strides[0] +j*input->strides[1] + k*input->strides[2]);
}
*out /= x;
}
}
return PyArray_Return(output);
}
The Numpy API has a function PyArray_Mean that accomplishes what you're trying to do without the "ugly looping" ;).
static PyObject *func1(PyObject *self, PyObject *args) {
PyArrayObject *X, *meanX;
int axis;
PyArg_ParseTuple(args, "O!i", &PyArray_Type, &X, &axis);
meanX = (PyArrayObject *) PyArray_Mean(X, axis, NPY_DOUBLE, NULL);
return PyArray_Return(meanX);
}

Categories