Numpy Python/C API - PyArray_SimpleNewFromData hangs - python

I'm figuring out the Python/C API for a more complex task. Initially, I wrote a simple example of adding two ndarrays of shape = (2,3) and type = float32.
I am able to pass two numpy arrays into c functions, read their dimensions and data and perform custom addion on data. But when I try to wrap the resulting data using PyArray_SimpleNewFromData, code hangs (returns NULL?)
To replicate the issue, create three files: mymath.c, setup.py, test.py in a folder as follows and run test.py (it runs setup.py to compile and install the module and then runs a simple test).
I'm using python in windows, inside an anaconda environment. I'm new to the Python/C API. So, any help would be much appreciated.
​
// mymath.c
#include <Python.h>
#include <stdio.h>
#include "numpy/arrayobject.h"
#include "numpy/npy_math.h"
#include <math.h>
#include <omp.h>
/*
C functions
*/
float* arr_add(float* d1, float* d2, int M, int N){
float * result = (float *) malloc(sizeof(float)*M*N);
for (int m=0; m<M; m++)
for (int n=0; n<N; n++)
result [m*N+ n] = d1[m*N+ n] + d2[m*N+ n];
return result;
}
/*
Unwrap apply and wrap pyObjects
*/
void capsule_cleanup(PyObject *capsule) {
void *memory = PyCapsule_GetPointer(capsule, NULL);
free(memory);
}
// add two 2d arrays (float32)
static PyObject *arr_add_fn(PyObject *self, PyObject *args)
{
PyArrayObject *arr1, *arr2;
if (!PyArg_ParseTuple(args, "OO", &arr1, &arr2))
return NULL;
// get data as flat list
float *d1, *d2;
d1 = (float *) arr1->data;
d2 = (float *) arr2->data;
int M, N;
M = (int)arr1->dimensions[0];
N = (int)arr1->dimensions[1];
printf("Dimensions, %d, %d \n\n", M,N);
PyObject *result, *capsule;
npy_intp dim[2];
dim[0] = M;
dim[1] = N;
float * d3 = arr_add(d1, d2, M, N);
result = PyArray_SimpleNewFromData(2, dim, NPY_FLOAT, (void *)d3);
if (result == NULL)
return NULL;
// -----------This is not executed. code hangs--------------------
for (int m=0; m<M; m++)
for (int n=0; n<N; n++)
printf("%f \n", d3[m*N+n]);
capsule = PyCapsule_New(d3, NULL, capsule_cleanup);
PyArray_SetBaseObject((PyArrayObject *) result, capsule);
return result;
}
/*
Bundle functions into module
*/
static PyMethodDef MyMethods [] ={
{"arr_add", arr_add_fn, METH_VARARGS, "Array Add two numbers"},
{NULL,NULL,0,NULL}
};
/*
Create module
*/
static struct PyModuleDef mymathmodule = {
PyModuleDef_HEAD_INIT,
"mymath", "My doc of mymath", -1, MyMethods
};
PyMODINIT_FUNC PyInit_mymath(void){
return PyModule_Create(&mymathmodule);
}
​
# setup.py
from distutils.core import setup, Extension
import numpy
module1 = Extension('mymath',
sources = ['mymath.c'],
# define_macros = [('NPY_NO_DEPRECATED_API', 'NPY_1_7_API_VERSION')],
include_dirs=[numpy.get_include()],
extra_compile_args = ['-fopenmp'],
extra_link_args = ['-lgomp'])
setup (name = 'mymath',
version = '1.0',
description = 'My math',
ext_modules = [module1])
​
# test.py
import os
os.system("python .\setup.py install")
import numpy as np
import mymath
a = np.arange(6,dtype=np.float32).reshape(2,3)
b = np.arange(6,dtype=np.float32).reshape(2,3)
c = mymath.arr_add(a,b)
print(c)

Related

How does ctypes define the class for C structure with function pointer only and init the variable in python?

I'm working on the python with ctypes to call the c so file, but the c file define the structure with function pointer
// mem ==================================================================
typedef struct StdMemFunc
{
void* (*const malloc) (unsigned long size);
void (*const free) (void* ptr);
void* (*const realloc) (void* ptr, unsigned long size);
void* (*const calloc) (unsigned long count, unsigned long size);
void* (*const set) (void* ptr, int value, unsigned long num);
void* (*const copy) (void* dest, const void* src, unsigned long num);
}*StdMemFunc;
typedef struct StdLib
{
const uint32_t version;
bool (*const is_version_compatible) (uint32_t version, uint32_t func_mask);
void (*const delay) (int32_t milli_sec);
const StdMemFunc mem;
}*StdLib;
and mock the function in another file as below
void *std_malloc(unsigned long size)
{
return malloc(size);
}
void std_free(void *ptr)
{
free(ptr);
}
void *std_realloc(void *ptr, unsigned long size)
{
return realloc(ptr, size);
}
void *std_calloc(unsigned long count, unsigned long size)
{
return calloc(count, size);
}
void *std_memset(void *ptr, int value, unsigned long num)
{
return memset(ptr, value, num);
}
void *std_memcopy(void *dest, const void *src, unsigned long num)
{
return memcpy(dest, src, num);
}
struct StdMemFunc mem_func =
{
.malloc = std_malloc,
.free = std_free,
.realloc = std_realloc,
.calloc = std_calloc,
.set = std_memset,
.copy = std_memcopy
};
then the python need to call another method with std_lib as paramater, the std_lib with call mem->malloc() method in C part, so how to define the class in the python with ctypes?
I have tried the below one, but it was not work
class StdMemFunc(Structure):
_fields_ = [
("malloc", ctypes.CFUNCTYPE(c_void_p, c_ulong)),
("free", ctypes.CFUNCTYPE(None, c_void_p)),
("realloc", ctypes.CFUNCTYPE(c_void_p, c_void_p, c_ulong)),
("calloc", ctypes.CFUNCTYPE(c_void_p, c_ulong, c_ulong)),
("set", ctypes.CFUNCTYPE(c_void_p, c_void_p, c_int, c_ulong)),
("copy", ctypes.CFUNCTYPE(c_void_p, c_void_p, c_ulong))
]
class StdLib(Structure):
_fields_ = [
("version", c_uint32),
("is_version_compatible", c_bool),
("delay", c_void_p),
("mem", POINTER(StdMemFunc)),
]
libc_std_lib = CDLL('/home/linus/code/galileo/mock_std_lib.so')
std_lib = StdLib()
std_lib.mem.malloc = libc_std_lib.std_malloc
libc_modbus.modbus_create_server_station.argtypes = [POINTER(ModbusNodeDef), c_int, StdLib, PlcDrvAccessor]
libc_modbus.modbus_create_server_station.restype = POINTER(ModbusStation)
libc_modbus.modbus_create_server_station(node_def, node_num, std_lib, plc_drv_accessor)
It looks like there are two problems here:
The is_version_compatible and delay fields in the StdLib struct are functions, but you are defining them as constants.
You are not instantiating all the fields in the struct, meaning that the program might be trying to dereference a null pointer, as null pointers are the default value for pointer types.
The StdLib struct definition should look something like this:
class StdLib(Structure):
_fields_ = [
("version", c_uint32),
("is_version_compatible", CFUNCTYPE(POINTER(c_bool), c_uint32, _uint32)),
("delay", CFUNCTYPE(c_void_p, c_int32)),
("mem", POINTER(StdMemFunc)),
]
For the instantiation, I would do something like this:
libc_std_lib = CDLL('/home/linus/code/galileo/mock_std_lib.so')
std_mem_func = StdMemFunc(
libc_std_lib.std_malloc,
libc_std_lib.std_free,
libc_std_lib.std_realloc,
libc_std_lib.std_calloc,
libc_std_lib.std_set,
libc_std_lib.std_copy
)
std_lib = StdLib(
1,
reference_to_is_version_compatible_func,
reference_to_delay_func,
std_mem_func
)
Of course, you need to pass the correct params/function references to StdLib. Maybe you will need to mock the is_version_compatible and delay functions as well.
Disclaimer: this is entirely untested, so I don't guarantee it will work.
The OP's code isn't quite reproducible, but I was able to get the same error message on the following line:
std_lib.mem.malloc = libc_std_lib.std_malloc
If I am following correctly, the OP wants to initialize the C structure with functions that are provided in C, but libc.std_lib.std_malloc isn't wrapped properly to do that. It is a function that wraps a C function that is callable from Python, not C.
ctypes function prototypes can be instantiated a number of ways, and the one that works is:
prototype(func_spec[, paramflags])
Returns a foreign function exported by a shared library. func_spec must be a 2-tuple
(name_or_ordinal, library). The first item is the name of the exported
function as string, or the ordinal of the exported function as small
integer. The second item is the shared library instance.
For example:
std_lib.mem.malloc = ctypes.CFUNCTYPE(ctypes.c_void_p, ctypes.c_ulong)(('std_malloc',libc_std_lib))
Here's a working set of files:
test.cpp
#include <stdlib.h>
#include <stdint.h>
#include <memory.h>
#include <stdio.h>
#ifdef _WIN32
# define API __declspec(dllexport)
#else
# define API
#endif
extern "C" {
typedef struct StdMemFunc {
void* (*const malloc)(unsigned long size);
void (*const free)(void* ptr);
void* (*const realloc)(void* ptr, unsigned long size);
void* (*const calloc)(unsigned long count, unsigned long size);
void* (*const set)(void* ptr, int value, unsigned long num);
void* (*const copy)(void* dest, const void* src, unsigned long num);
} *StdMemFuncPtr;
typedef struct StdLib {
const uint32_t version;
bool (*const is_version_compatible)(uint32_t version, uint32_t func_mask);
void (*const delay)(int32_t milli_sec);
const StdMemFunc mem;
} *StdLibPtr;
API void* std_malloc(unsigned long size) {
return malloc(size);
}
API void std_free(void* ptr) {
free(ptr);
}
API void* std_realloc(void* ptr, unsigned long size) {
return realloc(ptr, size);
}
API void* std_calloc(unsigned long count, unsigned long size) {
return calloc(count, size);
}
API void* std_memset(void* ptr, int value, unsigned long num) {
return memset(ptr, value, num);
}
API void* std_memcopy(void* dest, const void* src, unsigned long num) {
return memcpy(dest, src, num);
}
// A couple of test functions that accepts the initialized structure
// and calls sum of the function pointers.
API char* testit(StdLib* test) {
// This is how I debugged this, by comparing the *actual*
// function pointer value to the one received from Python.
// Once they matched the code worked.
printf("%p %p\n", std_malloc, test->mem.malloc);
char* p = static_cast<char*>(test->mem.malloc(10));
test->mem.set(p, 'A', 9);
p[9] = 0;
return p;
}
API void freeit(StdLib* test, char* p) {
test->mem.free(p);
}
}
test.py
import ctypes as ct
# prototypes
MALLOC = ct.CFUNCTYPE(ct.c_void_p,ct.c_ulong)
FREE = ct.CFUNCTYPE(None,ct.c_void_p)
REALLOC = ct.CFUNCTYPE(ct.c_void_p, ct.c_void_p, ct.c_ulong)
CALLOC = ct.CFUNCTYPE(ct.c_void_p, ct.c_ulong, ct.c_ulong)
SET = ct.CFUNCTYPE(ct.c_void_p,ct.c_void_p,ct.c_int,ct.c_ulong)
COPY = ct.CFUNCTYPE(ct.c_void_p, ct.c_void_p, ct.c_ulong)
class StdMemFunc(ct.Structure):
_fields_ = [("malloc", MALLOC),
("free", FREE),
("realloc", REALLOC),
("calloc", CALLOC),
("set", SET),
("copy", COPY)]
class StdLib(ct.Structure):
_fields_ = [("version", ct.c_uint32),
# Note these two fields were function pointers as well.
# Declared correctly now.
("is_version_compatible", ct.CFUNCTYPE(ct.c_bool, ct.c_uint32, ct.c_uint32)),
("delay", ct.CFUNCTYPE(None, ct.c_int32)),
("mem", StdMemFunc)]
dll = ct.CDLL('./test')
dll.testit.argtypes = ct.POINTER(StdLib),
dll.testit.restype = ct.POINTER(ct.c_char)
dll.freeit.argtypes = ct.POINTER(StdLib), ct.c_char_p
dll.freeit.restype = None
lib = StdLib()
lib.mem.malloc = MALLOC(('std_malloc', dll))
lib.mem.realloc = REALLOC(('std_realloc', dll))
lib.mem.calloc = CALLOC(('std_calloc', dll))
lib.mem.free = FREE(('std_free', dll))
lib.mem.set = SET(('std_memset', dll))
lib.mem.copy = COPY(('std_memcopy', dll))
p = dll.testit(lib)
# One way to access the data in the returned pointer is to slice it to the known length
print(p[:10])
# If known to be null-terminated, can also cast to c_char_p, which expects
# null-terminated data, and extract the value.
print(ct.cast(p,ct.c_char_p).value)
dll.freeit(lib,p)
Output:
b'AAAAAAAAA\x00'
b'AAAAAAAAA'

Python Method Call in a C++ Code through Python C-API

I am working on my Project which implies the use of Empirical Mode Decomposition in C++ for EEG Signals. The input Data is Eigen::MatrixXd, where the rows are the Channels and the columns are the samples.
I did not found a good C++ library for EMD so I want to use a Python one (dsatools). I have downloaded the package through Pip installer from the setup.py file on Xubuntu... so it's a system package now.
the problem is that the program can't read the module.
this is the code:
std::vector <Eigen::MatrixXd> DataAquisition::EMD (Eigen::MatrixXd array, int order, int iterations, int locality) {
std::vector <Eigen::MatrixXd> IMFs;
for (int i = 0; i < array.rows(); i++) {
Eigen::MatrixXd Kanals = array.row(i);
Eigen::MatrixXd IMFs_Cpp;
Py_Initialize();
//PyRun_SimpleString("from dsatools._base._imf_decomposition import * ");
PyObject* sys_path = PySys_GetObject("path");
PyObject* ProgrammName = PyUnicode_FromString("/home/user/Schreibtisch/mne-cpp-main/applications/mne_bci/MNE-BCI-QT/dsatools-master/dsatools/_base/_imf_decomposition/_emd.py");
PyList_Append(sys_path, ProgrammName);
PyObject* pModuleString = PyUnicode_FromString ((char*)"_emd.py");
PyObject* pModule = PyImport_Import(pModuleString);
PyObject* pFunction = PyObject_GetAttrString(pModule,(char*)"emd");
//PyObject* pDict = PyModule_GetDict(pModule);
//PyObject* pFunc = PyDict_GetItemString(pDict, (char*)"emd");
if (PyCallable_Check(pFunction))
{
PyObject* Signal = Py_BuildValue("(d)",(double*)Kanals.data());
PyObject* Order = Py_BuildValue("(i)",order);
PyObject* method = Py_BuildValue("(z)",(char*)"cubic");
PyObject* max_itter = Py_BuildValue("(i)",iterations);
PyObject* args = PyTuple_Pack(4,Signal,Order,method,max_itter);
PyErr_Print();
PyObject* IMFs_Py = PyObject_CallObject(pFunction,args);
PyErr_Print();
if (PyArray_Check(IMFs_Py))
std::cout << "EMD Output is NOT Array \n";
PyArrayObject *np_ret = reinterpret_cast <PyArrayObject*> (IMFs_Py);
int Rows = PyArray_SHAPE(np_ret)[0];
int Cols = PyArray_SHAPE(np_ret)[1];
double* c_out = reinterpret_cast<double*>(PyArray_DATA(np_ret));
Eigen::MatrixXd IMFs_Cpp = Eigen::Map <Eigen::MatrixXd> (c_out,Rows,Cols);
IMFs.push_back(IMFs_Cpp);
}
else
std::cout << "Python did not call the function \n";
Py_Finalize();
}
return IMFs;}
this is how the code in Python should look like and I just want to call the emd function:

Writing to hdf5-file in C++ results in data being truncated at some point

Consider the following code:
#include <H5Cpp.h>
#include <vector>
#include <eigen3/Eigen/Dense>
#include <iostream>
double* matrix_to_array(Eigen::MatrixXd const &input){
int const NX = input.rows();
int const NY = input.cols();
double *data = new double[NX*NY];
for(std::size_t i=0; i<NX; i++){
for(std::size_t j=0; j<NY; j++){
data[j+i*NX] = input(i,j);
}
}
return data;
}
int main() {
Eigen::MatrixXd data = Eigen::MatrixXd::Random(124, 4654);
data.fill(3);
H5::H5File file("data.hdf5", H5F_ACC_TRUNC);
hsize_t dimsf[2] = {data.rows(), data.cols()};
H5::DataSpace dataspace(2, dimsf);
H5::DataSet dataset = file.createDataSet("test_data_set",
H5::PredType::NATIVE_DOUBLE,
dataspace);
auto data_arr = matrix_to_array(data);
dataset.write(data_arr, H5::PredType::NATIVE_DOUBLE);
delete[] data_arr;
}
It compiles just fine using the following CMakeLists.txt
cmake_minimum_required(VERSION 2.8)
project(test)
find_package(HDF5 REQUIRED COMPONENTS C CXX)
include_directories(${HDF5_INCLUDE_DIRS})
add_executable(hdf5 hdf5.cpp)
target_link_libraries(hdf5 ${HDF5_HL_LIBRARIES} ${HDF5_CXX_LIBRARIES} ${HDF5_LIBRARIES})
After executing I thought everything was fine, but upon running the following python code (which bscly. just prints the data row by row)
import h5py
import numpy as np
hf = h5py.File("build/data.hdf5", "r")
keys = list(hf.keys())
data_set = hf.get(keys[0])
data_set_np = np.array(data_set)
for row in data_set_np:
print(row)
I realized that the first 18000 or so entries of the matrix were properly written to the hdf5-file, while the rest was set to zero for some reason. I checked data and data_arr in the above C++ code, and all the entries of both matrices are set to 0, so the error must happen somewhere in the writing process to the hdf5-file... The issue is, I don't see where. What exactly am I missing?
After some trying out and consulting the examples of the H5 group, I got it to work.
#include <iostream>
#include <string>
#include "H5Cpp.h"
#include <eigen3/Eigen/Dense>
using namespace H5;
int main (void){
const H5std_string FILE_NAME( "data.h5" );
const H5std_string DATASET_NAME( "DOUBLEArray" );
const int NX = 123; // dataset dimensions
const int NY = 4563;
const int RANK = 2;
Eigen::MatrixXd data = Eigen::MatrixXd::Random(NX, NY);
int i, j;
double data_arr[NX][NY]; // buffer for data to write
for (j = 0; j < NX; j++)
{
for (i = 0; i < NY; i++)
data_arr[j][i] = data(j,i);
}
H5File file( FILE_NAME, H5F_ACC_TRUNC );
hsize_t dimsf[2]; // dataset dimensions
dimsf[0] = NX;
dimsf[1] = NY;
DataSpace dataspace( RANK, dimsf );
/*
* Define datatype for the data in the file.
* We will store little endian DOUBLE numbers.
*/
FloatType datatype( PredType::NATIVE_DOUBLE );
datatype.setOrder( H5T_ORDER_LE );
DataSet dataset = file.createDataSet( DATASET_NAME, datatype, dataspace );
dataset.write( data_arr, PredType::NATIVE_DOUBLE );
}
As far as I can tell the only thing that changes is that we specify the order of elements here explicitly, i.e.
FloatType datatype( PredType::NATIVE_DOUBLE );
datatype.setOrder( H5T_ORDER_LE );
while in the question we just pass PredType::NATIVE_DOUBLE as argument. I can't really comment on why or if this solves the problem...

Using CUDA types in pyCUDA

Let us consider the CUDA code at CUDA's Mersenne Twister for an arbitrary number of threads and suppose that I want to convert it to a pyCUDA application.
I know that I can use ctypes and CDLL, namely,
cudart = CDLL("/usr/local/cuda/lib64/libcudart.so")
to use the cudart routines.
However, I would also need to allocate, for example, a curandStateMtgp32 array whose definition is in curand_mtgp32.h, or else call
curandMakeMTGP32Constants(mtgp32dc_params_fast_11213, devKernelParams);
and use mtgp32dc_params_fast_11213 whose definition is in curand_mtgp32_host.h.
How to deal with CUDA type definitions and values in pyCUDA?
I solved the problem with reference to device side APIs as follows:
I created a .dll containing two functions: MTGP32Setup() to setup the Mersenne Twister Generator and MTGP32Generation() to generate the random numbers;
I called the above functions using ctypes.
Source code for the .dll
// --- Generate random numbers with cuRAND's Mersenne Twister
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <time.h>
#include <cuda.h>
#include <curand_kernel.h>
/* include MTGP host helper functions */
#include <curand_mtgp32_host.h>
#define BLOCKSIZE 256
#define GRIDSIZE 64
curandStateMtgp32 *devMTGPStates;
/********************/
/* CUDA ERROR CHECK */
/********************/
// --- Credit to http://stackoverflow.com/questions/14038589/what-is-the-canonical-way-to-check-for-errors-using-the-cuda-runtime-api
void gpuAssert(cudaError_t code, const char *file, int line, bool abort = true)
{
if (code != cudaSuccess)
{
fprintf(stderr, "GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
if (abort) { exit(code); }
}
}
void gpuErrchk(cudaError_t ans) { gpuAssert((ans), __FILE__, __LINE__); }
/*************************/
/* CURAND ERROR CHECKING */
/*************************/
static const char *_curandGetErrorEnum(curandStatus_t error)
{
switch (error)
{
case CURAND_STATUS_SUCCESS:
return "CURAND_SUCCESS";
case CURAND_STATUS_VERSION_MISMATCH:
return "CURAND_STATUS_VERSION_MISMATCH";
case CURAND_STATUS_NOT_INITIALIZED:
return "CURAND_STATUS_NOT_INITIALIZED";
case CURAND_STATUS_ALLOCATION_FAILED:
return "CURAND_STATUS_ALLOCATION_FAILED";
case CURAND_STATUS_TYPE_ERROR:
return "CURAND_STATUS_TYPE_ERROR";
case CURAND_STATUS_OUT_OF_RANGE:
return "CURAND_STATUS_OUT_OF_RANGE";
case CURAND_STATUS_LENGTH_NOT_MULTIPLE:
return "CURAND_STATUS_LENGTH_NOT_MULTIPLE";
case CURAND_STATUS_DOUBLE_PRECISION_REQUIRED:
return "CURAND_STATUS_DOUBLE_PRECISION_REQUIRED";
case CURAND_STATUS_LAUNCH_FAILURE:
return "CURAND_STATUS_LAUNCH_FAILURE";
case CURAND_STATUS_PREEXISTING_FAILURE:
return "CURAND_STATUS_PREEXISTING_FAILURE";
case CURAND_STATUS_INITIALIZATION_FAILED:
return "CURAND_STATUS_INITIALIZATION_FAILED";
case CURAND_STATUS_ARCH_MISMATCH:
return "CURAND_STATUS_ARCH_MISMATCH";
case CURAND_STATUS_INTERNAL_ERROR:
return "CURAND_STATUS_INTERNAL_ERROR";
}
return "<unknown>";
}
inline void __curandSafeCall(curandStatus_t err, const char *file, const int line)
{
if (CURAND_STATUS_SUCCESS != err) {
fprintf(stderr, "CURAND error in file '%s', line %d, error: %s \nterminating!\n", __FILE__, __LINE__, \
_curandGetErrorEnum(err)); \
assert(0); \
}
}
void curandSafeCall(curandStatus_t err) { __curandSafeCall(err, __FILE__, __LINE__); }
/*******************/
/* iDivUp FUNCTION */
/*******************/
__host__ __device__ int iDivUp(int a, int b) { return ((a % b) != 0) ? (a / b + 1) : (a / b); }
/*********************/
/* GENERATION KERNEL */
/*********************/
__global__ void generate_kernel(curandStateMtgp32 * __restrict__ state, float * __restrict__ result, const int N)
{
int tid = threadIdx.x + blockIdx.x * blockDim.x;
for (int k = tid; k < N; k += blockDim.x * gridDim.x)
result[k] = curand_uniform(&state[blockIdx.x]);
}
extern "C" {
/**************************/
/* MERSENNE TWISTER SETUP */
/**************************/
__declspec(dllexport)
void MTGP32Setup() {
// --- Setup the pseudorandom number generator
gpuErrchk(cudaMalloc(&devMTGPStates, GRIDSIZE * sizeof(curandStateMtgp32)));
mtgp32_kernel_params *devKernelParams; gpuErrchk(cudaMalloc(&devKernelParams, sizeof(mtgp32_kernel_params)));
curandSafeCall(curandMakeMTGP32Constants(mtgp32dc_params_fast_11213, devKernelParams));
curandSafeCall(curandMakeMTGP32KernelState(devMTGPStates, mtgp32dc_params_fast_11213, devKernelParams, GRIDSIZE, time(NULL)));
}
/*******************************/
/* MERSENNE TWISTER GENERATION */
/*******************************/
__declspec(dllexport)
void MTGP32Generation(float * __restrict__ devResults, const int N) {
// --- Generate pseudo-random sequence and copy to the host
generate_kernel << <GRIDSIZE, BLOCKSIZE >> > (devMTGPStates, devResults, N);
gpuErrchk(cudaPeekAtLastError());
gpuErrchk(cudaDeviceSynchronize());
}
} //
Source code for the PyCUDA caller
import os
import sys
import numpy as np
import ctypes
from ctypes import *
import pycuda.driver as drv
import pycuda.gpuarray as gpuarray
import pycuda.autoinit
lib = cdll.LoadLibrary('D:\\Project\\cuRAND\\mersenneTwisterDLL\\x64\\Release\\mersenneTwisterDLL.dll')
N = 10
d_x = gpuarray.zeros((N, 1), dtype = np.float32)
lib.MTGP32Setup()
lib.MTGP32Generation(ctypes.cast(d_x.ptr, POINTER(c_float)), N)
print(d_x)
Host side APIs can be dealt with in a way similar to Calling host functions in PyCUDA.

What methods can I use to return a struct to a Python Ctypes call to the function in a shared object?

I have the following C file that I am compiling to a shared object. I then load the .so shared object via ctypes in python. I can call the function from ctypes, and the function prints the correct temp and humidity, however I can't seem to get the struct back from the main code. How can I get the struct back from the C function and how can I retrieve the fields from it within python.
#!/bin/python
from ctypes import *
class HMTEMP(Structure):
_fields_ = [ ("temp", c_double) , ("humidity", c_double) ]
dhtlib = 'libdht4py.so'
hlibc = CDLL(dhtlib)
HMTEMP = hlibc.readDHT()
print HMTEMP.temp
#define BCM2708_PERI_BASE 0x20000000
#define GPIO_BASE (BCM2708_PERI_BASE + 0x200000) /* GPIO controller */
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <dirent.h>
#include <fcntl.h>
#include <assert.h>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <bcm2835.h>
#include <unistd.h>
#define MAXTIMINGS 100
struct DHStruct {
double temp;
double humidity;
} ;
struct DHStruct readDHT();
int bits[250], data[100];
int bitidx = 0;
struct DHStruct readDHT() {
bcm2835_init() ;
int type = 11 ;
int pin = 4 ;
struct DHStruct dhts;
int counter = 0;
int laststate = HIGH;
int j=0;
// Set GPIO pin to output
bcm2835_gpio_fsel(pin, BCM2835_GPIO_FSEL_OUTP);
bcm2835_gpio_write(pin, HIGH);
usleep(500000); // 500 ms
bcm2835_gpio_write(pin, LOW);
usleep(20000);
bcm2835_gpio_fsel(pin, BCM2835_GPIO_FSEL_INPT);
data[0] = data[1] = data[2] = data[3] = data[4] = 0;
// wait for pin to drop?
while (bcm2835_gpio_lev(pin) == 1) {
usleep(1);
} //while
// read data!
for (int i=0; i< MAXTIMINGS; i++) {
counter = 0;
while ( bcm2835_gpio_lev(pin) == laststate) {
counter++;
//nanosleep(1); // overclocking might change this?
if (counter == 1000)
break;
}//while
laststate = bcm2835_gpio_lev(pin);
if (counter == 1000) break;
bits[bitidx++] = counter;
if ((i>3) && (i%2 == 0)) {
// shove each bit into the storage bytes
data[j/8] <<= 1;
if (counter > 200)
data[j/8] |= 1;
j++;
}//if
} //for
dhts.temp = data[2] ;
dhts.humidity = data[0] ;
printf("Temp = %5.2f *C, Hum = %5.2f \%\n", dhts.temp , dhts.humidity );
return dhts;
}//function
Ok I got it - and using ctypes was very fast. The python code:
#!/bin/python
from ctypes import *
# define the struct and it's fields
class DHStruct(Structure):
_fields_ = [("temp",c_double),("humidity",c_double)]
#reference the library
dhtlib = CDLL("libdht4py.so")
# set the return type as the object above
dhtlib.readDHT.restype = POINTER(DHStruct)
# dereference the pointer using ctype's -contents and access the struct fields.
print ( dhtlib.readDHT().contents.temp , dhtlib.readDHT().contents.humidity )
The C code : the key was to convert the function to return a pointer.
#define BCM2708_PERI_BASE 0x20000000
#define GPIO_BASE (BCM2708_PERI_BASE + 0x200000) /* GPIO controller */
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <dirent.h>
#include <fcntl.h>
#include <assert.h>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <bcm2835.h>
#include <unistd.h>
#define MAXTIMINGS 100
//define the struct
struct DHStruct {
double temp;
double humidity;
} ;
struct DHStruct *readDHT(); // define the function prototype to return the pointer
int bits[250], data[100];
int bitidx = 0;
//make sure to return a POINTER!!
struct DHStruct *readDHT() {
bcm2835_init() ;
int type = 11 ;
int pin = 4 ;
struct DHStruct *dhts; // here is the key - define the pointer to the struct
int counter = 0;
int laststate = HIGH;
int j=0;
// Set GPIO pin to output
bcm2835_gpio_fsel(pin, BCM2835_GPIO_FSEL_OUTP);
bcm2835_gpio_write(pin, HIGH);
usleep(500000); // 500 ms
bcm2835_gpio_write(pin, LOW);
usleep(20000);
bcm2835_gpio_fsel(pin, BCM2835_GPIO_FSEL_INPT);
data[0] = data[1] = data[2] = data[3] = data[4] = 0;
// wait for pin to drop?
while (bcm2835_gpio_lev(pin) == 1) {
usleep(1);
} //while
// read data!
for (int i=0; i< MAXTIMINGS; i++) {
counter = 0;
while ( bcm2835_gpio_lev(pin) == laststate) {
counter++;
//nanosleep(1); // overclocking might change this?
if (counter == 1000)
break;
}//while
laststate = bcm2835_gpio_lev(pin);
if (counter == 1000) break;
bits[bitidx++] = counter;
if ((i>3) && (i%2 == 0)) {
// shove each bit into the storage bytes
data[j/8] <<= 1;
if (counter > 200)
data[j/8] |= 1;
j++;
}//if
} //for
dhts->temp = data[2] ;
dhts->humidity = data[0] ;
//for debug printf("Temp = %5.2f *C, Hum = %5.2f \%\n", dhts->temp , dhts->humidity );
return dhts;
}//function
To combine C/C++ and Python I would recommend to use Cython.
With Cython you are able to pass objects (eg. numpy arrays) to C/C++, fill it with your data and get it back to your python-code.
Here is a minmal example:
The C-skript: (c_example.c)
#include <stdlib.h>
#include <math.h>
void c_claculate(double *x, int N) {
int i;
for (i = 0; i<N;i++) {
x[i]+=i*i;
}
}
The python-skript: (example.py)
from numpy import *
from example import *
data=zeros(10)
calculate(data)
print data
The .pyx file: (example.pyx)
import cython
import numpy
cimport numpy
# declare the interface to the C code
cdef extern void c_claculate(double *x, int N)
# Cython interface to C function
def calculate(numpy.ndarray[double, ndim=1, mode='c'] x not None):
cdef int N = x.shape[0]
c_claculate(&x[0],N)
return x
and the setup file: (setup.py)
from distutils.core import setup
from distutils.extension import Extension
from Cython.Distutils import build_ext
import numpy
setup(
cmdclass = {'build_ext': build_ext},
ext_modules = [
Extension("example",
sources=["example.pyx", "c_example.c"],
include_dirs=[numpy.get_include()]
)
],
)
Now you can compile the skript by running
python setup.py build_ext -fi
and then execute the python skript.
Cython should be available via pip on your PI.

Categories