Access a Numpy Recarray via the C-API - python

If we have a Numpy recarray:
x = np.array([(1.,2.)], dtype=np.dtype([('a','<f8'),('b','<f8')]))
We can access its fields in Python as:
x['a'] or x['b']
But if this array is passed to a C program as a PyArrayObject how do we access its fields? I realize we can get the dtype in C via: PyArray_Descr *dtype = PyArray_DTYPE(arr)PyObject *fields = dtype->fields but how can this be used to access the data at x['a']?

I'll try to answer my own question.
It appears that you can use the function PyObject_GetItem() to access fields in your Numpy recarray. To test this I created a simple recarray with three fields:
np.dtype([('field1', '<f8', (1,2)), ('field2', '<f8', (2,2)), ('field3', '<f8', (3,1))]) I send this array to my C++ function and exectute two loops: one loop over each field and a nested loop over the array elements in each field (eg. x['field1'], x['field2'], x['field3']). In the outerloop I use PyObject_GetItem() to access each field. The code is as follows:
C++ Code
#include "Python.h"
#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
#include "arrayobject.h"
#include <cmath>
#include <iostream>
#include <iomanip>
using namespace std;
static PyObject *readarray(PyObject *self, PyObject *args) {
PyArrayObject *arr, *x2;
PyArray_Descr *dtype;
PyObject *names, *name, *x1 = NULL;
Py_ssize_t N, i;
NpyIter *iter;
NpyIter_IterNextFunc *iternext;
double **dataptr;
npy_intp index;
if (!PyArg_ParseTuple(args, "O!", &PyArray_Type, &arr)) {
return NULL;
}
dtype = PyArray_DTYPE(arr);
names = dtype->names;
if (names != NULL) {
names = PySequence_Fast(names, NULL);
N = PySequence_Fast_GET_SIZE(names);
for (i=0; i<N; i++) {
name = PySequence_Fast_GET_ITEM(names, i);
cout << setw(7) << left << PyString_AsString(name);
x1 = PyObject_GetItem((PyObject *) arr, name);
x2 = (PyArrayObject *) x1;
dtype = PyArray_DTYPE(x2);
iter = NpyIter_New(x2, NPY_ITER_READONLY, NPY_KEEPORDER, NPY_SAME_KIND_CASTING, dtype);
if (iter == NULL) {return NULL;}
dataptr = (double **) NpyIter_GetDataPtrArray(iter);
iternext = NpyIter_GetIterNext(iter, NULL);
do {
index = NpyIter_GetIterIndex(iter);
if (index==0) {
cout << setw(6) << right << index << setw(9) << setiosflags(ios::fixed) << setprecision(4) <<**dataptr << endl;
} else {
cout << " " << setw(6) << right << index << setw(9) << setiosflags(ios::fixed) << setprecision(4) << **dataptr << endl;
}
} while (iternext(iter));
}
NpyIter_Deallocate(iter);
}
return Py_BuildValue("i", 0);
}
static PyMethodDef pyproj4methods[] = {
{"readarray", readarray, METH_VARARGS, "Documentation"},
{NULL, NULL, 0, NULL}
};
PyMODINIT_FUNC initpyproj4(void) {
Py_InitModule("pyproj4", pyproj4methods);
import_array();
}
Python Code
import numpy as np
import pyproj4 as p4
np.random.seed(22)
## Python Implementation ##
dt = np.dtype([('field1', '<f8', (1,2)), ('field2', '<f8', (2,2)), ('field3', '<f8', (3,1))])
x = np.zeros(2, dtype=dt)
for name in x.dtype.names:
m,n,p = x[name].shape
x[name] = np.random.randn(m,n,p)
it = np.nditer(x[name], ['c_index'], ['readonly'])
for num in it:
if it.index==0:
print '{0:6s} {1:6d} {2: 2.4f}'.format(name, it.index, num.item())
else:
print '{0:6s} {1:6d} {2: 2.4f}'.format(' ', it.index, num.item())
print '-----------------------'
## C-API Implementation ##
p4.readarray(x)
The output in both cases looks like:
field1 0 -0.0919
1 -1.4634
2 1.0818
3 -0.2393
field2 0 -0.4911
1 -1.0023
2 0.9188
3 -1.1036
4 0.6265
5 -0.5615
6 0.0289
7 -0.2308
field3 0 0.5878
1 0.7523
2 -1.0585
3 1.0560
4 0.7478
5 1.0647
If you know a better way to accomplish this, please don't hesitate to post your solution.

Related

Cannot assign contents of list attribute in Pybind11 defined class

I have a sparse matrix implementation in C++, and I used pybind11 to expose it to python. Here is the problem:
>>> D1 = phc.SparseMatrix(3, [[0],[1],[2]])
>>> D1.cData
[[0], [1], [2]]
>>> D1.cData[1] = [1,2]
>>> D1.cData
[[0], [1], [2]] #Should be [[0], [1,2], [2]]
In python, I cannot change the contents of the SparseMatrix.cData attribute with the assignment operator. I can change the entire list with D1.cData = [[1],[2],[3]]. This behavior is bewildering to me. D1.cData is just a list, so I would expect that the above code would work.
I suspect it has something to do with my pybind11 code since this behavior is not present in python-defined custom classes. But I have no idea what is wrong (I am a novice programmer). Here is the source code info:
Python Bindings
#include <pybind11/pybind11.h>
#include <pybind11/stl.h>
namespace py = pybind11;
#include <SparseMatrix.h>
namespace phc = ph_computation;
using SparseMatrix = phc::SparseMatrix;
using Column = phc::Column;
using CData = phc::CData;
PYBIND11_MODULE(ph_computations, m)
{
m.doc() = "ph_computations python bindings";
using namespace pybind11::literals;
m.def("add_cols", &phc::add_cols);//begin SparseMatrix.h
py::class_<SparseMatrix>(m, "SparseMatrix")
.def(py::init<size_t, CData>())
.def(py::init<std::string>())
.def_readwrite("n_rows", &SparseMatrix::n_rows)
.def_readwrite("n_cols", &SparseMatrix::n_cols)
.def_readwrite("cData", &SparseMatrix::cData)
.def("__neq__", &SparseMatrix::operator!=)
.def("__eq__", &SparseMatrix::operator==)
.def("__add__", &SparseMatrix::operator+)
.def("__mul__", &SparseMatrix::operator*)
.def("transpose", &SparseMatrix::transpose)
.def("__str__", &SparseMatrix::print)
.def("save", &SparseMatrix::save)
;
m.def("identity", &phc::make_identity);
m.def("matching_pivots", &phc::matching_pivots);//end SparseMatrix.h
}
SparseMatrix.h
#pragma once
#include <iostream>
#include <sstream>
#include <fstream>
#include <string>
#include <iterator>
#include <algorithm>
#include <vector>
#include <stdexcept>
namespace ph_computation{
using Int = int;
using Column = std::vector<Int>;//a Column is represented by a vector of indices
using CData = std::vector<Column>;//a matrix is represented by a vector of Columns
//Add columns in Z2
Column add_cols(const Column& c1, const Column& c2);
struct SparseMatrix
{
size_t n_rows{0};
size_t n_cols{0};
CData cData;
SparseMatrix()=default;
SparseMatrix(size_t n_rows_, CData cData_):
n_rows(n_rows_), n_cols(cData_.size()), cData(cData_){}
SparseMatrix(std::string path);
bool operator!=(const SparseMatrix &other) const;
bool operator==(const SparseMatrix &other) const;
SparseMatrix operator+(const SparseMatrix &other) const;
SparseMatrix operator*(const SparseMatrix &other) const;
void transpose();
void print() const;
void save(std::string path);
};
SparseMatrix make_identity(size_t n_cols_);
bool matching_pivots(const SparseMatrix& a, const SparseMatrix& b);
}
SparseMatrix.cpp (you probably don't need this)
#include <SparseMatrix.h>
namespace ph_computation {
Column add_cols(const Column& c1, const Column& c2){
Column c3;
int idx1{0};
int idx2{0};
while(idx1 < c1.size() && idx2 < c2.size()){
if(c1[idx1] < c2[idx2]){
c3.push_back(c1[idx1]);
++idx1;
}
else if(c1[idx1] > c2[idx2]){
c3.push_back(c2[idx2]);
++idx2;
}
else {
++idx1;
++idx2;
}
}
if (idx1 < c1.size()){
c3.insert(c3.end(), std::next(c1.begin(), idx1), c1.end());
}
else if (idx2 < c2.size()){
c3.insert(c3.end(), std::next(c2.begin(), idx2), c2.end());
}
return c3;
}
SparseMatrix make_identity(size_t n_cols_){
CData cData_(n_cols_);
for (int j = 0; j < n_cols_; ++j){
cData_[j] = {j};
}
return SparseMatrix(n_cols_, cData_);
}
SparseMatrix::SparseMatrix(std::string path){
std::fstream f_in;
f_in.open(path, std::ios::in);
if(f_in.is_open()){
std::string n_rows_line;
std::getline(f_in, n_rows_line); //first line of file contains number of rows
n_rows = std::stoi(n_rows_line);
std::string n_cols_line;
std::getline(f_in, n_cols_line); //second line of file contains number of cols
n_cols = std::stoi(n_cols_line);
CData cData_(n_cols);
cData = cData_;
std::string line;
int j = 0;
int nnz, data;
while (std::getline(f_in, line)){
std::stringstream line_str = std::stringstream(line);
while (line_str >> nnz){
Column col_j(nnz);
for (int i =0; i < nnz; ++i){
line_str >> data;
col_j[i] = data;
}
cData[j] = col_j;
}
++j;
}
f_in.close();
}
else{
throw std::runtime_error("File did not open.");
}
}
bool SparseMatrix::operator!=(const SparseMatrix &other) const{
if (n_rows != other.n_rows || cData != other.cData){
return true;
}
return false;
}
bool SparseMatrix::operator==(const SparseMatrix &other) const{
return !(*this != other);
}
SparseMatrix SparseMatrix::operator+(const SparseMatrix &other) const{
if (n_rows != other.n_rows || n_cols != other.n_cols){
throw std::invalid_argument("Matrices must have same dimension to add.");
}
CData ans_cData;
for (int j = 0; j < n_cols; ++j){
ans_cData.push_back(add_cols(cData[j], other.cData[j]));
}
return SparseMatrix(n_rows, ans_cData);
}
SparseMatrix SparseMatrix::operator*(const SparseMatrix &other) const{
if(n_cols != other.n_rows){
throw std::invalid_argument("Matrices must have compatible dimensions.");
}
size_t ans_rows = n_rows;
CData ans_cData(other.n_cols);
SparseMatrix ans(ans_rows, ans_cData);
for(int j =0; j<ans.n_cols; ++j){
for(int idx : other.cData[j]){
ans.cData[j] = add_cols(ans.cData[j], cData[idx]);
}
}
return ans;
}
void SparseMatrix::transpose(){
CData cData_T(n_rows);
for(int j =0; j<n_cols; ++j){
if(!cData[j].empty()){
for(int x: cData[j]){
cData_T[x].push_back(j);
}
}
}
cData = cData_T;
n_rows = n_cols;
n_cols = cData.size();
}
void SparseMatrix::print() const{
for (int i = 0; i < n_rows; ++i){
for (int j = 0; j < n_cols; ++j){
if (cData[j].empty())
{std::cout << " 0";}
else if (std::binary_search(cData[j].begin(), cData[j].end(), i))//Assumes row indices
{std::cout << " 1";} //are ordered
else
{std::cout << " 0";}
if (n_cols-1 == j)
{std::cout << " \n";}
}
}
}
void SparseMatrix::save(std::string path){
std::fstream f_out;
f_out.open(path, std::ios::out);
if(f_out.is_open()){
f_out << n_rows << "\n";
f_out << n_cols << "\n";
for(int j = 0; j < n_cols; ++j){
int col_j_sz = cData[j].size();
f_out << col_j_sz;
for(int i = 0; i < col_j_sz; ++i){
f_out << " " << cData[j][i];
}
f_out << "\n";
}
f_out.close();
}
else{
throw std::runtime_error("File did not open.");
}
}
bool matching_pivots(const SparseMatrix& a, const SparseMatrix& b){
if(a.n_rows != b.n_rows || a.n_cols != b.n_cols){
throw std::invalid_argument("Input matrices must have the same size.");
}
for (int j = 0; j<a.n_cols; ++j){
bool a_j_empty = a.cData[j].empty();
bool b_j_empty = b.cData[j].empty();
if (a_j_empty != b_j_empty){
return false;
}
else if (!a_j_empty){
if(a.cData[j].back() != b.cData[j].back()){
return false;
}
}
}
return true;
}
} // namespace ph_computation
I found the answer in the pybind11 documentation here: https://pybind11.readthedocs.io/en/stable/advanced/cast/stl.html?highlight=opaque#making-opaque-types
Apparently, STL container data members can be overwritten in their entirety in python, but modification of the data member through list methods does not work. I don't really understand it, but the link above answers the question.

LLDB customize print of template class

I use LLDB as my debugger, and want it to print my template class MyArray<N> in a customized format.
I read the LLDB document, and come up with python script that can get public and private data members of MyArray<N>. However, I don't know how to get N (the template parameter), neither do I know how to get result return by MyArray<N>::size().
Here is the code
#include <stdio.h>
#include <iostream>
template<int N>
class MyArray
{
public:
MyArray(){data = new int[N];}
~MyArray(){if (data) delete[] data;}
int size() const{ return N;}
int& operator[](size_t i) { return data[i];}
int const& operator[](size_t i) const { return data[i];}
private:
int* data = nullptr;
};
template<int N>
std::ostream& operator <<(std::ostream& os, const MyArray<N>& arr)
{
os << "N = " << arr.size() << std::endl;
os << "elements in array:" << std::endl;
for (int i = 0; i < arr.size(); i++) {
if (i > 0) os << ", ";
os << arr[i];
}
return os << std::endl;
}
int main()
{
MyArray<10> arr;
for (int i = 0; i < arr.size(); i++)
arr[i] = 10 + i;
std::cout << arr << std::endl; // Yeah, I can use this for print. but I want this during LLDB debug
return 0;
}
//// Update: Add corresponding lldb config
~/.lldbinit:
command script import ~/.lldbcfg/print_my_array.py
~/.lldbcfg/print_my_array.py:
def print_my_array(valobj, internal_dict):
#N = valobj.GetChildMemberWithName("size") # failed
N = 10
data = valobj.GetChildMemberWithName("data")
info = ''
for i in range(N):
if(i>0): info += ', '
info += str(data.GetChildAtIndex(i).GetValueAsSigned(0))
info += ')'
return info
def __lldb_init_module(debugger, internal_dict):
debugger.HandleCommand('type summary add -P MyArray<10> -F ' + __name__ + '.print_my_array')
The simple way would be to store the value of N as static member:
template<int N>
class MyArray
{
public:
static constexpr const int n = N;
};
Supposed MyArray is not your type you can infer the template argument via a trait:
template <typename T>
struct get_value;
template <int N>
struct get_value<MyArray<N>> {
static constexpr const n = N;
};

Reading variable length array in HDF5 C++

I am trying to read an hdf5 file containing variable-length vectors of doubles in C++. I used the following code to create the hdf5 file. It contains one dataset called "test" containing 100 rows of varying lengths. I had to make a couple of changes to the code in the link, so for convenience here is the exact code I used to write the data to hdf5:
#include <iostream>
#include <string>
#include <H5Cpp.h>
#include <vector>
#include <random>
const hsize_t n_dims = 1;
const hsize_t n_rows = 100;
const std::string dataset_name = "test";
int main () {
H5::H5File file("vlen_cpp.hdf5", H5F_ACC_TRUNC);
H5::DataSpace dataspace(n_dims, &n_rows);
// target dtype for the file
auto item_type = H5::PredType::NATIVE_DOUBLE;
auto file_type = H5::VarLenType(&item_type);
// dtype of the generated data
auto mem_type = H5::VarLenType(&item_type);
H5::DataSet dataset = file.createDataSet(dataset_name, file_type, dataspace);
std::vector<std::vector<double>> data;
data.reserve(n_rows);
// this structure stores length of each varlen row and a pointer to
// the actual data
std::vector<hvl_t> varlen_spec(n_rows);
std::mt19937 gen;
std::normal_distribution<double> normal(0.0, 1.0);
std::poisson_distribution<hsize_t> poisson(20);
for (hsize_t idx=0; idx < n_rows; idx++) {
data.emplace_back();
hsize_t size = poisson(gen);
data.at(idx).reserve(size);
varlen_spec.at(idx).len = size;
varlen_spec.at(idx).p = (void*) &data.at(idx).front();
for (hsize_t i = 0; i < size; i++) {
data.at(idx).push_back(normal(gen));
}
}
dataset.write(&varlen_spec.front(), mem_type);
return 0;
}
I am very new to C++ and my issue is trying to read the data back out of this file in C++. I tried to mimic what I would do in Python, but didn't have any luck. In Python, I would do this:
import h5py
import numpy as np
data = h5py.File("vlen_cpp.hdf5", "r")
i = 0 # This is the row I would want to read
arr = data["test"][i] # <-- This is the simplest way.
# Now trying to mimic something closer to C++
did = data["test"].id
dataspace = did.get_space()
dataspace.select_hyperslab(start=(i, ), count=(1, ))
memspace = h5py.h5s.create_simple(dims_tpl=(1, ))
memspace.select_hyperslab(start=(0, ), count=(1, ))
arr = np.zeros((1, ), dtype=object)
did.read(memspace, dataspace, arr)
print(arr) # This gives back the correct data
The python code seems to works fine, so I tried to mimic those steps in C++:
#include <H5Cpp.h>
#include <string>
#include <vector>
#include <stdio.h>
int main(int argc, char **argv) {
std::string filename = argv[1];
// memtype of the file
auto itemType = H5::PredType::NATIVE_DOUBLE;
auto memType = H5::VarLenType(&itemType);
// get dataspace
H5::H5File file(filename, H5F_ACC_RDONLY);
H5::DataSet dataset = file.openDataSet("test");
H5::DataSpace dataspace = dataset.getSpace();
// get the size of the dataset
hsize_t rank;
hsize_t dims[1];
rank = dataspace.getSimpleExtentDims(dims); // rank = 1
std::cout << "Data size: "<< dims[0] << std::endl; // this is the correct number of values
// create memspace
hsize_t memDims[1] = {1};
H5::DataSpace memspace(rank, memDims);
// container to store read data
std::vector<std::vector<double>> data;
// Select hyperslabs
hsize_t dataCount[1] = {1};
hsize_t dataOffset[1] = {0}; // this should be i
hsize_t memCount[1] = {1};
hsize_t memOffset[1] = {0};
dataspace.selectHyperslab(H5S_SELECT_SET, dataCount, dataOffset);
memspace.selectHyperslab(H5S_SELECT_SET, memCount, memOffset);
// vector to store read data
std::vector<double> temp;
temp.reserve(20);
dataset.read(temp.data(), memType, memspace, dataspace);
for (int i = 0; i < temp.size(); i++) {
std::cout << temp[i] << ", ";
}
std::cout << "\n";
return 0;
}
Nothing crashes when I run the C++ program, and the correct number of rows in the "test" dataset is printed (100), but the dataset.read() step isn't working: the first row isn't being read into the vector I want it to be read into (temp). I would greatly appreciate if someone could let me know what I'm doing wrong. Thanks so much.
My goal is to eventually read all 100 rows in the dataset in a loop (placing each row of data into the std:vector temp) and store each one in the std::vectorstd::vector<double> called data. But for now I'm just trying to make sure I can even read the first row.
EDIT: link to hdf5 file
"test" dataset looks like this:
[ 0.16371168 -0.21425339 0.29859526 -0.82794418 0.01021543 1.05546644
-0.546841 1.17456768 0.66068215 -1.04944273 1.48596426 -0.62527598
-2.55912244 -0.82908105 -0.53978052 -0.88870719]
[ 0.33958656 -0.48258915 2.10885699 -0.12130623 -0.2873894 -0.37100313
-1.05934898 -2.3014427 1.45502412 -0.06152739 0.92532768 1.35432642
1.51560926 -0.24327452 1.00886476 0.19749707 0.43894484 0.4394992
-0.12814881]
[ 0.64574273 0.14938582 -0.10369248 1.53727461 0.62404949 1.07824824
1.17066933 1.17196281 -2.05005927 0.13639514 -1.45473056 -1.71462623
-1.11552074 -1.73985207 1.12422121 -1.58694009]
...
EDIT 2:
I've additionally tried without any luck to read the data into (array, armadillo vector, eigen vectorXd). The program does not crash, but what is read into the containers is garbage:
#include <H5Cpp.h>
#include <string>
#include <vector>
#include <stdio.h>
#include <Eigen/Dense>
#include <Eigen/Core>
#include <armadillo>
int main(int argc, char **argv) {
std::string filename = argv[1];
// memtype of the file
auto itemType = H5::PredType::NATIVE_DOUBLE;
auto memType = H5::VarLenType(&itemType);
// get dataspace
H5::H5File file(filename, H5F_ACC_RDONLY);
H5::DataSet dataset = file.openDataSet("test");
H5::DataSpace dataspace = dataset.getSpace();
// get the size of the dataset
hsize_t rank;
hsize_t dims[1];
rank = dataspace.getSimpleExtentDims(dims); // rank = 1
std::cout << "Data size: "<< dims[0] << std::endl; // this is the correct number of values
std::cout << "Data rank: "<< rank << std::endl; // this is the correct rank
// create memspace
hsize_t memDims[1] = {1};
H5::DataSpace memspace(rank, memDims);
// Select hyperslabs
hsize_t dataCount[1] = {1};
hsize_t dataOffset[1] = {0}; // this would be i if reading in a loop
hsize_t memCount[1] = {1};
hsize_t memOffset[1] = {0};
dataspace.selectHyperslab(H5S_SELECT_SET, dataCount, dataOffset);
memspace.selectHyperslab(H5S_SELECT_SET, memCount, memOffset);
// Create storage to hold read data
int i;
int NX = 20;
double data_out[NX];
for (i = 0; i < NX; i++)
data_out[i] = 0;
arma::vec temp(20);
Eigen::VectorXd temp2(20);
// Read data into data_out (array)
dataset.read(data_out, memType, memspace, dataspace);
std::cout << "data_out: " << "\n";
for (i = 0; i < NX; i++)
std::cout << data_out[i] << " ";
std::cout << std::endl;
// Read data into temp (arma vec)
dataset.read(temp.memptr(), memType, memspace, dataspace);
std::cout << "arma vec: " << "\n";
std::cout << temp << std::endl;
// Read data into temp (eigen vec)
dataset.read(temp2.data(), memType, memspace, dataspace);
std::cout << "eigen vec: " << "\n";
std::cout << temp2 << std::endl;
return 0;
}
(ONE) SOLUTION:
After struggling with this a lot, I was able to get a solution working, though admittedly I'm too new to C++ to really understand why this works why and the previous attempts didn't:
#include <H5Cpp.h>
#include <string>
#include <vector>
#include <stdio.h>
int main(int argc, char **argv) {
std::string filename = argv[1];
// Set memtype of the file
auto itemType = H5::PredType::NATIVE_DOUBLE;
auto memType = H5::VarLenType(&itemType);
// Get dataspace
H5::H5File file(filename, H5F_ACC_RDONLY);
H5::DataSet dataset = file.openDataSet("test");
H5::DataSpace dataspace = dataset.getSpace();
// Get the size of the dataset
hsize_t rank;
hsize_t dims[1];
rank = dataspace.getSimpleExtentDims(dims); // rank = 1
std::cout << "Data size: "<< dims[0] << std::endl; // this is the correct number of values
std::cout << "Data rank: "<< rank << std::endl; // this is the correct rank
// Create memspace
hsize_t memDims[1] = {1};
H5::DataSpace memspace(rank, memDims);
// Initialize hyperslabs
hsize_t dataCount[1];
hsize_t dataOffset[1];
hsize_t memCount[1];
hsize_t memOffset[1];
// Create storage to hold read data
hvl_t *rdata = new hvl_t[1];
std::vector<std::vector<double>> dataOut;
for (hsize_t i = 0; i < dims[0]; i++) {
// Select hyperslabs
dataCount[0] = 1;
dataOffset[0] = i;
memCount[0] = 1;
memOffset[0] = 0;
dataspace.selectHyperslab(H5S_SELECT_SET, dataCount, dataOffset);
memspace.selectHyperslab(H5S_SELECT_SET, memCount, memOffset);
// Read out the data
dataset.read(rdata, memType, memspace, dataspace);
double* ptr = (double*)rdata[0].p;
std::vector<double> thisRow;
for (int j = 0; j < rdata[0].len; j++) {
double* val = (double*)&ptr[j];
thisRow.push_back(*val);
}
dataOut.push_back(thisRow);
}
// Confirm data read out properly
for (int i = 0; i < dataOut.size(); i++) {
std::cout << "Row " << i << ":\n";
for (int j = 0; j < dataOut[i].size(); j++) {
std::cout << dataOut[i][j] << " ";
}
std::cout << "\n";
}
return 0;
}
If anyone knows a more efficient way that doesn't involve looping over the elements of each row (i.e. pull out an entire row in one go) that would be really helpful, but for now this works fine for me.

Python crashes on returning an C++ map value

i am writing a Test to keep Data in a C++ map. I am using the Python C-Api.
Adding int values and getting the length of the map is no problem.
But when i get a value from the map and want to return it to Python than the Interpreter crashes.
This is my Code:
//some includes
int VerboseLog = 99;
typedef map<const char*, int> SessionKeeper;
static SessionKeeper openSessions;
static PyObject* getSession(PyObject* self, PyObject* args) {
cout << "get" << endl;
const char *key;
if (!PyArg_ParseTuple(args, "z*", &key)) {
PyErr_SetString(PyExc_Exception, "UUID konnte nicht ausgelesen werden");
PyErr_PrintEx(1);
return NULL;
}
int r = openSessions.at(key);
return Py_BuildValue("i", r);
}
static PyObject *addSession(PyObject* self, PyObject* args) {
cout << "Hello" << endl;
const char *key;
int toAppend;
if (!PyArg_ParseTuple(args, "si", &key, &toAppend)) {
PyErr_SetString(PyExc_Exception, "Ein Parameter konnte nicht ausgelesen werden");
PyErr_PrintEx(1);
return NULL;
}
openSessions[key] = toAppend;
cout << openSessions.size() << endl;
cout << openSessions[key] << endl;
Py_RETURN_NONE;
}
static PyObject* length(PyObject *self){
cout << "length: ";
int i = openSessions.size();
return Py_BuildValue("i",i);
}
static PyMethodDef SessionKeeper_methods[] = {
/*Note the third entry (METH_VARARGS). This is a flag telling the interpreter the calling convention
to be used for the C function. It should normally always be METH_VARARGS or METH_VARARGS | METH_KEYWORDS;
a value of 0 means that an obsolete variant of PyArg_ParseTuple() is used.*/
{ "length", (PyCFunction)length, METH_VARARGS, "return length of a Session" },
{ "addSession", (PyCFunction)addSession, METH_VARARGS, "add a Session." },
{ "getSession", (PyCFunction)getSession, METH_VARARGS, "get a Session" },
{ NULL }
};
static PyModuleDef sessionKeeperMod = {
PyModuleDef_HEAD_INIT,
u8"SessionKeeper",
NULL,
-1,
SessionKeeper_methods
};
static PyModuleDef CpluplusModules_ModDef = {
PyModuleDef_HEAD_INIT,
u8"CPlusPlusMouldes",
u8"Enthält alle Erweietrungen",
-1,
NULL, NULL, NULL, NULL, NULL
};
PyMODINIT_FUNC PyInit_CPlusPlusModules(void) {
PyObject* m, *sessionMod;
if (PyType_Ready(&TAModulType) < 0)
return NULL;
if (PyType_Ready(&DatabaseReader_Type) < 0)
return NULL;
if (!(sessionMod = PyModule_Create(&sessionKeeperMod))) {
cout << "fail" << endl;
return NULL;
}
m = PyModule_Create(&CpluplusModules_ModDef);
if (m == NULL)
return NULL;
Py_INCREF(&TAModulType);
Py_INCREF(&DatabaseReader_Type);
PyModule_AddObject(m, "TAModul", (PyObject *)&TAModulType);
PyModule_AddObject(m, "DBReader", (PyObject *)&DatabaseReader_Type);
PyModule_AddObject(m, "SessionKeeper", sessionMod);
return m;
}
The other Modules (DBReader and TAModule) workes fine. The goal will be to safe PythonObjects (containing DbReader Objects and TAModul Objects) in the map.
But bacck to my question why do the getSession crashes the Interpreter on return? And why do the length function works fine.
map<const char*,...> will match based on the address of the string rather than the contents. Therefore it's pretty likely that at will fail and throw an C++ out_of_range exception.
You should start by wrapping the line
int r = openSessions.at(key);
with a try {} catch block, to stop the out_of_range exception propagating through the Python interpreter (which isn't C++ so can't handle it).
You should then change the map to match the key on the string contents. The easiest way would be to use map<std::string,int>.

Accessing view of a NumPy array using the C API

In a Python extension module I've written in C++, I use the following snippet of code to convert a NumPy array into an Armadillo array for use in the C++ portion of the code:
static arma::mat convertPyArrayToArma(PyArrayObject* pyarr, int nrows, int ncols)
{
// Check if the dimensions are what I expect.
if (!checkPyArrayDimensions(pyarr, nrows, ncols)) throw WrongDimensions();
const std::vector<int> dims = getPyArrayDimensions(pyarr); // Gets the dimensions using the API
PyArray_Descr* reqDescr = PyArray_DescrFromType(NPY_DOUBLE);
if (reqDescr == NULL) throw std::bad_alloc();
// Convert the array to Fortran-ordering as required by Armadillo
PyArrayObject* cleanArr = (PyArrayObject*)PyArray_FromArray(pyarr, reqDescr,
NPY_ARRAY_FARRAY);
if (cleanArr == NULL) throw std::bad_alloc();
reqDescr = NULL; // The new reference from DescrFromType was stolen by FromArray
double* dataPtr = static_cast<double*>(PyArray_DATA(cleanArr));
arma::mat result (dataPtr, dims[0], dims[1], true); // this copies the data from cleanArr
Py_DECREF(cleanArr);
return result;
}
The problem is that when I pass this a view of a NumPy array (i.e. my_array[:, 3]), it doesn't seem to handle the strides of the underlying C array correctly. Based on the output, it seems like the array pyarr received by the function is actually the full base array, and not the view (or at least when I access the data using PyArray_DATA, I seem to be getting a pointer to the full base array). If I instead pass this function a copy of the view (i.e. my_array[:, 3].copy()), it works as expected, but I don't want to have to remember to do that every time.
So, is there a way to make PyArray_FromArray copy only the slice of the matrix I want? I tried using the flag NPY_ARRAY_ENSURECOPY, but that didn't help.
Edit 1
As suggested in the comments, here is a full working example:
In file example.cpp:
#define NPY_NO_DEPRECATED_API NPY_1_9_API_VERSION
extern "C" {
#include <Python.h>
#include <numpy/arrayobject.h>
}
#include <exception>
#include <cassert>
#include <string>
#include <type_traits>
#include <map>
#include <vector>
#include <armadillo>
class WrongDimensions : public std::exception
{
public:
WrongDimensions() {}
const char* what() const noexcept { return msg.c_str(); }
private:
std::string msg = "The dimensions were incorrect";
};
class NotImplemented : public std::exception
{
public:
NotImplemented() {}
const char* what() const noexcept { return msg.c_str(); }
private:
std::string msg = "Not implemented";
};
class BadArrayLayout : public std::exception
{
public:
BadArrayLayout() {}
const char* what() const noexcept { return msg.c_str(); }
private:
std::string msg = "The matrix was not contiguous";
};
static const std::vector<npy_intp> getPyArrayDimensions(PyArrayObject* pyarr)
{
npy_intp ndims = PyArray_NDIM(pyarr);
npy_intp* dims = PyArray_SHAPE(pyarr);
std::vector<npy_intp> result;
for (int i = 0; i < ndims; i++) {
result.push_back(dims[i]);
}
return result;
}
/* Checks the dimensions of the given array. Pass -1 for either dimension to say you don't
* care what the size is in that dimension. Pass dimensions (X, 1) for a vector.
*/
static bool checkPyArrayDimensions(PyArrayObject* pyarr, const npy_intp dim0, const npy_intp dim1)
{
const auto dims = getPyArrayDimensions(pyarr);
assert(dims.size() <= 2 && dims.size() > 0);
if (dims.size() == 1) {
return (dims[0] == dim0 || dim0 == -1) && (dim1 == 1 || dim1 == -1);
}
else {
return (dims[0] == dim0 || dim0 == -1) && (dims[1] == dim1 || dim1 == -1);
}
}
template<typename outT>
static arma::Mat<outT> convertPyArrayToArma(PyArrayObject* pyarr, int nrows, int ncols)
{
if (!checkPyArrayDimensions(pyarr, nrows, ncols)) throw WrongDimensions();
int arrTypeCode;
if (std::is_same<outT, uint16_t>::value) {
arrTypeCode = NPY_UINT16;
}
else if (std::is_same<outT, double>::value) {
arrTypeCode = NPY_DOUBLE;
}
else {
throw NotImplemented();
}
const auto dims = getPyArrayDimensions(pyarr);
if (dims.size() == 1) {
outT* dataPtr = static_cast<outT*>(PyArray_DATA(pyarr));
return arma::Col<outT>(dataPtr, dims[0], true);
}
else {
PyArray_Descr* reqDescr = PyArray_DescrFromType(arrTypeCode);
if (reqDescr == NULL) throw std::bad_alloc();
PyArrayObject* cleanArr = (PyArrayObject*)PyArray_FromArray(pyarr, reqDescr, NPY_ARRAY_FARRAY);
if (cleanArr == NULL) throw std::bad_alloc();
reqDescr = NULL; // The new reference from DescrFromType was stolen by FromArray
outT* dataPtr = static_cast<outT*>(PyArray_DATA(cleanArr));
arma::Mat<outT> result (dataPtr, dims[0], dims[1], true); // this copies the data from cleanArr
Py_DECREF(cleanArr);
return result;
}
}
static PyObject* convertArmaToPyArray(const arma::mat& matrix)
{
npy_intp ndim = matrix.is_colvec() ? 1 : 2;
npy_intp nRows = static_cast<npy_intp>(matrix.n_rows); // NOTE: This narrows the integer
npy_intp nCols = static_cast<npy_intp>(matrix.n_cols);
npy_intp dims[2] = {nRows, nCols};
PyObject* result = PyArray_SimpleNew(ndim, dims, NPY_DOUBLE);
if (result == NULL) throw std::bad_alloc();
double* resultDataPtr = static_cast<double*>(PyArray_DATA((PyArrayObject*)result));
for (int i = 0; i < nRows; i++) {
for (int j = 0; j < nCols; j++) {
resultDataPtr[i * nCols + j] = matrix(i, j);
}
}
return result;
}
extern "C" {
// An example function that takes a NumPy array and converts it to
// an arma::mat and back. This should return the array unchanged.
static PyObject* example_testFunction(PyObject* self, PyObject* args)
{
PyArrayObject* myArray = NULL;
if (!PyArg_ParseTuple(args, "O!", &PyArray_Type, &myArray)) {
return NULL;
}
PyObject* output = NULL;
try {
arma::mat myMat = convertPyArrayToArma<double>(myArray, -1, -1);
output = convertArmaToPyArray(myMat);
}
catch (const std::bad_alloc&) {
PyErr_NoMemory();
Py_XDECREF(output);
return NULL;
}
catch (const std::exception& err) {
PyErr_SetString(PyExc_RuntimeError, err.what());
Py_XDECREF(output);
return NULL;
}
return output;
}
static PyMethodDef example_methods[] =
{
{"test_function", example_testFunction, METH_VARARGS, "A test function"},
{NULL, NULL, 0, NULL}
};
static struct PyModuleDef example_module = {
PyModuleDef_HEAD_INIT,
"example", /* name of module */
NULL, /* module documentation, may be NULL */
-1, /* size of per-interpreter state of the module,
or -1 if the module keeps state in global variables. */
example_methods
};
PyMODINIT_FUNC
PyInit_example(void)
{
import_array();
PyObject* m = PyModule_Create(&example_module);
if (m == NULL) return NULL;
return m;
}
}
And setup.py for compiling:
from setuptools import setup, Extension
import numpy as np
example_module = Extension(
'example',
include_dirs=[np.get_include(), '/usr/local/include'],
libraries=['armadillo'],
library_dirs=['/usr/local/lib'],
sources=['example.cpp'],
language='c++',
extra_compile_args=['-std=c++11', '-mmacosx-version-min=10.10'],
)
setup(name='example',
ext_modules=[example_module],
)
Now assume we have the example array
a = np.array([[ 1, 2, 3, 4, 5, 6],
[ 7, 8, 9,10,11,12],
[13,14,15,16,17,18]], dtype='float64')
The function seems to work fine for multidimensional slices like a[:, :3], and it returns the matrix unaltered as I'd expect. But if I give it a single-dimensional slice, I get the wrong components unless I make a copy:
>>> example.test_function(a[:, 3])
array([ 4., 5., 6.])
>>> example.test_function(a[:, 3].copy())
array([ 4., 10., 16.])
The view of the array is just another information-wrapper for the same data array. Numpy doesn't copy any data here. Only the information for the interpretation of the data are adjusted and the pointer to the data moved if useful.
In your code you're assuming, that the data of a vector a[:, 3] is represented as a vector in memory which wouldn't differ for NPY_ARRAY_CARRAY and NPY_ARRAY_FARRAY. But this representation you only get after creating a (fortran ordered) copy of the array itself.
To make it work I modified your convertPyArrayToArma() function a little to create a copy even if it's a vector:
template<typename outT>
static arma::Mat<outT> convertPyArrayToArma(PyArrayObject* pyarr, int nrows, int ncols)
{
if (!checkPyArrayDimensions(pyarr, nrows, ncols)) throw WrongDimensions();
int arrTypeCode;
if (std::is_same<outT, uint16_t>::value) {
arrTypeCode = NPY_UINT16;
}
else if (std::is_same<outT, double>::value) {
arrTypeCode = NPY_DOUBLE;
}
else {
throw NotImplemented();
}
PyArray_Descr* reqDescr = PyArray_DescrFromType(arrTypeCode);
if (reqDescr == NULL) throw std::bad_alloc();
PyArrayObject* cleanArr = (PyArrayObject*)PyArray_FromArray(pyarr, reqDescr, NPY_ARRAY_FARRAY);
if (cleanArr == NULL) throw std::bad_alloc();
reqDescr = NULL; // The new reference from DescrFromType was stolen by FromArray
const auto dims = getPyArrayDimensions(pyarr);
outT* dataPtr = static_cast<outT*>(PyArray_DATA(cleanArr));
// this copies the data from cleanArr
arma::Mat<outT> result;
if (dims.size() == 1) {
result = arma::Col<outT>(dataPtr, dims[0], true);
}
else {
result = arma::Mat<outT>(dataPtr, dims[0], dims[1], true);
}
Py_DECREF(cleanArr);
return result;
}
Just a short answer that can help you about this. PyArray_DATA is not aware about the state of the view.
Before using it you can use PyArray_GETCONTIGUOUS on the py object reference and then use the return reference in PyArray_DATA. This way, you get a copy of the numpy object as a contiguous one.
=> void* pContiguousInput = PyArray_DATA(PyArray_GETCONTIGUOUS(pyInputData));
Hopes it help !

Categories