Related
I have a sparse matrix implementation in C++, and I used pybind11 to expose it to python. Here is the problem:
>>> D1 = phc.SparseMatrix(3, [[0],[1],[2]])
>>> D1.cData
[[0], [1], [2]]
>>> D1.cData[1] = [1,2]
>>> D1.cData
[[0], [1], [2]] #Should be [[0], [1,2], [2]]
In python, I cannot change the contents of the SparseMatrix.cData attribute with the assignment operator. I can change the entire list with D1.cData = [[1],[2],[3]]. This behavior is bewildering to me. D1.cData is just a list, so I would expect that the above code would work.
I suspect it has something to do with my pybind11 code since this behavior is not present in python-defined custom classes. But I have no idea what is wrong (I am a novice programmer). Here is the source code info:
Python Bindings
#include <pybind11/pybind11.h>
#include <pybind11/stl.h>
namespace py = pybind11;
#include <SparseMatrix.h>
namespace phc = ph_computation;
using SparseMatrix = phc::SparseMatrix;
using Column = phc::Column;
using CData = phc::CData;
PYBIND11_MODULE(ph_computations, m)
{
m.doc() = "ph_computations python bindings";
using namespace pybind11::literals;
m.def("add_cols", &phc::add_cols);//begin SparseMatrix.h
py::class_<SparseMatrix>(m, "SparseMatrix")
.def(py::init<size_t, CData>())
.def(py::init<std::string>())
.def_readwrite("n_rows", &SparseMatrix::n_rows)
.def_readwrite("n_cols", &SparseMatrix::n_cols)
.def_readwrite("cData", &SparseMatrix::cData)
.def("__neq__", &SparseMatrix::operator!=)
.def("__eq__", &SparseMatrix::operator==)
.def("__add__", &SparseMatrix::operator+)
.def("__mul__", &SparseMatrix::operator*)
.def("transpose", &SparseMatrix::transpose)
.def("__str__", &SparseMatrix::print)
.def("save", &SparseMatrix::save)
;
m.def("identity", &phc::make_identity);
m.def("matching_pivots", &phc::matching_pivots);//end SparseMatrix.h
}
SparseMatrix.h
#pragma once
#include <iostream>
#include <sstream>
#include <fstream>
#include <string>
#include <iterator>
#include <algorithm>
#include <vector>
#include <stdexcept>
namespace ph_computation{
using Int = int;
using Column = std::vector<Int>;//a Column is represented by a vector of indices
using CData = std::vector<Column>;//a matrix is represented by a vector of Columns
//Add columns in Z2
Column add_cols(const Column& c1, const Column& c2);
struct SparseMatrix
{
size_t n_rows{0};
size_t n_cols{0};
CData cData;
SparseMatrix()=default;
SparseMatrix(size_t n_rows_, CData cData_):
n_rows(n_rows_), n_cols(cData_.size()), cData(cData_){}
SparseMatrix(std::string path);
bool operator!=(const SparseMatrix &other) const;
bool operator==(const SparseMatrix &other) const;
SparseMatrix operator+(const SparseMatrix &other) const;
SparseMatrix operator*(const SparseMatrix &other) const;
void transpose();
void print() const;
void save(std::string path);
};
SparseMatrix make_identity(size_t n_cols_);
bool matching_pivots(const SparseMatrix& a, const SparseMatrix& b);
}
SparseMatrix.cpp (you probably don't need this)
#include <SparseMatrix.h>
namespace ph_computation {
Column add_cols(const Column& c1, const Column& c2){
Column c3;
int idx1{0};
int idx2{0};
while(idx1 < c1.size() && idx2 < c2.size()){
if(c1[idx1] < c2[idx2]){
c3.push_back(c1[idx1]);
++idx1;
}
else if(c1[idx1] > c2[idx2]){
c3.push_back(c2[idx2]);
++idx2;
}
else {
++idx1;
++idx2;
}
}
if (idx1 < c1.size()){
c3.insert(c3.end(), std::next(c1.begin(), idx1), c1.end());
}
else if (idx2 < c2.size()){
c3.insert(c3.end(), std::next(c2.begin(), idx2), c2.end());
}
return c3;
}
SparseMatrix make_identity(size_t n_cols_){
CData cData_(n_cols_);
for (int j = 0; j < n_cols_; ++j){
cData_[j] = {j};
}
return SparseMatrix(n_cols_, cData_);
}
SparseMatrix::SparseMatrix(std::string path){
std::fstream f_in;
f_in.open(path, std::ios::in);
if(f_in.is_open()){
std::string n_rows_line;
std::getline(f_in, n_rows_line); //first line of file contains number of rows
n_rows = std::stoi(n_rows_line);
std::string n_cols_line;
std::getline(f_in, n_cols_line); //second line of file contains number of cols
n_cols = std::stoi(n_cols_line);
CData cData_(n_cols);
cData = cData_;
std::string line;
int j = 0;
int nnz, data;
while (std::getline(f_in, line)){
std::stringstream line_str = std::stringstream(line);
while (line_str >> nnz){
Column col_j(nnz);
for (int i =0; i < nnz; ++i){
line_str >> data;
col_j[i] = data;
}
cData[j] = col_j;
}
++j;
}
f_in.close();
}
else{
throw std::runtime_error("File did not open.");
}
}
bool SparseMatrix::operator!=(const SparseMatrix &other) const{
if (n_rows != other.n_rows || cData != other.cData){
return true;
}
return false;
}
bool SparseMatrix::operator==(const SparseMatrix &other) const{
return !(*this != other);
}
SparseMatrix SparseMatrix::operator+(const SparseMatrix &other) const{
if (n_rows != other.n_rows || n_cols != other.n_cols){
throw std::invalid_argument("Matrices must have same dimension to add.");
}
CData ans_cData;
for (int j = 0; j < n_cols; ++j){
ans_cData.push_back(add_cols(cData[j], other.cData[j]));
}
return SparseMatrix(n_rows, ans_cData);
}
SparseMatrix SparseMatrix::operator*(const SparseMatrix &other) const{
if(n_cols != other.n_rows){
throw std::invalid_argument("Matrices must have compatible dimensions.");
}
size_t ans_rows = n_rows;
CData ans_cData(other.n_cols);
SparseMatrix ans(ans_rows, ans_cData);
for(int j =0; j<ans.n_cols; ++j){
for(int idx : other.cData[j]){
ans.cData[j] = add_cols(ans.cData[j], cData[idx]);
}
}
return ans;
}
void SparseMatrix::transpose(){
CData cData_T(n_rows);
for(int j =0; j<n_cols; ++j){
if(!cData[j].empty()){
for(int x: cData[j]){
cData_T[x].push_back(j);
}
}
}
cData = cData_T;
n_rows = n_cols;
n_cols = cData.size();
}
void SparseMatrix::print() const{
for (int i = 0; i < n_rows; ++i){
for (int j = 0; j < n_cols; ++j){
if (cData[j].empty())
{std::cout << " 0";}
else if (std::binary_search(cData[j].begin(), cData[j].end(), i))//Assumes row indices
{std::cout << " 1";} //are ordered
else
{std::cout << " 0";}
if (n_cols-1 == j)
{std::cout << " \n";}
}
}
}
void SparseMatrix::save(std::string path){
std::fstream f_out;
f_out.open(path, std::ios::out);
if(f_out.is_open()){
f_out << n_rows << "\n";
f_out << n_cols << "\n";
for(int j = 0; j < n_cols; ++j){
int col_j_sz = cData[j].size();
f_out << col_j_sz;
for(int i = 0; i < col_j_sz; ++i){
f_out << " " << cData[j][i];
}
f_out << "\n";
}
f_out.close();
}
else{
throw std::runtime_error("File did not open.");
}
}
bool matching_pivots(const SparseMatrix& a, const SparseMatrix& b){
if(a.n_rows != b.n_rows || a.n_cols != b.n_cols){
throw std::invalid_argument("Input matrices must have the same size.");
}
for (int j = 0; j<a.n_cols; ++j){
bool a_j_empty = a.cData[j].empty();
bool b_j_empty = b.cData[j].empty();
if (a_j_empty != b_j_empty){
return false;
}
else if (!a_j_empty){
if(a.cData[j].back() != b.cData[j].back()){
return false;
}
}
}
return true;
}
} // namespace ph_computation
I found the answer in the pybind11 documentation here: https://pybind11.readthedocs.io/en/stable/advanced/cast/stl.html?highlight=opaque#making-opaque-types
Apparently, STL container data members can be overwritten in their entirety in python, but modification of the data member through list methods does not work. I don't really understand it, but the link above answers the question.
pat = "TATAG"
DSIGMA = 4 //i used 4 character 'ACGT'
m = len(pat)
string = "ACGT"
shift = [[0] * DSIGMA]*m
for l in range(m):
U[l] = 1
for s in range(DSIGMA):
shift[l][string[s]] = 1'''
Output
TypeError: list indices must be integers or slices, not str
I build this code in c++ and run perfectly, but when i convert into python it doesn't work. How to fix my code? Thanks!
You are writing python now :) so do not comment like // or /**/
and Use # or ''' ''' for commenting.
Be careful about Spaces/Tabs :)
Where you defined U parameter? You should define this somewhere; you can find and read about similar error here.
Why you passed string[s] as index? if you need to access via string/key, you can use something like a DICT. You can read about this here.
Remove ``` and also output in the last line.
I do not know what the result should be, but changing the code in this way does not give an error:
pat = "TATAG"
DSIGMA = 4 # i used 4 character 'ACGT' ****
m = len(pat)
string = "ACGT"
shift = [[0] * DSIGMA]*m
U=[0,0,0,0,0] # DEFINE SOMETHING ****
for l in range(m):
U[l] = 1
for s in range(DSIGMA):
# Change this to something else ****
# string[s] is not ok as an index ****
# shift[l][string[s]] = 1
shift[l]=1
I'm sorry, this is my first time using stack overflow.
I using string[s] as index because that string would be useful in my next code. There is my c++ program.
#include <iostream>
#include <stdlib.h>
#include <string.h>
#define DSIGMA 4
#define SIGMA 256
#include <fstream>
using namespace std;
void MAS_Search(char *P,char *T){
int shift[100][SIGMA],U[100],l,s,k,safe[100],i;
int mas,avr_shift,scan[100],max_pos,mas_shift[100][100];
int w;
int count=0;
char dna[DSIGMA]={ 'A','C','G','T' };
int freq[100];
freq['A'] = 293; freq['C'] = 207;
freq['G'] = 207; freq['T'] = 293;
int n=strlen(T);
int m=strlen(P);
/*Preproses*/
for (l = 0; l<m; l++) {
U[l] = 1; // Deklarasi U = {0,1,...,m-1}.
for (s = 0; s<DSIGMA; s++) {
shift[l][dna[s]] = 1;
}
}
for (k = 1; k <= m; k++) {
safe[k] = 0;
}
/* Preproses */
for (i = 0; i <m; i++) {
for (l = 0; l <m; l++) {
if (U[l] == 1) {
for (s = 0; s <DSIGMA; s++) {
for (k = shift[l][dna[s]]; k <= m; k++) {
if (safe[k] == 0 && dna[s] == P[l - k]) {
shift[l][dna[s]] = k;
break;
}
}
}
}
}
//Max
mas=0;
for (l = 0; l <m; l++) {
if (U[l] == 1) {
avr_shift = 0;
for (s = 0; s <DSIGMA; s++) {
avr_shift = avr_shift + shift[l][dna[s]] * freq[dna[s]];
}
if ((mas < avr_shift) || (mas == avr_shift && freq[P[max_pos]] > freq[P[l]])) {
mas = avr_shift;
max_pos = l;
cout<<max_pos;
}
}
}
scan[i]=max_pos;
U[max_pos]=0;
for (k=1;k<=max_pos-1;k++){
if (P[max_pos]!=P[max_pos-k]){
safe[k]=1;
}
}
}
//Found pattern and text
w = 0;
while (w<=n-m){
i=0;
while (i<=m && T[w+scan[i]] == P[scan[i]]){
i=i+1;
}
if (i>m){
cout<<"Pattern found at: "<<w+1<<endl;
w=w+shift[scan[m]][T[w+scan[m]]];
}
else{
w=w+shift[scan[i]][T[w+scan[i]]];
}
}
}
//Input
main(){
ifstream teks;
string Data;
int i=0;
char T[40000];
teks.open("Data.txt");
while (!teks.eof()){
teks.get(T[i]);
i++;
}
char P[] = "TATA";
MAS_Search(P, T);
return 0;
teks.close();
cin.get();
}
I'm using SWIG to call a C++ program from python with three arguments. The last argument is also being used for the return value.
analysis.i
%module coverage
%{
//#include "../SeqMCMC/src/funeval_base.hpp"
#include "coverage.hpp"
%}
%include "std_vector.i"
void coverage(std::vector<double> *Pypar, std::vector<std::vector<double> > *Pyxpass, std::vector<std::vector<double> > *Pyypass);
coverage.hpp
#ifndef _COVERAGE_
#define _COVERAGE_
#include<vector>
void coverage(std::vector<double> *Pypar,
std::vector<std::vector<double>> *Pyxpass,
std::vector<std::vector<double>> *Pyypass);
#endif
coverage.cpp
#include "isofuneval.hpp"
#include "funeval_base.hpp"
#include <boost/numeric/ublas/vector.hpp>
#include <boost/numeric/ublas/matrix.hpp>
void coverage (std::vector<double> *Pypar,
std::vector<std::vector<double>> *Pyxpass,
std::vector<std::vector<double>> *Pyypass){
//Convert the par, xpass and ypass vectors as ublas
boost::numeric::ublas::vector<double> par((*Pypar).size());
boost::numeric::ublas::matrix<double> xpass((*Pyxpass).size(),(*Pyxpass)[0].size());
boost::numeric::ublas::matrix<double> ypass((*Pyypass).size(),(*Pyypass)[0].size());
for (size_t i = 0; i < (*Pypar).size(); i++){
par(i) = (*Pypar)[i];
}
for (size_t i = 0; i < (*Pyxpass).size(); i++){
for (size_t j = 0; j <(*Pyxpass)[0].size(); j++){
xpass(i,j) = (*Pyxpass)[i][j] ;}
}
for (size_t i = 0; i < (*Pyypass).size(); i++){
for (size_t j = 0; j <(*Pyypass)[0].size(); j++){
ypass(i,j) = (*Pyypass)[i][j];}
}
isofuneval CoveragePlot;
CoveragePlot.function(par, xpass, ypass); //These should actually be references and or pointers if I expect ypass to have the result
for (size_t i = 0; i < (*Pyypass).size(); i++){
for (size_t j = 0; j <(*Pyypass)[0].size(); j++){
(*Pyypass)[i][j] = ypass(i,j) ;
}
}
}
It compiles, and the module loads, but when I run it:
import _coverage as coverage
coverage.coverage([3, 2, 1 ],[[4, 8423] , [4, 12] ],[[24,234 ], [23, 23] ])
I get the following error:
TypeError: in method 'coverage', argument 1 of type 'std::vector< double,std::allocator< double > > *'
In your analysis.i file declare your template specializations explicitly
%template(DoubleVector1D) std::vector<double>;
%template(DoubleVector2D) std::vector<std::vector<double>>;
I am using SWIG to build a Python module for some functions' evaluation based on their C code.
The main function I need is defined as follow:
void eval(double *x, int nx, int mx, double *f, int func_id)
And the aimed python function should be:
value_list = module.eval(point_matrix, func_id)
Here, eval will call a benchmark function and return their values. func_id is the id of function eval going to call, nx is the dimension of the function, and mx is the number of points which will be evaluated.
Actually, I did not clearly understand how SWIG pass values between typemaps (like, temp$argnum, why always using $argnum?). But by looking into the wrap code, I finished the typemap.i file:
%module cec17
%{
#include "cec17.h"
%}
%typemap(in) (double *x, int nx, int mx) (int count){
if (PyList_Check($input)) {
$3 = PyList_Size($input);
$2 = PyList_Size(PyList_GetItem($input, 0));
count = $3;
int i = 0, j = 0;
$1 = (double *) malloc($2*$3*sizeof(double));
for (i = 0; i < $3; i++){
for (j = 0; j < $2; j++){
PyObject *o = PyList_GetItem(PyList_GetItem($input, i), j);
if (PyFloat_Check(o))
$1[i*$2+j] = PyFloat_AsDouble(o);
else {
PyErr_SetString(PyExc_TypeError, "list must contrain strings");
free($1);
return NULL;
}
}
}
} else {
PyErr_SetString(PyExc_TypeError, "not a list");
return NULL;
}
}
%typemap(freearg) double *x {
free((void *) $1);
}
%typemap(in, numinputs=0) double *f (double temp) {
$1 = &temp;
}
%typemap(argout) double *f {
int i = 0;
int s = count1;
printf("pass arg %d", s);
$result = PyList_New(0);
for (i = 0; i < s; i++){
PyList_Append($result, PyFloat_FromDouble($1[i]));
}
}
void eval(double *x, int nx, int mx, double *f, int func_num);
However, strange things happened then. Usually, I test 30 dimensional functions. When evaluating less than 10 points (mx < 10), the module works fine. When evaluating more points, an error occurs:
[1] 13616 segmentation fault (core dumped) python test.py
I'm quite sure the problem is not in the c code, because the only place where 'mx' occurs is in the 'for-loop' line in which are evaluations of each point.
I also tried to read the wrap code and debug, but I just can't find where the problem is. Following is a part of the wrap code generated by SWIG, and I added a 'printf' line. Even this string is not printed before the error.
#ifdef __cplusplus
extern "C" {
#endif
SWIGINTERN PyObject *_wrap_eval(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
PyObject *resultobj = 0;
double *arg1 = (double *) 0 ;
int arg2 ;
int arg3 ;
double *arg4 = (double *) 0 ;
int arg5 ;
int count1 ;
double temp4 ;
int val5 ;
int ecode5 = 0 ;
PyObject * obj0 = 0 ;
PyObject * obj1 = 0 ;
printf("check point 0");
{
arg4 = &temp4;
}
if (!PyArg_ParseTuple(args,(char *)"OO:eval",&obj0,&obj1)) SWIG_fail;
{
if (PyList_Check(obj0)) {
arg3 = PyList_Size(obj0);
arg2 = PyList_Size(PyList_GetItem(obj0, 0));
count1 = arg3;
int i = 0, j = 0;
arg1 = (double *) malloc(arg2*arg3*sizeof(double));
for (i = 0; i < arg3; i++){
for (j = 0; j < arg2; j++){
PyObject *o = PyList_GetItem(PyList_GetItem(obj0, i), j);
if (PyFloat_Check(o))
arg1[i*arg2+j] = PyFloat_AsDouble(o);
else {
PyErr_SetString(PyExc_TypeError, "list must contrain strings");
free(arg1);
return NULL;
}
}
}
} else {
PyErr_SetString(PyExc_TypeError, "not a list");
return NULL;
}
}
ecode5 = SWIG_AsVal_int(obj1, &val5);
if (!SWIG_IsOK(ecode5)) {
SWIG_exception_fail(SWIG_ArgError(ecode5), "in method '" "eval" "', argument " "5"" of type '" "int""'");
}
arg5 = (int)(val5);
eval(arg1,arg2,arg3,arg4,arg5);
resultobj = SWIG_Py_Void();
{
int i = 0;
int s = count1;
resultobj = PyList_New(0);
for (i = 0; i < s; i++){
PyList_Append(resultobj, PyFloat_FromDouble(arg4[i]));
}
}
return resultobj;
fail:
return NULL;
}
The problem seems a little tedious. Maybe you could just show me how to write the proper typemap.i code.
I'm not sure what your evaluation function is supposed to do, so I took a guess and implemented a wrapper for it. I took value_list = module.eval(point_matrix, func_id) to mean you want to return a list of result of evaluating some function against each row of data points, and came up with the following. Things I changed:
The typemaps replace the first four parameters with a Python list of lists of numbers.
space for the results in f was malloced.
To accept other numeric types except float, PyFloat_AsDouble was called on each value, and PyErr_Occurred was called to see if it failed to convert.
The freearg typemap now frees both allocations.
The argout typemap now handles the f output parameter correctly.
I added a sample eval implementation.
%module cec17
%typemap(in) (double *x, int nx, int mx, double* f) %{
if (PyList_Check($input)) {
$3 = PyList_Size($input);
$2 = PyList_Size(PyList_GetItem($input, 0));
$1 = malloc($2 * $3 * sizeof(double));
$4 = malloc($3 * sizeof(double));
for (int i = 0; i < $3; i++) {
for (int j = 0; j < $2; j++) {
PyObject *o = PyList_GetItem(PyList_GetItem($input, i), j);
double tmp = PyFloat_AsDouble(o);
if(PyErr_Occurred())
SWIG_fail;
$1[i * $2 + j] = PyFloat_AsDouble(o);
}
}
} else {
PyErr_SetString(PyExc_TypeError, "not a list");
return NULL;
}
%}
%typemap(freearg) (double *x, int nx, int mx, double* f) %{
free($1);
free($4);
%}
%typemap(argout) (double *x, int nx, int mx, double* f) (PyObject* tmp) %{
tmp = PyList_New($3);
for (int i = 0; i < $3; i++) {
PyList_SET_ITEM(tmp, i, PyFloat_FromDouble($4[i]));
}
$result = SWIG_Python_AppendOutput($result, tmp);
%}
%inline %{
void eval(double *x, int nx, int mx, double *f, int func_num)
{
for(int i = 0; i < mx; ++i) {
f[i] = 0.0;
for(int j = 0; j < nx; ++j)
f[i] += x[i*nx+j];
}
}
%}
Output:
>>> import cec17
>>> cec17.eval([[1,2,3],[4,5,6]],99)
[6.0, 15.0]
Error checking could be improved. For example, checking for sequences instead of lists. Only the outer list is checked that it actually is a list, so if [1,2,3] was the first parameter instead of nested lists, it won't behave properly. There is no check that all the sublists are the same size, either.
Hope this helps. Let me know if anything is unclear.
In Ubuntu 14.04, I wrote a C file called hash.c:
/* hash.c: hash table with linear probing */
typedef struct {
void *key;
void *value;
} ht_entry;
typedef struct {
ht_entry *table;
int len;
int num_entries;
int (*hash_fn)(void *key);
int (*key_cmp)(void *k1, void *k2);
} hashtable;
and compiled it with
gcc -shared hash.c -o test.so -fPIC
Afterwards, I tried to load test.so in a Python script (for testing), but I got the following error: "OSError: .../test.so: undefined symbol: hash_fn"
hash_fn is a function pointer in the hashtable struct. It is referenced a number of times by functions later in the file.
I do not understand why this error is happening. I have Googled but all other cases either concern C++ or includes. In my case I just have 1 C file that includes only stdio and stdlib.
here is the FULL code.
When I comment out all but hash_create and print_info, it loads succesfully. When I uncomment find(), it the error happens.
(print_info is just for testing that ctypes works)
/* hash.c: hash table with linear probing */
#include <stdio.h>
#include <stdlib.h>
typedef struct {
void *key;
void *value;
} ht_entry;
typedef struct {
ht_entry *table;
int len;
int num_entries;
int (*hash_fn)(void *key);
int (*key_cmp)(void *k1, void *k2);
} hashtable;
static void close_gap(hashtable *ht, int i);
static int find(hashtable *ht, void *key);
hashtable* hash_create(int len, int (*hash_fn)(void*), int (*key_cmp)(void*, void*))
{
hashtable* ht = (hashtable*) malloc(sizeof(hashtable));
ht->len = len;
ht->table = calloc(len, sizeof(ht_entry));
ht->hash_fn = hash_fn;
ht->key_cmp = key_cmp;
ht->table[0].key = 2;
ht->table[0].value = 3;
return ht;
}
void print_info(hashtable *ht)
{
printf("%d, %d, %d\n", ht->len, ht->table[0].key, ht->table[0].value);
}
void* hash_retrieve(hashtable* ht, void *key)
{
int i = find(ht, key);
if(i < 0) {
return NULL;
}
return ht->table[i].value;
}
void hash_insert(hashtable* ht, void *key, void *value)
{
if(ht->num_entries == ht->len) {
return;
}
int i = hash_fn(key) % ht->len;
while(ht->table[i].key != NULL) {
i = (i + i) % ht->len;
}
ht->table[i].key = key;
ht->table[i].value = value;
}
void hash_remove(hashtable *ht, void *key)
{
int i = find(ht, key);
if(i < 0) {
return;
}
ht->table[i].key = 0;
ht->table[i].value = 0;
close_gap(ht, i);
}
static int find(hashtable *ht, void *key)
{
int i = hash_fn(key) % ht->len;
int num_checked = 0;
while(ht->table[i].key && num_checked != ht->len) {
if(!ht->key_cmp(ht->table[i].key, key)) {
return i;
}
num_checked++;
i = (i + i) % ht->len;
}
return -1;
}
static void close_gap(hashtable *ht, int i)
{
int j = (i + 1) % ht->len;
while(ht->table[j].key) {
int loc = ht->hash_fn(ht->table[j].key);
if((j > i && (loc <= i || loc > j)) || (j < i && (loc <= i && loc > j))) {
ht->table[i] = ht->table[j];
ht->table[j].key = 0;
ht->table[j].value = 0;
close_gap(ht, j);
return;
}
}
}
When I use your compilation line I get five warnings. There are several problems here. First you are trying to assign an int to void * in several places. That raises a warning, and it would crash at runtime because you are passing 2 and 3 as addresses.
Second, you are calling hash_fn in a couple of places instead of ht->hash_fn. That causes the linker error, but you should consider my other changes, otherwise it will crash at runtime with a SIGSEGV:
/* hash.c: hash table with linear probing */
#include <stdio.h>
#include <stdlib.h>
typedef struct {
void *key;
void *value;
} ht_entry;
typedef struct {
ht_entry *table;
int len;
int num_entries;
int (*hash_fn)(void *key);
int (*key_cmp)(void *k1, void *k2);
} hashtable;
static void close_gap(hashtable *ht, int i);
static int find(hashtable *ht, void *key);
hashtable* hash_create(int len, int (*hash_fn)(void*), int (*key_cmp)(void*, void*))
{
hashtable* ht = (hashtable*) malloc(sizeof(hashtable));
ht->len = len;
ht->table = calloc(len, sizeof(ht_entry));
ht->hash_fn = hash_fn;
ht->key_cmp = key_cmp;
// <<< Code changed here
/*
ht->table[0].key = 2;
ht->table[0].value = 3;
*/
{
int *p = malloc(sizeof(int));
*p = 2;
ht->table[0].key = p;
p = malloc(sizeof(int));
*p = 3;
ht->table[0].value = p;
}
// end of code change
return ht;
}
void print_info(hashtable *ht)
{
// <<<< Code changed
printf("%d, %d, %d\n", ht->len,
*(int *)ht->table[0].key, *(int *)ht->table[0].value);
}
void* hash_retrieve(hashtable* ht, void *key)
{
int i = find(ht, key);
if(i < 0) {
return NULL;
}
return ht->table[i].value;
}
void hash_insert(hashtable* ht, void *key, void *value)
{
if(ht->num_entries == ht->len) {
return;
}
// <<< Code changed
int i = ht->hash_fn(key) % ht->len;
while(ht->table[i].key != NULL) {
i = (i + i) % ht->len;
}
ht->table[i].key = key;
ht->table[i].value = value;
}
void hash_remove(hashtable *ht, void *key)
{
int i = find(ht, key);
if(i < 0) {
return;
ht->table[i].key = 0;
ht->table[i].value = 0;
close_gap(ht, i);
}
static int find(hashtable *ht, void *key)
{
// <<< Code changed
int i = ht->hash_fn(key) % ht->len;
int num_checked = 0;
while(ht->table[i].key && num_checked != ht->len) {
if(!ht->key_cmp(ht->table[i].key, key)) {
return i;
}
num_checked++;
i = (i + i) % ht->len;
}
return -1;
}
static void close_gap(hashtable *ht, int i)
{
int j = (i + 1) % ht->len;
while(ht->table[j].key) {
int loc = ht->hash_fn(ht->table[j].key);
if((j > i && (loc <= i || loc > j)) || (j < i && (loc <= i && loc > j))) {
ht->table[i] = ht->table[j];
ht->table[j].key = 0;
ht->table[j].value = 0;
close_gap(ht, j);
return;
}
}
}
I only coded around the errors and warnings, I did not check the logic. You will see that I have used malloc to allocate memory for key and value. Obviously you will need memory management on these two (i.e. free()).