I am working on a image reconstruction algorithm and I found this repo online that would work great with my code, but unfortunately it doesnt seem to support complex valued calculations. I've read up on cython the past couple of days, but I'm pressed for time and I wanted to ask for advice before bull-dozering all over the code.
To be more exact, this is the Cython file:
from libcpp.vector cimport vector
from libcpp cimport bool
cimport numpy as np
import numpy as np
cdef extern from "../bm3d_src/mt19937ar.h":
double mt_genrand_res53()
cdef extern from "../bm3d_src/bm3d.h":
int run_bm3d( const float sigma, vector[float] &img_noisy,
vector[float] &img_basic,
vector[float] &img_denoised,
const unsigned width,
const unsigned height,
const unsigned chnls,
const bool useSD_h,
const bool useSD_w,
const unsigned tau_2D_hard,
const unsigned tau_2D_wien,
const unsigned color_space)
cdef extern from "../bm3d_src/utilities.h":
int save_image(char * name, vector[float] & img,
const unsigned width,
const unsigned height,
const unsigned chnls)
def hello():
return "Hello World"
def random():
return mt_genrand_res53()
cpdef float[:, :, :] bm3d(float[:, :, :] input_array,
float sigma,
bool useSD_h = True,
bool useSD_w = True,
str tau_2D_hard = "DCT",
str tau_2D_wien = "DCT"
):
"""
sigma: value of assumed noise of the noisy image;
input_array : input image, H x W x channum
useSD_h (resp. useSD_w): if true, use weight based
on the standard variation of the 3D group for the
first (resp. second) step, otherwise use the number
of non-zero coefficients after Hard Thresholding
(resp. the norm of Wiener coefficients);
tau_2D_hard (resp. tau_2D_wien): 2D transform to apply
on every 3D group for the first (resp. second) part.
Allowed values are 'DCT' and 'BIOR';
# FIXME : add color space support; right now just RGB
"""
cdef vector[float] input_image
cdef vector[float] basic_image
cdef vector[float] output_image
cdef vector[float] denoised_image
height = input_array.shape[0]
width = input_array.shape[1]
chnls = input_array.shape[2]
# convert the input image
input_image.resize(input_array.size)
pos = 0
for i in range(input_array.shape[0]):
for j in range(input_array.shape[1]):
for k in range(input_array.shape[2]):
input_image[pos] = input_array[i, j, k]
pos +=1
if tau_2D_hard == "DCT":
tau_2D_hard_i = 4
elif tau_2D_hard == "BIOR" :
tau_2D_hard_i = 5
else:
raise ValueError("Unknown tau_2d_hard, must be DCT or BIOR")
if tau_2D_wien == "DCT":
tau_2D_wien_i = 4
elif tau_2D_wien == "BIOR" :
tau_2D_wien_i = 5
else:
raise ValueError("Unknown tau_2d_wien, must be DCT or BIOR")
# FIXME someday we'll have color support
color_space = 0
ret = run_bm3d(sigma, input_image, basic_image, output_image,
width, height, chnls,
useSD_h, useSD_w,
tau_2D_hard_i, tau_2D_wien_i,
color_space)
if ret != 0:
raise Exception("run_bmd3d returned an error, retval=%d" % ret)
cdef np.ndarray output_array = np.zeros([height, width, chnls],
dtype = np.float32)
pos = 0
for i in range(input_array.shape[0]):
for j in range(input_array.shape[1]):
for k in range(input_array.shape[2]):
output_array[i, j, k] = output_image[pos]
pos +=1
return output_array
How would I go about making the most minimal changes such that it'll work with numpy array with dtype='complex'?
Cheers!
Related
I'm trying to make a function which is defined in a C library callable from python.
I'm begginer with cython n C
Sooo in short I ended up having the following problem:
#.pyx file
cdef extern from "lsd.h":
double *lsd_scale(int *n_out, double *img, int X, int Y, double scale)
def detector(int n_out, double img, int X, int Y, double scale):
return lsd_scale(n_out, img, X, Y, scale)
The definition from library.h file
#param n_out Pointer to an int where LSD will store the number of
line segments detected.
#param img Pointer to input image data. It must be an array of
doubles of size X x Y, and the pixel at coordinates
(x,y) is obtained by img[x+y*X].
#param X X size of the image: the number of columns.
#param Y Y size of the image: the number of rows.
#return A double array of size 7 x n_out, containing the list
of line segments detected. The array contains first
7 values of line segment number 1, then the 7 values
of line segment number 2, and so on, and it finish
by the 7 values of line segment number n_out.
The seven values are:
- x1,y1,x2,y2,width,p,-log10(NFA)
.
for a line segment from coordinates (x1,y1) to (x2,y2),
a width 'width', an angle precision of p in (0,1) given
by angle_tolerance/180 degree, and NFA value 'NFA'.
If 'out' is the returned pointer, the 7 values of
line segment number 'n+1' are obtained with
'out[7*n+0]' to 'out[7*n+6]'.
Soo in short the question is how to indicate in the python function that n_out is a pointer to an integer and img is a pointer to an array.
I tried the following
ctypedef int*p_int
ctypedef double*p_double
cdef extern from "lsd.h":
double *lsd_scale(int *n_out, double *img, int X, int Y, double scale)
def detector(p_int n_out, p_double img, int X, int Y, double scale):
return lsd_scale(n_out, img, X, Y, scale)
But throws same error
Error compiling Cython file:
------------------------------------------------------------
...
cdef extern from "lsd.h":
double *lsd_scale(int *n_out, double *img, int X, int Y, double scale)
def detector(int n_out, double img, int X, int Y, double scale):
return lsd_scale(n_out, img, X, Y, scale)
^
------------------------------------------------------------
detector.pyx:6:21: Cannot assign type 'int' to 'int *'
Error compiling Cython file:
------------------------------------------------------------
...
cdef extern from "lsd.h":
double *lsd_scale(int *n_out, double *img, int X, int Y, double scale)
def detector(int n_out, double img, int X, int Y, double scale):
return lsd_scale(n_out, img, X, Y, scale)
^
------------------------------------------------------------
detector.pyx:6:28: Cannot assign type 'double' to 'double *'
Error compiling Cython file:
------------------------------------------------------------
...
cdef extern from "lsd.h":
double *lsd_scale(int *n_out, double *img, int X, int Y, double scale)
def detector(int n_out, double img, int X, int Y, double scale):
return lsd_scale(n_out, img, X, Y, scale)
^
------------------------------------------------------------
Thanks for helping
The idea should be to create a useful Python interface, not directly reproduce the C interface. Some of these arguments are pointless to the Python interface because they're already stored in (say) a Numpy array.
double *img, int X, int Y all describe the input array. Essentially the requirement is "2D, contiguous block of memory". In Cython this can be expressed using a typed memoryview:
def detector(..., double[:,::1] img, ...):
... = lsd_scale(..., &img[0,0], # address of first element
img.shape[0], img.shape[1], # (it's possible these are the wrong way round, test!)
...)
The return value and n_out combine to describe the output array, which is (7xn_out). You could get Numpy to take ownership of this data, or you could copy the data. I recommend the latter. Therefore, you don't actually want n_out to be an input from Python. Instead:
def detector(...):
cdef double[:,::1] result_memview # useful for later
cdef int n_out
cdef double* result = lsd_scale(&n_out, ...)
try:
# copy the data to an output array.
# The following is an untested outline. Making it work is an exercise for you
result_memview = <double[:7,:n_out:1]>result # make a temporary memoryview of the data
return np.array(result_memview) # should make a copy, I think
finally:
free(result) # cimported.
I'm trying to iterate over a 2D image containing floating-point depth data, it has a somewhat normal resolution (640, 480), but python has been too slow, so I've been trying to optimize the problem by using cython.
I've tried to move the looping to other functions, shifting around the nogil statement, didn't seem to work, after reworking the problem, I was able to get a portion of it working. But this last part is escaping me to no avail.
I've attempted to get rid of python objects from the prange() loop by moving them to the with gil section beforehand, hence:
cdef int[:] w_list = array.array(range(0, w_inc, interpolation))
instead of
for r in range(0, w_inc, interpolation):
but the error persists
My code works in two parts:
The split_data() method subsections the image into num quadrants that are stored in a 3D array bits. These are use to make splitting up the work to multiple thread/processes easier. This part works okay.
#cython.cdivision(True)
#cython.boundscheck(False)
cpdef split_data(double[:, :] frame, int h, int w, int num):
cdef double[:, :, :] bits = np.zeros(shape=(num, h // num, w // num), dtype=float)
cdef int c_count = os.cpu_count()
cdef int i, j, k
for i in prange(num, nogil=True, num_threads=c_count):
for j in prange(h // num):
for k in prange(w // num):
bits[i, j, k] = frame[i * (h // num) + j, i * (w // num) + k]
return bits
The scatter_data() method takes the bits array from the previous function and then creates another 3D array with length num where num is the length of bits, called points which is a series of 3D coordinates representing valid depth points. It then uses prange() to extract the valid depth data from each of these bits and stores them into points
#cython.cdivision(True)
#cython.boundscheck(False)
cpdef scatter_data(double[:, :] depths, object validator=None,
int h=-1, int w=-1, int interpolation=1):
# Handles if h or w is -1 (default)
if h < 0 or w < 0:
h = depths.shape[0] if h < 0 else h
w = depths.shape[1] if w < 0 else w
cdef int max_num = w * h
cdef int c_count = os.cpu_count()
cdef int h_inc = h // c_count, w_inc = w // c_count
cdef double[:, :, :] points = np.zeros(shape=(c_count, max_num, 3), dtype=float)
cdef double[:, :, :] bits = split_data(depths, h, w, c_count)
cdef int count = 0
cdef int i, r, c
cdef int[:] w_list = array.array(range(0, w_inc, interpolation))
cdef int[:] h_list = array.array(range(0, h_inc, interpolation))
for i in prange(c_count, nogil=True, num_threads=c_count):
count = 0
for r in w_list:
for c in h_list:
if depths[c, r] != 0:
points[i, count, 0] = w - r
points[i, count, 1] = c
points[i, count, 2] = depths[c, r]
count = count + 1
points = points[:count]
return points
and for completeness
3. Here are my import statements
import cython
from cython.parallel import prange
from cpython cimport array
import array
cimport numpy as np
import numpy as np
import os
When compiling the code I keep getting error messages something along the lines of:
Error compiling Cython file:
------------------------------------------------------------
...
cdef int[:] w_list = array.array(range(0, w_inc, interpolation))
cdef int[:] h_list = array.array(range(0, h_inc, interpolation))
for i in prange(c_count, nogil=True, num_threads=c_count):
count = 0
for r in w_list:
^
------------------------------------------------------------
data_util/cy_scatter.pyx:70:17: Iterating over Python object not allowed without gil
and
Error compiling Cython file:
------------------------------------------------------------
...
cdef int[:] w_list = array.array(range(0, w_inc, interpolation))
cdef int[:] h_list = array.array(range(0, h_inc, interpolation))
for i in prange(c_count, nogil=True, num_threads=c_count):
count = 0
for r in w_list:
^
------------------------------------------------------------
data_util/cy_scatter.pyx:70:17: Coercion from Python not allowed without the GIL
and
Error compiling Cython file:
------------------------------------------------------------
...
cdef int[:] w_list = array.array(range(0, w_inc, interpolation))
cdef int[:] h_list = array.array(range(0, h_inc, interpolation))
for i in prange(c_count, nogil=True, num_threads=c_count):
count = 0
for r in w_list:
^
------------------------------------------------------------
data_util/cy_scatter.pyx:70:17: Converting to Python object not allowed without gil
Is there a way to do this? And if so, how do I do this?
You just want to iterate by index rather than by iterating over a Python iterator:
for ri in range(w_list.shape[0]):
r = w_list[ri]
This is somewhere where best practice in Python differs from best practice in Cython - Cython only accelerates iterating over numeric loops. The way you're trying to do it will fall back to being a Python iterator which is both slower, and requires the GIL.
I am using PyOpenCL to process images in Python and to send a 3D numpy array (height x width x 4) to the kernel. I am having trouble indexing the 3D array inside the kernel code. For now I am only able to copy the whole input array to the output. The current code looks like this, where img is the image with img.shape = (320, 512, 4):
__kernel void part1(__global float* img, __global float* results)
{
unsigned int x = get_global_id(0);
unsigned int y = get_global_id(1);
unsigned int z = get_global_id(2);
int index = x + 320*y + 320*512*z;
results[index] = img[index];
}
However, I do not quite understand how this work. For example, how do I index the Python equivalent of img[1, 2, 3] inside this kernel? And further, which index should be used into results for storing some item if I want it to be on the position results[1, 2, 3] in the numpy array when I get the results back to Python?
To run this I am using this Python code:
import pyopencl as cl
import numpy as np
class OpenCL:
def __init__(self):
self.ctx = cl.create_some_context()
self.queue = cl.CommandQueue(self.ctx)
def loadProgram(self, filename):
f = open(filename, 'r')
fstr = "".join(f.readlines())
self.program = cl.Program(self.ctx, fstr).build()
def opencl_energy(self, img):
mf = cl.mem_flags
self.img = img.astype(np.float32)
self.img_buf = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=self.img)
self.dest_buf = cl.Buffer(self.ctx, mf.WRITE_ONLY, self.img.nbytes)
self.program.part1(self.queue, self.img.shape, None, self.img_buf, self.dest_buf)
c = np.empty_like(self.img)
cl.enqueue_read_buffer(self.queue, self.dest_buf, c).wait()
return c
example = OpenCL()
example.loadProgram("get_energy.cl")
image = np.random.rand(320, 512, 4)
image = image.astype(np.float32)
results = example.opencl_energy(image)
print("All items are equal:", (results==image).all())
Update:
The OpenCL docs state (in 3.5), that
"Memory objects are categorized into two types: buffer objects, and image objects. A buffer
object stores a one-dimensional collection of elements whereas an image object is used to store a
two- or three- dimensional texture, frame-buffer or image."
so, a buffer always is linear, or linearized as you can see from my sample below.
import pyopencl as cl
import numpy as np
h_a = np.arange(27).reshape((3,3,3)).astype(np.float32)
ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)
mf = cl.mem_flags
d_a = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=h_a)
prg = cl.Program(ctx, """
__kernel void p(__global const float *d_a) {
printf("Array element is %f ",d_a[10]);
}
""").build()
prg.p(queue, (1,), None, d_a)
Gives me
"Array element is 10"
as output. So, the buffer actually is the linearized array. Nevertheless, the naive [x,y,z] approach known from numpy doesn't work that way. Using an 2 or 3-D Image instead of a buffer should work nevertheless.
Although this is not the opitimal solution, I linearized the array in Python and sent it as 1D. In kernel code I calculated x, y and z from the linear index. When returned to Pyhon I reshaped it back to the original shape.
I encountered the same problem.
On https://lists.tiker.net/pipermail/pyopencl/2009-October/000134.html
is a simple example how to use 3d arrays with PyOpenCL that worked for me. I quote the code here for future reference:
import pyopencl as cl
import numpy
import numpy.linalg as la
sizeX=4
sizeY=2
sizeZ=5
a = numpy.random.rand(sizeX,sizeY,sizeZ).astype(numpy.float32)
ctx = cl.Context()
queue = cl.CommandQueue(ctx)
mf = cl.mem_flags
a_buf = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=a)
dest_buf = cl.Buffer(ctx, mf.WRITE_ONLY, a.nbytes)
prg = cl.Program(ctx, """
__kernel void sum(__global const float *a, __global float *b)
{
int x = get_global_id(0);
int y = get_global_id(1);
int z = get_global_id(2);
int idx = z * %d * %d + y * %d + x;
b[idx] = a[idx] * x + 3 * y + 5 * z;
}
""" % (sizeY, sizeX, sizeX) ).build()
prg.sum(queue, a.shape, a_buf, dest_buf)
cl.enqueue_read_buffer(queue, dest_buf, a).wait()
print a
I am trying to match a template with a binary image (only black and white) by shifting the template along the image. And return the minimum distance between the template and the image with the corresponding position on which this minimum distance did occur. For example:
img:
0 1 0
0 0 1
0 1 1
template:
0 1
1 1
This template matches the image best at position (1,1) and the distance will then be 0. So far things are not too difficult and I already got some code that does the trick.
def match_template(img, template):
mindist = float('inf')
idx = (-1,-1)
for y in xrange(img.shape[1]-template.shape[1]+1):
for x in xrange(img.shape[0]-template.shape[0]+1):
#calculate Euclidean distance
dist = np.sqrt(np.sum(np.square(template - img[x:x+template.shape[0],y:y+template.shape[1]])))
if dist < mindist:
mindist = dist
idx = (x,y)
return [mindist, idx]
But for images of the size I need (image among 500 x 200 pixels and template among 250 x 100) this already takes approximately 4.5 seconds, which is way too slow. And I know the same thing can be done much quicker using matrix multiplications (in matlab I believe this can be done using im2col and repmat). Can anyone explain me how to do it in python/numpy?
btw. I know there is an opencv matchTemplate function that does exactly what I need, but since I might need to alter the code slightly later on I would prefer a solution which I fully understand and can alter.
Thanks!
edit: If anyone can explain me how opencv does this in less than 0.2 seconds that would also be great. I have had a short look at the source code, but those things somehow always look quite complicated to me.
edit2: Cython code
import numpy as np
cimport numpy as np
DTYPE = np.int
ctypedef np.int_t DTYPE_t
def match_template(np.ndarray img, np.ndarray template):
cdef float mindist = float('inf')
cdef int x_coord = -1
cdef int y_coord = -1
cdef float dist
cdef unsigned int x, y
cdef int img_width = img.shape[0]
cdef int img_height = img.shape[1]
cdef int template_width = template.shape[0]
cdef int template_height = template.shape[1]
cdef int range_x = img_width-template_width+1
cdef int range_y = img_height-template_height+1
for y from 0 <= y < range_y:
for x from 0 <= x < range_x:
dist = np.sqrt(np.sum(np.square(template - img[ x:<unsigned int>(x+template_width), y:<unsigned int>(y+template_height) ]))) #calculate euclidean distance
if dist < mindist:
mindist = dist
x_coord = x
y_coord = y
return [mindist, (x_coord,y_coord)]
img = np.asarray(img, dtype=DTYPE)
template = np.asarray(template, dtype=DTYPE)
match_template(img, template)
One possible way of doing what you want is via convolution (which can be brute force or FFT). Matrix multiplications AFAIK won't work. You need to convolve your data with the template. And find the maximum (you'll also need to do some scaling to make it work properly).
xs=np.array([[0,1,0],[0,0,1],[0,1,1]])*1.
ys=np.array([[0,1],[1,1]])*1.
print scipy.ndimage.convolve(xs,ys,mode='constant',cval=np.inf)
>>> array([[ 1., 1., inf],
[ 0., 2., inf],
[ inf, inf, inf]])
print scipy.signal.fftconvolve(xs,ys,mode='valid')
>>> array([[ 1., 1.],
[ 0., 2.]])
There may be a fancy way to get this done using pure numpy/scipy magic. But it might be easier (and more understandable when you look at the code in the future) to just drop into Cython to get this done. There's a good tutorial for integrating Cython with numpy at http://docs.cython.org/src/tutorial/numpy.html.
EDIT:
I did a quick test with your Cython code and it ran ~15 sec for a 500x400 img with a 100x200 template. After some tweaks (eliminating the numpy method calls and numpy bounds checking), I got it down under 3 seconds. That may not be enough for you, but it shows the possibility.
import numpy as np
cimport numpy as np
cimport cython
from libc.math cimport sqrt
DTYPE = np.int
ctypedef np.int_t DTYPE_t
#cython.boundscheck(False)
def match_template(np.ndarray[DTYPE_t, ndim=2] img, np.ndarray[DTYPE_t, ndim=2] template):
cdef float mindist = float('inf')
cdef int x_coord = -1
cdef int y_coord = -1
cdef float dist
cdef unsigned int x, y
cdef int img_width = img.shape[0]
cdef int img_height = img.shape[1]
cdef int template_width = template.shape[0]
cdef int template_height = template.shape[1]
cdef int range_x = img_width-template_width+1
cdef int range_y = img_height-template_height+1
cdef DTYPE_t total
cdef int delta
cdef unsigned int j, k, j_plus, k_plus
for y from 0 <= y < range_y:
for x from 0 <= x < range_x:
#dist = np.sqrt(np.sum(np.square(template - img[ x:<unsigned int>(x+template_width), y:<unsigned int>(y+template_height) ]))) #calculate euclidean distance
# Do the same operations, but in plain C
total = 0
for j from 0 <= j < template_width:
j_plus = <unsigned int>x + j
for k from 0 <= k < template_height:
k_plus = <unsigned int>y + k
delta = template[j, k] - img[j_plus, k_plus]
total += delta*delta
dist = sqrt(total)
if dist < mindist:
mindist = dist
x_coord = x
y_coord = y
return [mindist, (x_coord,y_coord)]
I have made some Numpy C-extensions before with great help from this site, but as far as I can see the returned parameters are all fixed length.
Is there any way to have a Numpy C-extension return a variable length numpy array instead?
You may find it easier to make numpy extensions in Cython using the Numpy C-API which simplifies the process as it allows you to mix python and c objects. In that case there is little difficult about making a variable length array, you can simply specify an array with an arbitrary shape.
The Cython numpy tutorial is probably the best source on this topic.
For example, here is a function I recently wrote:
import numpy as np
cimport numpy as np
cimport cython
dtype = np.double
ctypedef double dtype_t
np.import_ufunc()
np.import_array()
def ewma(a, d, axis):
#Calculates the exponentially weighted moving average of array a along axis using the parameter d.
cdef void *args[1]
cdef double weight[1]
weight[0] = <double>np.exp(-d)
args[0] = &weight[0]
return apply_along_axis(&ewma_func, np.array(a, dtype = float), np.double, np.double, False, &(args[0]), <int>axis)
cdef void ewma_func(int n, void* aData,int astride, void* oData, int ostride, void** args):
#Exponentially weighted moving average calculation function
cdef double avg = 0.0
cdef double weight = (<double*>(args[0]))[0]
cdef int i = 0
for i in range(n):
avg = (<double*>((<char*>aData) + i * astride))[0]*weight + avg * (1.0 - weight)
(<double*>((<char*>oData) + i * ostride))[0] = avg
ctypedef void (*func_1d)(int, void*, int, void*, int, void **)
cdef apply_along_axis(func_1d function, a, adtype, odtype, reduce, void** args, int axis):
#generic function for applying a cython function along a particular dimension
oshape = list(a.shape)
if reduce :
oshape[axis] = 1
out = np.empty(oshape, odtype)
cdef np.flatiter ita, ito
ita = np.PyArray_IterAllButAxis(a, &axis)
ito = np.PyArray_IterAllButAxis(out, &axis)
cdef int axis_length = a.shape[axis]
cdef int a_axis_stride = a.strides[axis]
cdef int o_axis_stride = out.strides[axis]
if reduce:
o_axis_stride = 0
while np.PyArray_ITER_NOTDONE(ita):
function(axis_length, np.PyArray_ITER_DATA (ita), a_axis_stride, np.PyArray_ITER_DATA (ito), o_axis_stride, args)
np.PyArray_ITER_NEXT(ita)
np.PyArray_ITER_NEXT(ito)
if reduce:
oshape.pop(axis)
out.shape = oshape
return out
If this doesn't suit you, there is a function for making a new empty array with arbitrary shape (link).
I am interpreting your question to mean "I have a function that takes a NumPy array of length n, but it will return another array of length m different from n." If that is the case, you will need to malloc a new C array in the extension, e.g.
new_array = malloc(m * sizeof(int64)); // or whatever your data type is
then create a new NumPy array with that. This example assumes a 1D array:
int npy_intp dims[1];
dims[0] = m;
PyArrayObject *out = (PyArrayObject *)PyArray_SimpleNewFromData(1, // 1D array
dims, // dimensions
NPY_INT64, // type
new_array);
PyArray_ENABLEFLAGS(out, NPY_ARRAY_OWNDATA);
Then return the new array. The important part here is to set the NPY_ARRAY_OWNDATA flag so that the memory you allocated is freed when the Python object is garbage collected.