Convert OpenCV IplImage* data to numpy array - python

I'm building a python wrapper of a c++ dll by means of ctypes. The afore mentioned library makes an extensive use of OpenCV2.2 (using the old C Api).
I want to convert the char* imageData field of the OpenCV's IplImage structure to a numpy array. I've search SO and the web for a few days but no solution seems to solve my problem.
Here's the problem. I've tested my implementation with 2 images: one of size 600x599 (and it's all good) and the other one of 602x600 (and here's the problem). Both are color images (3-channel images). I've tested the implementation with several images of size 602x600 and always get the image distorted.
I'm guessing there might be something weird going on with the padding added to the image by OpenCV (although i think took care of it in my implementation), but i can't quite put my finger on it.
The thing is that the second image shows all distorted after the "processing" performed in the c++ dll (none for the moment) and i can only think i'm doing something wrong converting back the IplImage data (imageData) to the numpy array.
Here goes the C++ source code:
char* do_something(IplImage *img, int* image_size)
{
// returning the image data
return get_data_from_iplimage
(
img, // doing zero processing for now
image_size
);
}
char* get_data_from_iplimage(IplImage* img, int* image_size)
{
// getting the image total size
*image_size = img->imageSize;
// copying data
char* image_data = new char[img->imageSize];
memcpy(image_data, img->imageData, img->imageSize);
// releasing the Iplimage*
cvReleaseImage(&img);
// returning the image data
return image_data;
}
Here goes the Python source code:
# Image type (IplImage)
IPL_DEPTH_SIGN = 0x80000000
IPL_DEPTH_1U = 1
IPL_DEPTH_8U = 8
IPL_DEPTH_16U = 16
IPL_DEPTH_32F = 32
IPL_DEPTH_64F = 64
IPL_DEPTH_8S = IPL_DEPTH_SIGN + IPL_DEPTH_8U
IPL_DEPTH_16S = IPL_DEPTH_SIGN + IPL_DEPTH_16U
IPL_DEPTH_32S = IPL_DEPTH_SIGN + 32
def depth2dtype(depth):
if depth is IPL_DEPTH_8U:
return np.dtype('uint8')
elif depth is IPL_DEPTH_8S:
return np.dtype('int8')
elif depth is IPL_DEPTH_16U:
return np.dtype('uint16')
elif depth is IPL_DEPTH_16S:
return np.dtype('int16')
elif depth is IPL_DEPTH_32S:
return np.dtype('int32')
elif depth is IPL_DEPTH_32F:
return np.dtype('float32')
elif depth is IPL_DEPTH_64F:
return np.dtype('float64')
else:
# This is probably a good default
return np.dtype('uint8')
def get_iplimage_ptr(img):
# None is considered as the NULL pointer
if img is None:
return None # the same thing as 'return img'
# getting image dimensions and data
height, width, n_channels = get_ndarray_dimensions(img)
img_data = img.tostring()
# creating the image header
cv_img = cv2.cv.CreateImageHeader((width, height), cv2.cv.IPL_DEPTH_8U, n_channels)
width_step = img.dtype.itemsize * n_channels * width # creating the famous 'width_step' parameter
cv2.cv.SetData(cv_img, None, width_step)
# setting the data (img is a numpy array)
ipl = iplimage_t.from_address(id(cv_img))
ipl_img_ptr = ipl.ipl_ptr.contents
ipl_img_ptr.imageData = img_data
# returning the OpenCV2.2 compatible image (IplImage*)
return ipl_img_ptr
def get_ndarray_dimensions(img):
# getting image shape information
img_shape = img.shape
img_shape_length = len(img_shape)
# validating parameters
if img_shape_length <= 1 or img_shape_length > 3:
raise ArgumentError('Invalid image information. We support images with 1, 2 or 3 channels only.')
# getting the amount of channels
nc = 1 if img_shape_length == 2 else img_shape[2]
# building the processed image
h, w = img_shape[0], img_shape[1]
# returning the height, width and nChannels
return h, w, nc
def build_ndarray_from_data(str_data, img_shape):
# getting image dimensions
height, width, n_channels = img_shape
# getting the ndarray datatype
dtype = depth2dtype(IPL_DEPTH_8U)
# building a numpy ndarray from the string data
ndarray = np.fromstring(str_data, dtype)
# reshaping to 'height' rows
# width_step = len(str_data) / height
ndarray = ndarray.reshape(height, -1)
# removing the padding added by opencv to each row
cols = dtype.itemsize * width * n_channels
ndarray = ndarray[:, :cols]
# reshaping to the final ndarray dimensions
ndarray = ndarray.reshape((height, width, n_channels))
# returning the numpy array that represents the image
return ndarray
# python wrapper function to the c++ function
def do_something(img):
# getting the IplImage*
iplimage_ptr = get_iplimage_ptr(img)
# calling the c++ function
image_size = c_int(0)
byte_data = __c_do_something(iplimage_ptr, byref(image_size))
str_data = string_at(byte_data, image_size.value)
# getting the image dimensions
img_shape = get_ndarray_dimensions(img)
# building the processed image
proc_img = build_ndarray_from_data(str_data, img_shape)
# returning the processed image
return proc_img
# does something ('pointer' to the c function)
__c_do_something = c_func(
'do_something', _cdll, c_byte_p,
('img', POINTER(IplImage), 1), # IplImage *img
('image_size', c_int_p, 1), # int* image_size
)
I apologize for the length of source code (although there are a few definitions missing), but i guess that "explicit is better than implicit", jeje.
Any help would be appreciated.
PD: If it helps i'm using Python 2.7, Numpy 1.7.1, OpenCV2.2 (precompiled), Visual Studio 2013 (Visual C++) and Windows 8.1.

I might be wrong, but... for me you can just convert IPlImage to Mat and than convert it to python numpy array. Of course you can do this the other way too - numpy array to Mat and Mat to IPlImage. Here there is code which works great for OpenCV 2.x (tested on Opencv 2.4.10, but should work for 2.2 as well). If it won't work for you version, it should at least be a good hint and help you write boost python converters for your version.
If - for some reason - you can't convert IplImage to Mat, let us know why so we can try to make other solution.

After a few days stucked with this problem i think i finally reached a solution. Instead of passing the imageData (char*) i decided to replicate the OpenCV IplImage structure in Python with ctypes. Then, built the numpy array from the received IplImage pointer.
By the way i still don't know what was happening before, but i guess that there was something crazy going on with the conversion of the char* imageData to a string in Python (0 values-translated as null characters-, etc., etc.).
The C++ snippet is a bit simpler now as i don't need to "extract" the imageData from the image. Here it goes:
IplImage* do_something(IplImage *img)
{
// doing nothing
return img;
}
In the Python side, the code is somewhat similar to the old one. There are, however some key aspects:
First, a 'cv.iplimage' is built.
The 'cv.iplimage' is then converted to a 'cv.cvmat'.
Finally, the 'cv.cvmat' is converted to a numpy array.
Here goes the code:
# Image type (IplImage)
IPL_DEPTH_SIGN = 0x80000000
IPL_DEPTH_1U = 1
IPL_DEPTH_8U = 8
IPL_DEPTH_16U = 16
IPL_DEPTH_32F = 32
IPL_DEPTH_64F = 64
IPL_DEPTH_8S = IPL_DEPTH_SIGN + IPL_DEPTH_8U
IPL_DEPTH_16S = IPL_DEPTH_SIGN + IPL_DEPTH_16U
IPL_DEPTH_32S = IPL_DEPTH_SIGN + 32
# subclassing the ctypes.Structure class to add new features
class _Structure(Structure):
def __repr__(self):
"""
Print the fields
"""
res = []
for field in self._fields_:
res.append('%s=%s' % (field[0], repr(getattr(self, field[0]))))
return self.__class__.__name__ + '(' + ','.join(res) + ')'
class IplTileInfo(_Structure):
_fields_ = []
class IplROI(_Structure):
_fields_ = \
[
# 0 - no COI (all channels are selected)
# 1 - 0th channel is selected ...
('coi', c_int),
('xOffset', c_int),
('yOffset', c_int),
('width', c_int),
('height', c_int),
]
# ipl image header
class IplImage(_Structure):
def __repr__(self):
"""
Print the fields
"""
res = []
for field in self._fields_:
if field[0] in ['imageData', 'imageDataOrigin']:
continue
res.append('%s=%s' % (field[0], repr(getattr(self, field[0]))))
return self.__class__.__name__ + '(' + ','.join(res) + ')'
IplImage._fields_ = [
("nSize", c_int),
("ID", c_int),
("nChannels", c_int),
("alphaChannel", c_int),
("depth", c_int),
("colorModel", c_char * 4),
("channelSeq", c_char * 4),
("dataOrder", c_int),
("origin", c_int),
("align", c_int),
("width", c_int),
("height", c_int),
("roi", POINTER(IplROI)),
("maskROI", POINTER(IplImage)),
("imageID", c_void_p),
("tileInfo", POINTER(IplTileInfo)),
("imageSize", c_int),
("imageData", c_byte_p),
("widthStep", c_int),
("BorderMode", c_int * 4),
("BorderConst", c_int * 4),
("imageDataOrigin", c_char_p)]
class iplimage_t(_Structure):
_fields_ = \
[
('ob_refcnt', c_ssize_t),
('ob_type', py_object),
('ipl_ptr', POINTER(IplImage)),
('data', py_object),
('offset', c_size_t)
]
# gets the dimensions of a numpy ndarray
def get_ndarray_dimensions(img):
# getting image shape information
img_shape = img.shape
img_shape_length = len(img_shape)
# validating parameters
if img_shape_length <= 1 or img_shape_length > 3:
raise ArgumentError('Invalid image information. We support images with 1, 2 or 3 channels only.')
# getting the amount of channels
nc = 1 if img_shape_length == 2 else img_shape[2]
# building the processed image
h, w = img_shape[0], img_shape[1]
# returning the height, width and nChannels
return h, w, nc
def build_ndarray_from_data(iplimage_ptr, img_shape):
# getting image dimensions
height, width, n_channels = img_shape
# getting the IplImage*
iplimage = iplimage_ptr.contents
# creating the image header
cv_img = cv2.cv.CreateImageHeader((width, height), IPL_DEPTH_8U, n_channels)
# getting the char* from byte data
str_data = string_at(iplimage.imageData, iplimage.imageSize)
# setting the image data
cv2.cv.SetData(cv_img, str_data, iplimage.widthStep)
# building a CvMat image
cv_mat = cv_img[:, :]
# building the ndarray from the CvMat image
ndarray = np.asarray(cv_mat)
# returing the built ndarray
return ndarray
# python wrapper function to the c++ function
def do_something(img):
# getting the IplImage*
iplimage_ptr = get_iplimage_ptr(img)
# calling the c++ function
ipl_ptr = __c_do_something(iplimage_ptr)
# getting the image dimensions
img_shape = get_ndarray_dimensions(img)
# building the processed image
proc_img = build_ndarray_from_data(ipl_ptr, img_shape)
# returning the processed image
return proc_img
# does something ('pointer' to the c function)
__c_do_something = c_func(
'do_something', _cdll, POINTER(IplImage),
('img', POINTER(IplImage), 1), # IplImage *img
)
Hope it helps ;).
PS: I apologize for the length of the code, but i tried to provide the closest to a working example. Loading the compiled C++ .dll with ctypes is up to you (:.

you don't need Python IplImage ,just do this
C file:
void *return_frame;
extern "C" void* get_rgb_frame(){
return return_frame;
}
#define FRAME_BUFSIZE (1920 * 1080 * 3 + 1)
return_frame = malloc(FRAME_BUFSIZE);
memset(return_frame, 0x00, FRAME_BUFSIZE + 1);
IplImage* pImg = cvLoadImage("test.jpg",-1);
memcpy(return_frame, 1920 * 1080 * 3);
cvReleaseImage(&pImg);
Python file:
dll.get_rgb_frame.restype = c_void_p
yv12_img = dll.get_rgb_frame()
imagedata = string_at(yv12_img, 1920 * 1080 * 3)
cv_img = cv2.cv.CreateImageHeader((1920, 1080), cv2.cv.IPL_DEPTH_8U, 3)
cv2.cv.SetData(cv_img, imagedata, 3 * 1920)
cv_mat = cv_img[:]
array = np.asarray(cv_mat)
cv2.imshow('jinfeng', array)
and you can show image in Python

Related

Python API to C++ - return pointer to object instead of full object

I created a python module to call some c++ functions of my library.
It use opencv to manipulate some matrix.
My python code
image = "zebre_1080p.png"
intput_image = cv2.resize(image, (int(cols - cols % ratio), int(rows - rows % ratio)))
output_image = image
cv2.imshow('input',intput_image)
cv2.waitKey(0)
test = py_module.testProcess(input_image, output_image) //increase the resolution of the input image
cv2.imshow('output',output_image) // same as image
cv2.waitKey(0)
cv2.imshow('output',test) // input_image with new resolution
cv2.waitKey(0)
My C++ code
static PyObject* testProcess(PyObject* self, PyObject* args) {
PyArrayObject *input, *output;
if ( ! PyArg_ParseTuple(args, "OO", &input, &output) ) return NULL;
cv::Mat in = cv::Mat(cv::Size(in_dims[0], in_dims[1]), CV_8UC3, PyArray_DATA(input));
cv::Mat out = cv::Mat(cv::Size(out_dims[0], out_dims[1]), CV_8UC3);
in ret = Process(in, out);
if(ret < 0){
printf("Erreur Process : %x\n", ret);
return NULL;
}
npy_intp dimens[] = {out.rows, out.cols};
PyObject* obj = PyArray_SimpleNewFromData(out.dims, dimens, NPY_UINT8, out.data);
return obj;
So this solution works but I want to return the same value as my C++ function and like the c++ function return the out matrix in output_image object.
Like that :
ret = py_module.testModule(input_image, output_image) //increase the resolution of the input image
cv2.imshow('output',output_image) // intput_image with new resolution
cv2.waitKey(0)
I don't know if it is possible, maybe with :
Py_BuildValue("O", obj);
or something else ?
Thanks.
I succeeded to do it like that :
static PyObject* testProcess(PyObject* self, PyObject* args) {
_import_array();
PyArrayObject *input, *output;
if ( ! PyArg_ParseTuple(args, "OO", &input, &output) ) return NULL;
npy_intp* in_shape = PyArray_SHAPE(input);
npy_intp* out_shape = PyArray_SHAPE(output);
cv::Mat in = cv::Mat(in_shape[0], in_shape[1], CV_8UC3, PyArray_DATA(input));
cv::Mat out = cv::Mat(out_shape[0], out_shape[1], CV_8UC3, PyArray_DATA(output));
int ret = Process(in, out);
if(ret < 0){
printf("Erreur Process : %x\n", ret);
return NULL;
}
PyArrayObject *obj = (PyArrayObject *)PyArray_SimpleNewFromData(out.channels(), out_shape, PyArray_TYPE(output), out.data);
PyArray_MoveInto(output, obj);
return PyLong_FromLong( 0 );
}
My python script :
def main():
ratio = 2
image = "zebre.png"
#Read image
orgHRImage = cv2.imread(image)
cols, rows, dim = orgHRImage.shape
#crop to %2
orgHRImage = cv2.resize(orgHRImage, (int(cols - cols % ratio), int(rows - rows % ratio)))
#downscale image
downSize = (int(cols/ratio), int(rows/ratio))
downScaledImage = cv2.resize(orgHRImage, downSize)
#create empty output image
output = np.zeros(orgHRImage.shape, orgHRImage.dtype)
#cv2.imshow('downScaledImage',downScaledImage)
#cv2.waitKey(0)
#cv2.imshow('output',output)
#cv2.waitKey(0)
py_module.testProcess(downScaledImage, output)
#cv2.imshow('output',output)
#cv2.waitKey(0)
So I create one input matrix (cv::Mat in) with the informations of &input (which is a image) and one output matrix (cv::Mat out) with the informations of &output (which was initialized at zeros in my python script.
I call the Process function to fill my output matrix.
I create a new PyArrayObject with the same properties of &output and with the fresh data of the matrix out.
I move the pointer to the new object to my old output pointer to have the new object.
I was able to see the picture resized its size divided by 2, next a black image and at the end the resized image from output.

Ctypes Cuda - pointer multiplication does not result in product

I implemented a Cuda matrix multiplication solely in C which successfully runs. Now I am trying to shift the Matrix initialization to numpy and use Python's ctypes library to execute the c code. It seems like the array with the pointer does not contain the multiplied values. I am not quite sure where the problem lies, but already in the CUDA code - even after calling the Kernel method and loading back the values from device to host, values are still zeroes.
The CUDA code:
#include <stdio.h>
#include <stdlib.h>
#define BLOCK_SIZE 16
#define RANDOM_MN_RANGE 64
struct Matrix {
int width;
int height;
// contiguously stored Matrix, in row first order
float *elements;
};
__global__ void MatMulKernel(Matrix A, Matrix B, Matrix C){
// runs for each col - row pair
float tmpVal = 0;
int col = blockIdx.x * blockDim.x + threadIdx.x;
int row = blockIdx.y * blockDim.y + threadIdx.y;
for (int i = 0; i < A.width; ++i)
tmpVal += A.elements[row * A.width + i] *
B.elements[i * B.width + col];
C.elements[ row * C.width + col ] = tmpVal;
}
extern "C" {
void mMul( Matrix *A, Matrix *B, Matrix *C ){
Matrix d_A, d_B, d_C;
// Matrix d_A
d_A.width = A->width;
d_A.height = A->height;
size_t sizeA = A->width * A->height * sizeof(float);
// dynamically allocate cudaMemory for elemenst array
cudaMalloc(&d_A.elements, sizeA);
cudaMemcpy(d_A.elements, A->elements, sizeA, cudaMemcpyHostToDevice);
// Matrix d_B
d_B.width = B->width;
d_B.height = B->height;
size_t sizeB = B->width * B->height * sizeof(float);
// dynamically allocate cudaMemory for elemenst array
cudaMalloc(&d_B.elements, sizeB);
cudaMemcpy(d_B.elements, B->elements, sizeB, cudaMemcpyHostToDevice);
// Matrix d_C
d_C.width = C->width;
d_C.height = C->height;
size_t sizeC = C->width * C->height * sizeof(float);
// dynamically allocate cudaMemory for elemenst array
cudaMalloc(&d_C.elements, sizeC);
// 16 * 16 = 256 threads per block
dim3 dimBlock(BLOCK_SIZE, BLOCK_SIZE);
// Blocks per grid
dim3 dimGrid(B->width / dimBlock.x, A->height / dimBlock.y);
// calling the Kernel
MatMulKernel<<<dimGrid, dimBlock>>>(d_A, d_B, d_C);
// copy results from result matrix C to the host again
cudaMemcpy(C->elements, d_C.elements, sizeC, cudaMemcpyDeviceToHost);
// C->elements[0] contains still 0, the values do not seem to be loaded back to host memory.
printf("A is %f\n", A->elements[0]);
printf("B is %f\n", B->elements[0]);
printf("C is %f\n", C->elements[0]);
// free the cuda memory
cudaFree(d_A.elements);
cudaFree(d_B.elements);
cudaFree(d_C.elements);
}
}
To compile the code I use the following command
nvcc --shared --compiler-options '-fPIC' -o Sequential_Cuda_Python.so Sequential_Cuda_Python.cu
ctypes Python code
import numpy as np
from numpy.ctypeslib import ndpointer
from ctypes import *
class Matrix(Structure):
_fields_ = [("width", c_int),
("height", c_int),
("elements", POINTER(c_float))]
libc = CDLL("./Sequential_Cuda_Python.so")
libc.mMul.argtypes = [ POINTER(Matrix), POINTER(Matrix), POINTER(Matrix) ]
libc.mMul.restype = None
def npArrtoMatrixPtr(data: np.ndarray) -> (POINTER(Matrix), tuple):
"""
numpy arr to Matrix pointer
#return (pointer to arr, shape)
"""
c_float_p = POINTER(c_float)
data = data.astype(np.float32)
w, h = np.shape(data)
# print((w, h))
mXp = Matrix(height=h, width=w, elements=data.ctypes.data_as(c_float_p))
return (pointer(mXp), np.shape(data))
def matMulSeqCuda( _mA, _mB, _mC ):
"""
multiplies mA with mB sequentually using mC
"""
pmA, mASz = ( _mA[0], _mA[-1] )
pmB, mBSz = ( _mB[0], _mB[-1] )
pmC, mCSz = ( _mC[0], _mC[-1] )
assert np.shape( mASz )[0] == 2 and \
np.shape( mBSz )[0] == 2 and \
np.shape( mCSz )[0] == 2, "Only 2D arrays accepted"
#assert np.shape(mA)[0] == np.shape(mB)[1], "Rows of mA need to be the same as Cols of mB"
libc.mMul( pmA, pmB, pmC )
c = pmC.contents
mtxC = np.ctypeslib.as_array(c.elements, shape=(c.height, c.height))
# The array still contains only 0. values
print(mtxC)
return 0
if __name__ == '__main__':
a = np.ones( (16, 8) )
b = np.ones( (16, 8) )
c = np.zeros( (16, 16) )
mA = npArrtoMatrixPtr(a)
mB = npArrtoMatrixPtr(b)
mC = npArrtoMatrixPtr(c)
matMulSeqCuda(mA, mB, mC)
Solution:
As #Mark Tolonen pointed out the error lied in the Python script. By calling npArrtoMatrixPptr(creates and returns a Pointer to a Matrix struct) within the same scope as the CUDA function libc.mMul I was able to retrieve the correct resulting Matrix mtxC.
import numpy as np
from numpy.ctypeslib import ndpointer
from ctypes import *
class Matrix(Structure):
_fields_ = [("width", c_int),
("height", c_int),
("elements", POINTER(c_float))]
libc = CDLL("./Sequential_Cuda_Python.so")
libc.mMul.argtypes = [ POINTER(Matrix), POINTER(Matrix), POINTER(Matrix) ]
libc.mMul.restype = None
def npArrtoMatrixPtr(data: np.ndarray) -> (POINTER(Matrix), tuple):
"""
numpy arr to Matrix pointer
#return (pointer to arr, shape)
"""
#c_float_p = POINTER(c_float)
data = data.astype(np.float32)
h, w = data.shape
mXp = Matrix(w, h, data.ctypes.data_as(POINTER(c_float)))
return (pointer(mXp), np.shape(data))
def matMulSeqCuda( npMa, npMb, npMc ):
"""
multiplies mA with mB sequentually using mC
"""
assert len(np.shape( npMa )) == 2 and \
len(np.shape( npMb )) == 2 and \
len(np.shape( npMc )) == 2, "Only 2D arrays accepted"
pmA, szA = npArrtoMatrixPtr(npMa)
pmB, szB = npArrtoMatrixPtr(npMb.T)
pmC, szC = npArrtoMatrixPtr(npMc) # the resulting array
libc.mMul( pmA, pmB, pmC )
c = pmC.contents
mtxC = np.ctypeslib.as_array(c.elements, shape=(c.height, c.width))
# the result is correct
print(mtxC)
return 0
if __name__ == '__main__':
a = np.ones( (16, 8) )
b = np.ones( (16, 8) )
c = np.zeros( (16, 16) )
matMulSeqCuda(a, b, c)
I can't compile your code as is, but the problem is that np.shape returns (rows,columns) or the equivalent (height,width), not (width,height):
w, h = np.shape(data) # should be h,w
And also on visual inspection this line is wrong (c.height twice).
mtxC = np.ctypeslib.as_array(c.elements, shape=(c.height, c.height))
The CUDA code is extraneous to the question, which is passing a Matrix* correctly and receiving back the modifications. I made a minimal reproducible example below that concentrates on passing a Matrix correctly:
test.cpp - receives a Matrix structure and doubles the values in it.
#ifdef _WIN32
# define API __declspec(dllexport)
#else
# define API
#endif
struct Matrix {
int width;
int height;
float *elements;
};
extern "C" API
void doubleit(Matrix *A) {
for(int r = 0; r < A->height; ++r)
for(int c = 0; c < A->width; ++c) {
A->elements[r * A->width + c] *= 2;
}
}
test.py
import numpy as np
from ctypes import *
class Matrix(Structure):
_fields_ = [("width", c_int),
("height", c_int),
("elements", POINTER(c_float))]
libc = CDLL('./test')
libc.doubleit.argtypes = POINTER(Matrix),
libc.doubleit.restype = None
def doubleit(a):
h,w = a.shape # note h,w not w,h
m = Matrix(w,h,a.ctypes.data_as(POINTER(c_float)))
libc.doubleit(m)
a = np.arange(0.0,0.6,0.1,dtype=np.float32).reshape((2,3))
print(a)
doubleit(a)
print(a)
Output:
[[0. 0.1 0.2]
[0.3 0.4 0.5]]
[[0. 0.2 0.4]
[0.6 0.8 1. ]]

reading an opencv image in python through a socket

I am trying to read an opencv image in a python socket that is sent from c++.
I am able to read the image into another c++ program or VB program and build an image but with python I don't understand what's happening.
The sending code where I send the mat.data:
char *start_s = "<S><size>43434234<cols>64<rows>64<SE>";//plus I send the image size, cols, rows, which varies, not like the static char string shown
char *end_e = "<E>";
cv::Mat image_send = some_mat;
iResult = send( ConnectSocket, start_s, (int)strlen(start_s), 0 );
iResult = send( ConnectSocket, (const char *) image_send.data, i_buffer_size, 0 );
iResult = send( ConnectSocket, end_e, (int)strlen(end_e), 0 );
This is what I have tried with the python, but haven't had any success yet. The image_cols and Image_rows are filtered from the socket, not shown here, and only the image_mat.data from the c++ mat is in the socket that I am trying to put into the image:
data = conn.recv(4757560)
if(i_Read_Image == 2) & (image_cols != 0) & (image_rows != 0):
print ("Entering")
#print(data)
data2 = np.fromstring(data, dtype='uint8')
img_np = cv2.imdecode(data2,cv2.IMREAD_COLOR )
cv2.imshow('image',img_np)
cv2.waitKey(0)
#Also tried this
#img = Image.new('RGB', (image_cols, image_rows))
#img.putdata(data)
#img5 = np.reshape(data2,(image_rows,image_cols))
i_Read_Image = 0
With the help of the comments I was able to get a working answer. The original image is in a single array RGB, this needs to be reshaped and placed into a 'RGB' image, it can be done in one line:
img = Image.fromarray(data2.reshape(image_rows,image_cols,3), 'RGB')
and when reading an opencv data array from a socket: this works:
data = conn.recv(567667)
if(i_Read_Image == 2) & (image_cols != 0) & (image_rows != 0):
data2 = np.fromstring(data, dtype='uint8')
img = Image.fromarray(data2.reshape(image_rows,image_cols,3), 'RGB')
img.show()

Convert PyQt5 QPixmap to numpy ndarray

I have pixmap:
pixmap = self._screen.grabWindow(0,
self._x, self._y,
self._width, self._height)
I want to convert it to OpenCV format.
I tried to convert it to numpy.ndarray as described here but I got error sip.voidptr object has an unknown size
Is there any way to get numpy array (same format as cv2.VideoCapture read method returns)?
I got numpy array using this code:
channels_count = 4
pixmap = self._screen.grabWindow(0, self._x, self._y, self._width, self._height)
image = pixmap.toImage()
s = image.bits().asstring(self._width * self._height * channels_count)
arr = np.fromstring(s, dtype=np.uint8).reshape((self._height, self._width, channels_count))
The copy can be avoided by doing:
channels_count = 4
pixmap = self._screen.grabWindow(0, self._x, self._y, self._width, self._height)
image = pixmap.toImage()
b = image.bits()
# sip.voidptr must know size to support python buffer interface
b.setsize(self._height * self._width * channels_count)
arr = np.frombuffer(b, np.uint8).reshape((self._height, self._width, channels_count))
Heres a function:
def QPixmapToArray(pixmap):
## Get the size of the current pixmap
size = pixmap.size()
h = size.width()
w = size.height()
## Get the QImage Item and convert it to a byte string
qimg = pixmap.toImage()
byte_str = qimg.bits().tobytes()
## Using the np.frombuffer function to convert the byte string into an np array
img = np.frombuffer(byte_str, dtype=np.uint8).reshape((w,h,4))
return img

data type errors for input images of cv2.calcOpticalFlowPyrLK

I'm running opencv 2.4.1 using python bindings and am having difficulty calculating the optical flow.
Specifically this section of code:
#calculate the opticalflow
if prev_saturation_thresh_img==None:
prev_saturation_thresh_img=saturation_img
if i >=0:
prev_img=prev_saturation_thresh_img
next_img=saturation_thresh_img
p1, st, err = cv2.calcOpticalFlowPyrLK(prev_img,next_img,tracks_np,**lk_params)
Returns the error:
<unknown> is not a numpy array
So then I try to convert the images to numpy arrays:
prev_img=prev_saturation_thresh_img
next_img=saturation_thresh_img
Now I have a new error:
<unknown> data type = 17 is not supported
In a last-ditch effort I convert the images to cvmat (from iplimage) before converting it to a numpy array, just to see what happens
error: ..\..\..\OpenCV-2.4.1\modules\video\src\lkpyramid.cpp:607: error: (-215) nextPtsMat.checkVector(2, CV_32F, true) == npoints
So now I'm stuck. Below is the code in it's entirety for reference
import cv
import cv2
import numpy as np
class Target:
def __init__(self):
self.capture = cv.CaptureFromFile("raw_gait_cropped.avi")
def run(self):
#initiate font
font = cv.InitFont(cv.CV_FONT_HERSHEY_SIMPLEX, 1, 1, 0, 3, 8)
#instantiate images
img_size=cv.GetSize(cv.QueryFrame(self.capture))
hsv_img=cv.CreateImage(img_size,8,3)
saturation_img=cv.CreateImage(img_size,8,1)
saturation_thresh_img=cv.CreateImage(img_size,8,1)
prev_saturation_thresh_img=None
#create params for GoodFeaturesToTrack and calcOpticalFlowPyrLK
gftt_params = dict( cornerCount=11,
qualityLevel=0.2,
minDistance=5,
mask=None,
useHarris=True
)
lk_params = dict( winSize = (15, 15),
maxLevel = 2,
criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03),
flags = cv2.OPTFLOW_USE_INITIAL_FLOW,
minEigThreshold=1
)
tracks=[]
writer=cv.CreateVideoWriter("angle_tracking.avi",cv.CV_FOURCC('M','J','P','G'),30,cv.GetSize(hsv_img),1)
i=0
while True:
#grab a frame from the video capture
img=cv.QueryFrame(self.capture)
#break the loop when the video is over
if img == None:
break
#convert the image to HSV
cv.CvtColor(img,hsv_img,cv.CV_BGR2HSV)
#Get Saturation channel
cv.MixChannels([hsv_img],[saturation_img],[(1,0)])
#Apply threshold to saturation channel
cv.InRangeS(saturation_img,145,255,saturation_thresh_img)
#locate initial features to track
if i==0:
eig_image=temp_image = cv.CreateMat(img.height, img.width, cv.CV_32FC1)
for (x,y) in cv.GoodFeaturesToTrack(saturation_thresh_img, eig_image, temp_image, **gftt_params):
tracks.append([(x,y)])
cv.Circle(saturation_thresh_img,(int(x),int(y)),5,(255,255,255),-1,cv.CV_AA,0)
tracks_np=np.float32(tracks).reshape(-1,2)
print tracks
#calculate the opticalflow
if prev_saturation_thresh_img==None:
prev_saturation_thresh_img=saturation_img
if i >=0:
prev_img=prev_saturation_thresh_img
next_img=saturation_thresh_img
p1, st, err = cv2.calcOpticalFlowPyrLK(prev_img,next_img,tracks_np,**lk_params)
prev_saturation_thresh_img=saturation_img
i=i+1
print i
#display frames to users
cv.ShowImage("Raw Video",img)
cv.ShowImage("Saturation Channel",saturation_img)
cv.ShowImage("Saturation Thresholded",saturation_thresh_img)
# Listen for ESC or ENTER key
c = cv.WaitKey(7) % 0x100
if c == 27 or c == 10:
break
#close all windows once video is done
cv.DestroyAllWindows()
if __name__=="__main__":
t = Target()
t.run()
OpenCV can be very picky about the data formats it accepts. The following code extract works for me:
prev = cv.LoadImage('images/'+file_list[0])
prev = np.asarray(prev[:,:])
prev_gs = cv2.cvtColor(prev, cv2.COLOR_BGR2GRAY)
current = cv.LoadImage('images/'+file)
current = np.asarray(current[:,:])
current_gs = cv2.cvtColor(current, cv2.COLOR_BGR2GRAY)
features, status, track_error = cv2.calcOpticalFlowPyrLK(prev_gs, current_gs, good_features, None,
**lk_params)
Note the [:,:] when converting from images to numpy arrays, I have found that they are required.
I hope that this may solve your problem.

Categories