I have pixmap:
pixmap = self._screen.grabWindow(0,
self._x, self._y,
self._width, self._height)
I want to convert it to OpenCV format.
I tried to convert it to numpy.ndarray as described here but I got error sip.voidptr object has an unknown size
Is there any way to get numpy array (same format as cv2.VideoCapture read method returns)?
I got numpy array using this code:
channels_count = 4
pixmap = self._screen.grabWindow(0, self._x, self._y, self._width, self._height)
image = pixmap.toImage()
s = image.bits().asstring(self._width * self._height * channels_count)
arr = np.fromstring(s, dtype=np.uint8).reshape((self._height, self._width, channels_count))
The copy can be avoided by doing:
channels_count = 4
pixmap = self._screen.grabWindow(0, self._x, self._y, self._width, self._height)
image = pixmap.toImage()
b = image.bits()
# sip.voidptr must know size to support python buffer interface
b.setsize(self._height * self._width * channels_count)
arr = np.frombuffer(b, np.uint8).reshape((self._height, self._width, channels_count))
Heres a function:
def QPixmapToArray(pixmap):
## Get the size of the current pixmap
size = pixmap.size()
h = size.width()
w = size.height()
## Get the QImage Item and convert it to a byte string
qimg = pixmap.toImage()
byte_str = qimg.bits().tobytes()
## Using the np.frombuffer function to convert the byte string into an np array
img = np.frombuffer(byte_str, dtype=np.uint8).reshape((w,h,4))
return img
Related
I have a json file which contain polygons of filled circles. How can i implement this json file to the below code to get masks?
def polygons_to_mask_array_labelme(polygons, width : int = 300, height : int = 300) -> np.ndarray:
'''
This function takes a list of lists that contains polygon masks for each building. Example;
[[x11,y11,x12,y12,...],...,[xn1,yn1,xn2,yn2,...]]
The return of this function is an array of size width x height which contains a binary mask
as defined by the list of polygons
Example usage:
import json
with open(json_names[0], encoding = 'utf-8') as f:
data = json.load(f)
plt.imshow(polygons_to_mask_array(data['shapes'], 898, 559))
'''
img = Image.new('L', (width, height), 0)
for polygon in polygons:
nested_lst_of_tuples = [tuple(l) for l in polygon['points']]
try:
ImageDraw.Draw(img).polygon(nested_lst_of_tuples, outline=1, fill=1)
except:
print(nested_lst_of_tuples)
mask = np.array(img)
return mask
and the json file is:
name is f90.json, which shows
{"polygons": ["220.5,43.0 224.3625,42.38749999999999 227.85,40.61250000000001 230.6125,37.849999999999994 232.3875,34.36250000000001 233.0,30.5 232.3875,26.63749999999999 230.6125,23.150000000000006 227.85,20.38749999999999 224.3625,18.61250000000001 220.5,18.0 216.6375,18.61250000000001 213.15,20.38749999999999 210.3875,23.150000000000006 208.6125,26.63749999999999 208.0,30.5 208.6125,34.36250000000001 210.3875,37.849999999999994 213.15,40.61250000000001 216.6375,42.38749999999999", "351.5,116.0 355.3625,115.38749999999999 358.85,113.61250000000001 361.6125,110.85 363.3875,107.36250000000001 364.0,103.5 363.3875,99.63749999999999 361.6125,96.15 358.85,93.38749999999999 355.3625,91.61250000000001 351.5,91.0 347.6375,91.61250000000001 344.15,93.38749999999999 341.3875,96.15 339.6125,99.63749999999999 339.0,103.5 339.6125,107.36250000000001 341.3875,110.85 344.15,113.61250000000001 347.6375,115.38749999999999", "314.5,368.0 318.3625,367.3875 321.85,365.6125 324.6125,362.85 326.3875,359.3625 327.0,355.5 326.3875,351.6375 324.6125,348.15 321.85,345.3875 318.3625,343.6125 314.5,343.0 310.6375,343.6125 307.15,345.3875 304.3875,348.15 302.6125,351.6375 302.0,355.5 302.6125,359.3625 304.3875,362.85 307.15,365.6125 310.6375,367.3875", "475.5,353.0 479.3625,352.3875 482.85,350.6125 485.6125,347.85 487.3875,344.3625 488.0,340.5 487.3875,336.6375 485.6125,333.15 482.85,330.3875 479.3625,328.6125 475.5,328.0 471.6375,328.6125 468.15,330.3875 465.3875,333.15 463.6125,336.6375 463.0,340.5 463.6125,344.3625 465.3875,347.85 468.15,350.6125 471.6375,352.3875"]}
My output doesn't show anything and I honestly can't find out why
This is the full code, but I think the problem is when I'm passing the argument to aRed, aGreen, aBlue, originalImage = openImage(response.content)
When I run that code in collab python notebook, my image isn't showing up for some reason! Maybe it's the way I'm passing the URL as an argument in the line above?
import numpy
from PIL import Image
import requests
from io import BytesIO
# FUNCTION DEFINTIONS:
# open the image and return 3 matrices, each corresponding to one channel (R, G and B channels)
def openImage(imagePath):
imOrig = Image.open(BytesIO(imagePath))
im = numpy.array(imOrig)
aRed = im[:, :, 0]
aGreen = im[:, :, 1]
aBlue = im[:, :, 2]
return [aRed, aGreen, aBlue, imOrig]
# compress the matrix of a single channel
def compressSingleChannel(channelDataMatrix, singularValuesLimit):
uChannel, sChannel, vhChannel = numpy.linalg.svd(channelDataMatrix)
aChannelCompressed = numpy.zeros((channelDataMatrix.shape[0], channelDataMatrix.shape[1]))
k = singularValuesLimit
leftSide = numpy.matmul(uChannel[:, 0:k], numpy.diag(sChannel)[0:k, 0:k])
aChannelCompressedInner = numpy.matmul(leftSide, vhChannel[0:k, :])
aChannelCompressed = aChannelCompressedInner.astype('uint8')
return aChannelCompressed
# MAIN PROGRAM:
response = requests.get('https://i.imgur.com/BIOFZNo.png')
print ('*** Image Compression using SVD - a demo')
aRed, aGreen, aBlue, originalImage = openImage(response.content)
# image width and height:
imageWidth = 1000
imageHeight = 1000
#number of singular values to use for reconstructing the compressed image
singularValuesLimit = 160
aRedCompressed = compressSingleChannel(aRed, singularValuesLimit)
aGreenCompressed = compressSingleChannel(aGreen, singularValuesLimit)
aBlueCompressed = compressSingleChannel(aBlue, singularValuesLimit)
imr=Image.fromarray(aRedCompressed,mode=None)
img=Image.fromarray(aGreenCompressed,mode=None)
imb=Image.fromarray(aBlueCompressed,mode=None)
newImage = Image.merge("RGB", (imr,img,imb))
originalImage.show()
newImage.show()
There are no errors in compiling the program, it just doesn't show up anything.
Thank you all!
Here is the link to my file: https://colab.research.google.com/drive/12K0nWKRdOpZ3gSfTn0wuP8Y0_UUeUxEE
You don't need to specify .show() in interactive modes. Just remove that part, and it will work fine.
import numpy
from PIL import Image
import requests
from io import BytesIO
# FUNCTION DEFINTIONS:
# open the image and return 3 matrices, each corresponding to one channel (R, G and B channels)
def openImage(imagePath):
imOrig = Image.open(BytesIO(imagePath))
im = numpy.array(imOrig)
aRed = im[:, :, 0]
aGreen = im[:, :, 1]
aBlue = im[:, :, 2]
return [aRed, aGreen, aBlue, imOrig]
# compress the matrix of a single channel
def compressSingleChannel(channelDataMatrix, singularValuesLimit):
uChannel, sChannel, vhChannel = numpy.linalg.svd(channelDataMatrix)
aChannelCompressed = numpy.zeros((channelDataMatrix.shape[0], channelDataMatrix.shape[1]))
k = singularValuesLimit
leftSide = numpy.matmul(uChannel[:, 0:k], numpy.diag(sChannel)[0:k, 0:k])
aChannelCompressedInner = numpy.matmul(leftSide, vhChannel[0:k, :])
aChannelCompressed = aChannelCompressedInner.astype('uint8')
return aChannelCompressed
# MAIN PROGRAM:
response = requests.get('https://i.imgur.com/BIOFZNo.png')
print ('*** Image Compression using SVD - a demo')
aRed, aGreen, aBlue, originalImage = openImage(response.content)
# image width and height:
imageWidth = 1000
imageHeight = 1000
#number of singular values to use for reconstructing the compressed image
singularValuesLimit = 160
aRedCompressed = compressSingleChannel(aRed, singularValuesLimit)
aGreenCompressed = compressSingleChannel(aGreen, singularValuesLimit)
aBlueCompressed = compressSingleChannel(aBlue, singularValuesLimit)
imr=Image.fromarray(aRedCompressed,mode=None)
img=Image.fromarray(aGreenCompressed,mode=None)
imb=Image.fromarray(aBlueCompressed,mode=None)
newImage = Image.merge("RGB", (imr,img,imb))
originalImage
OriginalImage will be displayed. For new image, in next code cell:
newImage
How to write the code? I could only come up with this:
def rotateImage90CW(image):
pic = FileImage(image)
oldw = pic.getWidth()
oldh = pic.getHeight()
newIm = EmptyImage(oldw,oldh)
for row in range (oldh):
for col in range(oldw):
oldPixel = pic.getPixel(col,row)
newIm.setPixel(oldw-row,col,oldPixel)
newIm.draw(myWin)
If you use PIL/pillow:
from PIL import Image
im = Image.open(image)
im.rotate(90).show()
In your example, oldw-row should be „row”
in my case, there are 2 ways of getting image to resize/crop.
upload normal image file
giving base64 string data of image
in 1. case, resize and crop is working well:
f = Image.open(uploaded_image)
new_width, new_height = 1200, 630
wpercent = (new_width / float(f.size[0]))
hsize = int((float(f.size[1]) * float(wpercent)))
if f.mode != "RGB":
f = f.convert('RGB')
og_img = None
if f.size[0] < new_width:
#upscale
og_img = f.resize((new_width, hsize), Image.BICUBIC)
elif f.size[0] >= new_width:
#downscale
og_img = f.resize((new_width, hsize), Image.ANTIALIAS)
og_img = og_img.crop((0, 0, 1200, 630))
resized/cropped image:
in 2. case, the code is the same as above with slight change in:
base64_image = str(request.POST.get('base64_image')).split(',')[1]
imgfile = open('/'.join([settings.MEDIA_ROOT, 'test.png' ]), 'w+b')
imgfile.write(decodestring(base64_image))
imgfile.seek(0)
f = Image.open(imgfile)
#.. as above
but the resized/cropped image:
why is it in 2.case bad in quality and size? (black bottom part..) what am I doing wrong? am I reading the base64 string in wrong way?
I found a website which has many interesting things in it.It has 2(there are many) tools which maybe can help you.The 1th tool converts image to base64 and the 2th tool minifies the size of image (up to 70% save).
http://www.w3docs.com/tools/minimage/
http://www.w3docs.com/tools/image-base64
I'm building a python wrapper of a c++ dll by means of ctypes. The afore mentioned library makes an extensive use of OpenCV2.2 (using the old C Api).
I want to convert the char* imageData field of the OpenCV's IplImage structure to a numpy array. I've search SO and the web for a few days but no solution seems to solve my problem.
Here's the problem. I've tested my implementation with 2 images: one of size 600x599 (and it's all good) and the other one of 602x600 (and here's the problem). Both are color images (3-channel images). I've tested the implementation with several images of size 602x600 and always get the image distorted.
I'm guessing there might be something weird going on with the padding added to the image by OpenCV (although i think took care of it in my implementation), but i can't quite put my finger on it.
The thing is that the second image shows all distorted after the "processing" performed in the c++ dll (none for the moment) and i can only think i'm doing something wrong converting back the IplImage data (imageData) to the numpy array.
Here goes the C++ source code:
char* do_something(IplImage *img, int* image_size)
{
// returning the image data
return get_data_from_iplimage
(
img, // doing zero processing for now
image_size
);
}
char* get_data_from_iplimage(IplImage* img, int* image_size)
{
// getting the image total size
*image_size = img->imageSize;
// copying data
char* image_data = new char[img->imageSize];
memcpy(image_data, img->imageData, img->imageSize);
// releasing the Iplimage*
cvReleaseImage(&img);
// returning the image data
return image_data;
}
Here goes the Python source code:
# Image type (IplImage)
IPL_DEPTH_SIGN = 0x80000000
IPL_DEPTH_1U = 1
IPL_DEPTH_8U = 8
IPL_DEPTH_16U = 16
IPL_DEPTH_32F = 32
IPL_DEPTH_64F = 64
IPL_DEPTH_8S = IPL_DEPTH_SIGN + IPL_DEPTH_8U
IPL_DEPTH_16S = IPL_DEPTH_SIGN + IPL_DEPTH_16U
IPL_DEPTH_32S = IPL_DEPTH_SIGN + 32
def depth2dtype(depth):
if depth is IPL_DEPTH_8U:
return np.dtype('uint8')
elif depth is IPL_DEPTH_8S:
return np.dtype('int8')
elif depth is IPL_DEPTH_16U:
return np.dtype('uint16')
elif depth is IPL_DEPTH_16S:
return np.dtype('int16')
elif depth is IPL_DEPTH_32S:
return np.dtype('int32')
elif depth is IPL_DEPTH_32F:
return np.dtype('float32')
elif depth is IPL_DEPTH_64F:
return np.dtype('float64')
else:
# This is probably a good default
return np.dtype('uint8')
def get_iplimage_ptr(img):
# None is considered as the NULL pointer
if img is None:
return None # the same thing as 'return img'
# getting image dimensions and data
height, width, n_channels = get_ndarray_dimensions(img)
img_data = img.tostring()
# creating the image header
cv_img = cv2.cv.CreateImageHeader((width, height), cv2.cv.IPL_DEPTH_8U, n_channels)
width_step = img.dtype.itemsize * n_channels * width # creating the famous 'width_step' parameter
cv2.cv.SetData(cv_img, None, width_step)
# setting the data (img is a numpy array)
ipl = iplimage_t.from_address(id(cv_img))
ipl_img_ptr = ipl.ipl_ptr.contents
ipl_img_ptr.imageData = img_data
# returning the OpenCV2.2 compatible image (IplImage*)
return ipl_img_ptr
def get_ndarray_dimensions(img):
# getting image shape information
img_shape = img.shape
img_shape_length = len(img_shape)
# validating parameters
if img_shape_length <= 1 or img_shape_length > 3:
raise ArgumentError('Invalid image information. We support images with 1, 2 or 3 channels only.')
# getting the amount of channels
nc = 1 if img_shape_length == 2 else img_shape[2]
# building the processed image
h, w = img_shape[0], img_shape[1]
# returning the height, width and nChannels
return h, w, nc
def build_ndarray_from_data(str_data, img_shape):
# getting image dimensions
height, width, n_channels = img_shape
# getting the ndarray datatype
dtype = depth2dtype(IPL_DEPTH_8U)
# building a numpy ndarray from the string data
ndarray = np.fromstring(str_data, dtype)
# reshaping to 'height' rows
# width_step = len(str_data) / height
ndarray = ndarray.reshape(height, -1)
# removing the padding added by opencv to each row
cols = dtype.itemsize * width * n_channels
ndarray = ndarray[:, :cols]
# reshaping to the final ndarray dimensions
ndarray = ndarray.reshape((height, width, n_channels))
# returning the numpy array that represents the image
return ndarray
# python wrapper function to the c++ function
def do_something(img):
# getting the IplImage*
iplimage_ptr = get_iplimage_ptr(img)
# calling the c++ function
image_size = c_int(0)
byte_data = __c_do_something(iplimage_ptr, byref(image_size))
str_data = string_at(byte_data, image_size.value)
# getting the image dimensions
img_shape = get_ndarray_dimensions(img)
# building the processed image
proc_img = build_ndarray_from_data(str_data, img_shape)
# returning the processed image
return proc_img
# does something ('pointer' to the c function)
__c_do_something = c_func(
'do_something', _cdll, c_byte_p,
('img', POINTER(IplImage), 1), # IplImage *img
('image_size', c_int_p, 1), # int* image_size
)
I apologize for the length of source code (although there are a few definitions missing), but i guess that "explicit is better than implicit", jeje.
Any help would be appreciated.
PD: If it helps i'm using Python 2.7, Numpy 1.7.1, OpenCV2.2 (precompiled), Visual Studio 2013 (Visual C++) and Windows 8.1.
I might be wrong, but... for me you can just convert IPlImage to Mat and than convert it to python numpy array. Of course you can do this the other way too - numpy array to Mat and Mat to IPlImage. Here there is code which works great for OpenCV 2.x (tested on Opencv 2.4.10, but should work for 2.2 as well). If it won't work for you version, it should at least be a good hint and help you write boost python converters for your version.
If - for some reason - you can't convert IplImage to Mat, let us know why so we can try to make other solution.
After a few days stucked with this problem i think i finally reached a solution. Instead of passing the imageData (char*) i decided to replicate the OpenCV IplImage structure in Python with ctypes. Then, built the numpy array from the received IplImage pointer.
By the way i still don't know what was happening before, but i guess that there was something crazy going on with the conversion of the char* imageData to a string in Python (0 values-translated as null characters-, etc., etc.).
The C++ snippet is a bit simpler now as i don't need to "extract" the imageData from the image. Here it goes:
IplImage* do_something(IplImage *img)
{
// doing nothing
return img;
}
In the Python side, the code is somewhat similar to the old one. There are, however some key aspects:
First, a 'cv.iplimage' is built.
The 'cv.iplimage' is then converted to a 'cv.cvmat'.
Finally, the 'cv.cvmat' is converted to a numpy array.
Here goes the code:
# Image type (IplImage)
IPL_DEPTH_SIGN = 0x80000000
IPL_DEPTH_1U = 1
IPL_DEPTH_8U = 8
IPL_DEPTH_16U = 16
IPL_DEPTH_32F = 32
IPL_DEPTH_64F = 64
IPL_DEPTH_8S = IPL_DEPTH_SIGN + IPL_DEPTH_8U
IPL_DEPTH_16S = IPL_DEPTH_SIGN + IPL_DEPTH_16U
IPL_DEPTH_32S = IPL_DEPTH_SIGN + 32
# subclassing the ctypes.Structure class to add new features
class _Structure(Structure):
def __repr__(self):
"""
Print the fields
"""
res = []
for field in self._fields_:
res.append('%s=%s' % (field[0], repr(getattr(self, field[0]))))
return self.__class__.__name__ + '(' + ','.join(res) + ')'
class IplTileInfo(_Structure):
_fields_ = []
class IplROI(_Structure):
_fields_ = \
[
# 0 - no COI (all channels are selected)
# 1 - 0th channel is selected ...
('coi', c_int),
('xOffset', c_int),
('yOffset', c_int),
('width', c_int),
('height', c_int),
]
# ipl image header
class IplImage(_Structure):
def __repr__(self):
"""
Print the fields
"""
res = []
for field in self._fields_:
if field[0] in ['imageData', 'imageDataOrigin']:
continue
res.append('%s=%s' % (field[0], repr(getattr(self, field[0]))))
return self.__class__.__name__ + '(' + ','.join(res) + ')'
IplImage._fields_ = [
("nSize", c_int),
("ID", c_int),
("nChannels", c_int),
("alphaChannel", c_int),
("depth", c_int),
("colorModel", c_char * 4),
("channelSeq", c_char * 4),
("dataOrder", c_int),
("origin", c_int),
("align", c_int),
("width", c_int),
("height", c_int),
("roi", POINTER(IplROI)),
("maskROI", POINTER(IplImage)),
("imageID", c_void_p),
("tileInfo", POINTER(IplTileInfo)),
("imageSize", c_int),
("imageData", c_byte_p),
("widthStep", c_int),
("BorderMode", c_int * 4),
("BorderConst", c_int * 4),
("imageDataOrigin", c_char_p)]
class iplimage_t(_Structure):
_fields_ = \
[
('ob_refcnt', c_ssize_t),
('ob_type', py_object),
('ipl_ptr', POINTER(IplImage)),
('data', py_object),
('offset', c_size_t)
]
# gets the dimensions of a numpy ndarray
def get_ndarray_dimensions(img):
# getting image shape information
img_shape = img.shape
img_shape_length = len(img_shape)
# validating parameters
if img_shape_length <= 1 or img_shape_length > 3:
raise ArgumentError('Invalid image information. We support images with 1, 2 or 3 channels only.')
# getting the amount of channels
nc = 1 if img_shape_length == 2 else img_shape[2]
# building the processed image
h, w = img_shape[0], img_shape[1]
# returning the height, width and nChannels
return h, w, nc
def build_ndarray_from_data(iplimage_ptr, img_shape):
# getting image dimensions
height, width, n_channels = img_shape
# getting the IplImage*
iplimage = iplimage_ptr.contents
# creating the image header
cv_img = cv2.cv.CreateImageHeader((width, height), IPL_DEPTH_8U, n_channels)
# getting the char* from byte data
str_data = string_at(iplimage.imageData, iplimage.imageSize)
# setting the image data
cv2.cv.SetData(cv_img, str_data, iplimage.widthStep)
# building a CvMat image
cv_mat = cv_img[:, :]
# building the ndarray from the CvMat image
ndarray = np.asarray(cv_mat)
# returing the built ndarray
return ndarray
# python wrapper function to the c++ function
def do_something(img):
# getting the IplImage*
iplimage_ptr = get_iplimage_ptr(img)
# calling the c++ function
ipl_ptr = __c_do_something(iplimage_ptr)
# getting the image dimensions
img_shape = get_ndarray_dimensions(img)
# building the processed image
proc_img = build_ndarray_from_data(ipl_ptr, img_shape)
# returning the processed image
return proc_img
# does something ('pointer' to the c function)
__c_do_something = c_func(
'do_something', _cdll, POINTER(IplImage),
('img', POINTER(IplImage), 1), # IplImage *img
)
Hope it helps ;).
PS: I apologize for the length of the code, but i tried to provide the closest to a working example. Loading the compiled C++ .dll with ctypes is up to you (:.
you don't need Python IplImage ,just do this
C file:
void *return_frame;
extern "C" void* get_rgb_frame(){
return return_frame;
}
#define FRAME_BUFSIZE (1920 * 1080 * 3 + 1)
return_frame = malloc(FRAME_BUFSIZE);
memset(return_frame, 0x00, FRAME_BUFSIZE + 1);
IplImage* pImg = cvLoadImage("test.jpg",-1);
memcpy(return_frame, 1920 * 1080 * 3);
cvReleaseImage(&pImg);
Python file:
dll.get_rgb_frame.restype = c_void_p
yv12_img = dll.get_rgb_frame()
imagedata = string_at(yv12_img, 1920 * 1080 * 3)
cv_img = cv2.cv.CreateImageHeader((1920, 1080), cv2.cv.IPL_DEPTH_8U, 3)
cv2.cv.SetData(cv_img, imagedata, 3 * 1920)
cv_mat = cv_img[:]
array = np.asarray(cv_mat)
cv2.imshow('jinfeng', array)
and you can show image in Python