Memory leak issue with using device contexts - python

I'm using a combination of python and c++ to create a snapshot of a screen area and use that screenshot as part of a video feed (pixmap in a label PyQt5) as well as save the screenshot as a .bmp.. currently done at 30fps for creating a video file later. The code works to this degree so far just fine, aside from the memory leak I'm getting.
contextcreator.cpp, put into libccreator.so
#include "contextcreator.h"
#include <cstring>
#include <stdio.h>
#include <shlwapi.h>
#include <typeinfo>
BYTE* createContext(int x, int y, int width, int height){
HDC hdesktop = GetDC(NULL);
HDC memDC = CreateCompatibleDC(hdesktop);
HBITMAP hbitmap = CreateCompatibleBitmap(hdesktop, width, height);
HGDIOBJ hbitmapOld = (HBITMAP)SelectObject(memDC, hbitmap);
BitBlt(memDC, 0, 0, width, height, hdesktop, x, y, SRCCOPY|CAPTUREBLT);
SelectObject(memDC, hbitmapOld);
BITMAPINFO bmi = {0};
bmi.bmiHeader.biSize = sizeof(bmi.bmiHeader);
GetDIBits(hdesktop, hbitmap, 0, 0, NULL, &bmi, DIB_RGB_COLORS);
BYTE* stream = new BYTE[bmi.bmiHeader.biSizeImage];
bmi.bmiHeader.biCompression = BI_RGB;
GetDIBits(hdesktop, hbitmap, 0, bmi.bmiHeader.biHeight, (LPVOID)stream, &bmi, DIB_RGB_COLORS);
BYTE* data = new BYTE[14 + sizeof(bmi) + bmi.bmiHeader.biSizeImage];
memcpy(data + 14, &bmi, sizeof(bmi));
memcpy(&data[0] + sizeof(bmi) + 14, stream, bmi.bmiHeader.biSizeImage);
for(int i = 0; i < 14; i++){
data[i] = 0; }
delete[] stream;
ReleaseDC(NULL, hdesktop);
DeleteDC(memDC);
return data;
}
void releaseData(BYTE* stream){
delete[] stream;
}
The python code that utilizes libccreator.so
from PyQt5.QtCore import *
from PyQt5.QtGui import *
from PyQt5.QtWidgets import *
from ctypes import *
import ctypes.wintypes as wintypes
import time
import os
os.add_dll_directory("C:/msys64/mingw64/bin")
mylib = cdll.LoadLibrary('C:/Users/amish_ac2c1jm/OneDrive/Documents/blahblah/libccreator.so')
create_context = mylib.createContext
create_context.argtypes = [c_int, c_int, c_int, c_int]
create_context.restype = POINTER(wintypes.BYTE)
release_stream = mylib.releaseData
release_stream.argtypes = [POINTER(wintypes.BYTE)]
release_stream.restype = None
class CaptureThread(QObject):
finished = pyqtSignal()
update_image = pyqtSignal([bytearray])
def __init__(self, x, y, w, h, parent=None):
super().__init__(parent)
self.x = x
self.y = y
self.w = w
self.h = h
self.stopthread = False
self.framenumber = 0
def run(self):
test_timer = time.time()
while not self.stopthread:
if time.time() - test_timer >= 1000/30/1000:
test_timer = time.time()
self.capture()
self.finished.emit()
def capture(self):
bmpptr = create_context(self.x, self.y, self.w, self.h)
data = bytearray(string_at(addressof(bmpptr.contents) + 0x22, 0x4))
size = int.from_bytes(data, byteorder='little', signed=False) + 0x36
data = bytearray(string_at(bmpptr, size))
release_stream(bmpptr)
data[0:2] = b'BM'
value = int.from_bytes(data[0x22:0x26], byteorder='little', signed=False)
data[2:6] = (value + 0x36).to_bytes(4, byteorder='little', signed=False)
data[6:10] = b'\x00\x00\x00\x00'
data[10:14] = b'\x36\x00\x00\x00'
with open(f"images/frame{self.framenumber}.bmp", "wb") as f:
f.write(data)
self.framenumber += 1
self.update_image.emit(data)
Originally i had some memory leaks from not deleting the byte arrays created with the new keyword, and such memory leak issue was apparent quite quickly when my monitors started blinking and chrome would crash along with pycharm. I also wasn't originally using ReleaseDC for the [hardware?] DC, but instead using DeleteDC for both that DC and the memory DC. I was able to visually see the memory leak in task manager as my project very quickly overcame chrome and pycharm memory usage (these bmp's aren't compressed after all.. something I'll look into later). Still, a memory leak persists but doesn't show up for my app in task manager, only showing my overall memory usage gradually increase until i run out of memory. Takes about 7min or so (i have 16gb of RAM).
I feel it has something to do with the DC's, but i'm not entirely sure. I have some experience with c++ from a while back, but once i learned python i didn't miss compiler and linking issues to say the least lol.
I use PyCharm for my python IDE and Qt Creator for c++. Thanks in advance for any help :)

As per the comment response from Igor Tandetnik, the leak was from not deleting the object returned from the CreateCompatibleBitmap function.

Related

Get address of buffer for video capture

I am trying to use python to capture an image from a camera.
I have been looking at the capture example code, which is in c, on linuxtv.org, and I've got to a bit of an impasse.
I'm using the USERPTR IO method, because mmap seems too complicated and I can't use the read method.
In the original c there is a struct like so:
struct buffer {
void *start;
size_t length;
};
Which I have pythonised into:
class buffer_location():
def __init__(self):
self.start = v4l2.c_int(0)
self.length = 0
Where v4l2 is a python module that wraps up linux/videodev2.h
In order to initialise the buffers for the video capture I have done this:
buflocs = []
buffers = []
buffer_size = ...#size is taken from another ioctl call
for i in range(4):
bl = buffer_location()
buflocs.append(bl)
bl.length = buffer_size
buff = create_string_buffer(buffer_size)
buffers.append(buff)
bl.start = pointer(buff)
This is an attempt to replicate what happens in init_userp in the original c. Then I tried to replicate start_capturing like so:
for i in range(4):
v4l2buf = v4l2.v4l2_buffer()
v4l2buf.type = v4l2.V4L2_BUF_TYPE_VIDEO_CAPTURE
v4l2buf.memory = v4l2.V4L2_MEMORY_USERPTR
v4l2buf.index = i
v4l2buf.m.usrptr = buflocs[i].start
v4l2buf.length = buffer_size
if -1 == ioctl(cam, v4l2.VIDIOC_QBUF, v4l2buf):
print('cannot qbuf')
However this gives the error
OSError: [Errno 14] Bad address
I assume this is coming from v4l2buf.m.usrptr.
How can I correctly assign the address to the start of the buffer?

How to pass array of ctypes structures to pyOpenCL?

I have been following along with a tutorial online using OpenCL, where I am doing everything using python and pyOpenCL. As a stripped down example of my problem, I need to pass an array of C structs as an argument to an OpenCL kernel.
Here is an artificial example of OpenCL code:
typedef struct Test{
float a;
float3 b;
} Test;
__kernel void render_kernel(__constant Test *tests, const int width, const int height, const int num_structs, __global float3* output)
{
unsigned int work_item_id = get_global_id(0);
unsigned int x_coord = work_item_id % width;
unsigned int y_coord = work_item_id / width;
Test test = tests[0];
output[work_item_id] = test.b;
}
This will do something silly which is just give one of the float3 arrays as the output, but I just need to know I am actually getting the data through to the kernel properly.
I am trying to mimic an array of these structures on the python side with the following code:
class Test(ctypes.Structure):
_fields_ = [
("a", ctypes.c_float),
("b", (ctypes.c_float * 4))
]
class Test_Array(ctypes.Structure):
_fields_ = [("TEST_ARRAY", ctypes.POINTER(Test))]
def __init__(self, num_structs):
elems = (Test * num_structs)()
self.TEST_ARRAY = ctypes.cast(elems, ctypes.POINTER(Test))
self.elements = num_structs
for num in range(0, num_structs):
self.TEST_ARRAY[num].a = 1.0
self.TEST_ARRAY[num].b = (1.0, 0.0, 0.0, 1.0)
num_structs = 2
test_arr = Test_Array(num_structs)
#host buffer
color_out = np.empty((win.width * win.height, 4), dtype=np.float32)
cl_prog = CL()
cl_prog.load_program("shaders/struct.cl")
#device buffers
cl_structs = cl_prog.create_input_buffer(num_structs * ctypes.sizeof(Test))
cl_output = cl_prog.create_output_buffer(color_out.nbytes)
cl.enqueue_fill_buffer(cl_prog.queue, cl_structs, test_arr.TEST_ARRAY,
0, num_structs * ctypes.sizeof(Test))
global_work_size = (win.width * win.height,)
cl_prog.program.render_kernel(cl_prog.queue, global_work_size, None,
cl_structs, np.int32(win.width), np.int32(win.height),
np.int32(num_structs), cl_output)
cl_prog.retrieve_data(color_out, cl_output)
print(color_out)
This isn't really relevant as the functions in this class are just wrappers around pyOpenCL functions, but here is the CL class which is instantiated.
class CL:
def __init__(self):
self.platform = cl.get_platforms()[0]
self.device = self.platform.get_devices()[0]
self.ctx = cl.Context([self.device])
self.queue = cl.CommandQueue(self.ctx)
def load_program(self, file_path):
with open(file_path) as f:
src = f.read()
self.program = cl.Program(self.ctx, src).build()
def create_output_buffer(self, size):
"""
creates and returns a write only cl.Buffer of size bytes.
"""
mf = cl.mem_flags
return cl.Buffer(self.ctx, mf.WRITE_ONLY, size)
def create_input_buffer(self, size):
"""
returns a read only cl.Buffer of size bytes.
"""
mf = cl.mem_flags
return cl.Buffer(self.ctx, mf.READ_ONLY, size)
def retrieve_data(self, host_buffer, device_buffer):
"""
retrieves data from a buffer on the device, device_buffer, and copies it
over to host_buffer
"""
cl.enqueue_copy(self.queue, host_buffer, device_buffer)
def fill_buffer(self, memory, pattern, offset, size, wait_for=None):
"""
A wrapper around cl.enqueue_fill_buffer which uses self.queue
"""
cl.enqueue_fill_buffer(self.queue, memory, pattern, offset, size, wait_for)
def enqueue_copy(self, device_buffer, host_buffer):
cl.enqueue_copy(self.queue, device_buffer, host_buffer)
When I run the above code, it compiles and runs fine, but the information I get back from the buffer is just garbage that was already in memory. I can't tell if my problem is with alignment of the data, the way I am creating the array of ctypes structs, or what?
I am not attached to using a C array of C structs. I suspect there is a way to do this with numpy arrays, but I can't figure it out. Any way to properly get the data from the host to the device would be greatly appreciated.
Some suggest back in 2014, this could perhaps be done like so:
__kernel void render_kernel(struct Params Test, ...){
}
You can see this post.
Otherwise, something called Attributes of Variables may be an option?
Hope you got this sorted out and share the experience. I would love to see how this is done as I may want to try pass SQL query over to kernel to process.

Python ctypes: how to allocate output buffer for C function in callback

I have next callback as one of arguments in function in c-code:
typedef unsigned char* (*my_callback)(int size);
//for example:
unsigned char * tmp_buff = nullptr;
tmp_buff = i_alloc_fn(10);
printf("Tmp buff addr = %d.\n", tmp_buff);
*tmp_buff = 111;
printf("I am still alive");
I am trying to provide this callback from python (C-code is loaded as .so lib). I tried 2 ways.
ALLOC_CALLBACK_FUNC = ctypes.CFUNCTYPE(ctypes.c_char_p, ctypes.c_int)
#...
def py_alloc_callback(size):
libc = ctypes.CDLL("libc.so.6")
mem_ptr = libc.malloc(ctypes.c_uint(size))
return mem_ptr
And
ALLOC_CALLBACK_FUNC = ctypes.CFUNCTYPE(ctypes.c_char_p, ctypes.c_int)
stringbuffer = ''
#...
def py_alloc_callback(size):
global stringbuffer
stringbuffer=ctypes.create_string_buffer(size)
return ctypes.POINTER(ctypes.c_ubyte)(stringbuffer)
But both variants led to segmentation fault in C-code when it tried to write to allocated memory.
Please, help me fix it
mem_ptr = libc.malloc(ctypes.c_uint(size))
is clearly wrong. The parameter to malloc is of type size_t.
Now it works:
def py_alloc_callback(size):
libc = ctypes.CDLL("libc.so.6")
alloc_f = libc.malloc
alloc_f.restype = ctypes.c_void_p
alloc_f.argtypes = [ ctypes.c_uint ]
return alloc_f(ctypes.c_uint(size))

camera image incorrectly formatted in ctypes pointer (python)

I am using a DLL library to call functions to operate a camera in python, and i'm able to retrieve the image using ctypes but it's formatted incorrectly. The returned image is duplicated and half of it is blank. what do i need to do to fix this?
I have a labview program that correctly takes images from the camera, so that is how they are supposed to look like.
Correct image retrieved using Labview
Image retrieved using Python:
the image is duplicated and also sideways in python.
python code:
from ctypes import *
import numpy as np
import matplotlib.pyplot as plt
mydll = windll.LoadLibrary('StTrgApi.dll')
hCamera = mydll.StTrg_Open()
print(hCamera)
im_height = 1200
im_width = 1600
dwBufferSize = im_height * im_width
pbyteraw = np.zeros((im_height, im_width), dtype=np.uint16)
dwNumberOfByteTrans = 0
dwNumberOfByteTrans = (c_ubyte * dwNumberOfByteTrans)()
dwFrameNo = 0
dwFrameNo = (c_ubyte * dwFrameNo)()
dwMilliseconds = 3000
mydll.StTrg_TakeRawSnapShot(hCamera,
pbyteraw.ctypes.data_as(POINTER(c_int16)), dwBufferSize*2,
dwNumberOfByteTrans, dwFrameNo, dwMilliseconds)
print(pbyteraw)
plt.matshow(pbyteraw)
plt.show()
C++ code for taking the image:
DWORD dwBufferSize = 0;
if(!StTrg_GetRawDataSize(hCamera, &dwBufferSize))
{
_tprintf(TEXT("Get Raw Data Size Failed.\n"));
return(-1);
}
PBYTE pbyteRaw = new BYTE[dwBufferSize];
if(NULL != pbyteRaw)
{
DWORD dwNumberOfByteTrans = 0;
DWORD dwFrameNo = 0;
DWORD dwMilliseconds = 3000;
for(DWORD dwPos = 0; dwPos < 10; dwPos++)
{
if(StTrg_TakeRawSnapShot(hCamera, pbyteRaw, dwBufferSize,
&dwNumberOfByteTrans, &dwFrameNo, dwMilliseconds))
{
TCHAR szFileName[MAX_PATH];
if(is2BytesMode)
{
_stprintf_s(szFileName, _countof(szFileName), TEXT("%s\\%u.tif"), szBitmapFilePath, dwFrameNo);
StTrg_SaveImage(dwWidth, dwHeight, STCAM_PIXEL_FORMAT_16_MONO_OR_RAW, pbyteRaw, szFileName, 0);
}
else
{
_stprintf_s(szFileName, _countof(szFileName), TEXT("%s\\%u.bmp"), szBitmapFilePath, dwFrameNo);
StTrg_SaveImage(dwWidth, dwHeight, STCAM_PIXEL_FORMAT_08_MONO_OR_RAW, pbyteRaw, szFileName, 0);
}
_tprintf(TEXT("Save Image:%s\n"), szFileName);
}
else
{
_tprintf(TEXT("Fail:StTrg_TakeRawSnapShot\n"));
break;
}
}
delete[] pbyteRaw;
}
Based on your C code, something like this should work, but it is untested since I don't have your camera library. If you are using 32-bit Python, make sure the library calls are __stdcall to use WinDLL, else use CDLL. 64-bit Python it doesn't matter. Defining the argument types and return type helps catch errors. For output parameters, create instances of the correct ctype, then pass byref(). The way you were currently doing the output parameters was likely the cause of your crash. Setting argtypes would have detected that the values weren't pointers to DWORDs.
from ctypes import *
from ctypes import wintypes as w
mydll = WinDLL('StTrgApi')
mydll.StTrg_Open.argtypes = None
mydll.StTrg_Open.restype = w.HANDLE
mydll.StTrg_GetRawDataSize.argtypes = w.HANDLE,w.PDWORD
mydll.StTrg_GetRawDataSize.restype = None
mydll.StTrg_TakeRawSnapShot.argtypes = w.HANDLE,w.PBYTE,w.DWORD,w.PDWORD,w.PDWORD,w.DWORD
mydll.StTrg_TakeRawSnapShot.restype = None
hCamera = mydll.StTrg_Open()
print(hCamera)
dwBufferSize = w.DWORD()
mydll.StTrg_GetRawDataSize(hCamera,byref(dwBufferSize))
pbyteraw = (w.BYTE * dwbufferSize)()
dwNumberOfByteTrans = w.DWORD() # output parameters. Pass byref()
dwFrameNo = w.DWORD() # output parameters. Pass byref()
dwMilliseconds = 3000
mydll.StTrg_TakeRawSnapShot(hCamera,
pbyteraw,
dwbufferSize,
byref(dwNumberOfByteTrans),
byref(dwFrameNo),
dwMilliseconds)

CreateCompatibleDC fails after calling it exactly 4,984 times

I've encountered a strange bug in my program. It's a little odd, as it occurs on exactly the 4984th call to the function. I've been tweaking this all day, and without fail, that's the number at which it fails.
The code in question is a small convenience function which creates and returns a DC and Bitmap. The context of this little function is that it's a piece in my stab at a screen recorder, so it's getting called tons and tons of times.
When I first noticed the error, after some sleuthing around, I found this very similar Stackoverflow question, so the code below is modeled after the answer in that thread. However, even after following the suggested deletion and releasing pattern, the problem remains for me right on that 4984th iteration.
This is the specific failure point of the program:
def _createDcAndBitmap(self, size, input_bitmap=None):
hwnd = win32gui.GetDesktopWindow()
zhwndDevice = win32gui.GetWindowDC(hwnd)
zmfcDC = win32ui.CreateDCFromHandle(zhwndDevice)
zsaveDC = zmfcDC.CreateCompatibleDC()
zsaveBitMap = win32ui.CreateBitmap()
zsaveBitMap.CreateCompatibleBitmap(zmfcDC, *size)
hOldBmp = zsaveDC.SelectObject(zsaveBitMap)
return zsaveDC, zsaveBitMap, hOldBmp, hwnd
The error is always throw from the line:
zsaveBitMap.CreateCompatibleBitmap(zmfcDC, *size)
With the error reported by Python as:
error: CreateCompatibleDC failed
Calling FormatMessage from the win32api gives further information:
Invalid device context (DC) handle.
The Full Code:
class Bitmap(object):
_sourceDC, _sourceBitmap, hOldBmp, hwnd = self._bytesToDcAndBitmap(bytestring, sourceSize)
_bytes, _size = self._scaleBitmap(_sourceDC, _sourceBitmap, hOldBmp, hwnd, sourceSize)
def _scaleBitmap(self, sourceDC, sourceBitmap, sourceHOldBmp, sourceHwnd, sourceSize):
'''
Resizes the current bitmap down to a target size
of (X, 540), where the X is varied depending on the
aspect ratio of the input bitmap
'''
target_size = self._getTargetSize(sourceSize)
destDC, destBitmap, hOldBmp, hwnd = self._createDcAndBitmap(target_size)
win32gui.SetStretchBltMode(destDC.GetHandleAttrib(), 4)
win32gui.StretchBlt(pywintypes.HANDLE(destDC.GetHandleAttrib()), 0,0,target_size[0], target_size[1], # #UndefinedVariable HANDLE -- PyDev is dumb
sourceDC.GetHandleAttrib(), 0,0, sourceSize[0], sourceSize[1], win32con.SRCCOPY)
new_bytestring = destBitmap.GetBitmapBits(True)
new_size = self._bitmapSize(destBitmap)
self._deleteDCBitmapOldBmpAndHwmn(sourceDC, sourceBitmap, sourceHOldBmp, sourceHwnd)
self._deleteDCBitmapOldBmpAndHwmn(destDC, destBitmap, hOldBmp, hwnd)
def _bytesToDcAndBitmap(self, bytestring, sourceSize):
a = (ctypes.c_int * (sourceSize[0]*sourceSize[1]))()
ctypes.memmove(a, bytestring, len(bytestring))
hwnd = win32gui.GetDesktopWindow()
zhwndDevice = win32gui.GetWindowDC(hwnd)
zmfcDC = win32ui.CreateDCFromHandle(zhwndDevice)
zsaveDC = zmfcDC.CreateCompatibleDC()
zsaveBitMap = win32ui.CreateBitmap()
zsaveBitMap.CreateCompatibleBitmap(zmfcDC, sourceSize[0], sourceSize[1])
hOldBmp = zsaveDC.SelectObject(zsaveBitMap)
ctypes.windll.gdi32.SetBitmapBits(zsaveBitMap.GetHandle(), len(bytestring), ctypes.byref(a))
return zsaveDC, zsaveBitMap, hOldBmp, hwnd
def _createDcAndBitmap(self, size, input_bitmap=None):
hwnd = win32gui.GetDesktopWindow()
zhwndDevice = win32gui.GetWindowDC(hwnd)
zmfcDC = win32ui.CreateDCFromHandle(zhwndDevice)
zsaveDC = zmfcDC.CreateCompatibleDC()
zsaveBitMap = win32ui.CreateBitmap()
zsaveBitMap.CreateCompatibleBitmap(zmfcDC, *size)
hOldBmp = zsaveDC.SelectObject(zsaveBitMap)
return zsaveDC, zsaveBitMap, hOldBmp, hwnd
def _deleteDCBitmapOldBmpAndHwmn(self, dc, bitmap, old_bitmap, hwnd):
win32gui.SelectObject(dc.GetHandleAttrib(), old_bitmap.GetHandle())
win32gui.DeleteDC(dc.GetHandleAttrib())
win32gui.DeleteObject(bitmap.GetHandle())
win32gui.ReleaseDC(win32gui.GetDesktopWindow(), hwnd)
The code is a little peculiar, as it's running on the 'exit' end of a pipe. So it's job is reconstructing a serialized byte string (gotten from GetBitmapBits()) back into a Bitmap, scaling it, then going back to a byte string. Doing it this way is about a solid order of magnitude faster than using higher level Python libraries :)
So, I'm guessing this is due to a memory leak somewhere, but as far as I can tell, I'm closing everything down correctly. And yet, it still fails right around the 5000th call.
Am I missing a leak somewhere?

Categories