Converting a WinAPI screenshot to a OpenCV compatible form - python

So I originally asked a question here about taking faster screen captures using win api as compared to PIL. I was able to succesfully capture the screen via BitBlt.
Now I am unsure how to convert the bitmap into a form that can be used with OpenCV. OpenCV doesn't have any support for bitmaps, and when I print(im) it is a ~14k long 1D array. OpenCV can't do anything with 1D, and I have tried to reshape it with NUMPY with no success.
def take_screenshot1(hwnd):
wDC = win32gui.GetWindowDC(hwnd)
dcObj=win32ui.CreateDCFromHandle(wDC)
cDC=dcObj.CreateCompatibleDC()
dataBitMap = win32ui.CreateBitmap()
dataBitMap.CreateCompatibleBitmap(dcObj, 765, 503)
cDC.SelectObject(dataBitMap)
cDC.BitBlt((0, 0), (765, 503), dcObj, (0, 0), win32con.SRCCOPY)
im = dataBitMap.GetBitmapBits(False)
#img = np.array(im)
#cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
dcObj.DeleteDC()
cDC.DeleteDC()
win32gui.ReleaseDC(hwnd, wDC)
win32gui.DeleteObject(dataBitMap.GetHandle())
edit:
Here is the working code:
def take_screenshot1(hwnd):
wDC = win32gui.GetWindowDC(hwnd)
dcObj=win32ui.CreateDCFromHandle(wDC)
cDC=dcObj.CreateCompatibleDC()
dataBitMap = win32ui.CreateBitmap()
dataBitMap.CreateCompatibleBitmap(dcObj, 765, 503)
cDC.SelectObject(dataBitMap)
cDC.BitBlt((0, 0), (765, 503), dcObj, (0, 0), win32con.SRCCOPY)
im = dataBitMap.GetBitmapBits(False)
img = np.array(im).astype(dtype="uint8")
img.shape = (503,765,4)
cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
dcObj.DeleteDC()
cDC.DeleteDC()
win32gui.ReleaseDC(hwnd, wDC)
win32gui.DeleteObject(dataBitMap.GetHandle())

GetBitmapBits() in its Python incarnation returns an array of signed ints instead of unsigned bytes. You should first convert it to unsigned bytes and then do as #DanMaĆĄek said.

Related

python screengrab problem (on yolov5, pytorch)

im trying to get a screenshot of a window as fast and then inference on yolov5
it works but sometimes it doesnt detect very well compared to using detect.py on the same image. i think its probably because of img shape or array but i dont know where or how to edit those to make it work. can anyone help me with this please?
import torch
import numpy as np
import win32gui
import win32ui
import win32con
w = 800 # set this
h = 600 # set this
bmpfilenamename = "color.bmp" #set this
windowname = 'put windowname'
def screenshot():
hwnd = win32gui.FindWindow(None, windowname)
wDC = win32gui.GetWindowDC(hwnd)
dcObj=win32ui.CreateDCFromHandle(wDC)
cDC=dcObj.CreateCompatibleDC()
dataBitMap = win32ui.CreateBitmap()
dataBitMap.CreateCompatibleBitmap(dcObj, w, h)
cDC.SelectObject(dataBitMap)
cDC.BitBlt((0,0),(w, h) , dcObj, (0,0), win32con.SRCCOPY)
#save the screenshot
#dataBitMap.SaveBitmapFile(cDC, bmpfilenamename)
signedIntsArray = dataBitMap.GetBitmapBits(True)
img = np.frombuffer(signedIntsArray, dtype='uint8')
img.shape = (h,w,4)
# Free Resources
dcObj.DeleteDC()
cDC.DeleteDC()
win32gui.ReleaseDC(hwnd, wDC)
win32gui.DeleteObject(dataBitMap.GetHandle())
#img = img[..., ::-1]
#img = np.ascontiguousarray(img)
return img
#load
model = torch.hub.load('./', 'custom', path='yolov5s.pt', source='local')
#inference
test = screenshot()
results = model(test)
boxes = results.pandas().xyxy[0]
print (boxes)
edit : i figured you can do it by changing the code from this
results = model(test)
to this
results = model(cv.cvtColor(test, cv.COLOR_BGR2RGB))
but isnt this code supposed to do the same? for some reason this one wont work
img[: ,: ,::-1]

use win32gui to capture grayscale screen

I use the following code to capture a window screen in python:
def get_screenshot(self):
# get the window image data
wDC = win32gui.GetWindowDC(self.hwnd)
dcObj = win32ui.CreateDCFromHandle(wDC)
cDC = dcObj.CreateCompatibleDC()
dataBitMap = win32ui.CreateBitmap()
dataBitMap.CreateCompatibleBitmap(dcObj, self.w, self.h)
cDC.SelectObject(dataBitMap)
cDC.BitBlt((0, 0), (self.w, self.h), dcObj, (self.cropped_x, self.cropped_y), win32con.SRCCOPY)
# convert the raw data into a format opencv can read
#dataBitMap.SaveBitmapFile(cDC, 'debug.bmp')
signedIntsArray = dataBitMap.GetBitmapBits(True)
img = np.fromstring(signedIntsArray, dtype='uint8')
img.shape = (self.h, self.w, 4)
# free resources
dcObj.DeleteDC()
cDC.DeleteDC()
win32gui.ReleaseDC(self.hwnd, wDC)
win32gui.DeleteObject(dataBitMap.GetHandle())
# drop the alpha channel, or cv.matchTemplate() will throw an error like:
# error: (-215:Assertion failed) (depth == CV_8U || depth == CV_32F) && type == _templ.type()
# && _img.dims() <= 2 in function 'cv::matchTemplate'
img = img[...,:3]
# make image C_CONTIGUOUS to avoid errors that look like:
# File ... in draw_rectangles
# TypeError: an integer is required (got type tuple)
# see the discussion here:
# https://github.com/opencv/opencv/issues/14866#issuecomment-580207109
img = np.ascontiguousarray(img)
return img
I took the code from some YouTube tutorials and the code works perfectly.
However, I'm trying to convert that image into grayscale so I could later use it for Homography without any success.
I tried to use things like cv.cvtColor but nothing worked.
Is there any way to make it grayscale right on the capture?
Thank you
Use the Pillow package img.convert() function to convert to graystyle
from PIL import Image
img = Image.open('download.png')
imgGray = img.convert('L')
imgGray.save('test_gray.png')

OpenCV: imdecode image from memory returns None

Answered Edit: Dima solved my issue, I was unnecessarily calling cv2.decode for data that could be passed directly to OpenCV.
First, I'm pretty new to Python and OpenCV so apologies if I'm missing something obvious, which I'm almost certain I am. I'm trying to use Python to grab a screenshot from a specific window then pass that to OpenCV. If I write the screenshot out to disk then read it back, everything is fine, but as I want to be analyzing multiple screenshots per second, waiting on disk IO seems silly. Unfortunately I've spent about 4 hours trying everything I came up with searching around and nothing has solved the problem. Here is my code:
from PIL import ImageGrab
from PIL import Image
import win32gui
import win32ui
from ctypes import windll
import os
import time
import cv2
import numpy as np
from matplotlib import pyplot
def enumHandler(hwnd, lParam):
if win32gui.IsWindowVisible(hwnd):
if 'Sword' in win32gui.GetWindowText(hwnd):
lParam.append(hwnd)
def screenGrab(rect):
im = ImageGrab.grab(rect)
im.save(os.getcwd() + '\\game__' + str(int(time.time())) +
'.png', 'PNG')
def main():
hwnds = []
win32gui.EnumWindows(enumHandler, hwnds)
rect = win32gui.GetWindowRect(hwnds[0])
w = rect[2] - rect[0]
h = rect[3] - rect[1]
hwndDC = win32gui.GetWindowDC(hwnds[0])
mfcDC = win32ui.CreateDCFromHandle(hwndDC)
saveDC = mfcDC.CreateCompatibleDC()
saveBitMap = win32ui.CreateBitmap()
saveBitMap.CreateCompatibleBitmap(mfcDC, w, h)
saveDC.SelectObject(saveBitMap)
result = windll.user32.PrintWindow(hwnds[0], saveDC.GetSafeHdc(), 0)
bmpinfo = saveBitMap.GetInfo()
bmpstr = saveBitMap.GetBitmapBits(True)
im = Image.frombuffer('RGB', (bmpinfo['bmWidth'], bmpinfo['bmHeight']), bmpstr, 'raw', 'BGRX', 0, 1)
im.save('thistest.png')
img = cv2.imread('thistest.png', cv2.IMREAD_UNCHANGED)
if img is not None:
print(img.shape)
print(type(img))
cv2.imshow('FRAME', img)
cv2.waitKey()
im2 = np.frombuffer(bmpstr, dtype='uint8')
im2.shape = (bmpinfo['bmHeight'], bmpinfo['bmWidth'], 4)
img2 = cv2.imdecode(im2, cv2.IMREAD_GRAYSCALE)
if img2 is not None:
print(img2.shape)
print(type(img2))
cv2.imshow('FRAME', img2)
cv2.waitKey()
win32gui.DeleteObject(saveBitMap.GetHandle())
saveDC.DeleteDC()
mfcDC.DeleteDC()
win32gui.ReleaseDC(hwnds[0], hwndDC)
if __name__ == '__main__':
main()
The only piece that sticks out to me is that if I print out the size/shape of bmpstr, it shows something like 1000,1600,3. However, after using numpy.frombuffer I'm unable to reshape to 1000,1600,3 and instead have to use 4 as it will complain about the size. When looking at the len of bmpstr it would show ~6400000 so the math makes sense that you need "4" dimensions, but bmpstr shows the same size with 3. I figured this meant somehow bmpstr was counting from 0, fine, but why can't I do the same with numpy.shape/reshape?
Anyway that's my only guess as to what's going wrong and could be entirely off base. Any help is appreciated.
Edit: I believe Dima is on the right track pointing out I need to convert from RGB to BGR. Unfortunately I tried more than a few methods but still cannot make this work. Here is my current code. If I uncomment im.save, the image is written successfully.
from PIL import Image
import win32gui
import win32ui
from ctypes import windll
import os
import time
import cv2
import numpy as np
from matplotlib import pyplot
def enumHandler(hwnd, lParam):
if win32gui.IsWindowVisible(hwnd):
if 'Sword' in win32gui.GetWindowText(hwnd):
lParam.append(hwnd)
def main():
hwnds = []
win32gui.EnumWindows(enumHandler, hwnds)
rect = win32gui.GetWindowRect(hwnds[0])
w = rect[2] - rect[0]
h = rect[3] - rect[1]
hwndDC = win32gui.GetWindowDC(hwnds[0])
mfcDC = win32ui.CreateDCFromHandle(hwndDC)
saveDC = mfcDC.CreateCompatibleDC()
saveBitMap = win32ui.CreateBitmap()
saveBitMap.CreateCompatibleBitmap(mfcDC, w, h)
saveDC.SelectObject(saveBitMap)
result = windll.user32.PrintWindow(hwnds[0], saveDC.GetSafeHdc(), 0)
if (result):
bmpinfo = saveBitMap.GetInfo()
bmpstr = saveBitMap.GetBitmapBits(True)
im = Image.frombuffer('RGB', (bmpinfo['bmWidth'], bmpinfo['bmHeight']), bmpstr, 'raw', 'BGRX', 0, 1)
#im.save('thistest.png')
im2 = np.array(im)
#im2 = im2[..., :2]
im2 = im2[:, :, ::-1].copy()
img2 = cv2.imdecode(im2, cv2.IMREAD_UNCHANGED)
if img2 is not None:
cv2.imshow('FRAME', img2)
cv2.waitKey()
else:
print('img2 was empty')
win32gui.DeleteObject(saveBitMap.GetHandle())
saveDC.DeleteDC()
mfcDC.DeleteDC()
win32gui.ReleaseDC(hwnds[0], hwndDC)
if __name__ == '__main__':
main()
As soon as you have im, which is of type PIL.Image, you can instantly convert it to numpy and thus OpenCV like this
img = np.array(im)
# In case it is needed to get rid of alpha channel, if it is present
img = img[..., :2]
# To convert RGB to BGR
img = img[:, :, ::-1].copy()
cv2.imshow('FRAME', img)
cv2.waitKey()
In general, using PIL to decode images and then OpenCV to process them is a common practice.

Python - Screenshot of background/inactive window

I am attempting to take fast screenshots ready for processing with PIL/Numpy (~0.01s per screenshot) with Python 3.6. Ideally the window would not need to be in the foreground, i.e. even when another window is covering it, the screenshot is still successful.
So far I've modified the code for python 3 from this question: Python Screenshot of inactive window PrintWindow + win32gui
However, all it gets is black images.
import win32gui
import win32ui
from ctypes import windll
from PIL import Image
hwnd = win32gui.FindWindow(None, 'Calculator')
# Get window bounds
left, top, right, bot = win32gui.GetWindowRect(hwnd)
w = right - left
h = bot - top
hwndDC = win32gui.GetWindowDC(hwnd)
mfcDC = win32ui.CreateDCFromHandle(hwndDC)
saveDC = mfcDC.CreateCompatibleDC()
saveBitMap = win32ui.CreateBitmap()
saveBitMap.CreateCompatibleBitmap(mfcDC, w, h)
saveDC.SelectObject(saveBitMap)
result = windll.user32.PrintWindow(hwnd, saveDC.GetSafeHdc(), 1)
print(result)
bmp_info = saveBitMap.GetInfo()
bmp_str = saveBitMap.GetBitmapBits(True)
print(bmp_str)
im = Image.frombuffer(
'RGB',
(bmp_info['bmWidth'], bmp_info['bmHeight']),
bmp_str, 'raw', 'BGRX', 0, 1)
win32gui.DeleteObject(saveBitMap.GetHandle())
saveDC.DeleteDC()
mfcDC.DeleteDC()
win32gui.ReleaseDC(hwnd, hwndDC)
if result == 1:
im.save("screenshot.png")
This code worked for me with applications in background, not minimized.
import win32gui
import win32ui
def background_screenshot(hwnd, width, height):
wDC = win32gui.GetWindowDC(hwnd)
dcObj=win32ui.CreateDCFromHandle(wDC)
cDC=dcObj.CreateCompatibleDC()
dataBitMap = win32ui.CreateBitmap()
dataBitMap.CreateCompatibleBitmap(dcObj, width, height)
cDC.SelectObject(dataBitMap)
cDC.BitBlt((0,0),(width, height) , dcObj, (0,0), win32con.SRCCOPY)
dataBitMap.SaveBitmapFile(cDC, 'screenshot.bmp')
dcObj.DeleteDC()
cDC.DeleteDC()
win32gui.ReleaseDC(hwnd, wDC)
win32gui.DeleteObject(dataBitMap.GetHandle())
hwnd = win32gui.FindWindow(None, windowname)
background_screenshot(hwnd, 1280, 780)

Fastest way to take screenshot of a window

def take_screenshot(hwnd):
left, top, right, bot = win32gui.GetClientRect(hwnd)
#left, top, right, bot = win32gui.GetWindowRect(hwnd)
width = right - left
height = bot - top
wDC = win32gui.GetWindowDC(hwnd)
dcObj=win32ui.CreateDCFromHandle(wDC)
cDC=dcObj.CreateCompatibleDC()
dataBitMap = win32ui.CreateBitmap()
dataBitMap.CreateCompatibleBitmap(dcObj, width, height)
cDC.SelectObject(dataBitMap)
cDC.BitBlt((0, 0), (width, height), dcObj, (0, 0), win32con.SRCCOPY)
im = dataBitMap.GetBitmapBits(True)
img = np.frombuffer(im, dtype='uint8')
img.shape = (height,width,4)
cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
dcObj.DeleteDC()
cDC.DeleteDC()
win32gui.ReleaseDC(hwnd, wDC)
win32gui.DeleteObject(dataBitMap.GetHandle())
return img[:,:,:3]
I'm using this code at the moment. Is there any way to make it faster= I want to be able to capture 1080 at 60 fps live. Can I use my gpu for this problem? I have gtx 1070.

Categories