I'm trying to do an exercise of an application windows capture using pywin32, so that I get the exact mirror image of the window(like a video game) The issue is - the below code keep capturing/ keep returning only the first frame/old frame of the window and not the current frame of the window. So there is no change reflecting in the mirror image. I tried changing the pywin32 different version but same problem. Please help is there any issue with the lines of code
Python Version== 3.8.10
pywin32 == 227
Windows 10
import cv2 as cv
import numpy as np
from time import time
import win32gui, win32ui, win32con
def get_screenshot(win_name):
hwnd = win32gui.FindWindow(None, win_name)
print('hwnd',hwnd)
window_rect = win32gui.GetWindowRect(hwnd)
print('window_rec',window_rect)
w = window_rect[2] - window_rect[0]
h = window_rect[3] - window_rect[1]
wDC = win32gui.GetWindowDC(hwnd)
print('wDC',wDC)
dcObj = win32ui.CreateDCFromHandle(wDC)
print('dcObj',dcObj)
cDC = dcObj.CreateCompatibleDC()
print('cDC',cDC)
dataBitMap = win32ui.CreateBitmap()
print('dataBitMap',dataBitMap)
dataBitMap.CreateCompatibleBitmap(dcObj, w, h)
cDC.SelectObject(dataBitMap)
cDC.BitBlt((0, 0), (w, h), dcObj, (0,0), win32con.SRCCOPY)
signedIntsArray = dataBitMap.GetBitmapBits(True)
img = np.fromstring(signedIntsArray, dtype='uint8')
img.shape = (h, w, 4)
dcObj.DeleteDC()
cDC.DeleteDC()
win32gui.ReleaseDC(hwnd, wDC)
win32gui.DeleteObject(dataBitMap.GetHandle())
img = img[...,:3]
return img
loop_time = time()
while(True):
# get an updated image of the window
frame = get_screenshot("<<Window Name>>")
cv.imshow('Computer Vision', frame)
# debug the loop rate
#print('FPS {}'.format(1 / (time() - loop_time)))
loop_time = time()
# press 'q' with the output window focused to exit.
# waits 1 ms every loop to process key presses
if cv.waitKey(1) == ord('q'):
cv.destroyAllWindows()
break
I had the same issue, following the same online tutorial. He covers this issue in the 5th video. see here: https://www.youtube.com/watch?v=7k4j-uL8WSQ&list=PL1m2M8LQlzfKtkKq2lK5xko4X-8EZzFPI&index=5
Instead of:
hwnd = win32gui.FindWindow()
use:
hwnd = win32gui.GetDesktopWindow()
Which unfortunately has the side effect of capturing the whole desktop window, but functions great still.
Related
im trying to get a screenshot of a window as fast and then inference on yolov5
it works but sometimes it doesnt detect very well compared to using detect.py on the same image. i think its probably because of img shape or array but i dont know where or how to edit those to make it work. can anyone help me with this please?
import torch
import numpy as np
import win32gui
import win32ui
import win32con
w = 800 # set this
h = 600 # set this
bmpfilenamename = "color.bmp" #set this
windowname = 'put windowname'
def screenshot():
hwnd = win32gui.FindWindow(None, windowname)
wDC = win32gui.GetWindowDC(hwnd)
dcObj=win32ui.CreateDCFromHandle(wDC)
cDC=dcObj.CreateCompatibleDC()
dataBitMap = win32ui.CreateBitmap()
dataBitMap.CreateCompatibleBitmap(dcObj, w, h)
cDC.SelectObject(dataBitMap)
cDC.BitBlt((0,0),(w, h) , dcObj, (0,0), win32con.SRCCOPY)
#save the screenshot
#dataBitMap.SaveBitmapFile(cDC, bmpfilenamename)
signedIntsArray = dataBitMap.GetBitmapBits(True)
img = np.frombuffer(signedIntsArray, dtype='uint8')
img.shape = (h,w,4)
# Free Resources
dcObj.DeleteDC()
cDC.DeleteDC()
win32gui.ReleaseDC(hwnd, wDC)
win32gui.DeleteObject(dataBitMap.GetHandle())
#img = img[..., ::-1]
#img = np.ascontiguousarray(img)
return img
#load
model = torch.hub.load('./', 'custom', path='yolov5s.pt', source='local')
#inference
test = screenshot()
results = model(test)
boxes = results.pandas().xyxy[0]
print (boxes)
edit : i figured you can do it by changing the code from this
results = model(test)
to this
results = model(cv.cvtColor(test, cv.COLOR_BGR2RGB))
but isnt this code supposed to do the same? for some reason this one wont work
img[: ,: ,::-1]
I am feeding an opencv window in a loop with this specific window screen capture routine below.
PROBLEM: after hundreds of cycles in the loop, it suddenly fail at either one of the two FAIL POINTS marked below in the code.
I am suspecting possible memory leak, but if I am not mistaken, I do delete and release what's required as well as I (re)select object before I delete it.
(The reason I am using this method, because it is important for me to be able to capture the specific window even if it is inactive and in the background and I did not found any other module/method actually works.)
What am I overlooking?
import win32gui
import win32ui
from PIL import Image
import numpy as np
import cv2
while True:
target_window = win32gui.FindWindow(None, ("Analytics dashboard - Google Chrome"))
hwndDC = win32gui.GetWindowDC(target_window)
mfcDC = win32ui.CreateDCFromHandle(hwndDC)
saveDC = mfcDC.CreateCompatibleDC() #### <-- RANDOM FAIL POINT 1: win32ui.error: CreateCompatibleDC failed
saveBitMap = win32ui.CreateBitmap()
saveBitMap.CreateCompatibleBitmap(mfcDC, screen_width, screen_height)
saveDC.SelectObject(saveBitMap)
result = windll.user32.PrintWindow(target_window, saveDC.GetSafeHdc(), 3)
bmpinfo = saveBitMap.GetInfo()
bmpstr = saveBitMap.GetBitmapBits(True)
screen_image = Image.frombuffer('RGB', (bmpinfo['bmWidth'], bmpinfo['bmHeight']), bmpstr, 'raw', 'BGRX', 0, 1)
mfcDC.DeleteDC() #### <-- RANDOM FAIL POINT 2: win32ui.error: DeleteDC failed
saveDC.DeleteDC()
win32gui.DeleteObject(saveBitMap.GetHandle())
win32gui.ReleaseDC(target_window, hwndDC)
image = cv2.cvtColor(np.array(screen_image), cv2.IMREAD_ANYCOLOR)
I tried the above code with ctypes.windll.user32.PrintWindow and there were no GDI leaks. PrintWindow's third argument should be PW_CLIENTONLY (1), or there is the undocumented PW_RENDERFULLCONTENT (2) option. Undocumented code is not reliable. I don't know what the constant (3) refers to.
If Chrome is the top window you should just take screen shot of desktop. This would be compliant.
It might help if you remove some of the code outside the loop, it will be more efficient at least.
import ctypes
import win32gui
import win32ui
import win32con
from PIL import Image
hdesktop = win32gui.GetDesktopWindow()
(l, r, width, height) = win32gui.GetClientRect(hdesktop)
hdc = win32gui.GetWindowDC(hdesktop)
dc = win32ui.CreateDCFromHandle(hdc)
memdc = dc.CreateCompatibleDC()
bitmap = win32ui.CreateBitmap()
bitmap.CreateCompatibleBitmap(dc, width, height)
memdc.SelectObject(bitmap)
while True:
hwnd = win32gui.FindWindow("Chrome_WidgetWin_1", None)
if hwnd == 0:
break
result = ctypes.windll.user32.PrintWindow(hwnd, memdc.GetSafeHdc(), 2)
if result == 1:
bytes = bitmap.GetBitmapBits(True)
img = Image.frombuffer('RGB', (width, height), bytes, 'raw', 'BGRX', 0, 1)
img.save("file.bmp")
#break
dc.DeleteDC()
memdc.DeleteDC()
win32gui.DeleteObject(bitmap.GetHandle())
win32gui.ReleaseDC(hwnd, hdc)
You can also add ctypes.windll.shcore.SetProcessDpiAwareness(2) on top
I've used the following code examples to capture a screenshot:
https://stackoverflow.com/a/3260811
https://stackoverflow.com/a/24352388/5858697
When taking a screenshot of Firefox or chrome, they return a blank black image. Capturing a screenshot of notepad works fine. I've done some research on this and I think it's because they're gpu accelerated. Other screenshot libraries work but I need to have it so I can capture a screenshot of an application even if it's not currently visible.
Has anyone solved a similar problem or could someone point me in the right direction? Thank you.
Based on the #Barmak's previous answer, I converted C + + code to python, and now it works.
import win32gui
import win32ui
import win32con
from ctypes import windll
from PIL import Image
import time
import ctypes
hwnd_target = 0x00480362 #Chrome handle be used for test
left, top, right, bot = win32gui.GetWindowRect(hwnd_target)
w = right - left
h = bot - top
win32gui.SetForegroundWindow(hwnd_target)
time.sleep(1.0)
hdesktop = win32gui.GetDesktopWindow()
hwndDC = win32gui.GetWindowDC(hdesktop)
mfcDC = win32ui.CreateDCFromHandle(hwndDC)
saveDC = mfcDC.CreateCompatibleDC()
saveBitMap = win32ui.CreateBitmap()
saveBitMap.CreateCompatibleBitmap(mfcDC, w, h)
saveDC.SelectObject(saveBitMap)
result = saveDC.BitBlt((0, 0), (w, h), mfcDC, (left, top), win32con.SRCCOPY)
bmpinfo = saveBitMap.GetInfo()
bmpstr = saveBitMap.GetBitmapBits(True)
im = Image.frombuffer(
'RGB',
(bmpinfo['bmWidth'], bmpinfo['bmHeight']),
bmpstr, 'raw', 'BGRX', 0, 1)
win32gui.DeleteObject(saveBitMap.GetHandle())
saveDC.DeleteDC()
mfcDC.DeleteDC()
win32gui.ReleaseDC(hdesktop, hwndDC)
if result == None:
#PrintWindow Succeeded
im.save("test.png")
Please note: Firefox uses Windowless Controls.
If you want to get the handle of Firefox, you may need UI Automation.
For a detailed explanation, please refer to #IInspectable's answer.
Answered Edit: Dima solved my issue, I was unnecessarily calling cv2.decode for data that could be passed directly to OpenCV.
First, I'm pretty new to Python and OpenCV so apologies if I'm missing something obvious, which I'm almost certain I am. I'm trying to use Python to grab a screenshot from a specific window then pass that to OpenCV. If I write the screenshot out to disk then read it back, everything is fine, but as I want to be analyzing multiple screenshots per second, waiting on disk IO seems silly. Unfortunately I've spent about 4 hours trying everything I came up with searching around and nothing has solved the problem. Here is my code:
from PIL import ImageGrab
from PIL import Image
import win32gui
import win32ui
from ctypes import windll
import os
import time
import cv2
import numpy as np
from matplotlib import pyplot
def enumHandler(hwnd, lParam):
if win32gui.IsWindowVisible(hwnd):
if 'Sword' in win32gui.GetWindowText(hwnd):
lParam.append(hwnd)
def screenGrab(rect):
im = ImageGrab.grab(rect)
im.save(os.getcwd() + '\\game__' + str(int(time.time())) +
'.png', 'PNG')
def main():
hwnds = []
win32gui.EnumWindows(enumHandler, hwnds)
rect = win32gui.GetWindowRect(hwnds[0])
w = rect[2] - rect[0]
h = rect[3] - rect[1]
hwndDC = win32gui.GetWindowDC(hwnds[0])
mfcDC = win32ui.CreateDCFromHandle(hwndDC)
saveDC = mfcDC.CreateCompatibleDC()
saveBitMap = win32ui.CreateBitmap()
saveBitMap.CreateCompatibleBitmap(mfcDC, w, h)
saveDC.SelectObject(saveBitMap)
result = windll.user32.PrintWindow(hwnds[0], saveDC.GetSafeHdc(), 0)
bmpinfo = saveBitMap.GetInfo()
bmpstr = saveBitMap.GetBitmapBits(True)
im = Image.frombuffer('RGB', (bmpinfo['bmWidth'], bmpinfo['bmHeight']), bmpstr, 'raw', 'BGRX', 0, 1)
im.save('thistest.png')
img = cv2.imread('thistest.png', cv2.IMREAD_UNCHANGED)
if img is not None:
print(img.shape)
print(type(img))
cv2.imshow('FRAME', img)
cv2.waitKey()
im2 = np.frombuffer(bmpstr, dtype='uint8')
im2.shape = (bmpinfo['bmHeight'], bmpinfo['bmWidth'], 4)
img2 = cv2.imdecode(im2, cv2.IMREAD_GRAYSCALE)
if img2 is not None:
print(img2.shape)
print(type(img2))
cv2.imshow('FRAME', img2)
cv2.waitKey()
win32gui.DeleteObject(saveBitMap.GetHandle())
saveDC.DeleteDC()
mfcDC.DeleteDC()
win32gui.ReleaseDC(hwnds[0], hwndDC)
if __name__ == '__main__':
main()
The only piece that sticks out to me is that if I print out the size/shape of bmpstr, it shows something like 1000,1600,3. However, after using numpy.frombuffer I'm unable to reshape to 1000,1600,3 and instead have to use 4 as it will complain about the size. When looking at the len of bmpstr it would show ~6400000 so the math makes sense that you need "4" dimensions, but bmpstr shows the same size with 3. I figured this meant somehow bmpstr was counting from 0, fine, but why can't I do the same with numpy.shape/reshape?
Anyway that's my only guess as to what's going wrong and could be entirely off base. Any help is appreciated.
Edit: I believe Dima is on the right track pointing out I need to convert from RGB to BGR. Unfortunately I tried more than a few methods but still cannot make this work. Here is my current code. If I uncomment im.save, the image is written successfully.
from PIL import Image
import win32gui
import win32ui
from ctypes import windll
import os
import time
import cv2
import numpy as np
from matplotlib import pyplot
def enumHandler(hwnd, lParam):
if win32gui.IsWindowVisible(hwnd):
if 'Sword' in win32gui.GetWindowText(hwnd):
lParam.append(hwnd)
def main():
hwnds = []
win32gui.EnumWindows(enumHandler, hwnds)
rect = win32gui.GetWindowRect(hwnds[0])
w = rect[2] - rect[0]
h = rect[3] - rect[1]
hwndDC = win32gui.GetWindowDC(hwnds[0])
mfcDC = win32ui.CreateDCFromHandle(hwndDC)
saveDC = mfcDC.CreateCompatibleDC()
saveBitMap = win32ui.CreateBitmap()
saveBitMap.CreateCompatibleBitmap(mfcDC, w, h)
saveDC.SelectObject(saveBitMap)
result = windll.user32.PrintWindow(hwnds[0], saveDC.GetSafeHdc(), 0)
if (result):
bmpinfo = saveBitMap.GetInfo()
bmpstr = saveBitMap.GetBitmapBits(True)
im = Image.frombuffer('RGB', (bmpinfo['bmWidth'], bmpinfo['bmHeight']), bmpstr, 'raw', 'BGRX', 0, 1)
#im.save('thistest.png')
im2 = np.array(im)
#im2 = im2[..., :2]
im2 = im2[:, :, ::-1].copy()
img2 = cv2.imdecode(im2, cv2.IMREAD_UNCHANGED)
if img2 is not None:
cv2.imshow('FRAME', img2)
cv2.waitKey()
else:
print('img2 was empty')
win32gui.DeleteObject(saveBitMap.GetHandle())
saveDC.DeleteDC()
mfcDC.DeleteDC()
win32gui.ReleaseDC(hwnds[0], hwndDC)
if __name__ == '__main__':
main()
As soon as you have im, which is of type PIL.Image, you can instantly convert it to numpy and thus OpenCV like this
img = np.array(im)
# In case it is needed to get rid of alpha channel, if it is present
img = img[..., :2]
# To convert RGB to BGR
img = img[:, :, ::-1].copy()
cv2.imshow('FRAME', img)
cv2.waitKey()
In general, using PIL to decode images and then OpenCV to process them is a common practice.
I am attempting to take fast screenshots ready for processing with PIL/Numpy (~0.01s per screenshot) with Python 3.6. Ideally the window would not need to be in the foreground, i.e. even when another window is covering it, the screenshot is still successful.
So far I've modified the code for python 3 from this question: Python Screenshot of inactive window PrintWindow + win32gui
However, all it gets is black images.
import win32gui
import win32ui
from ctypes import windll
from PIL import Image
hwnd = win32gui.FindWindow(None, 'Calculator')
# Get window bounds
left, top, right, bot = win32gui.GetWindowRect(hwnd)
w = right - left
h = bot - top
hwndDC = win32gui.GetWindowDC(hwnd)
mfcDC = win32ui.CreateDCFromHandle(hwndDC)
saveDC = mfcDC.CreateCompatibleDC()
saveBitMap = win32ui.CreateBitmap()
saveBitMap.CreateCompatibleBitmap(mfcDC, w, h)
saveDC.SelectObject(saveBitMap)
result = windll.user32.PrintWindow(hwnd, saveDC.GetSafeHdc(), 1)
print(result)
bmp_info = saveBitMap.GetInfo()
bmp_str = saveBitMap.GetBitmapBits(True)
print(bmp_str)
im = Image.frombuffer(
'RGB',
(bmp_info['bmWidth'], bmp_info['bmHeight']),
bmp_str, 'raw', 'BGRX', 0, 1)
win32gui.DeleteObject(saveBitMap.GetHandle())
saveDC.DeleteDC()
mfcDC.DeleteDC()
win32gui.ReleaseDC(hwnd, hwndDC)
if result == 1:
im.save("screenshot.png")
This code worked for me with applications in background, not minimized.
import win32gui
import win32ui
def background_screenshot(hwnd, width, height):
wDC = win32gui.GetWindowDC(hwnd)
dcObj=win32ui.CreateDCFromHandle(wDC)
cDC=dcObj.CreateCompatibleDC()
dataBitMap = win32ui.CreateBitmap()
dataBitMap.CreateCompatibleBitmap(dcObj, width, height)
cDC.SelectObject(dataBitMap)
cDC.BitBlt((0,0),(width, height) , dcObj, (0,0), win32con.SRCCOPY)
dataBitMap.SaveBitmapFile(cDC, 'screenshot.bmp')
dcObj.DeleteDC()
cDC.DeleteDC()
win32gui.ReleaseDC(hwnd, wDC)
win32gui.DeleteObject(dataBitMap.GetHandle())
hwnd = win32gui.FindWindow(None, windowname)
background_screenshot(hwnd, 1280, 780)