ValueError: too many values to unpack (expected 4) during ORB detection - python

I'm detecting Unicode words on a starscape background. They all look a little like the example images below. I have a folder with 183 images, if any of those images are detected I want to click a button on the screen. If no images are detected, I want to repeat the process.
So far I've been having the most success with Template Matching. I load the images into an array, loop through the entire array and if anything returns a >90% match I click the button.
This however is returning a large number of false positives. To improve my detection I've tried;
Canny Edge detection
HSV Thresholding
MatchTemplate
SIFT and SURF
and, Brute-Force Matching with ORB Descriptors
The best results by a long shot have been with ORB, hands down, not even close.
I've followed this tutorial and all of the tutorials on opencv.org however I'm getting the following error what seems like fairly randomly. Usually it's when the background application image changes significantly but I've no idea why that would matter.
Traceback (most recent call last):
File "c:\Users\keypoint_detection_test1.py", line 63, in <module>
keypoint_detection(ships_to_avoid)
File "c:\Users\keypoint_detection_test1.py", line 39, in keypoint_detection
kp1, kp2, matches, match_points = objectToFind.match_keypoints(keypoint_image)
ValueError: too many values to unpack (expected 4)
What does this error mean and how do I fix it?
import cv2 as cv
import os
import glob
# Change the working directory to the folder this script is in.
os.chdir('C:\\Users\\')
avoid = glob.glob(r"C:\Users\*.png")
def loadImages(directory):
# Intialise empty array
image_list = []
# Add images to array
for i in directory:
img = cv.imread(i, cv.IMREAD_UNCHANGED)
image_list.append((img, i))
return image_list
# initialize the WindowCapture class
wincap = WindowCapture()
def keypoint_detection(image_list):
for i in image_list:
needle_img = i[0]
needle_name = i[1]
# load image to find
objectToFind = Vision(needle_img)
# get an updated image of the screen
keypoint_image = wincap.get_haystack()
# crop the image
x, w, y, h = [600,700,20,50]
keypoint_image = keypoint_image[y:y+h, x:x+w]
kp1, kp2, matches, match_points = objectToFind.match_keypoints(keypoint_image)
match_image = cv.drawMatches(objectToFind.needle_img, kp1, keypoint_image, kp2, matches, None)
if match_points:
# find the center point of all the matched features
center_point = objectToFind.centeroid(match_points)
# account for the width of the needle image that appears on the left
center_point[0] += objectToFind.needle_w
# drawn the found center point on the output image
match_image = objectToFind.draw_crosshairs(match_image, [center_point])
# move somewhere/do something
#py.moveTo(center_point)
# display the processed image
cv.imshow('Keypoint Search', match_image)
# press 'q' with the output window focused to exit.
if cv.waitKey(1) == ord('q'):
cv.destroyAllWindows()
while(True):
ships_to_avoid = loadImages(avoid)
keypoint_detection(ships_to_avoid)
class Vision:
# properties
needle_img = None
needle_w = 0
needle_h = 0
# constructor
def __init__(self, needle_img_path):
self.needle_img = needle_img_path
# Save the dimensions of the needle image
self.needle_w = self.needle_img.shape[1]
self.needle_h = self.needle_img.shape[0]
def match_keypoints(self, original_image, patch_size=32):
min_match_count = 35
orb = cv.ORB_create(edgeThreshold=0, patchSize=patch_size)
keypoints_needle, descriptors_needle = orb.detectAndCompute(self.needle_img, None)
orb2 = cv.ORB_create(edgeThreshold=0, patchSize=patch_size, nfeatures=2000)
keypoints_haystack, descriptors_haystack = orb2.detectAndCompute(original_image, None)
FLANN_INDEX_LSH = 6
index_params = dict(algorithm=FLANN_INDEX_LSH, table_number=6, key_size=12, multi_probe_level=1)
search_params = dict(checks=50)
try:
flann = cv.FlannBasedMatcher(index_params, search_params)
matches = flann.knnMatch(descriptors_needle, descriptors_haystack, k=2)
except cv.error:
return None, None, [], [], None
# store all the good matches as per Lowe's ratio test.
good = []
points = []
for pair in matches:
if len(pair) == 2:
if pair[0].distance < 0.7*pair[1].distance:
good.append(pair[0])
if len(good) > min_match_count:
print('match %03d, kp %03d' % (len(good), len(keypoints_needle)))
for match in good:
points.append(keypoints_haystack[match.trainIdx].pt)
return keypoints_needle, keypoints_haystack, good, points
class WindowCapture:
# properties
w = 0
h = 0
hwnd = None
cropped_x = 0
cropped_y = 0
offset_x = 0
offset_y = 0
# constructor
def __init__(self, window_name=None):
# find the handle for the window we want to capture.
# if no window name is given, capture the entire screen
if window_name is None:
self.hwnd = win32gui.GetDesktopWindow()
else:
self.hwnd = win32gui.FindWindow(None, window_name)
if not self.hwnd:
raise Exception('Window not found: {}'.format(window_name))
# get the window size
window_rect = win32gui.GetWindowRect(self.hwnd)
self.w = window_rect[2] - window_rect[0]
self.h = window_rect[3] - window_rect[1]
# account for the window border and titlebar and cut them off
border_pixels = 0
titlebar_pixels = 0
self.w = self.w - (border_pixels * 2)
self.h = self.h - titlebar_pixels - border_pixels
self.cropped_x = border_pixels
self.cropped_y = titlebar_pixels
self.offset_x = window_rect[0] + self.cropped_x
self.offset_y = window_rect[1] + self.cropped_y
def get_haystack(self):
# get the window image data
wDC = win32gui.GetWindowDC(self.hwnd)
dcObj = win32ui.CreateDCFromHandle(wDC)
cDC = dcObj.CreateCompatibleDC()
dataBitMap = win32ui.CreateBitmap()
dataBitMap.CreateCompatibleBitmap(dcObj, self.w, self.h)
cDC.SelectObject(dataBitMap)
cDC.BitBlt((0, 0), (self.w, self.h), dcObj, (self.cropped_x, self.cropped_y), win32con.SRCCOPY)
signedIntsArray = dataBitMap.GetBitmapBits(True)
img = np.fromstring(signedIntsArray, dtype='uint8')
img.shape = (self.h, self.w, 4)
# free resources
dcObj.DeleteDC()
cDC.DeleteDC()
win32gui.ReleaseDC(self.hwnd, wDC)
win32gui.DeleteObject(dataBitMap.GetHandle())
img = img[...,:3]
img = np.ascontiguousarray(img)
return img

In match_keypoints you have (at least) two return statements.
One of them, in the except block returns 5 elements , None, None, [], [], None.
The other returns 4 elements, return keypoints_needle, keypoints_haystack, good, points
Thus, whenever match_keypoints encounters cv.error in that try block, it will return 5 elements, which is one more than you attempt to dereference in the line that is failing: kp1, kp2, matches, match_points = objectToFind.match_keypoints(keypoint_image)
Too many values to unpack is the error that occurs when the returned tuple has more elements than the number of variables on the LHS of the assignment.

Related

OpenCV wont object detect with MSS screen capture

I'm new to Python and want to learn it bit by bit, so I decided to write a simple program that would, in real time, capture my screen and do object detection. Through a lot of googling and reading, I was able to make this script, however, no matter what I do, it won't do object detection (m1.png).
Can you please assist me with the reason why it is like this?
import time
import cv2
import mss
import numpy
#template and dimensions
template = cv2.imread("m2.png")
template_gray = cv2.cvtColor(template, cv2.COLOR_BGRA2GRAY)
template_w, template_h = template_gray.shape[::-1]
with mss.mss() as sct:
# Part of the screen to capture
monitor = {"top": 523, "left": 247, "width": 875, "height": 679}
while True:
last_time = time.time()
# Get raw pixels from the screen, save it to a Numpy array
img = numpy.array(sct.grab(monitor))
# Display the picture
cv2.imshow("Normal", img)
# Display the picture in grayscale
img_gray = cv2.cvtColor(img, cv2.COLOR_BGRA2GRAY)
res = cv2.matchTemplate(
image = img_gray,
templ = template_gray,
method= cv2.TM_CCOEFF_NORMED
)
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
#threshold
if max_val >= 0.5:
img = cv2.rectangle(
img = img,
pt1 = max_loc,
pt2 = (
max_loc[0] + template_w, # = pt2 x
max_loc[1] + template_h # = pt2 y
),
color = (0,255,0),
thickness = 3 #fill the rectangle
)
print("fps: {}".format(1 / (time.time() - last_time)))
# Press "q" to quit
if cv2.waitKey(25) & 0xFF == ord("q"):
cv2.destroyAllWindows()
break
I spent three days trying to figure it out, and the only similar code that works but with a poor frame rate is this one:
#imports
from re import template
import cv2
import pyautogui
from time import sleep
#No cooldown time
pyautogui.PAUSE = 0
#template and dimensions
template = cv2.imread("b1.png")
template_gray = cv2.cvtColor(template, cv2.COLOR_RGB2GRAY)
template_w, template_h = template_gray.shape[::-1]
# game window dimensions
x, y, w, h = 523, 247, 875, 679
#wait
sleep(3)
#main
while True:
#screenshot = img
pyautogui.screenshot("image.png", (x, y, w, h))
image = cv2.imread("image.png")
while True:
image_gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
result = cv2.matchTemplate(
image = image_gray,
templ = template_gray,
method = cv2.TM_CCOEFF_NORMED
)
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result)
#threshold
if max_val >= 0.1:
#pyautogui.click(
# x = max_loc[0] + x, #screen x
# y = max_loc[1] + y #screen y
#)
image = cv2.rectangle(
img = image,
pt1 = max_loc,
pt2 = (
max_loc[0] + template_w, # = pt2 x
max_loc[1] + template_h # = pt2 y
),
color = (0,0,255),
thickness = -1 #fill the rectangle
)
else:
break
The structure looks similar the only difference in this template is that it uses pyautogui with OpenCV, whereas I'm trying to use mss. So does that mean that the issue in the code is that there's no physical location of the screen capture due? If so, does that mean it's impossible to make an object detection with mss?? You would make my day if you could disclose this mystery with the code!!
UPD: I was able to solve this, honey! So the issue was that firstly I misspelled the .png file, and to see it detecting an object, place the cv2.imshow after the if statement. Although it works, it's not perfect, so I'm trying to implement the usage of cv2.Canny() but now I don't get any output, so here I'm rising a question of whether there should be a different approach when Canny is used:
import time
import Options.settings as set
import time
import pyautogui as pt
from time import sleep
import cv2
import mss
import numpy
x = 0
offset = set.offset
create_logs = set.create_logs
#template and dimensions
template = cv2.imread("m2.png")
template_gray = cv2.cvtColor(template, cv2.COLOR_BGRA2GRAY)
template_canny = cv2.Canny(template_gray, 79, 100)
template_w, template_h = template_canny.shape[::-1]
with mss.mss() as sct:
# Part of the screen to capture
monitor = {"top": 523, "left": 1600, "width": 230, "height": 359}
while True:
last_time = time.time()
# Get raw pixels from the screen, save it to a Numpy array
img = numpy.array(sct.grab(monitor))
# Display the picture
cv2.imshow("Normal", img)
# Display the picture in grayscale
img_gray = cv2.cvtColor(img, cv2.COLOR_BGRA2GRAY)
img_canny = cv2.Canny(img_gray, 100, 115)
res = cv2.matchTemplate(
image = img_canny,
templ = template_canny,
method= cv2.TM_CCOEFF_NORMED
)
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
#threshold
if max_val >= 0.6:
x = x + 1
print(f'{x} is detected')
img = cv2.rectangle(
img = img,
pt1 = max_loc,
pt2 = (
max_loc[0] + template_w, # = pt2 x
max_loc[1] + template_h # = pt2 y
),
color = (0,255,0),
thickness = 3 #fill the rectangle
)
# Display the picture
cv2.imshow("Normal", img)
#print("fps: {}".format(1 / (time.time() - last_time)))
# Press "q" to quit
if cv2.waitKey(25) & 0xFF == ord("q"):
cv2.destroyAllWindows()
break
UPD 2:
As #fmw42 suggested, I tried different OpenCV methods, but whether I try them, they constantly react if there's an object in the screen capture field no matter how I change the if max_vol >= ...
Please find attached
m2.png = https://ibb.co/Xb5tCPZ
Example of what the screen capture look like = https://ibb.co/Xb5tCPZ
alright, somehow, I get the idea behind the issue,
I had to move after the if function and now it's working^^
# Display the picture
cv2.imshow("Normal", img)

How Can I Run This Thread in Python?

Im trying to turn my screen capture into a thread. But I dont understand the error, new to threads. Also any idea why I get a random still image only in return or blank screen when I initialize WindowCapture() with a window name.
main.py
wincap = WindowCapture()
wincap.start()
windowcapture.py
import numpy as np
import win32gui, win32ui, win32con
from threading import Thread, Lock
class WindowCapture:
# threading properties
stopped = True
lock = None
screenshot = None
# properties
w = 0
h = 0
hwnd = None
cropped_x = 0
cropped_y = 0
offset_x = 0
offset_y = 0
# constructor
def __init__(self, window_name=None):
# create a thread lock object
self.lock = Lock()
# find the handle for the window we want to capture.
# if no window name is given, capture the entire screen
if window_name is None:
self.hwnd = win32gui.GetDesktopWindow()
else:
self.hwnd = win32gui.FindWindow(None, window_name)
if not self.hwnd:
raise Exception('Window not found: {}'.format(window_name))
# get the window size
window_rect = win32gui.GetWindowRect(self.hwnd)
self.w = window_rect[2] - window_rect[0]
self.h = window_rect[3] - window_rect[1]
# account for the window border and titlebar and cut them off
border_pixels = 8
titlebar_pixels = 30
self.w = self.w - (border_pixels * 2)
self.h = self.h - titlebar_pixels - border_pixels
self.cropped_x = border_pixels
self.cropped_y = titlebar_pixels
# set the cropped coordinates offset so we can translate screenshot
# images into actual screen positions
self.offset_x = window_rect[0] + self.cropped_x
self.offset_y = window_rect[1] + self.cropped_y
def get_screenshot(self):
# get the window image data
wDC = win32gui.GetWindowDC(self.hwnd)
dcObj = win32ui.CreateDCFromHandle(wDC)
cDC = dcObj.CreateCompatibleDC()
dataBitMap = win32ui.CreateBitmap()
dataBitMap.CreateCompatibleBitmap(dcObj, self.w, self.h)
cDC.SelectObject(dataBitMap)
cDC.BitBlt((0, 0), (self.w, self.h), dcObj, (self.cropped_x, self.cropped_y), win32con.SRCCOPY)
# convert the raw data into a format opencv can read
#dataBitMap.SaveBitmapFile(cDC, 'debug.bmp')
signedIntsArray = dataBitMap.GetBitmapBits(True)
img = np.fromstring(signedIntsArray, dtype='uint8')
img.shape = (self.h, self.w, 4)
# free resources
dcObj.DeleteDC()
cDC.DeleteDC()
win32gui.ReleaseDC(self.hwnd, wDC)
win32gui.DeleteObject(dataBitMap.GetHandle())
# drop the alpha channel, or cv.matchTemplate() will throw an error like:
# error: (-215:Assertion failed) (depth == CV_8U || depth == CV_32F) && type == _templ.type()
# && _img.dims() <= 2 in function 'cv::matchTemplate'
img = img[...,:3]
# make image C_CONTIGUOUS to avoid errors that look like:
# File ... in draw_rectangles
# TypeError: an integer is required (got type tuple)
# see the discussion here:
# https://github.com/opencv/opencv/issues/14866#issuecomment-580207109
img = np.ascontiguousarray(img)
return img
# find the name of the window you're interested in.
# once you have it, update window_capture()
# https://stackoverflow.com/questions/55547940/how-to-get-a-list-of-the-name-of-every-open-window
#staticmethod
def list_window_names():
def winEnumHandler(hwnd, ctx):
if win32gui.IsWindowVisible(hwnd):
print(hex(hwnd), win32gui.GetWindowText(hwnd))
win32gui.EnumWindows(winEnumHandler, None)
# translate a pixel position on a screenshot image to a pixel position on the screen.
# pos = (x, y)
# WARNING: if you move the window being captured after execution is started, this will
# return incorrect coordinates, because the window position is only calculated in
# the __init__ constructor.
def get_screen_position(self, pos):
return (pos[0] + self.offset_x, pos[1] + self.offset_y)
# threading methods
def start(self):
self.stopped = False
t = Thread(target=self.run)
t.start()
def stop(self):
self.stopped = True
def run(self):
# TODO: you can write your own time/iterations calculation to determine how fast this is
while not self.stopped:
# get an updated image of the game
screenshot = self.get_screenshot()
# lock the thread while updating the results
self.lock.acquire()
self.screenshot = screenshot
self.lock.release()

How do I translate x, y coordinates with matchTemplate once I've cropped the template area?

I'm using python 3.9.6 and OpenCV 4.5.1
I'm trying to detect an objects on a template. My template is a real-time feed of my monitor and my objects are jpg's.
The issue: When I crop my template to speed up detection my mouse starts clicking in the wrong location.
This only happens after I've cropped my template. I think it's because I'm cropping my template at the wrong time in my script. My full monitor is (0 , 0, 1920, 1080) but I only want to capture [220:900, 270:1590]
I've followed the OpenCV documentation and a few online tutorials so far but I'm now stuck.
How do I click on img (third code block) rather than an incorrect off-set caused by cropping my template incorrectly?
I'm using win32gui to grab my template:
import numpy as np
import win32gui, win32ui, win32con
class WindowCapture:
# properties
w = 0
h = 0
hwnd = None
cropped_x = 0
cropped_y = 0
offset_x = 0
offset_y = 0
# constructor
def __init__(self, window_name=None):
# find the handle for the window we want to capture.
# if no window name is given, capture the entire screen
if window_name is None:
self.hwnd = win32gui.GetDesktopWindow()
else:
self.hwnd = win32gui.FindWindow(None, window_name)
if not self.hwnd:
raise Exception('Window not found: {}'.format(window_name))
# get the window size
window_rect = win32gui.GetWindowRect(self.hwnd)
self.w = window_rect[2] - window_rect[0]
self.h = window_rect[3] - window_rect[1]
# account for the window border and titlebar and cut them off
border_pixels = 0
titlebar_pixels = 0
self.w = self.w - (border_pixels * 2)
self.h = self.h - titlebar_pixels - border_pixels
self.cropped_x = border_pixels
self.cropped_y = titlebar_pixels
# set the cropped coordinates offset so we can translate screenshot
# images into actual screen positions
self.offset_x = window_rect[0] + self.cropped_x
self.offset_y = window_rect[1] + self.cropped_y
def get_screenshot(self):
# get the window image data
wDC = win32gui.GetWindowDC(self.hwnd)
dcObj = win32ui.CreateDCFromHandle(wDC)
cDC = dcObj.CreateCompatibleDC()
dataBitMap = win32ui.CreateBitmap()
dataBitMap.CreateCompatibleBitmap(dcObj, self.w, self.h)
cDC.SelectObject(dataBitMap)
cDC.BitBlt((0, 0), (self.w, self.h), dcObj, (self.cropped_x, self.cropped_y), win32con.SRCCOPY)
# convert the raw data into a format opencv can read
# dataBitMap.SaveBitmapFile(cDC, 'debug.bmp')
signedIntsArray = dataBitMap.GetBitmapBits(True)
img = np.fromstring(signedIntsArray, dtype='uint8')
img.shape = (self.h, self.w, 4)
# free resources
dcObj.DeleteDC()
cDC.DeleteDC()
win32gui.ReleaseDC(self.hwnd, wDC)
win32gui.DeleteObject(dataBitMap.GetHandle())
img = img[...,:3]
img = np.ascontiguousarray(img)
return img
#staticmethod
def list_window_names():
def winEnumHandler(hwnd, ctx):
if win32gui.IsWindowVisible(hwnd):
print(hex(hwnd), win32gui.GetWindowText(hwnd))
win32gui.EnumWindows(winEnumHandler, None)
And OpenCV and numpy for my object detection:
import cv2 as cv
import numpy as np
class Vision:
# properties
needle_img = None
needle_w = 0
needle_h = 0
method = None
# constructor
def __init__(self, needle_img_path, method=cv.TM_CCORR_NORMED):
self.needle_img = cv.imread(needle_img_path, cv.IMREAD_UNCHANGED)
# Save the dimensions of the needle image
self.needle_w = self.needle_img.shape[1]
self.needle_h = self.needle_img.shape[0]
# There are 6 methods to choose from:
# TM_CCOEFF, TM_CCOEFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_SQDIFF, TM_SQDIFF_NORMED
self.method = method
def find(self, haystack_img, threshold=0.5, debug_mode=None):
# run the OpenCV algorithm
result = cv.matchTemplate(haystack_img, self.needle_img, self.method)
# Get the all the positions from the match result that exceed our threshold
locations = np.where(result >= threshold)
locations = list(zip(*locations[::-1]))
rectangles = []
for loc in locations:
rect = [int(loc[0]), int(loc[1]), self.needle_w, self.needle_h]
# Add every box to the list twice in order to retain single (non-overlapping) boxes
rectangles.append(rect)
rectangles.append(rect)
# Apply group rectangles
rectangles, weights = cv.groupRectangles(rectangles, groupThreshold=1, eps=0.5)
points = []
if len(rectangles):
line_color = (0, 255, 0)
line_type = cv.LINE_4
marker_color = (255, 0, 255)
marker_type = cv.MARKER_CROSS
# Loop over all the rectangles
for (x, y, w, h) in rectangles:
# Determine the center position
center_x = x + int(w/2)
center_y = y + int(h/2)
# Save the points
points.append((center_x, center_y))
if debug_mode == 'rectangles':
# Determine the box position
top_left = (x, y)
bottom_right = (x + w, y + h)
# Draw the box
cv.rectangle(haystack_img, top_left, bottom_right, color=line_color,
lineType=line_type, thickness=2)
elif debug_mode == 'points':
# Draw the center point
cv.drawMarker(haystack_img, (center_x, center_y),
color=marker_color, markerType=marker_type,
markerSize=40, thickness=2)
############ DISPLAYS MATCHES #############
if debug_mode:
cv.imshow('Matches', haystack_img)
return points
And then passing in both variables in a separate script here:
import cv2 as cv
import pyautogui as py
from windowcapture import WindowCapture
from vision import Vision
# initialize the WindowCapture class
# leave blank to capture the whole screen
haystack = WindowCapture()
# initialize the Vision class
needle = Vision('needle.jpg')
while(True):
# get an updated image of the game
screenshot = template.get_screenshot()
screenshotCropped = screenshot[220:900, 270:1590]
img = needle.find(screenshotCropped, 0.85, 'points')
if img:
py.moveTo(img[0])
The line causing the issue is: screenshotCropped = screenshot[220:900, 270:1590] If it's removed I click on the object correctly.
I also tried adding border_pixels and titlebar_pixels to allow me to crop directly from WindowCapture but I run into the same issue detailed above.
If I understand your code correctly, when you crop the image, you're not (yet) accounting for the X/Y offset introduced through that crop.
If I understand your example correctly, your code
screenshotCropped = screenshot[220:900, 270:1590]
is cropping from 220-900 along the y-axis (height) and 270-1590 along the x-axis (width), yes? If so, try
x_0, x_1 = 270,1590
y_0, y_1 = 220,900
screenshotCropped = screenshot[y_0:y_1, x_0:x_1]
...
if img:
x_coord = img[0][0] + x_0
y_coord = img[0][1] + y_0
py.moveTo(x_coord,y_coord)
If your cropping region changes, update your (x_0, x_1, y_0, y_1) values accordingly (in both the crop operation and the py.moveTo operation)?

How to take Screen shot in real-time(like video capture) on mac os?

I use opencv for automating games therefore i need to take screen shots of what is going on screen in real time. I use pywin32 for it while I am on windows.
Is there a way to get similar effect on macos? or any other alternatives
class WindowCapture:
# properties
w = 0
h = 0
hwnd = None
cropped_x = 0
cropped_y = 0
offset_x = 0
offset_y = 0
# constructor
def __init__(self, window_name):
# find the handle for the window we want to capture
self.hwnd = win32gui.FindWindow(None, window_name)
if not self.hwnd:
raise Exception('Window not found: {}'.format(window_name))
# get the window size
window_rect = win32gui.GetWindowRect(self.hwnd)
self.w = window_rect[2] - window_rect[0]
self.h = window_rect[3] - window_rect[1]
# account for the window border and titlebar and cut them off
border_pixels = 8
titlebar_pixels = 30
self.w = self.w - (border_pixels * 2)
self.h = self.h - titlebar_pixels - border_pixels
self.cropped_x = border_pixels
self.cropped_y = titlebar_pixels
# set the cropped coordinates offset so we can translate screenshot
# images into actual screen positions
self.offset_x = window_rect[0] + self.cropped_x
self.offset_y = window_rect[1] + self.cropped_y
def get_screenshot(self):
# get the window image data
wDC = win32gui.GetWindowDC(self.hwnd)
dcObj = win32ui.CreateDCFromHandle(wDC)
cDC = dcObj.CreateCompatibleDC()
dataBitMap = win32ui.CreateBitmap()
dataBitMap.CreateCompatibleBitmap(dcObj, self.w, self.h)
cDC.SelectObject(dataBitMap)
cDC.BitBlt((0, 0), (self.w, self.h), dcObj, (self.cropped_x, self.cropped_y), win32con.SRCCOPY)
# convert the raw data into a format opencv can read
#dataBitMap.SaveBitmapFile(cDC, 'debug.bmp')
signedIntsArray = dataBitMap.GetBitmapBits(True)
img = np.fromstring(signedIntsArray, dtype='uint8')
img.shape = (self.h, self.w, 4)
# free resources
dcObj.DeleteDC()
cDC.DeleteDC()
win32gui.ReleaseDC(self.hwnd, wDC)
win32gui.DeleteObject(dataBitMap.GetHandle())
# drop the alpha channel, or cv.matchTemplate() will throw an error like:
# error: (-215:Assertion failed) (depth == CV_8U || depth == CV_32F) && type == _templ.type()
# && _img.dims() <= 2 in function 'cv::matchTemplate'
img = img[...,:3]
# make image C_CONTIGUOUS to avoid errors that look like:
# File ... in draw_rectangles
# TypeError: an integer is required (got type tuple)
# see the discussion here:
# https://github.com/opencv/opencv/issues/14866#issuecomment-580207109
img = np.ascontiguousarray(img)
return img
# find the name of the window you're interested in.
# once you have it, update window_capture()
# https://stackoverflow.com/questions/55547940/how-to-get-a-list-of-the-name-of-every-open-window
def list_window_names(self):
def winEnumHandler(hwnd, ctx):
if win32gui.IsWindowVisible(hwnd):
print(hex(hwnd), win32gui.GetWindowText(hwnd))
win32gui.EnumWindows(winEnumHandler, None)
# translate a pixel position on a screenshot image to a pixel position on the screen.
# pos = (x, y)
# WARNING: if you move the window being captured after execution is started, this will
# return incorrect coordinates, because the window position is only calculated in
# the __init__ constructor.
def get_screen_position(self, pos):
return (pos[0] + self.offset_x, pos[1] + self.offset_y)
I use above code for windows(found it on stackoverflow just copy/pasted it and edited it a little)

How can I capture detected image of object Yolov3 and display in flask

I am working on Real Time Object Detection using YOLOv3 with OpenCV and Python. It's works well. Currently I try to capture detected image of object and display in flask. Do someone know how to implement this features? Hope someone can helps. I got the tutorial based on this link https://medium.com/analytics-vidhya/real-time-object-detection-using-yolov3-with-opencv-and-python-64c985e14786
import cv2
import numpy as np
import os
import time
import detect as dt
from PIL import Image
labelsPath = os.path.sep.join(["yolo-coco", "coco.names"])
weightsPath = os.path.sep.join(["yolo-coco", "yolov3.weights"])
configPath = os.path.sep.join(["yolo-coco", "yolov3.cfg"])
labelsPath = os.path.sep.join(["yolo-coco", "coco.names"])
LABELS = open(labelsPath).read().strip().split("\n")
net = cv2.dnn.readNet(configPath, weightsPath)
layer_names = net.getLayerNames()
outputlayers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
colors= np.random.uniform(0,255,size=(len(LABELS),3))
#loading image
cap0=cv2.VideoCapture(0) #0 for 1st webcam
cap1=cv2.VideoCapture(1)
font = cv2.FONT_HERSHEY_PLAIN
starting_time= time.time()
frame_id = 0
count = 0
use_cuda = 1
# configPath.cuda()
while True:
ret0,frame0= cap0.read() #
ret1,frame1= cap1.read() #
image = cv2.cvtColor(frame0, cv2.COLOR_BGR2RGB)
im_pil = Image.fromarray(image)
im_pil = im_pil.resize((200, 200))
boxes = dt.do_detect(image, im_pil, 0.5, 0.4, use_cuda)
if (ret0):
frame_id+=1
#print(frame_id)
height,width,channels = frame0.shape
#print (frame.shape)
#detecting objects
blob = cv2.dnn.blobFromImage(frame0,0.00392,(320,320),(0,0,0),True,crop=False) #reduce 416 to 320
net.setInput(blob)
outs = net.forward(outputlayers)
#print(outs)
print(outs[1])
#Showing info on screen/ get confidence score of algorithm in detecting an object in blob
class_ids=[]
confidences=[]
boxes=[]
for out in outs:
#print(out)
for detection in out:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
print(confidence)
if confidence > 0.3:
#onject detected
center_x= int(detection[0]*width)
center_y= int(detection[1]*height)
w = int(detection[2]*width)
h = int(detection[3]*height)
#cv2.circle(img,(center_x,center_y),10,(0,255,0),2)
#rectangle co-ordinaters
x=int(center_x - w/2)
y=int(center_y - h/2)
#cv2.rectangle(img,(x,y),(x+w,y+h),(0,255,0),2)
boxes.append([x,y,w,h]) #put all rectangle areas
confidences.append(float(confidence)) #how confidence was that object detected and show that percentage
class_ids.append(class_id) #name of the object tha was detected
indexes = cv2.dnn.NMSBoxes(boxes,confidences,0.4,0.6)
result = open('C:/Users/HP/Miniconda3/envs/count_vechicle/coding/images/frame%04d.txt'%(count), 'w')
for i in range(len(boxes)):
result.write(boxes[i])
count = count + 1
success, image = vidcap.read()
result.close()
if i in indexes:
x,y,w,h = boxes[i]
label = str(LABELS[class_ids[i]])
confidence= confidences[i]
color = colors[class_ids[i]]
cv2.rectangle(frame0,(x,y),(x+w,y+h),color,2)
cv2.putText(frame0,label+" "+str(round(confidence,2)),(x,y+30),font,1,(255,255,255),2)
elapsed_time = time.time() - starting_time
fps=frame_id/elapsed_time
cv2.putText(frame0,"FPS:"+str(round(fps,2)),(10,50),font,2,(0,0,0),1)
cv2.imshow("Image0",frame0)
key = cv2.waitKey(1) #wait 1ms the loop will start again and we will process the next frame
if (ret1):
cv2.imshow("Image1",frame1)
key = cv2.waitKey(1) #wait 1ms the loop will start again and we will process the next frame
if key == 27: #esc key stops the process
break;
cap0.release()
cap1.release()
cv2.destroyAllWindows()
Using the bounding box coordinates of the detected object, you can crop a new image out of it and then save it to display.
Try this:
# extract the bounding box coordinates
(x, y) = (boxes[i][0], boxes[i][1])
(w, h) = (boxes[i][2], boxes[i][3])
crop_img = frame[y:y + h, x:x + w] #frame of video you are looping through
cv2.imwrite(<filename>, crop_img)
Update: according to your code :
if i in indexes:
x,y,w,h = boxes[i]
crop_img = frame0[y:y + h, x:x + w]
cv2.imwrite(<filename>, crop_img)
Using the box coordinates you can crop that part
Hope it helps.

Categories