ive been trying to use a script with cv2 and numpy to search an image for an icon, th start button for a stress test software, and that all works fine but it doesnt work together with any mouse movement packages, ive tried mouse, pyautogui, autoit and a few more with no results. the mouse movement works if i put it before the cv2 image recognition script but that wont work cause i need the coordinates.
P.S. 100, 500 is not the coordinates i want to move to, i want to move to the "coords" variable which should be equal to (pt[0] + w, pt[1] + h)
import cv2
import numpy as np
#import pyautogui
import time
import win32api
import pydirectinput
def movedamnyou():
pydirectinput.moveTo(100, 500)
#open OCCT
win32api.ShellExecute(0, "open", "D:\AppData\OCCT.exe", None, ".", 0)
#wait for program to open
time.sleep(20)
#classifier
#take screenshot
#im2 = pyautogui.screenshot('scrn.png')
# Read the main image
img_rgb = cv2.imread('scrn.png')
# Convert it to grayscale
img_gray = cv2.cvtColor(img_rgb, cv2.COLOR_BGR2GRAY)
# Read the template
template = cv2.imread('OCCTico.png', 0)
# Store width and height of template in w and h
w, h = template.shape[::-1]
# Perform match operations.
res = cv2.matchTemplate(img_gray, template, cv2.TM_CCOEFF_NORMED)
# Specify a threshold
threshold = 0.8
# Store the coordinates of matched area in a numpy array
loc = np.where(res >= threshold)
# Draw a rectangle around the matched region.
for pt in zip(*loc[::-1]):
cv2.rectangle(img_rgb, pt, (pt[0] + w, pt[1] + h), (0, 255, 255), 2)
# Show the final image with the matched area.
cv2.imshow('Detected', img_rgb)
#print the output position
coords = (pt[0] + w, pt[1] + h)
pydirectinput.moveTo(100, 500)
import pyautogui
pyautogui.moveTo(coords[0], coords[1])
Related
I am currently working on a small project, but I have an unresolved problem. That is I want to draw a shape through the desired objects , The first thing is to determine the coordinates of the starting and ending points but I don't have a specific idea yet but I don't know how to do it,I hope you can give me suggestions, Glad to have your help.
i want the result in like this
Here is an example using opencv you can draw rectangle over an object:
By giving the template image of the source image you can draw the shape over the image
# importing needed libraries
import cv2
import numpy as np
img_rgb = cv2.imread(source image) # opening the source image
img_gray = cv2.cvtColor(img_rgb, cv2.COLOR_BGR2GRAY) # converting to the gray scale
template = cv2.imread(template image,0) # opening the template image
w, h = template.shape[::-1] # giving the sape of template image
res = cv2.matchTemplate(img_gray,template,cv2.TM_CCOEFF_NORMED) # matching both the image using the opencv methods for matching object
threshold = 0.9
loc = np.where( res >= threshold)
for pt in zip(*loc[::-1]):
cv2.rectangle(img_rgb, pt, (pt[0] + w, pt[1] + h), (0,0,255), 1)
cv2.imshow('screen',img_rgb)
cv2.waitKey(0)
Source Image
Template Image
Result Image
I'm attempting to create a programĀ in which my code analyses a video from my security camera and locates the cars that have been spotted. I've figured out how to find the cars and draw red rectangles around them, but I'd like to add a condition that only draws boxes if there are more than 5 cars detected. However, I am unable to do so due to the presence of arrays. How would I go about fixing this code?
import cv2
classifier_file = 'cars.xml'
car_tracker = cv2.CascadeClassifier(classifier_file)
video = cv2.VideoCapture("footage.mp4")
while True:
(read_successful, frame) = video.read()
if read_successful:
grayscaled_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
else:
break
cars = car_tracker.detectMultiScale(grayscaled_frame)
if cars > 4:
for (x, y, w, h) in cars:
cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 0, 255), 2)
cv2.imshow('Car Detector', frame)
cv2.waitKey(0)
By the way, I am using an anaconda environment along with python 3.8.8
You have an array of detections.
All you need is to call len() on the array to get the number of elements. This is a built-in function of Python.
cars = car_tracker.detectMultiScale(grayscaled_frame)
if len(cars) >= 5:
for (x, y, w, h) in cars:
cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 0, 255), 2)
No need for any other libraries.
Here is one example using a different technique, in this case you can use cvlib it has a class object_detection to detect multiple classes. This uses the yolov4 weights, you will be capable of detect any car in the road or in this case from one webcam, only make sure that the car are ahead the camera not in 2d position.
import cv2
import matplotlib.pyplot as plt
# pip install cvlib
import cvlib as cv
from cvlib.object_detection import draw_bbox
im = cv2.imread('cars3.jpg')
#cv2.imshow("cars", im)
#cv2.waitKey(0)
bbox, label, conf = cv.detect_common_objects(im)
#yolov4 weights will be downloaded. Check: C:\Users\USER_NAME\.cvlib\object_detection\yolo
# if the download was not successful delete yolo folder and try again, it will be downloaded again
# after download the weights keep running the script it will say 0 cars, then close Anaconda spyder anda reopen it and try again.
#get bbox
output_image_with_bbox = draw_bbox(im, bbox, label, conf)
number_cars = label.count('car')
if number_cars > 5:
print('Number of cars in the image is: '+ str(number_cars))
plt.imshow(output_image_with_bbox)
plt.show()
else:
print('Number of cars in the image is: '+ str(number_cars))
plt.imshow(im)
plt.show()
output:
Greetings.
I have a image1 which I locate with pyautogui, center and click.
It is ok.
But I have portion of this image to click, once I locate the first and I m not able to get coordinates to click.
I find out CV2 module, and I was able to match template with image, but I m not able to get TEMPLATE coordinates once I GOT the first image.
so basically I have image1, which i locate, and there is a portion of image2, called template, which i need to locate.
I need do this, because, first image can change position on screen. How i get x, y to center the template image?
pyautogui.position ( x, y , 1 )
this is the script which work matching image with template
the code
import cv2
import numpy as np
import os
import pyautogui as p
img_rgb = cv2.imread('big.png')
img_gray = cv2.cvtColor(img_rgb, cv2.COLOR_BGR2GRAY)
template = cv2.imread('portion.png',0)
w, h = template.shape[::-1]
##print (w,h)
res = cv2.matchTemplate(img_gray,template,cv2.TM_SQDIFF)
threshold = 0.8
loc = np.where( res >= threshold)
for pt in zip(*loc[::-1]):
cv2.rectangle(img_gray, pt, (pt[0] + w, pt[1] + h), (0,255,255), 2)
##cv2.imshow('Detected',template)
#( of course before this I will center the x and y with locate / center somehow )
p.moveTo (x of portion , y of portion ,1) #( of course before this I will center the x and y with locate / center somehow )
Ok. I did it.
I just locate the first image. Then I locate the second giving first image coordinates as region ti check.
It was simple.
Sorry guys bother u.
I have been following a tutorial about computer vision and doing a little project to read the time from a game. The game time is formatted h:m. So far I got the h and m figured out using findContours, but I'm having trouble isolating the colon as the character shape is not continuous. Because of this when I try to matchTemplate the code freaks out and starts to use the dot to match to all the other digits.
Are there ways to group the contours by X?
Here are simplified code to get the reference digits, the code to get digits from the screen is basically the same.
refCnts = cv2.findContours(ref.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
refCnts = imutils.grab_contours(refCnts)
refCnts = contours.sort_contours(refCnts, method="left-to-right")[0]
digits = {}
# loop over the OCR-A reference contours
for (i, c) in enumerate(refCnts):
# compute the bounding box for the digit, extract it, and resize
# it to a fixed size
(x, y, w, h) = cv2.boundingRect(c)
roi = ref[y:y + h, x:x + w]
roi = cv2.resize(roi, (10, 13))
digits[i] = roi
Im new to python and opencv. Apologies in advance if this is a dumb question.
Here is the reference image I'm using:
Here is the input image I'm trying to read:
Do you have to use findCountours? Because there are better suited methods for such problems. For instance, you can use template matching as shown below:
These are input, template (cut out from your reference image), and output images:
import cv2
import numpy as np
# Read the input image & convert to grayscale
input_rgb = cv2.imread('input.png')
input_gray = cv2.cvtColor(input_rgb, cv2.COLOR_BGR2GRAY)
# Read the template (Using 0 to read image in grayscale mode)
template = cv2.imread('template.png', 0)
# Perform template matching - more on this here: https://docs.opencv.org/4.0.1/df/dfb/group__imgproc__object.html#ga3a7850640f1fe1f58fe91a2d7583695d
res = cv2.matchTemplate(input_gray,template,cv2.TM_CCOEFF_NORMED)
# Store the coordinates of matched area
# found the threshold value of .56 using trial & error using the input image - might be different in your game
lc = np.where( res >= 0.56)
# Draw a rectangle around the matched region
# I used the width and height of the template image but in practice you need to use a better method to accomplish this
w, h = template.shape[::-1]
for pt in zip(*lc[::-1]):
cv2.rectangle(input_rgb, pt, (pt[0] + w, pt[1] + h), (0,255,255), 1)
# display output
cv2.imshow('Detected',input_rgb)
# cv2.imwrite('output.png', input_rgb)
# cv2.waitKey(0)
# cv2.destroyAllWindows()
You may also look into text detection & recognition using openCV.
I want to save in a 3-dimensional matrix, the images obtained when I apply a sliding window algorithm. When using this code when I apply the changes behind the ## I get an error. I think the code is correct, any ideas?
window is the matriz (100,100) that i want save in A
## import the necessary packages
import cv2
import matplotlib.pyplot as plt
import numpy as np
import imutils
import time
img= cv2.imread("2.jpg") # your image path
image = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
def pyramid(image, scale=1.5, minSize=(30, 30)):
# yield the original image
yield image
# keep looping over the pyramid
while True:
# compute the new dimensions of the image and resize it
w = int(image.shape[1] / scale)
image = imutils.resize(image, width=w)
# if the resized image does not meet the supplied minimum
# size, then stop constructing the pyramid
if image.shape[0] < minSize[1] or image.shape[1] < minSize[0]:
break
# yield the next image in the pyramid
yield image
def sliding_window(image, stepSize, windowSize):
# slide a window across the image
for y in range(0, image.shape[0], stepSize):
for x in range(0, image.shape[1], stepSize):
# yield the current window
yield (x, y, image[y:y + windowSize[1], x:x + windowSize[0]])
(winW, winH) = (100, 100)
# declarate the matrix
A=np.empty((100,winW,winH))
b=1
window=[]
#%%
# loop over the image pyramid
for resized in pyramid(image, scale=2):
# loop over the sliding window for each layer of the pyramid
for (x, y, window) in sliding_window(resized, stepSize=32, windowSize=(winW, winH)):
# if the window does not meet our desired window size, ignore it
if window.shape[0] != winH or window.shape[1] != winW:
continue
# THIS IS WHERE YOU WOULD PROCESS YOUR WINDOW, SUCH AS APPLYING A
# MACHINE LEARNING CLASSIFIER TO CLASSIFY THE CONTENTS OF THE
# WINDOW
# since we do not have a classifier, we'll just draw the window
clone = resized.copy()
cv2.rectangle(clone, (x, y), (x + winW, y + winH), (0, 255, 0), 2)
cv2.imshow("Window", clone)
cv2.waitKey(1)
time.sleep(0.25)
#______ ###### the problem #### ______ i want to save the windows obtein in A, but I get the error in the next two lines of code
## A[b,:,:]=window
## b=b+1;
cv2.destroyAllWindows()
i want to save the windows obtein in A, but I get the error in the next two lines of code
## A[b,:,:]=window