locate an opencv detection using pyautogui - python

so I'm making a bot that detects icons on the screen and moves the mouse to the detected icon the user chose. this is what the code looks like:
import numpy as np
import pyautogui
import cv2
from PIL import ImageGrab
fourcc = cv2.VideoWriter_fourcc(*'XVID')
face_csc = cv2.CascadeClassifier('improved_cascade.xml')
out = cv2.VideoWriter("output.avi", fourcc, 5.0, (1366, 768))
while True:
img = ImageGrab.grab(bbox=None)
# convert image to numpy array
img_np = np.array(img)
# convert color space from BGR to RGB
frame = cv2.cvtColor(img_np, cv2.COLOR_BGR2RGB)
# show image on OpenCV frame
faces = face_csc.detectMultiScale(frame, 1.1 , 15)
for (x,y,w,h) in faces:
detected_icon = cv2.rectangle(frame,(x,y),(x+w,y+h), (255,0,0), 2)
roi_gray = frame[y:y+h, x:x+w]
roi_color = img_np[y:y+h,x:x+w]
cv2.putText(frame,'icon',(x,y),cv2.FONT_HERSHEY_TRIPLEX,0.8,(0,0,255),1)
cv2.imshow("stream", frame)
# write frame to video writer
out.write(frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
out.release()
cv2.destroyAllWindows()
but I'm having trouble making my mouse click on an icon opencv detected. for example: lets say that I set my program so that when it detects chrome on the screen, it hovers the mouse automatically to the icon and click on it. how would I be able to do that? thanks

I don't have a handy Windows box to run ImageGrab on, but assuming it produces a screenshot with the same width and height (in pixels) as the actual screen, given that both Pyautogui and OpenCV put the origin in the top left, the translation to Pyautogui should be straightforward:
for (x,y,w,h) in faces:
center_x = x + 0.5 * w
center_y = y + 0.5 * h
pyautogui.click(center_x, center_y)
That teleports the mouse pointer to the center of the object rectangle and clicks it. In case you want to simulate more human-like mouse movement and clicking, Pyautogui has a number of tools for that purpose.

Related

Python Dlib Face detection focus & enlarge detections

With the below sample code, i am using basic dlib face detection.
I was initially drawing a bounding box around the detected face but I now wanted to display only within what is detected(AKA the face): img[top:bottom,left:right,:]
import sys
import dlib
import cv2
detector = dlib.get_frontal_face_detector()
cam = cv2.VideoCapture(1)
color_green = (0,255,0)
line_width = 3
while True:
ret_val, img = cam.read()
rgb_image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
dets = detector(rgb_image)
#for det in dets:
#cv2.rectangle(img,(det.left(), det.top()), (det.right(), det.bottom()), color_green, line_width)
new_img = img[top:bottom,left:right,:]
cv2.imshow('my webcam', new_img)
if cv2.waitKey(1) == 27:
break # esc to quit
cv2.destroyAllWindows()
The issue that I am facing, is that it is successfully showing me what is within the x,y,w,h but the image kept resizing depending on how close I am to the camera.
What I did is the following steps:
I got the coordinates of the detection: img[top:bottom,left:right,:]
I then resized the image to a 480 to 480 size focus_face = cv2.resize(img, (480, 480))
And then passed the image to show.
So the issue Im having is if I resize the array(img) it does not seem to be following the detected face but focusing at the centre of the screen, especially the more I move back.. So if im at the centre of the screen then it shows my whole face, if im at the sides, it will only show a part of my face.
I did my best to explain this, but If you have any questions please let me know.
Best.

VideoCapture shows "no camera access" placeholder image

I am trying to display a live video feed from camera. When I run the program, ret returns true but cv2.imshow() displays a placeholder image. Any help would be greatly appreciated.
import numpy as np
from cv2 import cv2
face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
#initialize video from the webcam
video = cv2.VideoCapture(0)
print(cv2.VideoCapture(0).isOpened()) # ->returns True
while True:
# ret tells if the camera works properly. Frame is an actual frame from the video feed
ret, frame= video.read()
if ret ==True:
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
# Detect the faces
faces = face_cascade.detectMultiScale(gray, 1.1, 4)
# Draw the rectangle around each face
for (x, y, w, h) in faces:
cv2.rectangle(frame, (x, y), (x+w, y+h), (255, 0, 0), 2)
# Display
cv2.imshow('img', frame)
if cv2.waitKey(30) & 0xff==27:
break
video.release()
cv2.destroyAllWindows()
cv2.imshow('img', frame) opens the following window.
So, I checked whether the Camera permission is allowed and it seems it already is. I am using MacOS Big Sur (version 11.6).

How to rotate camera recorded video?

I am trying to detect faces in a camera recorded video. When i did it with webcam video, it's working fine. But, with camera recorded video, the video gets rotated by -90 degree. Please suggest me, how do I get the actual video output for face detection?
import cv2
import sys
cascPath = sys.argv[1]
faceCascade = cv2.CascadeClassifier('C:/Users/HP/Anaconda2/pkgs/opencv-3.2.0-np112py27_204/Library/etc/haarcascades/haarcascade_frontalface_default.xml')
#video_capture = cv2.videoCapture(0)
video_capture = cv2.VideoCapture('C:/Users/HP/sample1.mp4')
w=int(video_capture.get(3))
h=int(video_capture.get(4))
#output = cv2.VideoWriter('output_1.avi',cv2.VideoWriter_fourcc('M','J','P','G'), 60,frameSize = (w,h))
while True:
ret, frame = video_capture.read()
frame = rotateImage(frame,90)
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
faces = faceCascade.detectMultiScale(gray, 1.3, 5)
# Draw a rectangle around the faces
for (x, y, w, h) in faces:
cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2)
#cv2.imshow('face',i)
#output.write(frame)
cv2.imshow('Video', frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
video_capture.release()
output.release()
cv2.destroyAllWindows()
In cv2 you can use the cv2.rotate function to rotate image as per your requirement
rotated=cv2.rotate(img, cv2.ROTATE_90_CLOCKWISE)
for rotating video you can use cv2.flip(), this method take 3 Args and one of them is the rotating code(0,1,-1) you can check this link for more details:
https://www.geeksforgeeks.org/python-opencv-cv2-flip-method/

Find if drawn rectangle is visible on screen in opencv python

I have some code which will draw a rectangle using cv2.rectangle function based on other parameters. This is part of a face detection program. The rectangle is shown on screen within an image using imshow. However, due to the variable nature of the rectangle's dimensions, there are situations when the rectangle is not visible. After the line of code which draws the rectangle in the image, how do I detect if the rectangle is visible or not. I want this so that I can find when a face is not detected. This is my code:
import numpy as np
import cv2
# multiple cascades: https://github.com/Itseez/opencv/tree/master/data/haarcascades
# https://github.com/Itseez/opencv/blob/master/data/haarcascades/haarcascade_frontalface_default.xml
face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
# https://github.com/Itseez/opencv/blob/master/data/haarcascades/haarcascade_eye.xml
eye_cascade = cv2.CascadeClassifier('haarcascade_eye.xml')
cap = cv2.VideoCapture(0)
while 1:
ret, img = cap.read()
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
faces = face_cascade.detectMultiScale(gray, 1.3, 5)
for (x,y,w,h) in faces:
cv2.rectangle(img,(x + (w / 4), y + (h / 4)),(x+(3 * w / 4),y + (3 * h / 4)),(255,0,0),2)
cv2.imshow('img',img)
k = cv2.waitKey(30) & 0xff
if k == 27:
break
cap.release()
cv2.destroyAllWindows()
Thanks!
Add print len(faces) in the while loop. If its zero, then no face detected. No rectangle.

How can i get array from 'imshow()' in opencv?

I use opencv to open a camera, it shows face area croping in another window at once, so i have 2 windows in the same time. So i would like to show image and get array at once. Below is my script
import numpy as np
import cv2
# load clasifier from file
face_cascade = cv2.CascadeClassifier('cascades\haarcascade_frontalface_default.xml')
img = cv2.VideoCapture(0)
while(1):
#Read images
h,f=img.read()
gray = cv2.cvtColor(f, cv2.COLOR_BGR2GRAY)
faces = face_cascade.detectMultiScale(gray, 1.3, 5)
for (x,y,w,h) in faces:
# create rectangle
cv2.rectangle(f,(x,y),(x+w,y+h),(255,255,255),)
# crop face area
roi_gray = gray[y:y+h, x:x+w]
# showing crop face area
cv2.imshow('crop',roi_gray)
#create window camera which name is img
cv2.imshow('img',f)
key = cv2.waitKey(200)
if key in [27, ord('Q'), ord('q')]:
break
How to get array from cv2.imshow('crop',roi_gray)? Thank you

Categories