I'm new to Python and want to learn it bit by bit, so I decided to write a simple program that would, in real time, capture my screen and do object detection. Through a lot of googling and reading, I was able to make this script, however, no matter what I do, it won't do object detection (m1.png).
Can you please assist me with the reason why it is like this?
import time
import cv2
import mss
import numpy
#template and dimensions
template = cv2.imread("m2.png")
template_gray = cv2.cvtColor(template, cv2.COLOR_BGRA2GRAY)
template_w, template_h = template_gray.shape[::-1]
with mss.mss() as sct:
# Part of the screen to capture
monitor = {"top": 523, "left": 247, "width": 875, "height": 679}
while True:
last_time = time.time()
# Get raw pixels from the screen, save it to a Numpy array
img = numpy.array(sct.grab(monitor))
# Display the picture
cv2.imshow("Normal", img)
# Display the picture in grayscale
img_gray = cv2.cvtColor(img, cv2.COLOR_BGRA2GRAY)
res = cv2.matchTemplate(
image = img_gray,
templ = template_gray,
method= cv2.TM_CCOEFF_NORMED
)
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
#threshold
if max_val >= 0.5:
img = cv2.rectangle(
img = img,
pt1 = max_loc,
pt2 = (
max_loc[0] + template_w, # = pt2 x
max_loc[1] + template_h # = pt2 y
),
color = (0,255,0),
thickness = 3 #fill the rectangle
)
print("fps: {}".format(1 / (time.time() - last_time)))
# Press "q" to quit
if cv2.waitKey(25) & 0xFF == ord("q"):
cv2.destroyAllWindows()
break
I spent three days trying to figure it out, and the only similar code that works but with a poor frame rate is this one:
#imports
from re import template
import cv2
import pyautogui
from time import sleep
#No cooldown time
pyautogui.PAUSE = 0
#template and dimensions
template = cv2.imread("b1.png")
template_gray = cv2.cvtColor(template, cv2.COLOR_RGB2GRAY)
template_w, template_h = template_gray.shape[::-1]
# game window dimensions
x, y, w, h = 523, 247, 875, 679
#wait
sleep(3)
#main
while True:
#screenshot = img
pyautogui.screenshot("image.png", (x, y, w, h))
image = cv2.imread("image.png")
while True:
image_gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
result = cv2.matchTemplate(
image = image_gray,
templ = template_gray,
method = cv2.TM_CCOEFF_NORMED
)
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result)
#threshold
if max_val >= 0.1:
#pyautogui.click(
# x = max_loc[0] + x, #screen x
# y = max_loc[1] + y #screen y
#)
image = cv2.rectangle(
img = image,
pt1 = max_loc,
pt2 = (
max_loc[0] + template_w, # = pt2 x
max_loc[1] + template_h # = pt2 y
),
color = (0,0,255),
thickness = -1 #fill the rectangle
)
else:
break
The structure looks similar the only difference in this template is that it uses pyautogui with OpenCV, whereas I'm trying to use mss. So does that mean that the issue in the code is that there's no physical location of the screen capture due? If so, does that mean it's impossible to make an object detection with mss?? You would make my day if you could disclose this mystery with the code!!
UPD: I was able to solve this, honey! So the issue was that firstly I misspelled the .png file, and to see it detecting an object, place the cv2.imshow after the if statement. Although it works, it's not perfect, so I'm trying to implement the usage of cv2.Canny() but now I don't get any output, so here I'm rising a question of whether there should be a different approach when Canny is used:
import time
import Options.settings as set
import time
import pyautogui as pt
from time import sleep
import cv2
import mss
import numpy
x = 0
offset = set.offset
create_logs = set.create_logs
#template and dimensions
template = cv2.imread("m2.png")
template_gray = cv2.cvtColor(template, cv2.COLOR_BGRA2GRAY)
template_canny = cv2.Canny(template_gray, 79, 100)
template_w, template_h = template_canny.shape[::-1]
with mss.mss() as sct:
# Part of the screen to capture
monitor = {"top": 523, "left": 1600, "width": 230, "height": 359}
while True:
last_time = time.time()
# Get raw pixels from the screen, save it to a Numpy array
img = numpy.array(sct.grab(monitor))
# Display the picture
cv2.imshow("Normal", img)
# Display the picture in grayscale
img_gray = cv2.cvtColor(img, cv2.COLOR_BGRA2GRAY)
img_canny = cv2.Canny(img_gray, 100, 115)
res = cv2.matchTemplate(
image = img_canny,
templ = template_canny,
method= cv2.TM_CCOEFF_NORMED
)
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
#threshold
if max_val >= 0.6:
x = x + 1
print(f'{x} is detected')
img = cv2.rectangle(
img = img,
pt1 = max_loc,
pt2 = (
max_loc[0] + template_w, # = pt2 x
max_loc[1] + template_h # = pt2 y
),
color = (0,255,0),
thickness = 3 #fill the rectangle
)
# Display the picture
cv2.imshow("Normal", img)
#print("fps: {}".format(1 / (time.time() - last_time)))
# Press "q" to quit
if cv2.waitKey(25) & 0xFF == ord("q"):
cv2.destroyAllWindows()
break
UPD 2:
As #fmw42 suggested, I tried different OpenCV methods, but whether I try them, they constantly react if there's an object in the screen capture field no matter how I change the if max_vol >= ...
Please find attached
m2.png = https://ibb.co/Xb5tCPZ
Example of what the screen capture look like = https://ibb.co/Xb5tCPZ
alright, somehow, I get the idea behind the issue,
I had to move after the if function and now it's working^^
# Display the picture
cv2.imshow("Normal", img)
Related
I applied mask to video feed using OpenCV and want to display the Live streaming on the website but the following code stops streaming once it starts. I've been wrapping my head around but couldn't figure out the solution. Any help would be greatly appreciated.
views.py
def gen(frame):
while True:
# frame = camera.get_frame()
yield (b'--frame\r\n'
b'Content-Type: image/jpeg\r\n\r\n' + frame + b'\r\n\r\n')
#api_view(['GET'])
def seasoncolor(request):
while True:
return StreamingHttpResponse(gen(color_detection.color_detection(0)),
content_type='multipart/x-mixed-replace; boundary=frame')
color_detection.py
import numpy as np
import cv2
import sys
'''
ML object detection algo(haarcascade)used to identify objects.
the XML file consists of trained Haar Cascade models.
'''
def color_detection(season):
face_cascade = cv2.CascadeClassifier(
'accounts/personal_color/self_detection/haarcascade_frontalface_default.xml')
# 'accounts/personal_color/self_detection/haarcascade_frontalface_default.xml'
# initialize video from the webcam
video = cv2.VideoCapture(1)
# Spring/summer/fall/winter
while True:
# ret tells if the camera works properly. Frame is an actual frame from the video feed
ret, frame = video.read()
# make sure port is working and read the image
if frame is not None and video.isOpened():
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
'''
Detect the faces within the subregions of the image in scales
scaleFactor indicates how much the image size is reduced at each image scale.
minNeighbors: Higher value results in higher quality of the detected face.
'''
faces = face_cascade.detectMultiScale(
gray, scaleFactor=1.1, minNeighbors=6)
# Draw the rectangle around each face
for (x, y, w, h) in faces:
# Use the stcoordinates to find the center of the face and from that point draw a rectangle of radius w/2 or h/2.
center_coordinates = x + w // 2, y + h // 2
radius = w // 2 # or can be h / 2 or can be anything based on your requirements
# background color(black)
mask = np.zeros(frame.shape[:2], dtype="uint8")
# Draw the desired region to crop out in white
cv2.circle(mask, center_coordinates, radius, (255, 255, 255), -1)
masked = cv2.bitwise_and(frame, frame, mask=mask)
if int(season) ==0: # Spring
# Replace all (0,0,0)channel with Coral pink
masked[np.where((masked == [0, 0, 0]).all(axis=2))] = [121, 131, 248]
elif int(season) ==1: # Summer
#Replace all (0,0,0)channel with Rose Red
masked[np.where((masked==[0,0,0]).all(axis=2))]=[86,30,194]
elif int(season) ==2: # Fall
#Replace all (0,0,0)channel with Red Brown /Cinnamon
masked[np.where((masked==[0,0,0]).all(axis=2))]=[30,105,210]
else: # Winter
#Replace all (0,0,0)channel with Burgundy Red
masked[np.where((masked==[0,0,0]).all(axis=2))]=[31,2,141]
# cv2.imshow('mask applied', masked)
ret, jpeg = cv2.imencode('.jpg', masked)
return jpeg.tobytes()
if cv2.waitKey(30) & 0xff == 27:
break
video.release()
cv2.destroyAllWindows()
if __name__ == '__main__':
arg = sys.argv[1]
color_detection(arg)
Self_color_diagnosis.js
import React, { useState, useEffect } from 'react';
import ReactDOM from 'react-dom';
import CameraScreen from './CameraScreen';
import { StyleSheet, Text, View, Image } from 'react-native';
import { NavigationContainer } from '#react-navigation/native';
import { createStackNavigator } from '#react-navigation/stack';
import axios from 'axios';
function Self_color_diagnosis({navigation,route}) {
return (
<View style={styles.title_container}>
<Image style={styles.video} source={{
uri: 'http://localhost:8000/seasoncolor/',}}/>
</View>
);
}
const styles = StyleSheet.create({
video: {
width: 500,
height: 500
},
title_container: {
flex: 1,
justifyContent: 'center'
},
});
export default Self_color_diagnosis;
The above code results in the pic below. The streaming stops and does not change at all.
gen() runs loop which all time uses the same frame().
You have to get frame inside this loop.
def gen():
while True:
frame = color_detection.color_detection(0)
if frame:
yield b'--frame\r\nContent-Type: image/jpeg\r\n\r\n' + frame + b'\r\n\r\n'
#else:
# print('no frame')
But color_detection should run without loop.
And you should create VideoCapture(1) only once.
And you should return frame even if you didn't detect any face.
path = os.path.join(cv2.data.haarcascades, 'haarcascade_frontalface_default.xml')
face_cascade = cv2.CascadeClassifier(path)
video = cv2.VideoCapture(1)
def color_detection(season):
# ret tells if the camera works properly. Frame is an actual frame from the video feed
ret, frame = video.read()
# make sure port is working and read the image
if frame is not None and video.isOpened():
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
'''
Detect the faces within the subregions of the image in scales
scaleFactor indicates how much the image size is reduced at each image scale.
minNeighbors: Higher value results in higher quality of the detected face.
'''
faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=6)
# Draw circle around each face
for (x, y, w, h) in faces:
# Use the stcoordinates to find the center of the face and from that point draw a rectangle of radius w/2 or h/2.
center_coordinates = x + w // 2, y + h // 2
radius = w // 2 # or can be h / 2 or can be anything based on your requirements
# background color(black)
mask = np.zeros(frame.shape[:2], dtype="uint8")
# Draw the desired region to crop out in white
cv2.circle(mask, center_coordinates, radius, (255, 255, 255), -1)
masked = cv2.bitwise_and(frame, frame, mask=mask)
if season == 0: # Spring
# Replace all (0,0,0)channel with Coral pink
masked[np.where((masked == [0, 0, 0]).all(axis=2))] = [121, 131, 248]
elif season == 1: # Summer
#Replace all (0,0,0)channel with Rose Red
masked[np.where((masked==[0,0,0]).all(axis=2))] = [86,30,194]
elif season == 2: # Fall
#Replace all (0,0,0)channel with Red Brown /Cinnamon
masked[np.where((masked==[0,0,0]).all(axis=2))] = [30,105,210]
else: # Winter
#Replace all (0,0,0)channel with Burgundy Red
masked[np.where((masked==[0,0,0]).all(axis=2))] = [31,2,141]
ret, jpeg = cv2.imencode('.jpg', masked)
else: # it is `for/else` construction, not `if/else`
ret, jpeg = cv2.imencode('.jpg', frame)
return jpeg.tobytes()
#return None
BTW:
I see other problem. When it detects many faces then it creates new mask for every face and assigns every mask to original image - so every mask skip previous mask - so it should show only last face, and hide other faces. You should first create one mask with all circles and next use it on image.
EDIT:
I don't know what web framework you use so I used Flask to create minimal working example.
import os
from flask import Flask, Response
import cv2
import numpy as np
app = Flask(__name__)
print('\n'.join(sorted(os.listdir(cv2.data.haarcascades))))
path = os.path.join(cv2.data.haarcascades, 'haarcascade_frontalface_default.xml')
face_cascade = cv2.CascadeClassifier(path)
#video = cv2.VideoCapture(0) # my webcam
video = cv2.VideoCapture(0) # your webcam
def color_detection(season):
ret, frame = video.read()
if frame is not None and video.isOpened():
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=6)
# test two circles on image 640x480
#faces = [[100, 100, 250, 250], [640-100-250, 480-100-250, 250, 250]]
if len(faces) > 0: # it can be `if faces:` because `faces` is `numpy array` which need different method to check if not empty
# background color(black)
mask = np.zeros(frame.shape[:2], dtype="uint8")
# draw all circles on mask
for (x, y, w, h) in faces:
#print(x, y, w, h)
# use the coordinates to find the center of the face and from that point draw a rectangle of radius w/2 or h/2.
center_coordinates = x + w // 2, y + h // 2
radius = max(w, h) // 2 # or can be h / 2 or can be anything based on your requirements
# draw the desired region to crop out in white
cv2.circle(mask, center_coordinates, radius, (255, 255, 255), -1)
# use mask with all circles
masked = cv2.bitwise_and(frame, frame, mask=mask)
if season == 0: # Spring - Coral pink
color = [121, 131, 248]
elif season == 1: # Summer - Rose Red
color = [86,30,194]
elif season == 2: # Fall - Red Brown /Cinnamon
color = [30,105,210]
else: # Winter - Burgundy Red
color = [31,2,141]
masked[np.where((masked == [0,0,0]).all(axis=2))] = color
else: # no faces
masked = frame
ret, jpeg = cv2.imencode('.jpg', masked)
return jpeg.tobytes()
def gen():
while True:
frame = color_detection(0)
if frame:
yield (b'--frame\r\nContent-Type: image/jpeg\r\n\r\n' + frame + b'\r\n\r\n')
#else:
# print('no frame')
#app.route('/')
def index():
return '<image src="/seasoncolor">'
#app.route('/seasoncolor')
def seasoncolor():
return Response(gen(), mimetype='multipart/x-mixed-replace; boundary=frame')
if __name__ == '__main__':
#app.debug = True
app.run()
BTW:
To make sure: VideoCapture can work only with local camera where you run web server. It can't work with remote camera on user computer. Only user's browser has access to its camera. To work with remote camera on user computer you would have to use JavaScript to access camera in user browser and send frames to server - like in my examples in my GitHub python-examples: web camera in browser - canvas - take image and upload to server
Im learning to use Open CV and Im doing a basic example of template matching of locating and image within an image, but im having an issue with resolution.
It takes a screengrab finds the targetimage, marks it with a dot, but when I ask pyautogui to click that position it ends up clicking near but off
Max Loc: (623, 717)
When the actual cordinates are
339,550
I think it has something to do with the resolution of the screen as im 1920x1200 but the screenshot is 2880x1800 so its getting the cordinates from that I would think its just half but tried that and thats why Im here.
see code below:
Any help would be greatly appreciated
import keyboard
import mss
import cv2
import numpy
from time import time, sleep
import pyautogui
pyautogui.PAUSE = 0
sct = mss.mss()
screen_grab_dimensions = {
'left': 70,
'top': 185,
'width': 750,
'height': 500
}
coin = cv2.imread('coin2.png')
w = coin.shape[1]
h = coin.shape[0]
while True:
#GET SCREENGRAB WITH CORDINATES
scr = numpy.array(sct.grab(screen_grab_dimensions))
scr_remove = scr[:,:,:3]
#MATCH IMAGE WITHIN IMAGE
result = cv2.matchTemplate(scr_remove, coin, cv2.TM_CCOEFF_NORMED)
#GET AND PRINT ACCURACY AND LOCATION
_, max_val, _, max_loc = cv2.minMaxLoc(result)
print(f"Max Val: {max_val} Max Loc: {max_loc}")
#???
src = scr.copy()
#IF ACCURACY IF ABOVE .90 MARK IT WITH A DOT
if max_val > .90:
mx = max_loc
my = (max_loc[0] + w, max_loc[1] + h)
#WORKOUT CENTER OF COIN
centerpoint = ( int((max_loc[0] + (w/2))), int((max_loc[1] + (h/2))))
#MARK WITH A DOT
cv2.circle(scr,centerpoint,10,(255,0,0),-1)
#CLICK AREA
pyautogui.click(x=centerpoint)
#RESIZE FOR ME
scr = cv2.resize(scr, (0,0), fx=.5, fy=.5)
#SHOW ME SCREENSHOT
cv2.imshow('Screen Shot', scr)
cv2.waitKey(1)
sleep(.1)
if keyboard.is_pressed('q'):
break
I am creating a dataset for UNET where I the Image I want to get as Y is a binarized image. I have written a code for OpenCv which uses input from users as sliding bars and saves the image after pressing given key.
Could someone please Help me apply Perspective Transformation to different images. I can get the respective values from the TrackBar if needed.
My images look something like this:
Below is the code.
import numpy as np
import cv2
import skimage.filters as filters
from os import listdir
from os.path import isfile, join
class InteractiveBinarization():
def __init__(self,path='./images/',out='./out/'):
'''
args:
path: Path of the directory which has all the images
out: Path of directory where your binarized images will be saved
'''
self.path = path
self.images = [f for f in listdir(path) if isfile(join(path, f))]
self.N = len(self.images)
self.out = out
def dummy(self,x=None)->None:
'''
Does not do anything. Used to pass to crateTrackbar as it needs a function
'''
pass
def binarize(self,window_width:int=350,window_height:int=350)->None:
'''
Method to binarize the Image based on the sliding values from the bars. It accepts Gauss Kernal, Sharpeen Amount, Sharpen Radius, Rotation Angle
Press 'esc' or 'q' to quit, 's' to save the binarized image, 't' for printing the current bar values to image, 'p' for previous image and 'n' for next image
args:
window_width: Width of the Window which has sliding bars
window_height: Height of window for the sliding bars
'''
cv2.namedWindow('Tracking Window',cv2.WINDOW_FULLSCREEN)
cv2.resizeWindow('Tracking Window', window_width, window_height)
cv2.createTrackbar('kernel','Tracking Window',3,513,self.dummy) # gauss kernal size
cv2.createTrackbar('x_sigma','Tracking Window',0,100,self.dummy) # gauss X sigma
cv2.createTrackbar('y_sigma','Tracking Window',0,100,self.dummy) # gauss Y sigma
cv2.createTrackbar('amount1','Tracking Window',0,7,self.dummy) # sharpen amount number
cv2.createTrackbar('amount2','Tracking Window',1,100,self.dummy) # sharpen amount decimal
cv2.createTrackbar('radius1','Tracking Window',0,7,self.dummy) # sharpen radius
cv2.createTrackbar('radius2','Tracking Window',1,100,self.dummy) # sharpen radius decimal
cv2.createTrackbar('angle','Tracking Window',0,360,self.dummy) # rotation angle
QUIT = False
put_text = False
read_image = True
counter = 0
while not QUIT:
if read_image:
img_name = self.images[counter]
img = cv2.imread(self.path+img_name)
read_image = False
g_k = cv2.getTrackbarPos('kernel','Tracking Window')
if g_k % 2 == 0:
g_k+=1
g_x_sigma = cv2.getTrackbarPos('x_sigma','Tracking Window')
g_y_sigma = cv2.getTrackbarPos('y_sigma','Tracking Window')
s_a1 = cv2.getTrackbarPos('amount1','Tracking Window') # 1,2,3,4
s_a2 = cv2.getTrackbarPos('amount2','Tracking Window') # .01, ..... 0.99
s_r1 = cv2.getTrackbarPos('radius1','Tracking Window') # same as above
s_r2 = cv2.getTrackbarPos('radius2','Tracking Window')
s_a = round(s_a1 + s_a2/100,2) # 1.01.......... 7.99
s_r = round(s_r1 + s_r2/100,2) # same asa above
angle = cv2.getTrackbarPos('angle','Tracking Window')
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
smooth = cv2.GaussianBlur(gray, (g_k,g_k), g_x_sigma,sigmaY=g_y_sigma)
division = cv2.divide(gray, smooth, scale=255)
sharp = filters.unsharp_mask(division, radius=s_r, amount=s_a, multichannel=False, preserve_range=False)
sharp = (255*sharp).clip(0,255).astype(np.uint8)
kernel = np.ones((5,5),np.uint8)
opening = cv2.morphologyEx(img, cv2.MORPH_OPEN, kernel)
closing = cv2.morphologyEx(img, cv2.MORPH_CLOSE, kernel)
thresh = cv2.threshold(sharp, 0, 255, cv2.THRESH_OTSU )[1]
# rotate
(h, w) = thresh.shape[:2]
center = (w // 2, h // 2)
M = cv2.getRotationMatrix2D(center, angle, 1)
thresh = cv2.warpAffine(thresh, M, (w, h), flags=cv2.INTER_CUBIC, borderMode = cv2.BORDER_CONSTANT, borderValue=255)
if put_text:
text = f"g_k: {g_k} , g_x_sigma: {g_x_sigma} , g_y_sigma: {g_y_sigma} , s_a: {s_a} , s_r: {s_r} , angle: {angle}"
cv2.putText(thresh,text,org=(30,30),fontFace=cv2.FONT_HERSHEY_SIMPLEX,fontScale=0.5,color=(0,128,0),thickness=1)
cv2.imshow('Image', thresh)
key = cv2.waitKey(1) # show for 1 miliseconds. Because the loop is infinite, it'll be infinitely showing the results
if key==27 or key == ord('q'): # Press escape / q to close all windows
QUIT = True
break
elif key == ord('s'): # save binary image
cv2.imwrite(self.out+'binary_'+img_name, thresh)
elif key == ord('t'): # show or hide text on image
put_text = not put_text
elif key == ord('n'):
if counter < self.N-1:
read_image = True
counter += 1
elif key == ord('p'):
if counter > 0:
read_image = True
counter -= 1
cv2.destroyAllWindows()
I am beginner at using OpenCv2, I am trying to detect faces with the following function:
def faceDetection(test_img):
gray_img=cv2.cvtColor(test_img,cv2.COLOR_BGR2GRAY)#convert color image to grayscale
face_haar_cascade=cv2.CascadeClassifier(cv2.data.haarcascades +'haarcascade_frontalface_default.xml')#Load haar classifier
faces=face_haar_cascade.detectMultiScale(gray_img,scaleFactor=1.32,minNeighbors=5)#detectMultiScale returns rectangles
return faces,gray_img
However, sometimes the faces are detected for some of the photos and the others not. For example it's dected the face in this photo:
However, it didn't detect the face in this photo
I do not know what went wrong in the second picture, as I believe it's with good quality and the face is shown almost similar to the first photo. Any idea?
My reference is here.
Here is the code and output:
import cv2
import sys
def detectFaceOpenCVHaar(faceCascade, frame, inHeight=300, inWidth=0):
frameOpenCVHaar = frame.copy()
frameHeight = frameOpenCVHaar.shape[0]
frameWidth = frameOpenCVHaar.shape[1]
if not inWidth:
inWidth = int((frameWidth / frameHeight) * inHeight)
scaleHeight = frameHeight / inHeight
scaleWidth = frameWidth / inWidth
frameOpenCVHaarSmall = cv2.resize(frameOpenCVHaar, (inWidth, inHeight))
frameGray = cv2.cvtColor(frameOpenCVHaarSmall, cv2.COLOR_BGR2GRAY)
faces = faceCascade.detectMultiScale(frameGray)
bboxes = []
for (x, y, w, h) in faces:
x1 = x
y1 = y
x2 = x + w
y2 = y + h
cvRect = [int(x1 * scaleWidth), int(y1 * scaleHeight),
int(x2 * scaleWidth), int(y2 * scaleHeight)]
bboxes.append(cvRect)
cv2.rectangle(frameOpenCVHaar, (cvRect[0], cvRect[1]), (cvRect[2], cvRect[3]), (0, 255, 0),
int(round(frameHeight / 150)), 4)
return frameOpenCVHaar, bboxes
if __name__ == "__main__" :
faceCascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
frame = cv2.imread("/ur/image/directory/to/face.jpg")
outOpencvHaar, bboxes = detectFaceOpenCVHaar(faceCascade, frame)
cv2.imshow("Face Detection Comparison", outOpencvHaar)
key = cv2.waitKey(0)
cv2.destroyAllWindows()
Output
My original python script was created to work on images already saved. I am now wanting it to capture the image and crop it. I have a working webcam section and a working crop section but I am not able to combine them and make them it. I have included the combined code. Currently it will still crop a saved image and the GUI for the webcam does display for a second but does not display any content (gray screen). Can anyone help me?
import cv
import cv2
import numpy
import Image
import glob
import os
# Static
faceCascade = cv.Load('haarcascade_frontalface_alt.xml')
padding = -1
inputimg = raw_input('Please enter the entire path to the image folder:')
outputimg = raw_input('Please enter the entire path to the output folder:')
if not os.path.exists(outputimg):
os.makedirs(outputimg)
while (padding < 0):
padding = int(raw_input('Enter crop padding:'))
capture = cv2.VideoCapture(0)
cv2.namedWindow("Face Crop")
if capture.isOpened():
frame = capture.read()
def DetectFace(image, faceCascade, returnImage=False):
#variables
min_size = (50,50)
haar_scale = 1.1
min_neighbors = 3
haar_flags = 0
DOWNSCALE = 4
# Equalize the histogram
cv.EqualizeHist(image, image)
# Detect the faces
faces = cv.HaarDetectObjects(image, faceCascade, cv.CreateMemStorage(0),haar_scale, min_neighbors, haar_flags, min_size)
# If faces are found
if faces and returnImage:
for ((x, y, w, h), n) in faces:
# Convert bounding box to two CvPoints
pt1 = (int(x), int(y))
pt2 = (int(x + w), int(y + h))
cv.Rectangle(image, pt1, pt2, cv.RGB(255, 0, 0), 5, 8, 0)
# Start video frame
minisize = (frame.shape[1]/DOWNSCALE,frame.shape[0]/DOWNSCALE)
miniframe = cv2.resize(frame, minisize)
faceCam = classifier.detectMultiScale(miniframe)
for f in faceCam:
x, y, w, h = [ v*DOWNSCALE for v in f ]
cv2.rectangle(frame, (x,y), (x+w,y+h), (0,0,255))
cv2.putText(frame, "Press ESC to close.", (5, 25),
cv2.FONT_HERSHEY_SIMPLEX, 1.0, (255,255,255))
cv2.imshow("preview", frame)
# get next frame
frame = capture.read()
raw_input('Pause for testing')
key = cv2.waitKey(20)
if key in [27, ord('Q'), ord('q')]: # exit on ESC
break
if returnImage:
return image
else:
return faces
def pil2cvGrey(pil_im):
pil_im = pil_im.convert('L')
cv_im = cv.CreateImageHeader(pil_im.size, cv.IPL_DEPTH_8U, 1)
cv.SetData(cv_im, pil_im.tostring(), pil_im.size[0] )
return cv_im
def imgCrop(image, cropBox, boxScale=1):
# Crop a PIL image with the provided box [x(left), y(upper), w(width), h(height)]
# Calculate scale factors
xPadding=max(cropBox[2]*(boxScale-1),int(padding))
yPadding=max(cropBox[3]*(boxScale-1),int(padding))
# Convert cv box to PIL box [left, upper, right, lower]
PIL_box=[cropBox[0]-xPadding, cropBox[1]-yPadding, cropBox[0]+cropBox[2]+xPadding, cropBox[1]+cropBox[3]+yPadding]
return image.crop(PIL_box)
def Crop(imagePattern,boxScale=1):
imgList=glob.glob(imagePattern)
if len(imgList)<=0:
return
else:
for img in imgList:
pil_im=Image.open(img)
cv_im=pil2cvGrey(pil_im)
faces=DetectFace(cv_im,faceCascade)
if faces:
n=1
for face in faces:
croppedImage=imgCrop(pil_im, face[0],boxScale=boxScale)
fname,ext=os.path.splitext(img)
fname = os.path.basename(fname)
croppedImage.save(outputimg + '\\' + fname + ' -c' + ext)
n+=1
print 'Cropping:', fname
else:
print 'No faces found:', img
# Crop all images in a folder
Crop(inputimg + '\*.png', boxScale=1)
Crop(inputimg + '\*.jpg', boxScale=1)
Also, if anyone has any code improvements please let me know as I am new to Python.
I was able to fix this by reworking the logic and flow of code. Updated code below and on github, https://github.com/aDroidman/EyeonYou
import cv
import cv2
import numpy
import Image
import glob
import os
# Static
faceCascade = cv.Load('haarcascade_frontalface_alt.xml')
padding = -1
boxScale = 1
# Needed for webcam CV2 section
HaarXML = "haarcascade_frontalface_alt.xml"
classifier = cv2.CascadeClassifier(HaarXML)
downScale = 4
webcam = cv2.VideoCapture(0)
def DetectFace(image, faceCascade, returnImage=False):
#variables
min_size = (50,50)
haar_scale = 1.1
min_neighbors = 3
haar_flags = 0
DOWNSCALE = 4
# Equalize the histogram
cv.EqualizeHist(image, image)
# Detect the faces
faces = cv.HaarDetectObjects(image, faceCascade, cv.CreateMemStorage(0),haar_scale, min_neighbors, haar_flags, min_size)
# If faces are found
if faces and returnImage:
for ((x, y, w, h), n) in faces:
# Convert bounding box to two CvPoints
pt1 = (int(x), int(y))
pt2 = (int(x + w), int(y + h))
cv.Rectangle(image, pt1, pt2, cv.RGB(255, 0, 0), 5, 8, 0)
if returnImage:
return image
else:
return faces
def pil2cvGrey(pil_im):
pil_im = pil_im.convert('L')
cv_im = cv.CreateImageHeader(pil_im.size, cv.IPL_DEPTH_8U, 1)
cv.SetData(cv_im, pil_im.tostring(), pil_im.size[0] )
return cv_im
def imgCrop(image, cropBox, boxScale=1):
# Crop a PIL image with the provided box [x(left), y(upper), w(width), h(height)]
# Calculate scale factors
xPadding=max(cropBox[2]*(boxScale-1),int(padding))
yPadding=max(cropBox[3]*(boxScale-1),int(padding))
# Convert cv box to PIL box [left, upper, right, lower]
PIL_box=[cropBox[0]-xPadding, cropBox[1]-yPadding, cropBox[0]+cropBox[2]+xPadding, cropBox[1]+cropBox[3]+yPadding]
return image.crop(PIL_box)
def Crop(imagePattern,boxScale,outputimg):
imgList=glob.glob(imagePattern)
if len(imgList)<=0:
return
else:
for img in imgList:
pil_im=Image.open(img)
cv_im=pil2cvGrey(pil_im)
faces=DetectFace(cv_im,faceCascade)
if faces:
n=1
for face in faces:
croppedImage=imgCrop(pil_im, face[0],boxScale=boxScale)
fname,ext=os.path.splitext(img)
fname = os.path.basename(fname)
croppedImage.save(outputimg + '\\' + fname + ' -c' + ext)
n+=1
print 'Cropping:', fname
else:
print 'No faces found:', img
def CropSetup(padding, boxScale):
inputimg = raw_input('Please enter the entire path to the image folder:')
outputimg = raw_input('Please enter the entire path to the output folder:')
# Create output folder if missing
if not os.path.exists(outputimg):
os.makedirs(outputimg)
# Get padding for crop
while (padding < 0):
padding = int(raw_input('Enter crop padding:'))
# Crop images
Crop(inputimg + '\*.png', boxScale, outputimg)
Crop(inputimg + '\*.jpg', boxScale, outputimg)
print 'Option 1: Detect image from Webcam'
print 'Option 2: Crop saved images'
option = int(raw_input('Please enter 1 or 2: '))
def Webcam(webcam, classifier, downScale):
if webcam.isOpened():
rval, frame = webcam.read()
else:
rval = False
while rval:
# detect faces and draw bounding boxes
minisize = (frame.shape[1]/downScale,frame.shape[0]/downScale)
miniframe = cv2.resize(frame, minisize)
faces = classifier.detectMultiScale(miniframe)
for f in faces:
x, y, w, h = [ v*downScale for v in f ]
cv2.rectangle(frame, (x,y), (x+w,y+h), (0,0,255))
cv2.putText(frame, "Press ESC to close.", (5, 25),
cv2.FONT_HERSHEY_SIMPLEX, 1.0, (255,255,255))
cv2.imshow("Face Crop", frame)
# get next frame
rval, frame = webcam.read()
key = cv2.waitKey(10)
if key in [27, ord('Q'), ord('q')]: # exit on ESC
break
if option == 1:
Webcam(webcam, classifier, downScale)
elif option == 2:
CropSetup(padding, boxScale)
else:
print 'Not a valid input'