I have done multi-scale template matching in real-time from looking at this article. When the template appears in the frame, it detects it and drawing a bounding box around it which means it works fine. But when there is no template in the frame also, it detects somewhere and drawing the bounding box. I'll mention the code and the error that I recognized.
import cv2 as cv2
import numpy as np
import imutils
def main():
template1 = cv2.imread("C:\\Users\\Manthika\\Desktop\\opencvtest\\template.jpg")
template1 = cv2.cvtColor(template1, cv2.COLOR_BGR2GRAY)
template1 = cv2.Canny(template1, 50, 200)
template = imutils.resize(template1, width=60)
(tH, tW) = template.shape[:2]
cv2.imshow("Template", template)
windowName = "Something"
cv2.namedWindow(windowName)
cap = cv2.VideoCapture(0)
if cap.isOpened():
ret, frame = cap.read()
else:
ret = False
# loop over the frames to find the template
while ret:
# load the image, convert it to grayscale, and initialize the
# bookkeeping variable to keep track of the matched region
ret, frame = cap.read()
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
found = None
# loop over the scales of the image
for scale in np.linspace(0.2, 1.0, 20)[::-1]:
# resize the image according to the scale, and keep track
# of the ratio of the resizing
resized = imutils.resize(gray, width=int(gray.shape[1] * scale))
r = gray.shape[1] / float(resized.shape[1])
# if the resized image is smaller than the template, then break
# from the loop
if resized.shape[0] < tH or resized.shape[1] < tW:
print("frame is smaller than the template")
break
# detect edges in the resized, grayscale image and apply template
# matching to find the template in the image
edged = cv2.Canny(resized, 50, 200)
result = cv2.matchTemplate(edged, template, cv2.TM_CCOEFF)
(_, maxVal, _, maxLoc) = cv2.minMaxLoc(result)
# if we have found a new maximum correlation value, then update
# the bookkeeping variable
if found is None or maxVal > found[0]:
found = (maxVal, maxLoc, r)
# unpack the bookkeeping variable and compute the (x, y) coordinates
# of the bounding box based on the resized ratio
# print(found)
if found is None:
# just show only the frames if the template is not detected
cv2.imshow(windowName, frame)
print("No template is found")
else:
(_, maxLoc, r) = found
(startX, startY) = (int(maxLoc[0] * r), int(maxLoc[1] * r))
(endX, endY) = (int((maxLoc[0] + tW) * r), int((maxLoc[1] + tH) * r))
print(startX, startY, endX, endY)
# draw a bounding box around the detected result and display the image
cv2.rectangle(frame, (startX, startY), (endX, endY), (0, 0, 255), 2)
cv2.imshow(windowName, frame)
if cv2.waitKey(1) == 27:
break
cv2.destroyAllWindows()
cap.release()
if __name__ == "__main__":
main()
I think the problem is in this two lines,
if found is None or maxVal > found[0]:
found = (maxVal, maxLoc, r)
found variable always updates with a value even if it is none. I'm new to computer vision so please be kind and help me to solve this problem. And also kindly let me know if I need to mention anything else. Thank you.
Refer to How do I use OpenCV MatchTemplate?:
In your code, you have (_, maxVal, _, maxLoc) = cv2.minMaxLoc(result), where it should be minVal,maxVal,minLoc,maxLoc = cv.MinMaxLoc(result), and you need to set a threshold of minVal to filter unmatched results.
Example:
# loop over the scales of the image
for scale in np.linspace(0.2, 1.0, 20)[::-1]:
# resize the image according to the scale, and keep track
# of the ratio of the resizing
resized = imutils.resize(gray, width=int(gray.shape[1] * scale))
r = gray.shape[1] / float(resized.shape[1])
# if the resized image is smaller than the template, then break
# from the loop
if resized.shape[0] < tH or resized.shape[1] < tW:
break
# detect edges in the resized, grayscale image and apply template
# matching to find the template in the image
edged = cv2.Canny(resized, 50, 200)
result = cv2.matchTemplate(edged, template, cv2.TM_CCOEFF)
(minVal, maxVal, _, maxLoc) = cv2.minMaxLoc(result)
# if we have found a new maximum correlation value, then ipdate
# the bookkeeping variable
if found is None or maxVal > found[0]:
found = (maxVal, maxLoc, r)
# unpack the bookkeeping varaible and compute the (x, y) coordinates
# of the bounding box based on the resized ratio
(maxVal, maxLoc, r) = found
# Threshold setting, this 11195548 value is tested by some random images
threshold = 11195548
if maxVal > threshold:
print("match found")
(startX, startY) = (int(maxLoc[0] * r), int(maxLoc[1] * r))
(endX, endY) = (int((maxLoc[0] + tW) * r), int((maxLoc[1] + tH) * r))
# draw a bounding box around the detected result and display the image
cv2.rectangle(image, (startX, startY), (endX, endY), (0, 0, 255), 2)
cv2.imshow("Image", image)
cv2.waitKey(0)
else:
print("no match found")
Related
Good evening! I need a global variable in a function to be used in another function, however, when I try to declare this variable as a global variable, it throws the error "Statement expected, found Py:EQ", this in the line where the global code snippet is id, confidence = recognizer.predict(faceimage) specifically above the = sign on line 53. How do I fix this error?
# install opencv "pip install opencv-python"
import cv2
# distance from camera to object(face) measured
# centimeter
Known_distance = 76.2
# width of face in the real world or Object Plane
# centimeter
Known_width = 14.3
# Colors
GREEN = (0, 255, 0)
RED = (0, 0, 255)
WHITE = (255, 255, 255)
BLACK = (0, 0, 0)
# defining the fonts
fonts = cv2.FONT_HERSHEY_COMPLEX
# face detector object
face_detector = cv2.CascadeClassifier("haarcascade_frontalface_default.xml")
# focal length finder function
def Focal_Length_Finder(measured_distance, real_width, width_in_rf_image):
# finding the focal length
focal_length = (width_in_rf_image * measured_distance) / real_width
return focal_length
# distance estimation function
def Distance_finder(Focal_Length, real_face_width, face_width_in_frame):
distance = (real_face_width * Focal_Length) / face_width_in_frame
# return the distance
return distance
def microFacialExpressions(recognizer, width, height):
font = cv2.FONT_HERSHEY_COMPLEX_SMALL
detectorFace = cv2.CascadeClassifier("haarcascade_frontalface_default.xml")
camera = cv2.VideoCapture(0)
recognizer = cv2.face.EigenFaceRecognizer_create()
recognizer.read("classifierEigen.yml")
width, height = 220, 220
while(True):
connected, image = camera.read()
# Grayscale conversion
grayimage = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
facesDetected = detectorFace.detectMultiScale(GrayImage,scaleFactor=1.5, minSize=(100, 100))
for (x, y, l, a) in facesDetected:
faceimage = cv2.resize(greyimage[y:y + a, x:x + l], (width, height))
cv2.rectangle(image, (x, y), (x + l, y + a), (0,0,255), 2)
global id, confidence = recognizer.predict(faceimage)
#If ID is equal to 1, issue the message "Safe to exit" if not, issue the message "Hostile area"
if id == 1:
warning="Safe to exit"
else:
warning = "Hostile area"
cv2.putText(image, warning, (x,y +(a+30)), font, 2, (0,0,255))
return warning
def face_data(image):
face_width = 0 # making face width to zero
# converting color image to gray scale image
gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# detecting face in the image
faces = face_detector.detectMultiScale(gray_image, 1.3, 5)
# looping through the faces detect in the image
# getting coordinates x, y , width and height
for (x, y, h, w) in faces:
# draw the rectangle on the face
cv2.rectangle(image, (x, y), (x + w, y + h), GREEN, 2)
# getting face width in the pixels
face_width = w
# return the face width in pixel
return face_width
# reading reference_image from directory
ref_image = cv2.imread("Ref_image.jpg")
# find the face width(pixels) in the reference_image
ref_image_face_width = face_data(ref_image)
# get the focal by calling "Focal_Length_Finder"
# face width in reference(pixels),
# Known_distance(centimeters),
# known_width(centimeters)
Focal_length_found = Focal_Length_Finder(
Known_distance, Known_width, ref_image_face_width)
print(Focal_length_found)
# show the reference image
cv2.imshow("ref_image", ref_image)
# initialize the camera object so that we
# can get frame from it
cap = cv2.VideoCapture(0, cv2.CAP_DSHOW)
# looping through frame, incoming from
# camera/video
while True:
# reading the frame from camera
_, frame = cap.read()
# calling face_data function to find
# the width of face(pixels) in the frame
face_width_in_frame = face_data(frame)
# check if the face is zero then not
# find the distance
if face_width_in_frame != 0:
# finding the distance by calling function
# Distance finder function need
# these arguments the Focal_Length,
# known_width(centimeters),
# and Known_distance(centimeters)
Distance = Distance_finder(
Focal_length_found, Known_width, face_width_in_frame)
if Distance <= 50 and id:
print("Level S Alert!")
# draw line as background of text
cv2.line(frame, (30, 30), (230, 30), RED, 32)
cv2.line(frame, (30, 30), (230, 30), BLACK, 28)
# Drawing Text on the screen
cv2.putText(
frame, f"Distance: {round(Distance, 2)} CM", (30, 35),
fonts, 0.6, GREEN, 2)
# show the frame on the screen
cv2.imshow("frame", frame)
# quit the program if you press 'q' on keyboard
if cv2.waitKey(1) == ord("q"):
break
# closing the camera
cap.release()
# closing the windows that are opened
cv2.destroyAllWindows()
The global statement does not support assigning to a name, only declaring the name to be a global variable, rather than local variable. While global statements are legal pretty much anywhere, it is strongly recommended to put such declarations at the top of the function.
def microFacialExpressions(recognizer, width, height):
global id, confidence
font = cv2.FONT_HERSHEY_COMPLEX_SMALL
detectorFace = cv2.CascadeClassifier("haarcascade_frontalface_default.xml")
camera = cv2.VideoCapture(0)
recognizer = cv2.face.EigenFaceRecognizer_create()
recognizer.read("classifierEigen.yml")
width, height = 220, 220
while(True):
connected, image = camera.read()
# Grayscale conversion
grayimage = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
facesDetected = detectorFace.detectMultiScale(GrayImage,scaleFactor=1.5, minSize=(100, 100))
for (x, y, l, a) in facesDetected:
faceimage = cv2.resize(greyimage[y:y + a, x:x + l], (width, height))
cv2.rectangle(image, (x, y), (x + l, y + a), (0,0,255), 2)
confidence = recognizer.predict(faceimage)
#If ID is equal to 1, issue the message "Safe to exit" if not, issue the message "Hostile area"
if id == 1:
warning="Safe to exit"
else:
warning = "Hostile area"
cv2.putText(image, warning, (x,y +(a+30)), font, 2, (0,0,255))
return warning
Given that both variables are repeatedly changed in the loop, it's not clear why the last value of either is special enough to need in the global scope. I suspect neither variable needs to be declared global at all.
I'm trying to display the camera of my raspberry inside a label that is inside a frame(tkinter), I have a method for recognizing faces that loads the videocapture, but It is not displaying the camera in the label.
Here is my code for creating the gui
def create_ui(self):
self.window = tk.Tk()
self.window.title("App Faces")
self.window.geometry('860x720')
self.window.bind('<Escape>', lambda e: self.window.quit())
self.tab_control = ttk.Notebook(self.window)
self.tab2 = ttk.Frame(self.tab_control)
self.tab_control.add(self.tab2, text="Real Time Face Recognition")
self.label2 = ttk.Label(self.tab2, text="Real Time Face Recognition")
self.label2.place(x=12, y=50)
self.lmain2 = ttk.Label(self.tab2)
self.lmain2.place(x=12, y=70)
self.boton2 = ttk.Button(self.tab2, text="START", command=self.recognizing_faces_video)
self.boton2.pack()
self.boton2.place(x=50, y=20)
self.tab_control.pack(expand=1, fill='both')
self.window.mainloop()
In this method I call the the method self.recognizing_faces_video, but it is not working
def recognizing_faces_video(self):
self.recognizer = pickle.loads(open(self.recognizer, "rb").read())
self.le = pickle.loads(open(self.labelEncoder, "rb").read())
self.cap = cv2.VideoCapture("http://192.168.1.71:8000/stream.mjpg")
# print(self.cap)
# cap = cv2.VideoCapture("http://raspberrypi.mshome.net:8000/stream.mjpg")
self.cap.set(cv2.CAP_PROP_FRAME_WIDTH, self.width)
self.cap.set(cv2.CAP_PROP_FRAME_HEIGHT, self.width)
print(self.cap.isOpened())
# grab the frame from the threaded video stream
_, frame = self.cap.read()
#cv2image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGBA)
# resize the frame to have a width of 600 pixels (while
# maintaining the aspect ratio), and then grab the image
# dimensions
frame = imutils.resize(frame, width=600)
(h, w) = frame.shape[:2]
# construct a blob from the image
imageBlob = cv2.dnn.blobFromImage(cv2.resize(frame, (300, 300)), 1.0, (300, 300), (104.0, 177.0, 123.0),
swapRB=False, crop=False)
# apply OpenCV's deep learning-based face detector to localize
# faces in the input image
self.model.setInput(imageBlob)
detections = self.model.forward()
# loop over the detections
for i in range(0, detections.shape[2]):
# extract the confidence (i.e., probability) associated with
# the prediction
confidence = detections[0, 0, i, 2]
# filter out weak detections
if confidence > 0.5:
# compute the (x, y)-coordinates of the bounding box for
# the face
box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
(startX, startY, endX, endY) = box.astype("int")
# extract the face ROI
face = frame[startY:endY, startX:endX]
(fH, fW) = face.shape[:2]
# ensure the face width and height are sufficiently large
if fW < 20 or fH < 20:
continue
# construct a blob for the face ROI, then pass the blob
# through our face embedding model to obtain the 128-d
# quantification of the face
faceBlob = cv2.dnn.blobFromImage(face, 1.0 / 255,
(96, 96), (0, 0, 0), swapRB=True, crop=False)
self.embedder.setInput(faceBlob)
vec = self.embedder.forward()
# perform classification to recognize the face
preds = self.recognizer.predict_proba(vec)[0]
j = np.argmax(preds)
proba = preds[j]
name = self.le.classes_[j]
# draw the bounding box of the face along with the
# associated probability
text = "{}: {:.2f}%".format(name, proba * 100)
y = startY - 10 if startY - 10 > 10 else startY + 10
cv2.rectangle(frame, (startX, startY), (endX, endY),
(0, 0, 255), 2)
cv2.putText(frame, text, (startX, y),
cv2.FONT_HERSHEY_SIMPLEX, 0.45, (0, 0, 255), 2)
self.boton2['state'] = "disabled"
img = Image.fromArray(frame)
imgTk = ImageTk.PhotoImage(image = img)
self.lmain2.imgtk = imgTk
self.lmain2.configure(image=imgTk)
self.lmain2.after(10, self.recognizing_faces_video)
Can anyone help me out please?
I am trying to use opencv to create a rectangle around a cone. Where I am currently at is I have outlined the code which has resulted in a triangle shape. How can I use opencv to create a rectangle around the triangle.
My code so far:
import cv2
import numpy as np
img = cv2.imread('image.jpg')
ret, mask = cv2.threshold(img[:, :,2], 235, 255, cv2.THRESH_BINARY)
mask3 = np.zeros_like(img)
mask3[:, :, 0] = mask
mask3[:, :, 1] = mask
mask3[:, :, 2] = mask
orange = cv2.bitwise_and(img, mask3)
cv2.imwrite("output.jpg", orange)
im = cv2.imread('output.jpg')
imgray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
ret, thresh = cv2.threshold(imgray, 127, 255, 0)
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
cv2.drawContours(im, contours, -1, (0,255,0), 3)
cv2.imshow('img',im)
cv2.waitKey(0)
cv2.destroyAllWindows
Jpeg File:
One approach is using multi-scale template matching
You crop the object you want to find:
Apply Canny edge-detection to find the edges
edged = cv2.Canny(resized, 50, 200)
Find the matched template using matchTemplate
result = cv2.matchTemplate(edged, template, cv2.TM_CCOEFF)
Result:
Code:
import numpy as np
import imutils
import glob
import cv2
template = cv2.imread("template.jpg")
template = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY)
template = cv2.Canny(template, 50, 200)
(h, w) = template.shape[:2]
for imagePath in glob.glob("img2" + "/pXobJ.jpg"):
image = cv2.imread(imagePath)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
found = None
for scale in np.linspace(0.2, 1.0, 20)[::-1]:
resized = imutils.resize(gray, width=int(gray.shape[1] * scale))
r = gray.shape[1] / float(resized.shape[1])
if resized.shape[0] < h or resized.shape[1] < w:
break
edged = cv2.Canny(resized, 50, 200)
result = cv2.matchTemplate(edged, template, cv2.TM_CCOEFF)
(_, maxVal, _, maxLoc) = cv2.minMaxLoc(result)
if found is None or maxVal > found[0]:
found = (maxVal, maxLoc, r)
(_, maxLoc, r) = found
(startX, startY) = (int(maxLoc[0] * r), int(maxLoc[1] * r))
(endX, endY) = (int((maxLoc[0] + w) * r), int((maxLoc[1] + h) * r))
cv2.rectangle(image, (startX, startY), (endX, endY), (0, 0, 255), 2)
cv2.imwrite("img2/out.jpg", image)
print("Table coordinates: ({}, {}, {}, {})".format(startX, startY, endX, endY))
You can also use deep learning object detection with trained networks.
I'm new to Python but want to learn it a bit so I decided to create a program
with template matching from desktop input.
Can any one help with this ? How to write template matching with stream from desktop ?
import time
import cv2
import mss
import numpy
template = cv2.imread('template.jpg', 0)
w, h = template.shape[::-1]
with mss.mss() as sct:
# Part of the screen to capture
monitor = {"top": 40, "left": 0, "width": 800, "height": 640}
while "Screen capturing":
last_time = time.time()
# Get raw pixels from the screen, save it to a Numpy array
img = numpy.array(sct.grab(monitor))
# Display the picture
# cv2.imshow("OpenCV/Numpy normal", img)
# Display the picture in grayscale
cv2.imshow('OpenCV/Numpy grayscale', cv2.cvtColor(img, cv2.COLOR_BGRA2GRAY))
# Print fps
print("fps: {}".format(1 / (time.time() - last_time)))
# Search template in stream
# Press "q" to quit
if cv2.waitKey(25) & 0xFF == ord("q"):
cv2.destroyAllWindows()
break
The first thing I noticed that you did not apply any edge-detection to your template image. The edge-detection is not necessary but useful for finding the features of the template image.
Assume I have the following image:
To detect the above template image precisely I should be applying an edge detection algorithm.
template = cv2.imread("three.png")
template = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY)
template = cv2.Canny(template, 50, 200)
I should also apply edge detection to the stream from desktop.
img = sct.grab(mon)
gray = cv2.cvtColor(np.array(img), cv2.COLOR_BGR2GRAY)
edged = cv2.Canny(gray, 50, 200)
Check if the template matches with the captured image
result = cv2.matchTemplate(edged, template, cv2.TM_CCOEFF)
(_, maxVal, _, maxLoc) = cv2.minMaxLoc(result)
If template image matched in the stream from desktop then get the coordinates.
(_, maxLoc, r) = found
(startX, startY) = (int(maxLoc[0] * r), int(maxLoc[1] * r))
(endX, endY) = (int((maxLoc[0] + w) * r), int((maxLoc[1] + h) * r))
Finally draw the rectangle for displaying the location:
cv2.rectangle(img, (startX, startY), (endX, endY), (180, 105, 255), 2)
Result:
From above we see that the our template 3 value is matched on the stream from desktop.
Code:
import time
import cv2
import numpy as np
import imutils
from mss import mss
template = cv2.imread("three.png")
template = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY)
template = cv2.Canny(template, 50, 200)
(h, w) = template.shape[:2]
start_time = time.time()
mon = {'top': 200, 'left': 200, 'width': 200, 'height': 200}
with mss() as sct:
while True:
last_time = time.time()
img = sct.grab(mon)
img = np.array(img)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
edged = cv2.Canny(gray, 50, 200)
found = None
for scale in np.linspace(0.2, 1.0, 20)[::-1]:
resized = imutils.resize(gray, width=int(gray.shape[1] * scale))
r = gray.shape[1] / float(resized.shape[1])
if resized.shape[0] < h or resized.shape[1] < w:
break
edged = cv2.Canny(resized, 50, 200)
cv2.imwrite("canny_image.png", edged)
result = cv2.matchTemplate(edged, template, cv2.TM_CCOEFF)
(_, maxVal, _, maxLoc) = cv2.minMaxLoc(result)
if found is None or maxVal > found[0]:
found = (maxVal, maxLoc, r)
(_, maxLoc, r) = found
(startX, startY) = (int(maxLoc[0] * r), int(maxLoc[1] * r))
(endX, endY) = (int((maxLoc[0] + w) * r), int((maxLoc[1] + h) * r))
cv2.rectangle(img, (startX, startY), (endX, endY), (180, 105, 255), 2)
print('The loop took: {0}'.format(time.time()-last_time))
cv2.imshow('test', np.array(img))
if cv2.waitKey(25) & 0xFF == ord('q'):
cv2.destroyAllWindows()
break
Let untitled.png be a file storing the image
Here is a working program. I used the following to put it together,
OpenCV Org: Template Matching
Taking screenshots with OpenCV and Python
OpenCV python: ValueError: too many values to unpack
import os
import cv2 as cv
import numpy as np
import pyautogui
import time
import winsound # for sound
from matplotlib import pyplot as plt
os.chdir("C:\\Users\\Mike\\\Desktop")
img = cv.imread('untitled.png',0)
img_piece = cv.cvtColor(img, cv.COLOR_RGB2BGR)
c, w, h = img_piece.shape[::-1]
while 1:
pic = pyautogui.screenshot()
template = cv.cvtColor(np.array(pic), cv.COLOR_RGB2BGR)
meth = 'cv.TM_CCOEFF'
method = eval(meth)
# Apply template Matching
res = cv.matchTemplate(img_piece,template,method)
min_val, max_val, min_loc, max_loc = cv.minMaxLoc(res)
top_left = max_loc
bottom_right = (top_left[0] + w, top_left[1] + h)
cv.rectangle(img,top_left, bottom_right, 255, 2)
if max_val > 66000000.0:
print(max_val, top_left, bottom_right)
winsound.Beep(888, 111)
if 1:
plt.subplot(121),plt.imshow(res,cmap = 'gray')
plt.title('Matching Result'), plt.xticks([]), plt.yticks([])
plt.subplot(122),plt.imshow(img,cmap = 'gray')
plt.title('Detected Point'), plt.xticks([]), plt.yticks([])
plt.suptitle(meth)
plt.show()
break
time.sleep(1)
Below is the basic code to perform a match template. Place it below img = numpy.array(sct.grab(monitor)) and it will run every frame.
# create grayscale of image - because template is also grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# perform match
res = cv2.matchTemplate(gray,template ,cv2.TM_CCOEFF)
# get coordinates of best match
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
top_left = min_loc
bottom_right = (top_left[0] + w, top_left[1] + h)
# draw red rectangle over original screen capture
cv2.rectangle(img,top_left, bottom_right,(0,0,255),3)
# display image
cv2.imshow('Result',img)
You can find some more info on matchTemplate here
My original python script was created to work on images already saved. I am now wanting it to capture the image and crop it. I have a working webcam section and a working crop section but I am not able to combine them and make them it. I have included the combined code. Currently it will still crop a saved image and the GUI for the webcam does display for a second but does not display any content (gray screen). Can anyone help me?
import cv
import cv2
import numpy
import Image
import glob
import os
# Static
faceCascade = cv.Load('haarcascade_frontalface_alt.xml')
padding = -1
inputimg = raw_input('Please enter the entire path to the image folder:')
outputimg = raw_input('Please enter the entire path to the output folder:')
if not os.path.exists(outputimg):
os.makedirs(outputimg)
while (padding < 0):
padding = int(raw_input('Enter crop padding:'))
capture = cv2.VideoCapture(0)
cv2.namedWindow("Face Crop")
if capture.isOpened():
frame = capture.read()
def DetectFace(image, faceCascade, returnImage=False):
#variables
min_size = (50,50)
haar_scale = 1.1
min_neighbors = 3
haar_flags = 0
DOWNSCALE = 4
# Equalize the histogram
cv.EqualizeHist(image, image)
# Detect the faces
faces = cv.HaarDetectObjects(image, faceCascade, cv.CreateMemStorage(0),haar_scale, min_neighbors, haar_flags, min_size)
# If faces are found
if faces and returnImage:
for ((x, y, w, h), n) in faces:
# Convert bounding box to two CvPoints
pt1 = (int(x), int(y))
pt2 = (int(x + w), int(y + h))
cv.Rectangle(image, pt1, pt2, cv.RGB(255, 0, 0), 5, 8, 0)
# Start video frame
minisize = (frame.shape[1]/DOWNSCALE,frame.shape[0]/DOWNSCALE)
miniframe = cv2.resize(frame, minisize)
faceCam = classifier.detectMultiScale(miniframe)
for f in faceCam:
x, y, w, h = [ v*DOWNSCALE for v in f ]
cv2.rectangle(frame, (x,y), (x+w,y+h), (0,0,255))
cv2.putText(frame, "Press ESC to close.", (5, 25),
cv2.FONT_HERSHEY_SIMPLEX, 1.0, (255,255,255))
cv2.imshow("preview", frame)
# get next frame
frame = capture.read()
raw_input('Pause for testing')
key = cv2.waitKey(20)
if key in [27, ord('Q'), ord('q')]: # exit on ESC
break
if returnImage:
return image
else:
return faces
def pil2cvGrey(pil_im):
pil_im = pil_im.convert('L')
cv_im = cv.CreateImageHeader(pil_im.size, cv.IPL_DEPTH_8U, 1)
cv.SetData(cv_im, pil_im.tostring(), pil_im.size[0] )
return cv_im
def imgCrop(image, cropBox, boxScale=1):
# Crop a PIL image with the provided box [x(left), y(upper), w(width), h(height)]
# Calculate scale factors
xPadding=max(cropBox[2]*(boxScale-1),int(padding))
yPadding=max(cropBox[3]*(boxScale-1),int(padding))
# Convert cv box to PIL box [left, upper, right, lower]
PIL_box=[cropBox[0]-xPadding, cropBox[1]-yPadding, cropBox[0]+cropBox[2]+xPadding, cropBox[1]+cropBox[3]+yPadding]
return image.crop(PIL_box)
def Crop(imagePattern,boxScale=1):
imgList=glob.glob(imagePattern)
if len(imgList)<=0:
return
else:
for img in imgList:
pil_im=Image.open(img)
cv_im=pil2cvGrey(pil_im)
faces=DetectFace(cv_im,faceCascade)
if faces:
n=1
for face in faces:
croppedImage=imgCrop(pil_im, face[0],boxScale=boxScale)
fname,ext=os.path.splitext(img)
fname = os.path.basename(fname)
croppedImage.save(outputimg + '\\' + fname + ' -c' + ext)
n+=1
print 'Cropping:', fname
else:
print 'No faces found:', img
# Crop all images in a folder
Crop(inputimg + '\*.png', boxScale=1)
Crop(inputimg + '\*.jpg', boxScale=1)
Also, if anyone has any code improvements please let me know as I am new to Python.
I was able to fix this by reworking the logic and flow of code. Updated code below and on github, https://github.com/aDroidman/EyeonYou
import cv
import cv2
import numpy
import Image
import glob
import os
# Static
faceCascade = cv.Load('haarcascade_frontalface_alt.xml')
padding = -1
boxScale = 1
# Needed for webcam CV2 section
HaarXML = "haarcascade_frontalface_alt.xml"
classifier = cv2.CascadeClassifier(HaarXML)
downScale = 4
webcam = cv2.VideoCapture(0)
def DetectFace(image, faceCascade, returnImage=False):
#variables
min_size = (50,50)
haar_scale = 1.1
min_neighbors = 3
haar_flags = 0
DOWNSCALE = 4
# Equalize the histogram
cv.EqualizeHist(image, image)
# Detect the faces
faces = cv.HaarDetectObjects(image, faceCascade, cv.CreateMemStorage(0),haar_scale, min_neighbors, haar_flags, min_size)
# If faces are found
if faces and returnImage:
for ((x, y, w, h), n) in faces:
# Convert bounding box to two CvPoints
pt1 = (int(x), int(y))
pt2 = (int(x + w), int(y + h))
cv.Rectangle(image, pt1, pt2, cv.RGB(255, 0, 0), 5, 8, 0)
if returnImage:
return image
else:
return faces
def pil2cvGrey(pil_im):
pil_im = pil_im.convert('L')
cv_im = cv.CreateImageHeader(pil_im.size, cv.IPL_DEPTH_8U, 1)
cv.SetData(cv_im, pil_im.tostring(), pil_im.size[0] )
return cv_im
def imgCrop(image, cropBox, boxScale=1):
# Crop a PIL image with the provided box [x(left), y(upper), w(width), h(height)]
# Calculate scale factors
xPadding=max(cropBox[2]*(boxScale-1),int(padding))
yPadding=max(cropBox[3]*(boxScale-1),int(padding))
# Convert cv box to PIL box [left, upper, right, lower]
PIL_box=[cropBox[0]-xPadding, cropBox[1]-yPadding, cropBox[0]+cropBox[2]+xPadding, cropBox[1]+cropBox[3]+yPadding]
return image.crop(PIL_box)
def Crop(imagePattern,boxScale,outputimg):
imgList=glob.glob(imagePattern)
if len(imgList)<=0:
return
else:
for img in imgList:
pil_im=Image.open(img)
cv_im=pil2cvGrey(pil_im)
faces=DetectFace(cv_im,faceCascade)
if faces:
n=1
for face in faces:
croppedImage=imgCrop(pil_im, face[0],boxScale=boxScale)
fname,ext=os.path.splitext(img)
fname = os.path.basename(fname)
croppedImage.save(outputimg + '\\' + fname + ' -c' + ext)
n+=1
print 'Cropping:', fname
else:
print 'No faces found:', img
def CropSetup(padding, boxScale):
inputimg = raw_input('Please enter the entire path to the image folder:')
outputimg = raw_input('Please enter the entire path to the output folder:')
# Create output folder if missing
if not os.path.exists(outputimg):
os.makedirs(outputimg)
# Get padding for crop
while (padding < 0):
padding = int(raw_input('Enter crop padding:'))
# Crop images
Crop(inputimg + '\*.png', boxScale, outputimg)
Crop(inputimg + '\*.jpg', boxScale, outputimg)
print 'Option 1: Detect image from Webcam'
print 'Option 2: Crop saved images'
option = int(raw_input('Please enter 1 or 2: '))
def Webcam(webcam, classifier, downScale):
if webcam.isOpened():
rval, frame = webcam.read()
else:
rval = False
while rval:
# detect faces and draw bounding boxes
minisize = (frame.shape[1]/downScale,frame.shape[0]/downScale)
miniframe = cv2.resize(frame, minisize)
faces = classifier.detectMultiScale(miniframe)
for f in faces:
x, y, w, h = [ v*downScale for v in f ]
cv2.rectangle(frame, (x,y), (x+w,y+h), (0,0,255))
cv2.putText(frame, "Press ESC to close.", (5, 25),
cv2.FONT_HERSHEY_SIMPLEX, 1.0, (255,255,255))
cv2.imshow("Face Crop", frame)
# get next frame
rval, frame = webcam.read()
key = cv2.waitKey(10)
if key in [27, ord('Q'), ord('q')]: # exit on ESC
break
if option == 1:
Webcam(webcam, classifier, downScale)
elif option == 2:
CropSetup(padding, boxScale)
else:
print 'Not a valid input'