Related
I have an image that converted from PDF to PNG. The converted image contains several keywords that I wanted to extracted using OCR Tesseract.
Right now, I need to determine the ROI manually to crop the selected ROI. Since I have more than 5 ROI's to be applied, what would be the most efficient way to apply the ROI instead of doing it by try and error to find the exact location?
Below is the code:
def cropped(self, event):
#1st ROI
y = 20
x = 405
h = 230
w = 425
#2nd ROI
y1 = 30
x1 = 305
h1 = 330
w1 = 525
#open the converted image
image = cv2.imread("Output.png")
#perform image cropping
crop_image = image[x:w, y:h]
crop_image1 = image[x1:w1, y1:h1]
#save the cropped image
cv2.imwrite("Cropped.png", crop_image)
cv2.imwrite("Cropped1.png", crop_image1)
#open the cropped image and pass to the OCR engine
im = cv2.imread("Cropped.png")
im1 = cv2.imread("Cropped1.png")
## Do the text extraction here
you can use mouse event to select multiple ROI and crop based on the location
#!/usr/bin/env python3
import argparse
import cv2
import numpy as np
from PIL import Image
import os
drawing = False # true if mouse is pressed
ix,iy = -1,-1
refPt = []
img = ""
clone = ""
ROIRegion = []
# mouse callback function
def draw_rectangle(event,x,y,flags,param):
global ix,iy,drawing,img,clone,refPt, ROIRegion
if event == cv2.EVENT_LBUTTONDOWN:
drawing = True
ix,iy = x,y
refPt = [(x, y)]
ROIRegion.append(refPt)
#clone = img.copy()
elif event == cv2.EVENT_MOUSEMOVE:
if drawing == True:
img = clone.copy()
cv2.rectangle(img,(ix,iy),(x,y),(0,255,0),3)
a=x
b=y
if a != x | b != y:
cv2.rectangle(img,(ix,iy),(x,y),(0,0,0),-1)
elif event == cv2.EVENT_LBUTTONUP:
drawing = False
refPt.append((x,y))
img = clone.copy()
cv2.rectangle(img, (ix,iy),(x,y), (0, 255, 0), 2)
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--image", required=True, help="Path to the image")
args = vars(ap.parse_args())
# load the image, clone it, and setup the mouse callback function
img = cv2.imread(args["image"])
img = np.array(img)
clone = img.copy()
cv2.namedWindow('image')
cv2.setMouseCallback('image',draw_rectangle)
while(1):
cv2.imshow('image',img)
k = cv2.waitKey(1) & 0xFF
if k == ord("r"):
del ROIRegion[-1]
del refPt[-1]
img = clone.copy()
elif k == 27:
break
#Do your cropping here
for region in range(len(ROIRegion)):
cv2.rectangle(img, ROIRegion[region][0],ROIRegion[region][1], (0, 255, 0), 2)
roi = clone[ROIRegion[region][0][1]:ROIRegion[region][1][1], ROIRegion[region][0][0]:ROIRegion[region][1][0]]
roi = cv2.cvtColor(roi, cv2.COLOR_BGR2RGB)
Here is one way in Python/OpenCV.
Read the input
Threshold on box outline color
Apply morphology to ensure closed
Get the external contours
Loop over each contour, get its bounding box, crop the region in the input and write the output
Input:
import cv2
import numpy as np
# read image
img = cv2.imread('text_boxes.jpg')
# threshold on box outline color
lowerBound = (80,120,100)
upperBound = (160,200,180)
thresh = cv2.inRange(img, lowerBound, upperBound)
# apply morphology to ensure regions are filled and remove extraneous noise
kernel = np.ones((3,3), np.uint8)
thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
# get contours
contours = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
# get bounding boxes
i = 1
for cntr in contours:
# get bounding boxes
x,y,w,h = cv2.boundingRect(cntr)
crop = img[y:y+h, x:x+w]
cv2.imwrite("text_boxes_crop_{0}.png".format(i), crop)
i = i + 1
# save threshold
cv2.imwrite("text_boxes_thresh.png",thresh)
# show thresh and result
cv2.imshow("thresh", thresh)
cv2.waitKey(0)
cv2.destroyAllWindows()
Threshold image:
Cropped Images:
I am developing a facial recognition system and for that I have selected LBPH algorithm for doing the task. I have collected the sample images of user and trained it. The problem is while recognizing the face, the predict() of LBPHRecognizer always return same value for label but different value for confidence. Even if the face is unknown it returns 1.
Technologies I have been using : Python 3.7.4, OpenCV 4.1.2
Code to collect sample image
import cv2
import numpy as np
import os
import requests
import time
from PIL import Image
class CollectFaceWebCam():
def __init__(self, sid):
self.studentId = sid
#capture webcam
self.LiveWebCamera = cv2.VideoCapture(0)
#pre-trained dataset (haar-cascade classifier)
self.faceDataSet = cv2.CascadeClassifier('resources/haarcascade_frontalface_default.xml')
#sample image capture counter
self.imgCounter = 0
self.directoryName = 'sampleImgFolder'
#check path
if not os.path.exists(self.directoryName):
os.makedirs(self.directoryName)
if not os.path.exists(self.directoryName + '/' + self.studentId):
os.makedirs(self.directoryName + '/' + self.studentId)
def gen(self):
while True:
condition, frame = self.LiveWebCamera.read() #capture frame
img = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)#conversion to gray scale
#face detection
faces = self.faceDataSet.detectMultiScale( # Detect face sizes
img,
scaleFactor=1.3,
minNeighbors=5,
minSize=(100, 100),
flags=cv2.CASCADE_SCALE_IMAGE
)
for (x, y, w, h) in faces:
end_crd_x = x + w # face start coordinates
end_crd_y = y + h #face end coordinate
#draw rectangle
##parms image, start plot, end plot, thickness, color
cv2.rectangle(frame, (x, y), (end_crd_x, end_crd_y), (0, 255, 33), 1)
#accepts multiple face
if len(faces) >= 0:
#face must be single in frame
if len(faces) == 1:
detectedImg = img[y:y + h, x:x + w]
#checking blurness of image
blurValue = cv2.Laplacian(detectedImg, cv2.CV_64F).var()
#ignoring the blury images
if not blurValue <= 60:
newImg = img[y:y + h, x:x + w] #new img
#saving the detected faces
filename = '{}\{}\{}\{}_{}'.format(os.getcwd(), self.directoryName, self.studentId, self.studentId, self.imgCounter) + '.jpg'
cv2.imwrite(filename, newImg)
self.imgCounter += 1
else:
cv2.putText(frame,"Multiple Face not allowed", (50,150), cv2.FONT_HERSHEY_SIMPLEX, 1, (237, 20, 5), thickness=2)
cv2.putText(frame,"Collecting Sample", (50,100), cv2.FONT_HERSHEY_SIMPLEX, 1, (250, 250, 250), thickness=3)
cv2.putText(frame,"Image Count " + str(self.imgCounter), (50,200), cv2.FONT_HERSHEY_SIMPLEX, 2, (237, 20, 5), thickness=2)
cv2.imshow('Collecting Sample', frame) # display frames
k = cv2.waitKey(100) & 0xff # capture when user press 'esc'
if k == 27:
break
elif self.imgCounter == 110:
break
self.LiveWebCamera.release() #stop video capture
cv2.destroyAllWindows() #close all windows
class CleanSampleImages():
def __init__(self):
self.faceDataset = cv2.CascadeClassifier('resources/haarcascade_frontalface_default.xml')
self.eyeDataset = cv2.CascadeClassifier('resources/haarcascade_eye.xml')
self.targetFolder = 'sampleImgFolder'
def checkFace(self):
os.chdir(self.targetFolder)
for directory in os.listdir():
os.chdir(directory)
for files in os.listdir():
imagePath = '{}/{}'.format(os.getcwd(), files)
imagePil = Image.open(imagePath).convert('L')
imageNumpy = np.array(imagePil) #conversion of normal image to numpy array
#detect face
faces = self.faceDataset.detectMultiScale(imageNumpy)
#deleting image file if face is not found
if not len(faces) == 1:
os.remove(files)
break
for (x, y, w, h) in faces:
#detect eye from selected
eyes = self.eyeDataset.detectMultiScale(imageNumpy)
if not len(eyes) > 0 and len(eyes) <=2:
#deleting image file if eye count of image is less than 0 or more than 2
os.remove(files)
os.chdir('../')
os.chdir('../')
#id must be in X-X-ID eg. a-b-342
t = CollectFaceWebCam('sa-t-1')
t.gen()
clean = CleanSampleImages
c.checkFace()
Above code consist of two class CollectFaceWebCam and CleanSampleImages. CollectFaceWebCam works for collecting the sample images. and CleanSampleImages works for cleaning the collected data. if the image does not consist of face the file is deleted.
Code to Train images
import os
import cv2
import numpy as np
from PIL import Image
class Trainer():
def __init__(self):
self.recognizer = cv2.face.LBPHFaceRecognizer_create()
self.targetImagesDirectory="sampleImgFolder"
self.dataset = cv2.CascadeClassifier('resources/haarcascade_frontalface_default.xml')
def getImgwithId(self):
sampleImage, sampleImageId = [], []
filename = '{}\\{}'.format(os.getcwd(), self.targetImagesDirectory)
if os.path.exists(filename):
os.chdir(filename)
print('current path is ' + os.getcwd())
for f in os.listdir():
imgPath = os.path.join(filename, f)
os.chdir(imgPath)
for file in os.listdir():
#reteving id from filename (filename format : ta-s-ID_Filename.jpg)
id = file.split('_')
id = id[0].split('-')
id = id[2]
imageFilePath = imgPath + '\\' + file
imagePil = Image.open(imageFilePath).convert('L')
#conversion to numpy array
imageNp = np.array(imagePil, 'uint8')
faces = self.dataset.detectMultiScale(imageNp)
for (x, y, w, h) in faces:
sampleImage.append(imageNp)
sampleImageId.append(id)
os.chdir('../')
os.chdir('../')
return sampleImage, np.array(sampleImageId, dtype = int)
def train(self, data, label):
try:
self.recognizer.train(data, label)
self.msg = 'Training Successful'
print('writting')
self.recognizer.write('date.yml')
print('writing finished')
except:
self.msg = 'Core: Training Error'
print('except')
tr = Trainer()
sampleFaces, sampleFaceId = (tr.getImgwithId())
tr.train(sampleFaces, sampleFaceId)
Code to recognize face
import os
import cv2
import numpy as np
from PIL import Image
class Recognizer():
def __init__(self):
self.recognizer = cv2.face.LBPHFaceRecognizer_create()
self.recognizer.read('date.yml')
self.targetImagesDirectory="sampleImgFolder"
self.dataset = cv2.CascadeClassifier('resources/haarcascade_frontalface_default.xml')
self.captureVideo = cv2.VideoCapture(0)
self.font = cv2.FONT_HERSHEY_SIMPLEX = 2 # Font
self.predictedUser = []
def gen(self):
while True:
condition, frame = self.captureVideo.read() #capture frame
img = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)#conversion to gray scale
#face detection
faces = self.dataset.detectMultiScale( # Detect face sizes
img,
scaleFactor=1.3,
minNeighbors=5,
minSize=(100, 100),
flags=cv2.CASCADE_SCALE_IMAGE
)
for (x, y, w, h) in faces:
end_crd_x = x + w # face start coordinates
end_crd_y = y + h #face end coordinate
#draw rectangle
##parms image, start plot, end plot, thickness, color
cv2.rectangle(frame, (x, y), (end_crd_x, end_crd_y), (0, 255, 33), 1)
predictUser, confidence = self.recognizer.predict(img[y:y+h,x:x+w])
self.predictedUser.append(predictUser)
cv2.imshow('test', frame)
k = cv2.waitKey(100) & 0xff # capture when user press 'esc'
if k == 27:
break
self.captureVideo.release()
cv2.destroyAllWindows()
r = Recognizer()
r.gen()
print(r.predictedUser)
"predictUser, confidence = self.recognizer.predict(img[y:y+h,x:x+w])" line of code in Recognizer class always return same value for label. The output of code to recognize face is attached below:
I would love to know why and where the problem is, as My skills and research could not lead me to identification of problem.
It might because of the data-collection process. I see you using a cascade classifier multiple times, you could limit it. While checking for a face on webcam, you could use classifiers at the time and store only the extracted/cropped faces. Also during prediction, use confidence as a threshold to limit the false prediction.
I have successfully detected faces, save cropped face in a folder. Then view the total count of detected faces on the video using cv2.puttext.
Now I want to show each cropped face on the video just like I am showing the total count.
The code is as follows:
import numpy as np
import cv2
import time
from time import strftime
num = 0
total = 0
face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
cap = cv2.VideoCapture(0)
previous_millis = 0
while 1:
ret, img = cap.read()
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
font = cv2.FONT_HERSHEY_SIMPLEX
cv2.putText(img,'Person Count Algorithm',(10,50), font, 1,(255,0,0),2,cv2.LINE_AA)
S1=int(strftime("%S"))
#print "timing"
#print(S1)
millis = int(round(time.time() * 5000))
interval = 2000
#print(millis)
if(int(millis-previous_millis) >= interval):
previous_millis = millis
faces = face_cascade.detectMultiScale(gray, 1.3, 5)
#time.sleep(2)
for (x,y,w,h) in faces:
cv2.rectangle(img,(x,y),(x+w,y+h),(255,0,0),2)
roi_gray = gray[y:y+h, x:x+w]
roi_color = img[y:y+h, x:x+w]
cv2.putText(img, 'person', (x,y-10), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (255,0,0), 2)
cv2.imwrite('crop_faces/crop'+str(num)+'.jpg',roi_color)
num = num + 1
print ("FOUND", len(faces), 'PERSON')
total += len(faces)
print ('Total Count:', (total))
font = cv2.FONT_HERSHEY_SIMPLEX
img = cv2.circle(img, (470, 63), 63, (255,0,0), 3)
cv2.putText(img, 'Total Count:', (420,40), font, 0.5,(255,0,0),1,cv2.LINE_AA)
cv2.putText(img, str(total), (436,100), font, 2,(255,0,0),2,cv2.LINE_AA)
cv2.imshow('image',img)
k = cv2.waitKey(1) & 0xff
if k == 27:
break
cap.release()
cv2.destroyAllWindows()
Make an empty list of faces. faces = []
Make a copy of the cropped face. face = numpy.copy(img[y:y+h,x:x+w])
Resize face to a fixed thumbnail size. Lets call the fixed size face_width, face_height.
Append face in the faces list. faces.append(face). Make sure the cropped face does not already exist in the list.
Select a point on the image, i.e px=0, py=0. Copy all the faces from the list to the image starting from px,py. img[py:py+face_height,px:px+face_width] = faces[0] ...
Hi everyone I'm working on OpenCV(Python)on a face recognition program. I have two files, one which captures a new user's face and stores it by the name supplied by user. The second file recognizes the user using webcam. Now, my concern is that the user is getting recognised correctly but the name is only shown and not saved. How could I save the name of the recognised person so that it can be transfered or done some operations upon?
#__author__ = 'ADMIN'
import cv2, sys, numpy, os
size = 4
fn_haar = 'haarcascade_frontalface_default.xml'
fn_dir = 'att_faces'
fn_name = "aditya"
path = os.path.join(fn_dir, fn_name)
if not os.path.isdir(path):
os.mkdir(path)
(im_width, im_height) = (112, 92)
haar_cascade = cv2.CascadeClassifier(fn_haar)
webcam = cv2.VideoCapture(0)
# The program loops until it has 20 images of the face.
count = 0
while count < 20:
(rval, im) = webcam.read()
im = cv2.flip(im, 1, 0)
gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
mini = cv2.resize(gray, (gray.shape[1] / size, gray.shape[0] / size))
faces = haar_cascade.detectMultiScale(mini)
faces = sorted(faces, key=lambda x: x[3])
if faces:
face_i = faces[0]
(x, y, w, h) = [v * size for v in face_i]
face = gray[y:y + h, x:x + w]
face_resize = cv2.resize(face, (im_width, im_height))
pin=sorted([int(n[:n.find('.')]) for n in os.listdir(path)
if n[0]!='.' ]+[0])[-1] + 1
cv2.imwrite('%s/%s.png' % (path, pin), face_resize)
cv2.rectangle(im, (x, y), (x + w, y + h), (0, 255, 0), 3)
cv2.putText(im, fn_name, (x - 10, y - 10), cv2.FONT_HERSHEY_PLAIN,
1,(0, 255, 0))
count += 1
cv2.imshow('OpenCV', im)
key = cv2.waitKey(10)
if key == 27:
break
Code for face recognition from the dataset
__author__ = 'ADMIN'
import cv2, sys, numpy, os
size = 4
fn_haar = 'haarcascade_frontalface_default.xml'
fn_dir = 'att_faces'
# Part 1: Create fisherRecognizer
print('Training...')
# Create a list of images and a list of corresponding names
(images, lables, names, id) = ([], [], {}, 0)
for (subdirs, dirs, files) in os.walk(fn_dir):
for subdir in dirs:
names[id] = subdir
subjectpath = os.path.join(fn_dir, subdir)
for filename in os.listdir(subjectpath):
path = subjectpath + '/' + filename
lable = id
images.append(cv2.imread(path, 0))
lables.append(int(lable))
id += 1
(im_width, im_height) = (112, 92)
# Create a Numpy array from the two lists above
(images, lables) = [numpy.array(lis) for lis in [images, lables]]
# OpenCV trains a model from the images
# NOTE FOR OpenCV2: remove '.face'
model = cv2.createFisherFaceRecognizer()
model.train(images, lables)
# Part 2: Use fisherRecognizer on camera stream
haar_cascade = cv2.CascadeClassifier(fn_haar)
webcam = cv2.VideoCapture(0)
while True:
(rval, frame) = webcam.read()
frame=cv2.flip(frame,1,0)
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
mini = cv2.resize(gray, (gray.shape[1] / size, gray.shape[0] / size))
faces = haar_cascade.detectMultiScale(mini)
for i in range(len(faces)):
face_i = faces[i]
(x, y, w, h) = [v * size for v in face_i]
face = gray[y:y + h, x:x + w]
face_resize = cv2.resize(face, (im_width, im_height))
# Try to recognize the face
prediction = model.predict(face_resize)
cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 3)
# Write the name of recognized face
# [1]
cv2.putText(frame,
'%s - %.0f' % (names[prediction[0]],prediction[1]),
(x-10, y-10), cv2.FONT_HERSHEY_PLAIN,1,(0, 255, 0))
cv2.imshow('OpenCV', frame)
key = cv2.waitKey(10)
if key == 27:
break
This is my code. where i am not using any sql-server.
I am encoding images from the folder and it will show the recognized face with the name of the image saved. if the image is saved as .. abc.jpg. then it will detect the face during live streaming and show abc.jpg
here is my code :
from PIL import Image
import face_recognition
import cv2
import os
# Get a reference to webcam #0 (the default one)
video_capture = cv2.VideoCapture(0)
known_face_encodings=[]
known_face_names = []
user_appeared = []
root = "/home/erp-next/open cv/dataset/"
for filename in os.listdir(root):
if filename.endswith('.jpg' or '.png'):
try:
print(filename)
path = os.path.join(root, filename)
filter_image = face_recognition.load_image_file(path)
filter_face_encoding = face_recognition.face_encodings(filter_image)
known_face_encodings.append(filter_face_encoding[0])
known_face_names.append(filename)
except:
print("An exception occurred : " + filename )
#print(known_face_encodings)
print(known_face_names)
# Initialize some variables
face_locations = []
face_encodings = []
face_names = []
# process_this_frame = True
def face():
while True:
process_this_frame = True
# Grab a single frame of video
ret, frame = video_capture.read()
# Resize frame of video to 1/4 size for faster face recognition processing
small_frame = cv2.resize(frame, (0, 0), fx=0.25, fy=0.25)
# Convert the image from BGR color (which OpenCV uses) to RGB color (which face_recognition uses)
rgb_small_frame = small_frame[:, :, ::-1]
k = cv2.waitKey(1)
if k%256 == 27:
# ESC pressed
print("Escape hit, closing...")
break
# Only process every other frame of video to save time
if process_this_frame:
# Find all the faces and face encodings in the current frame of video
face_locations = face_recognition.face_locations(rgb_small_frame)
face_encodings = face_recognition.face_encodings(rgb_small_frame, face_locations)
face_names = []
for face_encoding in face_encodings:
# See if the face is a match for the known face(s)
matches = face_recognition.compare_faces(known_face_encodings, face_encoding)
name = "Unknown"
# If a match was found in known_face_encodings, just use the first one.
if True in matches:
first_match_index = matches.index(True)
name = known_face_names[first_match_index]
print(name)
face_names.append(name)
process_this_frame = not process_this_frame
# Display the results
for (top, right, bottom, left), name in zip(face_locations, face_names):
# Scale back up face locations since the frame we detected in was scaled to 1/4 size
top *= 4
right *= 4
bottom *= 4
left *= 4
# Draw a box around the face
cv2.rectangle(frame, (left, top), (right, bottom), (0, 0, 255), 2)
# Draw a label with a name below the face
cv2.rectangle(frame, (left, bottom - 35), (right, bottom), (0, 0, 255), cv2.FILLED)
font = cv2.FONT_HERSHEY_DUPLEX
cv2.putText(frame, name, (left + 6, bottom - 6), font, 1.0, (255, 255, 255), 1)
# Display the resulting image
cv2.imshow('Video', frame)
# Hit 'q' on the keyboard to quit!
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# Release handle to the webcam
video_capture.release()
cv2.destroyAllWindows()
face()
i am also using face_recognition library to encode and detect face.
Thanks.
My original python script was created to work on images already saved. I am now wanting it to capture the image and crop it. I have a working webcam section and a working crop section but I am not able to combine them and make them it. I have included the combined code. Currently it will still crop a saved image and the GUI for the webcam does display for a second but does not display any content (gray screen). Can anyone help me?
import cv
import cv2
import numpy
import Image
import glob
import os
# Static
faceCascade = cv.Load('haarcascade_frontalface_alt.xml')
padding = -1
inputimg = raw_input('Please enter the entire path to the image folder:')
outputimg = raw_input('Please enter the entire path to the output folder:')
if not os.path.exists(outputimg):
os.makedirs(outputimg)
while (padding < 0):
padding = int(raw_input('Enter crop padding:'))
capture = cv2.VideoCapture(0)
cv2.namedWindow("Face Crop")
if capture.isOpened():
frame = capture.read()
def DetectFace(image, faceCascade, returnImage=False):
#variables
min_size = (50,50)
haar_scale = 1.1
min_neighbors = 3
haar_flags = 0
DOWNSCALE = 4
# Equalize the histogram
cv.EqualizeHist(image, image)
# Detect the faces
faces = cv.HaarDetectObjects(image, faceCascade, cv.CreateMemStorage(0),haar_scale, min_neighbors, haar_flags, min_size)
# If faces are found
if faces and returnImage:
for ((x, y, w, h), n) in faces:
# Convert bounding box to two CvPoints
pt1 = (int(x), int(y))
pt2 = (int(x + w), int(y + h))
cv.Rectangle(image, pt1, pt2, cv.RGB(255, 0, 0), 5, 8, 0)
# Start video frame
minisize = (frame.shape[1]/DOWNSCALE,frame.shape[0]/DOWNSCALE)
miniframe = cv2.resize(frame, minisize)
faceCam = classifier.detectMultiScale(miniframe)
for f in faceCam:
x, y, w, h = [ v*DOWNSCALE for v in f ]
cv2.rectangle(frame, (x,y), (x+w,y+h), (0,0,255))
cv2.putText(frame, "Press ESC to close.", (5, 25),
cv2.FONT_HERSHEY_SIMPLEX, 1.0, (255,255,255))
cv2.imshow("preview", frame)
# get next frame
frame = capture.read()
raw_input('Pause for testing')
key = cv2.waitKey(20)
if key in [27, ord('Q'), ord('q')]: # exit on ESC
break
if returnImage:
return image
else:
return faces
def pil2cvGrey(pil_im):
pil_im = pil_im.convert('L')
cv_im = cv.CreateImageHeader(pil_im.size, cv.IPL_DEPTH_8U, 1)
cv.SetData(cv_im, pil_im.tostring(), pil_im.size[0] )
return cv_im
def imgCrop(image, cropBox, boxScale=1):
# Crop a PIL image with the provided box [x(left), y(upper), w(width), h(height)]
# Calculate scale factors
xPadding=max(cropBox[2]*(boxScale-1),int(padding))
yPadding=max(cropBox[3]*(boxScale-1),int(padding))
# Convert cv box to PIL box [left, upper, right, lower]
PIL_box=[cropBox[0]-xPadding, cropBox[1]-yPadding, cropBox[0]+cropBox[2]+xPadding, cropBox[1]+cropBox[3]+yPadding]
return image.crop(PIL_box)
def Crop(imagePattern,boxScale=1):
imgList=glob.glob(imagePattern)
if len(imgList)<=0:
return
else:
for img in imgList:
pil_im=Image.open(img)
cv_im=pil2cvGrey(pil_im)
faces=DetectFace(cv_im,faceCascade)
if faces:
n=1
for face in faces:
croppedImage=imgCrop(pil_im, face[0],boxScale=boxScale)
fname,ext=os.path.splitext(img)
fname = os.path.basename(fname)
croppedImage.save(outputimg + '\\' + fname + ' -c' + ext)
n+=1
print 'Cropping:', fname
else:
print 'No faces found:', img
# Crop all images in a folder
Crop(inputimg + '\*.png', boxScale=1)
Crop(inputimg + '\*.jpg', boxScale=1)
Also, if anyone has any code improvements please let me know as I am new to Python.
I was able to fix this by reworking the logic and flow of code. Updated code below and on github, https://github.com/aDroidman/EyeonYou
import cv
import cv2
import numpy
import Image
import glob
import os
# Static
faceCascade = cv.Load('haarcascade_frontalface_alt.xml')
padding = -1
boxScale = 1
# Needed for webcam CV2 section
HaarXML = "haarcascade_frontalface_alt.xml"
classifier = cv2.CascadeClassifier(HaarXML)
downScale = 4
webcam = cv2.VideoCapture(0)
def DetectFace(image, faceCascade, returnImage=False):
#variables
min_size = (50,50)
haar_scale = 1.1
min_neighbors = 3
haar_flags = 0
DOWNSCALE = 4
# Equalize the histogram
cv.EqualizeHist(image, image)
# Detect the faces
faces = cv.HaarDetectObjects(image, faceCascade, cv.CreateMemStorage(0),haar_scale, min_neighbors, haar_flags, min_size)
# If faces are found
if faces and returnImage:
for ((x, y, w, h), n) in faces:
# Convert bounding box to two CvPoints
pt1 = (int(x), int(y))
pt2 = (int(x + w), int(y + h))
cv.Rectangle(image, pt1, pt2, cv.RGB(255, 0, 0), 5, 8, 0)
if returnImage:
return image
else:
return faces
def pil2cvGrey(pil_im):
pil_im = pil_im.convert('L')
cv_im = cv.CreateImageHeader(pil_im.size, cv.IPL_DEPTH_8U, 1)
cv.SetData(cv_im, pil_im.tostring(), pil_im.size[0] )
return cv_im
def imgCrop(image, cropBox, boxScale=1):
# Crop a PIL image with the provided box [x(left), y(upper), w(width), h(height)]
# Calculate scale factors
xPadding=max(cropBox[2]*(boxScale-1),int(padding))
yPadding=max(cropBox[3]*(boxScale-1),int(padding))
# Convert cv box to PIL box [left, upper, right, lower]
PIL_box=[cropBox[0]-xPadding, cropBox[1]-yPadding, cropBox[0]+cropBox[2]+xPadding, cropBox[1]+cropBox[3]+yPadding]
return image.crop(PIL_box)
def Crop(imagePattern,boxScale,outputimg):
imgList=glob.glob(imagePattern)
if len(imgList)<=0:
return
else:
for img in imgList:
pil_im=Image.open(img)
cv_im=pil2cvGrey(pil_im)
faces=DetectFace(cv_im,faceCascade)
if faces:
n=1
for face in faces:
croppedImage=imgCrop(pil_im, face[0],boxScale=boxScale)
fname,ext=os.path.splitext(img)
fname = os.path.basename(fname)
croppedImage.save(outputimg + '\\' + fname + ' -c' + ext)
n+=1
print 'Cropping:', fname
else:
print 'No faces found:', img
def CropSetup(padding, boxScale):
inputimg = raw_input('Please enter the entire path to the image folder:')
outputimg = raw_input('Please enter the entire path to the output folder:')
# Create output folder if missing
if not os.path.exists(outputimg):
os.makedirs(outputimg)
# Get padding for crop
while (padding < 0):
padding = int(raw_input('Enter crop padding:'))
# Crop images
Crop(inputimg + '\*.png', boxScale, outputimg)
Crop(inputimg + '\*.jpg', boxScale, outputimg)
print 'Option 1: Detect image from Webcam'
print 'Option 2: Crop saved images'
option = int(raw_input('Please enter 1 or 2: '))
def Webcam(webcam, classifier, downScale):
if webcam.isOpened():
rval, frame = webcam.read()
else:
rval = False
while rval:
# detect faces and draw bounding boxes
minisize = (frame.shape[1]/downScale,frame.shape[0]/downScale)
miniframe = cv2.resize(frame, minisize)
faces = classifier.detectMultiScale(miniframe)
for f in faces:
x, y, w, h = [ v*downScale for v in f ]
cv2.rectangle(frame, (x,y), (x+w,y+h), (0,0,255))
cv2.putText(frame, "Press ESC to close.", (5, 25),
cv2.FONT_HERSHEY_SIMPLEX, 1.0, (255,255,255))
cv2.imshow("Face Crop", frame)
# get next frame
rval, frame = webcam.read()
key = cv2.waitKey(10)
if key in [27, ord('Q'), ord('q')]: # exit on ESC
break
if option == 1:
Webcam(webcam, classifier, downScale)
elif option == 2:
CropSetup(padding, boxScale)
else:
print 'Not a valid input'