I am trying to find an app that can detect faces in my pictures, make the detected face centered and crop 720 x 720 pixels of the picture. It is rather very time consuming & meticulous to edit around hundreds of pictures I plan to do that.
I have tried doing this using python opencv mentioned here but I think it is outdated. I've also tried using this but it's also giving me an error in my system. Also tried using face detection plugin for GIMP but it is designed for GIMP 2.6 but I am using 2.8 on a regular basis. I also tried doing what was posted at ultrahigh blog but it is very outdated (since I'm using a Precise derivative of Ubuntu, while the blogpost was made way back when it was still Hardy). Also tried using Phatch but there is no face detection so some cropped pictures have their face cut right off.
I have tried all of the above and wasted half a day trying to make any of the above do what I needed to do.
Do you guys have suggestion to achieve a goal to around 800 pictures I have.
My operating system is Linux Mint 13 MATE.
Note: I was going to add 2 more links but stackexchange prevented me to post two more links as I don't have much reputation yet.
I have managed to grab bits of code from various sources and stitch this together. It is still a work in progress. Also, do you have any example images?
'''
Sources:
http://pythonpath.wordpress.com/2012/05/08/pil-to-opencv-image/
http://www.lucaamore.com/?p=638
'''
#Python 2.7.2
#Opencv 2.4.2
#PIL 1.1.7
import cv
import Image
def DetectFace(image, faceCascade):
#modified from: http://www.lucaamore.com/?p=638
min_size = (20,20)
image_scale = 1
haar_scale = 1.1
min_neighbors = 3
haar_flags = 0
# Allocate the temporary images
smallImage = cv.CreateImage(
(
cv.Round(image.width / image_scale),
cv.Round(image.height / image_scale)
), 8 ,1)
# Scale input image for faster processing
cv.Resize(image, smallImage, cv.CV_INTER_LINEAR)
# Equalize the histogram
cv.EqualizeHist(smallImage, smallImage)
# Detect the faces
faces = cv.HaarDetectObjects(
smallImage, faceCascade, cv.CreateMemStorage(0),
haar_scale, min_neighbors, haar_flags, min_size
)
# If faces are found
if faces:
for ((x, y, w, h), n) in faces:
# the input to cv.HaarDetectObjects was resized, so scale the
# bounding box of each face and convert it to two CvPoints
pt1 = (int(x * image_scale), int(y * image_scale))
pt2 = (int((x + w) * image_scale), int((y + h) * image_scale))
cv.Rectangle(image, pt1, pt2, cv.RGB(255, 0, 0), 5, 8, 0)
return image
def pil2cvGrey(pil_im):
#from: http://pythonpath.wordpress.com/2012/05/08/pil-to-opencv-image/
pil_im = pil_im.convert('L')
cv_im = cv.CreateImageHeader(pil_im.size, cv.IPL_DEPTH_8U, 1)
cv.SetData(cv_im, pil_im.tostring(), pil_im.size[0] )
return cv_im
def cv2pil(cv_im):
return Image.fromstring("L", cv.GetSize(cv_im), cv_im.tostring())
pil_im=Image.open('testPics/faces.jpg')
cv_im=pil2cv(pil_im)
#the haarcascade files tells opencv what to look for.
faceCascade = cv.Load('C:/Python27/Lib/site-packages/opencv/haarcascade_frontalface_default.xml')
face=DetectFace(cv_im,faceCascade)
img=cv2pil(face)
img.show()
Testing on the first page of Google (Googled "faces"):
Update
This code should do exactly what you want. Let me know if you have questions. I tried to include lots of comments in the code:
'''
Sources:
http://opencv.willowgarage.com/documentation/python/cookbook.html
http://www.lucaamore.com/?p=638
'''
#Python 2.7.2
#Opencv 2.4.2
#PIL 1.1.7
import cv #Opencv
import Image #Image from PIL
import glob
import os
def DetectFace(image, faceCascade, returnImage=False):
# This function takes a grey scale cv image and finds
# the patterns defined in the haarcascade function
# modified from: http://www.lucaamore.com/?p=638
#variables
min_size = (20,20)
haar_scale = 1.1
min_neighbors = 3
haar_flags = 0
# Equalize the histogram
cv.EqualizeHist(image, image)
# Detect the faces
faces = cv.HaarDetectObjects(
image, faceCascade, cv.CreateMemStorage(0),
haar_scale, min_neighbors, haar_flags, min_size
)
# If faces are found
if faces and returnImage:
for ((x, y, w, h), n) in faces:
# Convert bounding box to two CvPoints
pt1 = (int(x), int(y))
pt2 = (int(x + w), int(y + h))
cv.Rectangle(image, pt1, pt2, cv.RGB(255, 0, 0), 5, 8, 0)
if returnImage:
return image
else:
return faces
def pil2cvGrey(pil_im):
# Convert a PIL image to a greyscale cv image
# from: http://pythonpath.wordpress.com/2012/05/08/pil-to-opencv-image/
pil_im = pil_im.convert('L')
cv_im = cv.CreateImageHeader(pil_im.size, cv.IPL_DEPTH_8U, 1)
cv.SetData(cv_im, pil_im.tostring(), pil_im.size[0] )
return cv_im
def cv2pil(cv_im):
# Convert the cv image to a PIL image
return Image.fromstring("L", cv.GetSize(cv_im), cv_im.tostring())
def imgCrop(image, cropBox, boxScale=1):
# Crop a PIL image with the provided box [x(left), y(upper), w(width), h(height)]
# Calculate scale factors
xDelta=max(cropBox[2]*(boxScale-1),0)
yDelta=max(cropBox[3]*(boxScale-1),0)
# Convert cv box to PIL box [left, upper, right, lower]
PIL_box=[cropBox[0]-xDelta, cropBox[1]-yDelta, cropBox[0]+cropBox[2]+xDelta, cropBox[1]+cropBox[3]+yDelta]
return image.crop(PIL_box)
def faceCrop(imagePattern,boxScale=1):
# Select one of the haarcascade files:
# haarcascade_frontalface_alt.xml <-- Best one?
# haarcascade_frontalface_alt2.xml
# haarcascade_frontalface_alt_tree.xml
# haarcascade_frontalface_default.xml
# haarcascade_profileface.xml
faceCascade = cv.Load('haarcascade_frontalface_alt.xml')
imgList=glob.glob(imagePattern)
if len(imgList)<=0:
print 'No Images Found'
return
for img in imgList:
pil_im=Image.open(img)
cv_im=pil2cvGrey(pil_im)
faces=DetectFace(cv_im,faceCascade)
if faces:
n=1
for face in faces:
croppedImage=imgCrop(pil_im, face[0],boxScale=boxScale)
fname,ext=os.path.splitext(img)
croppedImage.save(fname+'_crop'+str(n)+ext)
n+=1
else:
print 'No faces found:', img
def test(imageFilePath):
pil_im=Image.open(imageFilePath)
cv_im=pil2cvGrey(pil_im)
# Select one of the haarcascade files:
# haarcascade_frontalface_alt.xml <-- Best one?
# haarcascade_frontalface_alt2.xml
# haarcascade_frontalface_alt_tree.xml
# haarcascade_frontalface_default.xml
# haarcascade_profileface.xml
faceCascade = cv.Load('haarcascade_frontalface_alt.xml')
face_im=DetectFace(cv_im,faceCascade, returnImage=True)
img=cv2pil(face_im)
img.show()
img.save('test.png')
# Test the algorithm on an image
#test('testPics/faces.jpg')
# Crop all jpegs in a folder. Note: the code uses glob which follows unix shell rules.
# Use the boxScale to scale the cropping area. 1=opencv box, 2=2x the width and height
faceCrop('testPics/*.jpg',boxScale=1)
Using the image above, this code extracts 52 out of the 59 faces, producing cropped files such as:
Another available option is dlib, which is based on machine learning approaches.
import dlib
from PIL import Image
from skimage import io
import matplotlib.pyplot as plt
def detect_faces(image):
# Create a face detector
face_detector = dlib.get_frontal_face_detector()
# Run detector and get bounding boxes of the faces on image.
detected_faces = face_detector(image, 1)
face_frames = [(x.left(), x.top(),
x.right(), x.bottom()) for x in detected_faces]
return face_frames
# Load image
img_path = 'test.jpg'
image = io.imread(img_path)
# Detect faces
detected_faces = detect_faces(image)
# Crop faces and plot
for n, face_rect in enumerate(detected_faces):
face = Image.fromarray(image).crop(face_rect)
plt.subplot(1, len(detected_faces), n+1)
plt.axis('off')
plt.imshow(face)
facedetect OpenCV CLI wrapper written in Python
https://github.com/wavexx/facedetect is a nice Python OpenCV CLI wrapper, and I have added the following example to their README.
Installation:
sudo apt install python3-opencv opencv-data imagemagick
git clone https://gitlab.com/wavexx/facedetect
git -C facedetect checkout 5f9b9121001bce20f7d87537ff506fcc90df48ca
Get my test image:
mkdir -p pictures
wget -O pictures/test.jpg https://raw.githubusercontent.com/cirosantilli/media/master/Ciro_Santilli_with_a_stone_carved_Budai_in_the_Feilai_Feng_caves_near_the_Lingyin_Temple_in_Hangzhou_in_2012.jpg
Usage:
mkdir -p faces
for file in pictures/*.jpg; do
name=$(basename "$file")
i=0
facedetect/facedetect --data-dir /usr/share/opencv4 "$file" |
while read x y w h; do
convert "$file" -crop ${w}x${h}+${x}+${y} "faces/${name%.*}_${i}.${name##*.}"
i=$(($i+1))
done
done
If you don't pass --data-dir on this system, it fails with:
facedetect: error: cannot load HAAR_FRONTALFACE_ALT2 from /usr/share/opencv/haarcascades/haarcascade_frontalface_alt2.xml
and the file it is looking for is likely at: /usr/share/opencv4/haarcascades on the system.
After running it, the file:
faces/test_0.jpg
contains:
which was extracted from the original image pictures/test.jpg:
Budai was not recognized :-( If it had it would appear under faces/test_1.jpg, but that file does not exist.
Let's try another one with faces partially turned https://raw.githubusercontent.com/cirosantilli/media/master/Ciro_Santilli_with_his_mother_in_law_during_his_wedding_in_2017.jpg
Hmmm, no hits, the faces are not clear enough for the software.
Tested on Ubuntu 20.10, OpenCV 4.2.0.
Autocrop worked out for me pretty well.
It is as easy as autocrop -i pics -o crop -w 400 -H 400.
You can get the usage in their readme file.
usage: autocrop [-h] [-i INPUT] [-o OUTPUT] [-r REJECT] [-w WIDTH] [-H HEIGHT]
[-v] [--no-confirm] [--facePercent FACEPERCENT] [-e EXTENSION]
Automatically crops faces from batches of pictures
optional arguments:
-h, --help show this help message and exit
-i INPUT, --input INPUT
Folder where images to crop are located. Default:
current working directory
-o OUTPUT, --output OUTPUT, -p OUTPUT, --path OUTPUT
Folder where cropped images will be moved to. Default:
current working directory, meaning images are cropped
in place.
-r REJECT, --reject REJECT
Folder where images that could not be cropped will be
moved to. Default: current working directory, meaning
images that are not cropped will be left in place.
-w WIDTH, --width WIDTH
Width of cropped files in px. Default=500
-H HEIGHT, --height HEIGHT
Height of cropped files in px. Default=500
-v, --version show program's version number and exit
--no-confirm Bypass any confirmation prompts
--facePercent FACEPERCENT
Percentage of face to image height
-e EXTENSION, --extension EXTENSION
Enter the image extension which to save at output
This sounds like it might be a better question for one of the more (computer) technology focused exchanges.
That said, have you looked into something like this jquery face detection script? I don't know how savvy you are, but it is one option that is OS independent.
This solution also looks promising, but would require Windows.
the above codes work but this is recent implementation using OpenCV
I was unable to run the above by the latest and found something that works (from various places)
import cv2
import os
def facecrop(image):
facedata = "haarcascade_frontalface_alt.xml"
cascade = cv2.CascadeClassifier(facedata)
img = cv2.imread(image)
minisize = (img.shape[1],img.shape[0])
miniframe = cv2.resize(img, minisize)
faces = cascade.detectMultiScale(miniframe)
for f in faces:
x, y, w, h = [ v for v in f ]
cv2.rectangle(img, (x,y), (x+w,y+h), (255,255,255))
sub_face = img[y:y+h, x:x+w]
fname, ext = os.path.splitext(image)
cv2.imwrite(fname+"_cropped_"+ext, sub_face)
return
facecrop("1.jpg")
Just adding to #Israel Abebe's version. If you add a counter before image extension the algorithm will give all the faces detected. Attaching the code, same as Israel Abebe's. Just adding a counter and accepting the cascade file as an argument. The algorithm works beautifully! Thanks #Israel Abebe for this!
import cv2
import os
import sys
def facecrop(image):
facedata = sys.argv[1]
cascade = cv2.CascadeClassifier(facedata)
img = cv2.imread(image)
minisize = (img.shape[1],img.shape[0])
miniframe = cv2.resize(img, minisize)
faces = cascade.detectMultiScale(miniframe)
counter = 0
for f in faces:
x, y, w, h = [ v for v in f ]
cv2.rectangle(img, (x,y), (x+w,y+h), (255,255,255))
sub_face = img[y:y+h, x:x+w]
fname, ext = os.path.splitext(image)
cv2.imwrite(fname+"_cropped_"+str(counter)+ext, sub_face)
counter += 1
return
facecrop("Face_detect_1.jpg")
PS: Adding as answer. Was not able to add comment because of points issue.
Detect face and then crop and save the cropped image into folder ..
import numpy as np
import cv2 as cv
face_cascade = cv.CascadeClassifier('./haarcascade_frontalface_default.xml')
#eye_cascade = cv.CascadeClassifier('haarcascade_eye.xml')
img = cv.imread('./face/nancy-Copy1.jpg')
gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
faces = face_cascade.detectMultiScale(gray, 1.3, 5)
for (x,y,w,h) in faces:
cv.rectangle(img,(x,y),(x+w,y+h),(255,0,0),2)
roi_gray = gray[y:y+h, x:x+w]
roi_color = img[y:y+h, x:x+w]
#eyes = eye_cascade.detectMultiScale(roi_gray)
#for (ex,ey,ew,eh) in eyes:
# cv.rectangle(roi_color,(ex,ey),(ex+ew,ey+eh),(0,255,0),2)
sub_face = img[y:y+h, x:x+w]
face_file_name = "face/" + str(y) + ".jpg"
plt.imsave(face_file_name, sub_face)
plt.imshow(sub_face)
I have developed an application "Face-Recognition-with-Own-Data-Set" using the python package ‘face_recognition’ and ‘opencv-python’.
The source code and installation guide is in the GitHub - Face-Recognition-with-Own-Data-Set
Or run the source -
import face_recognition
import cv2
import numpy as np
import os
'''
Get current working director and create a Data directory to store the faces
'''
currentDirectory = os.getcwd()
dirName = os.path.join(currentDirectory, 'Data')
print(dirName)
if not os.path.exists(dirName):
try:
os.makedirs(dirName)
except:
raise OSError("Can't create destination directory (%s)!" % (dirName))
'''
For the given path, get the List of all files in the directory tree
'''
def getListOfFiles(dirName):
# create a list of file and sub directories
# names in the given directory
listOfFile = os.listdir(dirName)
allFiles = list()
# Iterate over all the entries
for entry in listOfFile:
# Create full path
fullPath = os.path.join(dirName, entry)
# If entry is a directory then get the list of files in this directory
if os.path.isdir(fullPath):
allFiles = allFiles + getListOfFiles(fullPath)
else:
allFiles.append(fullPath)
return allFiles
def knownFaceEncoding(listOfFiles):
known_face_encodings=list()
known_face_names=list()
for file_name in listOfFiles:
# print(file_name)
if(file_name.lower().endswith(('.png', '.jpg', '.jpeg'))):
known_image = face_recognition.load_image_file(file_name)
# known_face_locations = face_recognition.face_locations(known_image)
# known_face_encoding = face_recognition.face_encodings(known_image,known_face_locations)
face_encods = face_recognition.face_encodings(known_image)
if face_encods:
known_face_encoding = face_encods[0]
known_face_encodings.append(known_face_encoding)
known_face_names.append(os.path.basename(file_name[0:-4]))
return known_face_encodings, known_face_names
# Get the list of all files in directory tree at given path
listOfFiles = getListOfFiles(dirName)
known_face_encodings, known_face_names = knownFaceEncoding(listOfFiles)
video_capture = cv2.VideoCapture(0)
cv2.namedWindow("Video", flags= cv2.WINDOW_NORMAL)
# cv2.namedWindow("Video")
cv2.resizeWindow('Video', 1024,640)
cv2.moveWindow('Video', 20,20)
# Initialize some variables
face_locations = []
face_encodings = []
face_names = []
process_this_frame = True
while True:
# Grab a single frame of video
ret, frame = video_capture.read()
# print(ret)
# Resize frame of video to 1/4 size for faster face recognition processing
small_frame = cv2.resize(frame, (0, 0), fx=0.25, fy=0.25)
# Convert the image from BGR color (which OpenCV uses) to RGB color (which face_recognition uses)
rgb_small_frame = small_frame[:, :, ::-1]
k = cv2.waitKey(1)
# Hit 'c' on capture the image!
# Hit 'q' on the keyboard to quit!
if k == ord('q'):
break
elif k== ord('c'):
face_loc = face_recognition.face_locations(rgb_small_frame)
if face_loc:
print("Enter Name -")
name = input()
img_name = "{}/{}.png".format(dirName,name)
(top, right, bottom, left)= face_loc[0]
top *= 4
right *= 4
bottom *= 4
left *= 4
cv2.imwrite(img_name, frame[top - 5 :bottom + 5,left -5 :right + 5])
listOfFiles = getListOfFiles(dirName)
known_face_encodings, known_face_names = knownFaceEncoding(listOfFiles)
# Only process every other frame of video to save time
if process_this_frame:
# Find all the faces and face encodings in the current frame of video
face_locations = face_recognition.face_locations(rgb_small_frame)
face_encodings = face_recognition.face_encodings(rgb_small_frame, face_locations)
# print(face_locations)
face_names = []
for face_encoding,face_location in zip(face_encodings,face_locations):
# See if the face is a match for the known face(s)
matches = face_recognition.compare_faces(known_face_encodings, face_encoding, tolerance= 0.55)
name = "Unknown"
distance = 0
# use the known face with the smallest distance to the new face
face_distances = face_recognition.face_distance(known_face_encodings, face_encoding)
#print(face_distances)
if len(face_distances) > 0:
best_match_index = np.argmin(face_distances)
if matches[best_match_index]:
name = known_face_names[best_match_index]
# distance = face_distances[best_match_index]
#print(face_distances[best_match_index])
# string_value = '{} {:.3f}'.format(name, distance)
face_names.append(name)
process_this_frame = not process_this_frame
# Display the results
for (top, right, bottom, left), name in zip(face_locations, face_names):
# Scale back up face locations since the frame we detected in was scaled to 1/4 size
top *= 4
right *= 4
bottom *= 4
left *= 4
# Draw a box around the face
cv2.rectangle(frame, (left, top), (right, bottom), (0, 0, 255), 2)
# Draw a label with a name below the face
cv2.rectangle(frame, (left, bottom + 46), (right, bottom+11), (0, 0, 155), cv2.FILLED)
font = cv2.FONT_HERSHEY_DUPLEX
cv2.putText(frame, name, (left + 6, bottom +40), font, 1.0, (255, 255, 255), 1)
# Display the resulting image
cv2.imshow('Video', frame)
# Release handle to the webcam
video_capture.release()
cv2.destroyAllWindows()
It will create a 'Data' directory in the current location even if this directory does not exist.
When a face is marked with a rectangle, press 'c' to capture the image and in the command prompt, it will ask for the name of the face. Put the name of the image and enter. You can find this image in the 'Data' directory.
I used this shell command:
for f in *.jpg;do PYTHONPATH=/usr/local/lib/python2.7/site-packages python -c 'import cv2;import sys;rects=cv2.CascadeClassifier("/usr/local/opt/opencv/share/OpenCV/haarcascades/haarcascade_frontalface_default.xml").detectMultiScale(cv2.cvtColor(cv2.imread(sys.argv[1]),cv2.COLOR_BGR2GRAY),1.3,5);print("\n".join([" ".join([str(item) for item in row])for row in rects]))' $f|while read x y w h;do convert $f -gravity NorthWest -crop ${w}x$h+$x+$y ${f%jpg}-$x-$y.png;done;done
You can install opencv and imagemagick on OS X with brew install opencv imagemagick.
I think the best option is Google Vision API.
It's updated, it uses machine learning and it improves with the time.
You can check the documentation for examples:
https://cloud.google.com/vision/docs/other-features
Related
I am currently working on a small project, but I have an unresolved problem. That is I want to draw a shape through the desired objects , The first thing is to determine the coordinates of the starting and ending points but I don't have a specific idea yet but I don't know how to do it,I hope you can give me suggestions, Glad to have your help.
i want the result in like this
Here is an example using opencv you can draw rectangle over an object:
By giving the template image of the source image you can draw the shape over the image
# importing needed libraries
import cv2
import numpy as np
img_rgb = cv2.imread(source image) # opening the source image
img_gray = cv2.cvtColor(img_rgb, cv2.COLOR_BGR2GRAY) # converting to the gray scale
template = cv2.imread(template image,0) # opening the template image
w, h = template.shape[::-1] # giving the sape of template image
res = cv2.matchTemplate(img_gray,template,cv2.TM_CCOEFF_NORMED) # matching both the image using the opencv methods for matching object
threshold = 0.9
loc = np.where( res >= threshold)
for pt in zip(*loc[::-1]):
cv2.rectangle(img_rgb, pt, (pt[0] + w, pt[1] + h), (0,0,255), 1)
cv2.imshow('screen',img_rgb)
cv2.waitKey(0)
Source Image
Template Image
Result Image
I'm trying to use Tesseract to identify the characters 0-9, after reciving very poor results I've built a small test file to check what tesseract is actually running against.
I've taken 39 screenshots of images, and combined them with paint into a jpg file, I've also ran the same code/appraoch with a png and bmp version to the same outcome.
The below is the jpg test data and my output from trying to draw the bounding boxes. I expected 39 small green bounding boxes around each of the numbers in question. They appear (mostly) very clear to my eye.
I've tried the base line tesseract data I get with the pip install and got incredibly poor results, I downloaded additional english data from the official github and matching improved dramatically without any further changes, it wasn't great but it was much better.
In one script I take the screenshot images and I drop the blue and green channels, convert to grayscale, apply binary thresholding followed by OTSU to try and smooth out some of the gaps in the screenshots.
def getTemplate():
w, h = 1920, 1080
monitor = {'top': 0, 'left': 0, 'width': w, 'height': h}
img = Image.frombytes('RGB', (w,h), sct.grab(monitor).rgb)
haystack = np.array(img)
return haystack
path_to_tesseract = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
pytesseract.tesseract_cmd = path_to_tesseract
# # get an updated image of the screen & crop it
# template = get_haystack_image()
# haystack = template[137:167, 54:125]
template = getTemplate()
haystack = template[180:215, 55:125]
r = output.copy()
# set blue and green channels to 0
r[:, :, 0] = 0
r[:, :, 1] = 0
gray_image = cv.cvtColor(output, cv.COLOR_BGR2GRAY)
thresh = cv.threshold(gray_image, 40, 255, cv.THRESH_BINARY + cv.THRESH_OTSU)[1]
img = cv.imwrite('testData.jng', thresh)
And then I read the images, apply the below custom config, draw the boxes and print the results.
The results from the below are:
jpg:
222419230
222101064272787315267
from pytesseract import pytesseract
from pytesseract import Output
import cv2 as cv
import os
path_to_tesseract = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
pytesseract.tesseract_cmd = path_to_tesseract
# Read Images
jpg = cv.imread('testData.jpg')
# Define configuration that only whitelists number characters, set oem and specifiy language
custom_config = r'-l eng --psm 6 --oem 3 -c tessedit_char_whitelist=0123456789'
# Find the numbers in the image
text_jpg = pytesseract.image_to_string(jpg, config=custom_config)
# Print Results
print('jpg:' + '\n' + str(text_jpg))
d = pytesseract.image_to_data(jpg, output_type=Output.DICT)
n_boxes = len(d['level'])
for i in range(n_boxes):
(x, y, w, h) = (d['left'][i], d['top'][i], d['width'][i], d['height'][i])
cv.rectangle(jpg, (x, y), (x + w, y + h), (0, 255, 0), 2)
cv.imshow('jpg', jpg)
cv.waitKey(0)
cv.imwrite('results.jpg', jpg)
Why are the bounding boxes as overlaped and incorrect as they are and how how can I clean them up to improve my image recognition?
i have a problem with mediapipe coordinations. What i want to do is crop the box of the detected face.
https://google.github.io/mediapipe/solutions/face_detection.html
EXAMPLE OF PROCEDURE
And i use this code below:
mp_face_detection = mp.solutions.face_detection
# Setup the face detection function.
face_detection = mp_face_detection.FaceDetection(model_selection=0, min_detection_confidence=0.5)
# Initialize the mediapipe drawing class.
mp_drawing = mp.solutions.drawing_utils
# Read an image from the specified path.
sample_img = cv2.imread('12345.jpg')
# Specify a size of the figure.
plt.figure(figsize = [10, 10])
# Display the sample image, also convert BGR to RGB for display.
plt.title("Sample Image");plt.axis('off');plt.imshow(sample_img[:,:,::-1]);plt.show()
face_detection_results = face_detection.process(sample_img[:,:,::-1])
# Check if the face(s) in the image are found.
if face_detection_results.detections:
# Iterate over the found faces.
for face_no, face in enumerate(face_detection_results.detections):
# Display the face number upon which we are iterating upon.
print(f'FACE NUMBER: {face_no+1}')
print('---------------------------------')
# Display the face confidence.
print(f'FACE CONFIDENCE: {round(face.score[0], 2)}')
# Get the face bounding box and face key points coordinates.
face_data = face.location_data
# Display the face bounding box coordinates.
print(f'\nFACE BOUNDING BOX:\n{face_data.relative_bounding_box}')
# Iterate two times as we only want to display first two key points of each detected face.
for i in range(2):
# Display the found normalized key points.
print(f'{mp_face_detection.FaceKeyPoint(i).name}:')
print(f'{face_data.relative_keypoints[mp_face_detection.FaceKeyPoint(i).value]}')
So the results are in this form:
FACE NUMBER: 1
FACE CONFIDENCE: 0.89
FACE BOUNDING BOX:
xmin: 0.2784463167190552
ymin: 0.3503175973892212
width: 0.1538110375404358
height: 0.23071599006652832
RIGHT_EYE:
x: 0.3447018265724182
y: 0.4222590923309326
LEFT_EYE:
x: 0.39114508032798767
y: 0.3888365626335144
And i want to CROP the image in the coordinations of the BOX.
Like
face = Image.fromarray(image).crop(face_rect)
or any other crop procedure.
My problem is that i can't get the coords of the detected item from mediapipe.
Any ideas?
Got the solution guys
import dlib
from PIL import Image
from skimage import io
h, w, c = sample_img.shape
print('width: ', w)
print('height: ', h)
xleft = data.xmin*w
xleft = int(xleft)
xtop = data.ymin*h
xtop = int(xtop)
xright = data.width*w + xleft
xright = int(xright)
xbottom = data.height*h + xtop
xbottom = int(xbottom)
detected_faces = [(xleft, xtop, xright, xbottom)]
for n, face_rect in enumerate(detected_faces):
face = Image.fromarray(image_c).crop(face_rect)
face_np = np.asarray(face)
plt.imshow(face_np)
Assume, the objective is to crop a single detected face by mediapipe . Note the [0] to indicate that we are only interested in single face
results = mp_face.process(image_input)
detection=results.detections[0]
By default mediapipe returns detection data in normalize form and we have to convert to original size by multiplying x values by width and y values by height of input image.
We can employed the _normalized_to_pixel_coordinates available with the mediapipe
relative_bounding_box = location.relative_bounding_box
rect_start_point = _normalized_to_pixel_coordinates(
relative_bounding_box.xmin, relative_bounding_box.ymin, image_cols,
image_rows)
rect_end_point = _normalized_to_pixel_coordinates(
relative_bounding_box.xmin + relative_bounding_box.width,
relative_bounding_box.ymin + relative_bounding_box.height, image_cols,
image_rows)
This essentially produce
xleft,ytop=rect_start_point
xright,ybot=rect_end_point
In other word, ytop. ybot, xleft. xright represent face_top, face_bottom, face_left, and face_right, respectively.
Since the image is simply a 3D np array, we can crop it as below
crop_img = image_input[ytop: ybot, xleft: xright]
The complete code is as below
import cv2
import mediapipe as mp
from mediapipe.python.solutions.drawing_utils import _normalized_to_pixel_coordinates
# load face detection model
mp_face = mp.solutions.face_detection.FaceDetection(
model_selection=1, # model selection
min_detection_confidence=0.5 # confidence threshold
)
dframe= cv2.imread('xx.png',0)
image_rows, image_cols, _ = dframe.shape
image_input = cv2.cvtColor(dframe, cv2.COLOR_BGR2RGB)
results = mp_face.process(image_input)
detection=results.detections[0]
location = detection.location_data
relative_bounding_box = location.relative_bounding_box
rect_start_point = _normalized_to_pixel_coordinates(
relative_bounding_box.xmin, relative_bounding_box.ymin, image_cols,
image_rows)
rect_end_point = _normalized_to_pixel_coordinates(
relative_bounding_box.xmin + relative_bounding_box.width,
relative_bounding_box.ymin + relative_bounding_box.height, image_cols,
image_rows)
## Lets draw a bounding box
color = (255, 0, 0)
thickness = 2
cv2.rectangle(image_input, rect_start_point, rect_end_point, color, thickness)
xleft,ytop=rect_start_point
xright,ybot=rect_end_point
crop_img = image_input[ytop: ybot, xleft: xright]
cv2.imwrite('crop_image0.jpg', crop_img)
Can any one give an example of edge box detection algorithm to generate proposals for object detection using open cv.
We can get the details from https://docs.opencv.org/3.4.0/d4/d0d/group__ximgproc__edgeboxes.html
Yes. First you will need to download the model file that is used for Edge Boxes here. Once you do that, the following code below (taken from their Github) can be used as an example for running the Edge Boxes algorithm. In short, put the code below into a separate file called edgeboxes_demo.py, then in the terminal type in:
python model.yml.gz image_file
model.yml.gz is the model that you saved from the link above which I assume is in the same directory where the code is. image_file is the path to the image you want to use for testing the algorithm. The code will run the Edge Boxes algorithm then draw the detected boxes on the image in green:
import cv2 as cv
import numpy as np
import sys
if __name__ == '__main__':
model = sys.argv[1]
im = cv.imread(sys.argv[2])
edge_detection = cv.ximgproc.createStructuredEdgeDetection(model)
rgb_im = cv.cvtColor(im, cv.COLOR_BGR2RGB)
edges = edge_detection.detectEdges(np.float32(rgb_im) / 255.0)
orimap = edge_detection.computeOrientation(edges)
edges = edge_detection.edgesNms(edges, orimap)
edge_boxes = cv.ximgproc.createEdgeBoxes()
edge_boxes.setMaxBoxes(30)
boxes = edge_boxes.getBoundingBoxes(edges, orimap)
for b in boxes:
x, y, w, h = b
cv.rectangle(im, (x, y), (x+w, y+h), (0, 255, 0), 1, cv.LINE_AA)
cv.imshow("edges", edges)
cv.imshow("edgeboxes", im)
cv.waitKey(0)
cv.destroyAllWindows()
Test Image
Result
So what I'm trying to do using OpenCV, dlib, and Python is to basically identify facial landmarks on a set of images using dlib and then crop the the mouths from those very same images and save them as separate images with ".jpg" extensions.
This here is the code:
import numpy as np
import cv2
import dlib
import sys
import skimage
from PIL import Image
import os
import glob
#Everything is imported here
folderpath = sys.argv[1]
cascPath = sys.argv[2]
PREDICTOR_PATH = "/home/victor/facial-landmarks/shape_predictor_68_face_landmarks.dat"
#user supplies the folderpath and cascpath in a terminal/command prompt
#predictor_path is already set
imageformat = ".tif"
path = folderpath
imfilelist = [os.path.join(path,f) for f in os.listdir(path) if f.endswith(imageformat)]
#only images with ".tif" extensions in the folder interest us, we create a
#list with paths to those images
data = np.array([])
for IMG in imfilelist:
image = cv2.imread(IMG) #this for-loop iterates through images we need
np.append(data, image) # reads them, and appends them to the data
# numpy array
gray = np.array([])
for j in range(0, len(data)):
cvtimg = cv2.cvtColor(np.array(data[j]), cv2.COLOR_BGR2GRAY)
np.append(gray, cvtimg) #empty numpy array called gray is declared
# for-loop goes through all RGB pictures
# stored in data, converts them to grayscale
# and stores them in gray
MOUTH_OUTLINE_POINTS = list(range(48, 61))
MOUTH_INNER_POINTS = list(range(61, 68))
#defines the landmarks for the Mouth Outline and the inner mouth points
faceCascade = cv2.CascadeClassifier(cascPath)
#faceCascade is defined here, cascPath which is user supplied is the param
predictor = dlib.shape_predictor(PREDICTOR_PATH)
faces = np.array([])
for i in gray:
face = faceCascade.detectMultiScale(gray[i], scaleFactor=1.05, minNeighbors=5, minSize=(100,100))
np.append(faces, face) #this for-loop tries to detect faces and append
#them to the empty numpy array called faces
print("Found {0} faces!".format(len(faces)))
# nothing is displayed beyond this print statement
for (x, y, w, h) in faces:
dlib_rect = dlib.rectangle(int(x), int(y), int(x + w), int(y + h))
landmarks = np.matrix([[p.x, p.y]
for p in predictor(IMAGES, dlib_rect).parts()])
landmarks_display = landmarks[MOUTH_OUTLINE_POINTS + MOUTH_INNER_POINTS]
highX = 0
lowX = 1000
highY = 0
lowY = 1000
for idx, point in enumerate(landmarks_display):
pos = (point[0, 0], point[0, 1])
cv2.circle(image, pos, 2, color=(0, 0, 255), thickness=-1)
if (pos[0] > highX):
highX = pos[0]
if (pos[0] < lowX):
lowX = pos[0]
if (pos[1] > highY):
highY = pos[1]
if (pos[1] < lowY):
lowY = pos[1]
print (lowX, lowY, highX, highY)
CONSTANT_FACTOR = 0.325
delta_x = highX-lowX
delta_y = highY - lowY
low_x_adj = lowX - int(delta_x * CONSTANT_FACTOR)
high_x_adj = highX + int(delta_x * CONSTANT_FACTOR)
low_y_adj = lowY - int(delta_y * 0.2)
high_y_adj = highY + int(delta_y * CONSTANT_FACTOR)
crop_img = image[low_y_adj:high_y_adj,low_x_adj:high_x_adj]
cv2.imwrite("Cropped_Mouth.jpg", crop_img)
cv2.imshow("Cropped_Mouth.jpg", crop_img)
cv2.waitKey(0)
Now, I've checked the paths and they are correct. I don't get any syntax errors, runtime errors, nothing. The script runs, but no output is produced other than the following print statement: print("Found {0} faces!".format(len(faces))).
I assume it runs what comes after it, but there is no output on the screen and nothing is saved in my home folder (which is were the output pictures of cropped mouths are normally stored). The original script which was meant to work with one image only works perfectly, but this one doesn't seem to do the trick.
Any ideas and suggestions would be highly appreciated. Thank you.
P.S if the problem is with the code after the line that gets printed, I still didn't start working on that part for this script because I believe it is the code above the print statement that is faulty in some way
Why not use dlib face detector for detecting faces?. Below is the code to detect faces using dlib face detector and save mouth from faces with a .jpg extension. I just modified the dlib face landmarks.py given in the python examples folder of dlib.
import sys
import os
import dlib
import glob
import cv2
predictor_path = "shape_predictor_68_face_landmarks.dat"
faces_folder_path = "path/to/faces/folder"
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor(predictor_path)
win = dlib.image_window()
i = 0
for f in glob.glob(os.path.join(faces_folder_path, "*.tiff")):
print("Processing file: {}".format(f))
img = cv2.imread(f)
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# to clear the previous overlay. Useful when multiple faces in the same photo
win.clear_overlay()
# to show the image
win.set_image(img)
# Ask the detector to find the bounding boxes of each face. The 1 in the
# second argument indicates that we should upsample the image 1 time. This
# will make everything bigger and allow us to detect more faces.
dets = detector(img, 1)
print("Number of faces detected: {}".format(len(dets)))
for k, d in enumerate(dets):
print("Detection {}: Left: {} Top: {} Right: {} Bottom: {}".format(
k, d.left(), d.top(), d.right(), d.bottom()))
# Get the landmarks/parts for the face in box d.
shape = predictor(img, d)
i += 1
# The next lines of code just get the coordinates for the mouth
# and crop the mouth from the image.This part can probably be optimised
# by taking only the outer most points.
xmouthpoints = [shape.part(x).x for x in range(48,67)]
ymouthpoints = [shape.part(x).y for x in range(48,67)]
maxx = max(xmouthpoints)
minx = min(xmouthpoints)
maxy = max(ymouthpoints)
miny = min(ymouthpoints)
# to show the mouth properly pad both sides
pad = 10
# basename gets the name of the file with it's extension
# splitext splits the extension and the filename
# This does not consider the condition when there are multiple faces in each image.
# if there are then it just overwrites each image and show only the last image.
filename = os.path.splitext(os.path.basename(f))[0]
crop_image = img[miny-pad:maxy+pad,minx-pad:maxx+pad]
cv2.imshow('mouth',crop_image)
# The mouth images are saved in the format 'mouth1.jpg, mouth2.jpg,..
# Change the folder if you want to. They are stored in the current directory
cv2.imwrite(filename+'.jpg',crop_image)
cv2.waitKey(0)
cv2.destroyAllWindows()
win.add_overlay(shape)
win.add_overlay(dets)