How to crop face detected via Mediapipe in Python - python

i have a problem with mediapipe coordinations. What i want to do is crop the box of the detected face.
https://google.github.io/mediapipe/solutions/face_detection.html
EXAMPLE OF PROCEDURE
And i use this code below:
mp_face_detection = mp.solutions.face_detection
# Setup the face detection function.
face_detection = mp_face_detection.FaceDetection(model_selection=0, min_detection_confidence=0.5)
# Initialize the mediapipe drawing class.
mp_drawing = mp.solutions.drawing_utils
# Read an image from the specified path.
sample_img = cv2.imread('12345.jpg')
# Specify a size of the figure.
plt.figure(figsize = [10, 10])
# Display the sample image, also convert BGR to RGB for display.
plt.title("Sample Image");plt.axis('off');plt.imshow(sample_img[:,:,::-1]);plt.show()
face_detection_results = face_detection.process(sample_img[:,:,::-1])
# Check if the face(s) in the image are found.
if face_detection_results.detections:
# Iterate over the found faces.
for face_no, face in enumerate(face_detection_results.detections):
# Display the face number upon which we are iterating upon.
print(f'FACE NUMBER: {face_no+1}')
print('---------------------------------')
# Display the face confidence.
print(f'FACE CONFIDENCE: {round(face.score[0], 2)}')
# Get the face bounding box and face key points coordinates.
face_data = face.location_data
# Display the face bounding box coordinates.
print(f'\nFACE BOUNDING BOX:\n{face_data.relative_bounding_box}')
# Iterate two times as we only want to display first two key points of each detected face.
for i in range(2):
# Display the found normalized key points.
print(f'{mp_face_detection.FaceKeyPoint(i).name}:')
print(f'{face_data.relative_keypoints[mp_face_detection.FaceKeyPoint(i).value]}')
So the results are in this form:
FACE NUMBER: 1
FACE CONFIDENCE: 0.89
FACE BOUNDING BOX:
xmin: 0.2784463167190552
ymin: 0.3503175973892212
width: 0.1538110375404358
height: 0.23071599006652832
RIGHT_EYE:
x: 0.3447018265724182
y: 0.4222590923309326
LEFT_EYE:
x: 0.39114508032798767
y: 0.3888365626335144
And i want to CROP the image in the coordinations of the BOX.
Like
face = Image.fromarray(image).crop(face_rect)
or any other crop procedure.
My problem is that i can't get the coords of the detected item from mediapipe.
Any ideas?

Got the solution guys
import dlib
from PIL import Image
from skimage import io
h, w, c = sample_img.shape
print('width: ', w)
print('height: ', h)
xleft = data.xmin*w
xleft = int(xleft)
xtop = data.ymin*h
xtop = int(xtop)
xright = data.width*w + xleft
xright = int(xright)
xbottom = data.height*h + xtop
xbottom = int(xbottom)
detected_faces = [(xleft, xtop, xright, xbottom)]
for n, face_rect in enumerate(detected_faces):
face = Image.fromarray(image_c).crop(face_rect)
face_np = np.asarray(face)
plt.imshow(face_np)

Assume, the objective is to crop a single detected face by mediapipe . Note the [0] to indicate that we are only interested in single face
results = mp_face.process(image_input)
detection=results.detections[0]
By default mediapipe returns detection data in normalize form and we have to convert to original size by multiplying x values by width and y values by height of input image.
We can employed the _normalized_to_pixel_coordinates available with the mediapipe
relative_bounding_box = location.relative_bounding_box
rect_start_point = _normalized_to_pixel_coordinates(
relative_bounding_box.xmin, relative_bounding_box.ymin, image_cols,
image_rows)
rect_end_point = _normalized_to_pixel_coordinates(
relative_bounding_box.xmin + relative_bounding_box.width,
relative_bounding_box.ymin + relative_bounding_box.height, image_cols,
image_rows)
This essentially produce
xleft,ytop=rect_start_point
xright,ybot=rect_end_point
In other word, ytop. ybot, xleft. xright represent face_top, face_bottom, face_left, and face_right, respectively.
Since the image is simply a 3D np array, we can crop it as below
crop_img = image_input[ytop: ybot, xleft: xright]
The complete code is as below
import cv2
import mediapipe as mp
from mediapipe.python.solutions.drawing_utils import _normalized_to_pixel_coordinates
# load face detection model
mp_face = mp.solutions.face_detection.FaceDetection(
model_selection=1, # model selection
min_detection_confidence=0.5 # confidence threshold
)
dframe= cv2.imread('xx.png',0)
image_rows, image_cols, _ = dframe.shape
image_input = cv2.cvtColor(dframe, cv2.COLOR_BGR2RGB)
results = mp_face.process(image_input)
detection=results.detections[0]
location = detection.location_data
relative_bounding_box = location.relative_bounding_box
rect_start_point = _normalized_to_pixel_coordinates(
relative_bounding_box.xmin, relative_bounding_box.ymin, image_cols,
image_rows)
rect_end_point = _normalized_to_pixel_coordinates(
relative_bounding_box.xmin + relative_bounding_box.width,
relative_bounding_box.ymin + relative_bounding_box.height, image_cols,
image_rows)
## Lets draw a bounding box
color = (255, 0, 0)
thickness = 2
cv2.rectangle(image_input, rect_start_point, rect_end_point, color, thickness)
xleft,ytop=rect_start_point
xright,ybot=rect_end_point
crop_img = image_input[ytop: ybot, xleft: xright]
cv2.imwrite('crop_image0.jpg', crop_img)

Related

Resizing an object in an image

What I want to do is to increase the size of these boxes inplace, like if the box has a size of 100x200 i want to be 120x240 ie a 20 percent increase in size.
Resizing an croping the image will not work for all images as I am using it as a mask for another image(s) and if the position is changed the next step will not work.
I have been searching for a way to do it but was unable to find it.
I am using python 3.9.4
You can use connected components function from OpenCV to detect the white boxes. Now once you the center, height, width of all the boxes you can simply increase the size of them and replace them on a black image.
Official documentation of the function : Structural Analysis and Shape Descriptors
import cv2
image = cv2.imread(args["image"])
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255,
cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]
output = cv2.connectedComponentsWithStats(thresh, args["connectivity"],cv2.CV_32S)
(numLabels, labels, stats, centroids) = output
The above code loaded the image and finds out all the components in the image
now you can iterate over them to find the required component.
# loop over the number of unique connected component labels
for i in range(0, numLabels):
# if this is the first component then we examine the
# *background* (typically we would just ignore this
# component in our loop)
if i == 0:
text = "examining component {}/{} (background)".format(
i + 1, numLabels)
# otherwise, we are examining an actual connected component
else:
text = "examining component {}/{}".format( i + 1, numLabels)
# print a status message update for the current connected
# component
print("[INFO] {}".format(text))
# extract the connected component statistics and centroid for
# the current label
x = stats[i, cv2.CC_STAT_LEFT]
y = stats[i, cv2.CC_STAT_TOP]
w = stats[i, cv2.CC_STAT_WIDTH]
h = stats[i, cv2.CC_STAT_HEIGHT]
area = stats[i, cv2.CC_STAT_AREA]
(cX, cY) = centroids[i]

Python: How to get Face Mesh landmarks coordinates in MediaPipe?

I'm trying to get a list with landmark coordinates with MediaPipe's Face Mesh. For example: Landmark[6]: (0.36116672, 0.93204623, 0.0019629495)
I cant find the way to do that and would appreciate the help.
Mediapipe has more complex interface than most of the models you see publicly.
But what you're looking for is easily achievable anyway.
import cv2
import mediapipe as mp
mp_drawing = mp.solutions.drawing_utils
mp_face_mesh = mp.solutions.face_mesh
file_list = ['test.png']
# For static images:
drawing_spec = mp_drawing.DrawingSpec(thickness=1, circle_radius=1)
with mp_face_mesh.FaceMesh(
static_image_mode=True,
min_detection_confidence=0.5) as face_mesh:
for idx, file in enumerate(file_list):
image = cv2.imread(file)
# Convert the BGR image to RGB before processing.
results = face_mesh.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
# Print and draw face mesh landmarks on the image.
if not results.multi_face_landmarks:
continue
annotated_image = image.copy()
for face_landmarks in results.multi_face_landmarks:
print('face_landmarks:', face_landmarks)
mp_drawing.draw_landmarks(
image=annotated_image,
landmark_list=face_landmarks,
connections=mp_face_mesh.FACE_CONNECTIONS,
landmark_drawing_spec=drawing_spec,
connection_drawing_spec=drawing_spec)
In this example, which is taken from here, you can see that they're iterating through results.multi_face_landmarks:
for face_landmarks in results.multi_face_landmarks:
Each iterable here consists of information about each face detected in the image, and length of results.multi_face_landmarks is number of faces detected in the image.
When you print attributes of let's say - first face, you'll see 'landmark' as a last attribute.
dir(results.multi_face_landmarks[0])
>> ..., 'landmark']
We need landmark attribute to acquire pixel coordinates after one step further.
Length of landmark attribute is 468, which basically is number of predicted [x,y,z] keypoints after regression.
If we take first keypoint:
results.multi_face_landmarks[0].landmark[0]
it will give us normalized [x,y,z] values:
x: 0.25341567397117615
y: 0.71121746301651
z: -0.03244325891137123
Finally, x, y and z here are attributes of each keypoint. We can check that by calling dir() on keypoint.
Now you can easily reach normalized pixel coordinates:
results.multi_face_landmarks[0].landmark[0].x -> X coordinate
results.multi_face_landmarks[0].landmark[0].y -> Y coordinate
results.multi_face_landmarks[0].landmark[0].z -> Z coordinate
For denormalization of pixel coordinates, we should multiply x coordinate by width and y coordinate by height.
Sample code:
for face in results.multi_face_landmarks:
for landmark in face.landmark:
x = landmark.x
y = landmark.y
shape = image.shape
relative_x = int(x * shape[1])
relative_y = int(y * shape[0])
cv2.circle(image, (relative_x, relative_y), radius=1, color=(225, 0, 100), thickness=1)
cv2_imshow(image)
Which would give us:
Click to see result image
Here is a full explanation -
Face Mesh MediaPipe
import cv2
import mediapipe as mp
mp_drawing = mp.solutions.drawing_utils
mp_face_mesh = mp.solutions.face_mesh
# For static images:
file_list = ['test.png']
drawing_spec = mp_drawing.DrawingSpec(thickness=1, circle_radius=1)
with mp_face_mesh.FaceMesh(
static_image_mode=True,
max_num_faces=1,
min_detection_confidence=0.5) as face_mesh:
for idx, file in enumerate(file_list):
image = cv2.imread(file)
# Convert the BGR image to RGB before processing.
results = face_mesh.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
# Print and draw face mesh landmarks on the image.
if not results.multi_face_landmarks:
continue
annotated_image = image.copy()
for face_landmarks in results.multi_face_landmarks:
print('face_landmarks:', face_landmarks)
Lets work with this particular image
Once load the image, we first instantiate the mediapipe solutions
face_mesh = mp.solutions.face_mesh.FaceMesh(static_image_mode=True, max_num_faces=2,
min_detection_confidence=0.5)
and detect all faces via process as below
results = face_mesh.process(cv2.cvtColor(image_input , cv2.COLOR_BGR2RGB))
To access all the landmark, for this particular face, we can iterate throu the landmark via
ls_single_face=results.multi_face_landmarks[0].landmark
for idx in ls_single_face:
print(idx.x,idx.y,idx.z)
Which will output the x, y, and z coordinate
0.6062703132629395 0.34374159574508667 -0.02611529268324375
0.6024502515792847 0.3223230540752411 -0.05503281578421593
0.6047719717025757 0.32883960008621216 -0.029224306344985962
0.5947933793067932 0.29429933428764343 -0.04156317934393883
0.6020699143409729 0.31391528248786926 -0.058685336261987686
0.6023058295249939 0.3025013208389282 -0.054952703416347504
The full code is as below
import cv2
import mediapipe as mp
dframe = cv2.imread("detect_face/person.png")
image_input = cv2.cvtColor(dframe, cv2.COLOR_BGR2RGB)
face_mesh = mp.solutions.face_mesh.FaceMesh(static_image_mode=True, max_num_faces=2,
min_detection_confidence=0.5)
image_rows, image_cols, _ = dframe.shape
results = face_mesh.process(cv2.cvtColor(image_input , cv2.COLOR_BGR2RGB))
ls_single_face=results.multi_face_landmarks[0].landmark
for idx in ls_single_face:
print(idx.x,idx.y,idx.z)
Using similar strategy, we can plot a marker for a the given face landmark by iterating each of the coordinate.
from mediapipe.python.solutions.drawing_utils import _normalized_to_pixel_coordinates
ls_single_face=results.multi_face_landmarks[0].landmark
for idx in ls_single_face:
cord = _normalized_to_pixel_coordinates(idx.x,idx.y,image_cols,image_rows)
cv2.putText(image_input, '.', cord,cv2.FONT_HERSHEY_SIMPLEX, 0.3, (0, 0, 255), 2)
Which will output
The original image was retrieved from this link.
Mediapipe also have the built in approach to detect key face region as discussed here
Mediapipe's landmarks value is normalized by the width and height of the image. After, getting the landmark value simply multiple the x of the landmark with the width of your image and y of the landmark with the height of your image.
You may check this link for a complete tutorial on mediapipe. It's under craft but is going to be completed very soon.
To print the coordinates of the landmarks you have to check if they
exist and after that you can access x, y and z coordinates.The code for landmark 0 is:
#in the cycle of capture
if results.multi_face_landmarks:
coord= results.multi_face_landmarks.landmark[0]
print(''.join(['(',str(coord.x),',',str(coord.y),',',str(coord.z) ,')']))

Opencv, how to overcrop an image?

I have a set of arbitrary images. Half the images are pictures, half are masks defining ROIS.
In the current version of my program I use the ROI to crop the image (i.e I extract the rectangle in the image matching the bounding box of the ROI mask). The problem is, the ROI mask isn't perfect and it's better to over predict than under predict in my case.
So I want to copy more than the ROI rectangle, but if I do this, I may be trying to crop out of the image.
i.e:
x, y, w, h = cv2.boundingRect(mask_contour)
img = img[int(y-h*0.05):int(y + h * 1.05), int(x-w*0.05):int(x + w * 1.05)]
can fail because it tries to access out of bounds pixels. I could just clamp the values, but I wanted to know if there is a better approach
You can add a boarder using OpenCV
import cv2 as cv
import random
src = cv.imread('/home/stephen/lenna.png')
borderType = cv.BORDER_REPLICATE
boarderSize = .5
top = int(boarderSize * src.shape[0]) # shape[0] = rows
bottom = top
left = int(boarderSize * src.shape[1]) # shape[1] = cols
right = left
value = [random.randint(0,255), random.randint(0,255), random.randint(0,255)]
dst = cv.copyMakeBorder(src, top, bottom, left, right, borderType, None, value)
cv.imshow('img', dst)
c = cv.waitKey(0)
Maybe you could try to limit the coordinates beforehand. Please see the code below:
[ymin, ymax] = [max(0,int(y-h*0.05)), min(h, int(y+h*1.05))]
[xmin, xmax] = [max(0,int(x-w*1.05)), min(w, int(x+w*1.05))]
img = img[ymin:ymax, xmin:xmax]

Script for identifying landmarks and cropping mouth from images using OpenCV doesn't see faces

So what I'm trying to do using OpenCV, dlib, and Python is to basically identify facial landmarks on a set of images using dlib and then crop the the mouths from those very same images and save them as separate images with ".jpg" extensions.
This here is the code:
import numpy as np
import cv2
import dlib
import sys
import skimage
from PIL import Image
import os
import glob
#Everything is imported here
folderpath = sys.argv[1]
cascPath = sys.argv[2]
PREDICTOR_PATH = "/home/victor/facial-landmarks/shape_predictor_68_face_landmarks.dat"
#user supplies the folderpath and cascpath in a terminal/command prompt
#predictor_path is already set
imageformat = ".tif"
path = folderpath
imfilelist = [os.path.join(path,f) for f in os.listdir(path) if f.endswith(imageformat)]
#only images with ".tif" extensions in the folder interest us, we create a
#list with paths to those images
data = np.array([])
for IMG in imfilelist:
image = cv2.imread(IMG) #this for-loop iterates through images we need
np.append(data, image) # reads them, and appends them to the data
# numpy array
gray = np.array([])
for j in range(0, len(data)):
cvtimg = cv2.cvtColor(np.array(data[j]), cv2.COLOR_BGR2GRAY)
np.append(gray, cvtimg) #empty numpy array called gray is declared
# for-loop goes through all RGB pictures
# stored in data, converts them to grayscale
# and stores them in gray
MOUTH_OUTLINE_POINTS = list(range(48, 61))
MOUTH_INNER_POINTS = list(range(61, 68))
#defines the landmarks for the Mouth Outline and the inner mouth points
faceCascade = cv2.CascadeClassifier(cascPath)
#faceCascade is defined here, cascPath which is user supplied is the param
predictor = dlib.shape_predictor(PREDICTOR_PATH)
faces = np.array([])
for i in gray:
face = faceCascade.detectMultiScale(gray[i], scaleFactor=1.05, minNeighbors=5, minSize=(100,100))
np.append(faces, face) #this for-loop tries to detect faces and append
#them to the empty numpy array called faces
print("Found {0} faces!".format(len(faces)))
# nothing is displayed beyond this print statement
for (x, y, w, h) in faces:
dlib_rect = dlib.rectangle(int(x), int(y), int(x + w), int(y + h))
landmarks = np.matrix([[p.x, p.y]
for p in predictor(IMAGES, dlib_rect).parts()])
landmarks_display = landmarks[MOUTH_OUTLINE_POINTS + MOUTH_INNER_POINTS]
highX = 0
lowX = 1000
highY = 0
lowY = 1000
for idx, point in enumerate(landmarks_display):
pos = (point[0, 0], point[0, 1])
cv2.circle(image, pos, 2, color=(0, 0, 255), thickness=-1)
if (pos[0] > highX):
highX = pos[0]
if (pos[0] < lowX):
lowX = pos[0]
if (pos[1] > highY):
highY = pos[1]
if (pos[1] < lowY):
lowY = pos[1]
print (lowX, lowY, highX, highY)
CONSTANT_FACTOR = 0.325
delta_x = highX-lowX
delta_y = highY - lowY
low_x_adj = lowX - int(delta_x * CONSTANT_FACTOR)
high_x_adj = highX + int(delta_x * CONSTANT_FACTOR)
low_y_adj = lowY - int(delta_y * 0.2)
high_y_adj = highY + int(delta_y * CONSTANT_FACTOR)
crop_img = image[low_y_adj:high_y_adj,low_x_adj:high_x_adj]
cv2.imwrite("Cropped_Mouth.jpg", crop_img)
cv2.imshow("Cropped_Mouth.jpg", crop_img)
cv2.waitKey(0)
Now, I've checked the paths and they are correct. I don't get any syntax errors, runtime errors, nothing. The script runs, but no output is produced other than the following print statement: print("Found {0} faces!".format(len(faces))).
I assume it runs what comes after it, but there is no output on the screen and nothing is saved in my home folder (which is were the output pictures of cropped mouths are normally stored). The original script which was meant to work with one image only works perfectly, but this one doesn't seem to do the trick.
Any ideas and suggestions would be highly appreciated. Thank you.
P.S if the problem is with the code after the line that gets printed, I still didn't start working on that part for this script because I believe it is the code above the print statement that is faulty in some way
Why not use dlib face detector for detecting faces?. Below is the code to detect faces using dlib face detector and save mouth from faces with a .jpg extension. I just modified the dlib face landmarks.py given in the python examples folder of dlib.
import sys
import os
import dlib
import glob
import cv2
predictor_path = "shape_predictor_68_face_landmarks.dat"
faces_folder_path = "path/to/faces/folder"
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor(predictor_path)
win = dlib.image_window()
i = 0
for f in glob.glob(os.path.join(faces_folder_path, "*.tiff")):
print("Processing file: {}".format(f))
img = cv2.imread(f)
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# to clear the previous overlay. Useful when multiple faces in the same photo
win.clear_overlay()
# to show the image
win.set_image(img)
# Ask the detector to find the bounding boxes of each face. The 1 in the
# second argument indicates that we should upsample the image 1 time. This
# will make everything bigger and allow us to detect more faces.
dets = detector(img, 1)
print("Number of faces detected: {}".format(len(dets)))
for k, d in enumerate(dets):
print("Detection {}: Left: {} Top: {} Right: {} Bottom: {}".format(
k, d.left(), d.top(), d.right(), d.bottom()))
# Get the landmarks/parts for the face in box d.
shape = predictor(img, d)
i += 1
# The next lines of code just get the coordinates for the mouth
# and crop the mouth from the image.This part can probably be optimised
# by taking only the outer most points.
xmouthpoints = [shape.part(x).x for x in range(48,67)]
ymouthpoints = [shape.part(x).y for x in range(48,67)]
maxx = max(xmouthpoints)
minx = min(xmouthpoints)
maxy = max(ymouthpoints)
miny = min(ymouthpoints)
# to show the mouth properly pad both sides
pad = 10
# basename gets the name of the file with it's extension
# splitext splits the extension and the filename
# This does not consider the condition when there are multiple faces in each image.
# if there are then it just overwrites each image and show only the last image.
filename = os.path.splitext(os.path.basename(f))[0]
crop_image = img[miny-pad:maxy+pad,minx-pad:maxx+pad]
cv2.imshow('mouth',crop_image)
# The mouth images are saved in the format 'mouth1.jpg, mouth2.jpg,..
# Change the folder if you want to. They are stored in the current directory
cv2.imwrite(filename+'.jpg',crop_image)
cv2.waitKey(0)
cv2.destroyAllWindows()
win.add_overlay(shape)
win.add_overlay(dets)

Python PIL Outline image increase thicknesses

I know how to find the edges from a picture. But I would like to have the outline edges be thicker for example width 9.
from PIL import Image, ImageFilter
image = Image.open('your_image.png')
image = image.filter(ImageFilter.FIND_EDGES, width=9)
image.save('new_name.png')
is that possible?
You can find edges and fatten them like this:
#!/usr/bin/env python3
from PIL import Image, ImageMorph, ImageFilter
# Open star image and ensure greyscale
im = Image.open('star.png').convert('L')
# Detect edges and save
edges = im.filter(ImageFilter.FIND_EDGES)
edges.save('DEBUG-edges.png')
# Make fatter edges and save
fatEdges = edges.filter(ImageFilter.MaxFilter)
fatEdges.save('DEBUG-fatEdges.png')
# Make very fat edges and save
veryFatEdges = edges.filter(ImageFilter.MaxFilter(7))
veryFatEdges.save('DEBUG-veryFatEdges.png')
Top-left=original, top-right=edges, bottom-left=fat edges, bottom-right=very fat edges.
You could use ImageMorph module for more controlled morphology, but the maximum filter is very effective as it is.
It's never too late to share a solution. Try this ...
def change_img_edge(image, thickness, edge_color = (0,255,0, 200)):
# Iterate thickness-times.
# When image is filtered in next cycle, the detected edge moves outwards
for t in range(thickness):
msk = image.filter(ImageFilter.FIND_EDGES)
msk_data, img_data = msk.getdata(), image.getdata()
# Get image size
w, h = img_data.size
output = []
for y in range(0, h):
for x in range(0, w):
idx = x + w*y
curr_pxl = (0,0,0,0)
if msk_data[index][3]>0:
curr_pxl = edge_color
else:
curr_pxl = img_data[idx]
output.append(curr_pxl)
img.putdata(output)
return image
# Example usage
image = Image.open('your_image.png')
image = image.convert("RGBA")
image = change_img_edge(image, 5, (0,255,255, 200))
image.save('new_name.png')

Categories