OpenCV and Tesseract on door label detection - python

I'm fairly new on OpenCv and tesseract. I'm recently building a project on using computer vision to detect door labels. Hopefully it would be beneficial for visually impaired group.
The idea of the program is to preprocess the input image by converting it into binary color, then use canny edge to detect the outlines of door label, then dilate the canny edge result. After these, feed image to tesseract while trying to show the text detected with boxes.
Expected results are green rectangles on text. While printing out the text itself.
The issue is the missing rectangles and failure in text detection.
I have tried going through these:
Recognize Text in images using Canny Edge detection in Opencv
OpenCv pytesseract for OCR
Image preprocessing with OpenCV before doing character recognition (tesseract)
The questions and solutions are either too simple or not as relevant. Some are not in python as well.
Attached below is my attempt on the code:
import pytesseract as pytess
import cv2 as cv
import numpy as np
from PIL import Image
from pytesseract import Output
img = cv.imread(r"C:\Users\User\Desktop\dataset\p\Image_31.jpg", 0)
# edges store the canny version of img
edges = cv.Canny(img, 100, 200)
# ker as in kernel
# (5, 5) is the matrix while uint8 is datatype
ker = np.ones((3, 3), np.uint8)
# dil as in dilation
# edges as the src, ker is the kernel we set above, number of dilation
dil = cv.dilate(edges, ker, iterations=1)
# setup pytesseract parameters
configs = r'--oem 3 --psm 6'
# feed image to tesseract
result = pytess.image_to_data(dil, output_type=Output.DICT, config=configs, lang='eng')
print(result.keys())
boxes = len(result['text'])
# make a new copy of edges
new_item = dil.copy()
for sequence_number in range(boxes):
if int(result['conf'][sequence_number]) > 30: # removed constraints
(x, y, w, h) = (result['left'][sequence_number], result['top'][sequence_number],
result['width'][sequence_number], result['height'][sequence_number])
new_item = cv.rectangle(new_item, (x, y), (x + w, y + h), (0, 255, 0), 2)
# detect sentence with tesseract
# pending as rectangle not achieved
cv.imshow("original", img)
cv.imshow("canny", edges)
cv.imshow("dilation", dil)
cv.imshow("capturedText", new_item)
#ignore below this line, it is only for testing
#testobj = Image.fromarray(dil)
#testtext = pytess.image_to_string(testobj, lang='eng')
#print(testtext)
cv.waitKey(0)
cv.destroyAllWindows()
Resultant image:
The testing part of the code return results as shown below:
a)
Meets
Which, obviously does not satisfy the objective.
EDIT
After posting the question, I realized I may have done it wrong in the beginning. I should attempt to use OpencV to detect the contour of the door label and isolate the part containing text before sending whatever is in the rectangle for OCR recognition.
EDIT2
Now that I identify the issue thanks to our stackoverflow members, now I'm attempting to add on image rectification/image wrapping technique to retrieve a straight front view to get a better accuracy for the system. Update soon.
EDIT3
After certain bug fixing, reducing the constraint while allowing the function to draw on the original image, I have achieved the results below. Attached the updated code as well.
import cv2 as cv
import numpy as np
import pytesseract as pytess
from pytesseract import Output
# input of img source
img = cv.imread(r"C:\Users\User\Desktop\dataset\p\Image_31.jpg")
# necessary image color conversion
img2 = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
# edges store the canny version of img
edges = cv.Canny(img2, 100, 200)
# ker as in kernel
# (5, 5) is the matrix while uint8 is datatype
ker = np.ones((3, 3), np.uint8)
# dil as in dilation
# edges as the src, ker is the kernel we set above, number of dilation
dil = cv.dilate(edges, ker, iterations=1)
# setup pytesseract parameters
configs = r'--oem 3 --psm 6'
# feed image to tesseract
result = pytess.image_to_data(dil, output_type=Output.DICT, config=configs, lang='eng')
# number of boxes that encapsulate the boxes
boxes = len(result['text'])
# make a new copy of edges
new_item = dil.copy()
for sequence_number in range(boxes):
if int(result['conf'][sequence_number]) > 0: #removed constraints
(x, y, w, h) = (result['left'][sequence_number], result['top'][sequence_number],
result['width'][sequence_number], result['height'][sequence_number])
# draw rectangle boxes on the original img
cv.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 3)
# Crop the image
crp = new_item[y:y + h, x:x + w]
# OCR
txt = pytess.image_to_string(crp, config=configs)
# returns recognised text
print(txt)
cv.imshow("capturedText", crp)
cv.waitKey(0)
# cv.imshow("original", img)
# cv.imshow("canny", edges)
# cv.imshow("dilation", dil)
cv.imshow("results", img)
cv.waitKey(0)
cv.destroyAllWindows()

You have found all the detected text in the image:
for sequence_number in range(boxes):
if int(result['conf'][sequence_number]) > 30:
(x, y, w, h) = (result['left'][sequence_number], result['top'][sequence_number],
result['width'][sequence_number], result['height'][sequence_number])
new_item = cv.rectangle(new_item, (x, y), (x + w, y + h), (0, 255, 0), 2)
But you also say the current confidence should be more than 70%.
If we remove the constraint
If we OCR each new item
Result will be:
Now if you read:
txt = pytesseract.image_to_string(new_item, config="--psm 6")
print(txt)
OCR will be:
Meeting Room §
The output of the current pytesseract version 0.3.7
Code:
# Load the libraries
import cv2
import pytesseract
# Load the image
img = cv2.imread("fsUSw.png")
# Convert it to the gray-scale
gry = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# OCR detection
d = pytesseract.image_to_data(gry, config="--psm 6", output_type=pytesseract.Output.DICT)
# Get ROI part from the detection
n_boxes = len(d['level'])
# For each detected part
for i in range(1, 2):
# Get the localized region
(x, y, w, h) = (d['left'][i], d['top'][i], d['width'][i], d['height'][i])
# Draw rectangle to the detected region
cv2.rectangle(img, (x, y), (x + w, y + h), (0, 0, 255), 5)
# Crop the image
crp = gry[y:y + h, x:x + w]
# OCR
txt = pytesseract.image_to_string(crp, config="--psm 6")
print(txt)
# Display the cropped image
cv2.imshow("crp", crp)
cv2.waitKey(0)
# Display
cv2.imshow("img", img)
cv2.waitKey(0)

I think what you are looking for here is image rectificaiton (warping image to make it look like taken from another point of view) and there seem to be tools for this in python. However, the problem gets more complicated since in your case you need to detect how you want to rectify it. I am not sure how you should go about that.

Related

How to detect colored text on gradient background with pytesseract

I'm currently working on a small OCR bot. I got pretty much everything to work and am now trying to improve the OCR. Specifically, it has problems with two things: the orange/red-ish text on the same colored gradient and for some reason the first 1 of "1/1". Sadly I haven't found anything that worked in my case yet. I've made a small test image, which is consisting of multiple images, below:
Source Image
Results
Adaptive Threshold
As you can see the gradient results in a blob that is sometimes big enough to overlap with the first word (see "apprentice") resulting in garbage.
I've tried many variations and played around with thresholds, blurs, erode, dilation, box detection with the dilation method, etc. but nothing worked well. The only way I did get rid of the blob is using an adaptive Threshold. But sadly I wasn't able to get good results using the output image.
If anyone knows how to make the OCR more robust, increase accuracy and get rid of the blob I'd appreciate your help. Thanks.
The following code is my 'playground' to figure out a better way:
import cv2
import pytesseract
import numpy as np
pytesseract.pytesseract.tesseract_cmd = YOUR_PATH
def resize(img, scale_percent=300):
# use this instead?
# resize = image = imutils.resize(image, width=300)
# automatically resizes it about 300% by default
width = int(img.shape[1] * scale_percent / 100)
height = int(img.shape[0] * scale_percent / 100)
dim = (width, height)
resized = cv2.resize(img, dim, interpolation=cv2.INTER_AREA)
return resized
def preprocessImage(img, scale=300, threshhold=127):
""" input RGB colour space """
# makes results more accurate - inspired from https://stackoverflow.com/questions/58103337/how-to-ocr-image-with-tesseract
# another resource to improve accuracy - https://tesseract-ocr.github.io/tessdoc/ImproveQuality.html
# converts from rgb to grayscale then enlarges it
# applies gaussian blur
# convert to b&w
# invert black and white colours (white background, black text)
grayscale = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
cv2.imshow('grayscale', grayscale)
resized = resize(grayscale, scale)
cv2.imshow('resized', resized)
blurred = cv2.medianBlur(resized, 5)
#cv2.imshow('median', blurred)
blurred = cv2.GaussianBlur(resized, (5, 5), 5)
cv2.imshow('1', blurred)
cv2.waitKey()
blackAndWhite = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
cv2.imshow('blackAndWhite', blackAndWhite)
th3 = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2)
cv2.floodFill(th3, None, (0, 0), 255)
cv2.imshow('th3', th3)
#kernel = np.ones((3, 3), np.uint8)
#erode = cv2.erode(th3, kernel)
kernel = np.ones((5, 5), np.uint8)
#opening = cv2.morphologyEx(blackAndWhite, cv2.MORPH_OPEN, kernel)
invertedColours = cv2.bitwise_not(blackAndWhite)
return invertedColours
# excerpt from https://www.youtube.com/watch?v=6DjFscX4I_c
def imageToText(img):
# returns item name from image, preprocess if needed
boxes = pytesseract.image_to_data(img)
num = []
for count, box in enumerate(boxes.splitlines()):
if (count != 0):
box = box.split()
if (len(box) == 12):
text = box[11].strip('#®')
if (text != ''):
num.append(text)
text = ' '.join(num)
## Alternate method
# text = pytesseract.image_to_string(img)
# print("Name:", text)
return text
if __name__ == "__main__":
img = cv2.imread("test.png")
img = preprocessImage(img, scale=300)
print(imageToText(img))
##############################################
##### Detecting Words ######
##############################################
#[ 0 1 2 3 4 5 6 7 8 9 10 11 ]
#['level', 'page_num', 'block_num', 'par_num', 'line_num', 'word_num', 'left', 'top', 'width', 'height', 'conf', 'text']
boxes = pytesseract.image_to_data(img)
# convert back to colored image
img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
# draw boxes and text
for a,b in enumerate(boxes.splitlines()):
print(b)
if a!=0:
b = b.split()
if len(b)==12:
x,y,w,h = int(b[6]),int(b[7]),int(b[8]),int(b[9])
cv2.putText(img,b[11],(x,y-5),cv2.FONT_HERSHEY_SIMPLEX,1,(50,50,255),2)
cv2.rectangle(img, (x,y), (x+w, y+h), (0, 0, 255), 2)
cv2.imshow('img', img)
cv2.waitKey(0)
I couldn't get it perfect but almost...
I got a lot of benefit from CLAHE equalization. See tutorial here. But that wasn't enough. Still needed thresholding. Adaptive techniques didn't work well, but cv2.THRESH_TOZERO gives OK results. See thresholding tutorial here
import cv2
from pytesseract import image_to_string, image_to_data
img = cv2.imread('gradient.png', cv2.IMREAD_GRAYSCALE)
img = cv2.resize(img, (0,0), fx=2.0, fy=2.0)
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
img = clahe.apply(img)
img = 255-img # invert image. tesseract prefers black text on white background
ret, img = cv2.threshold(img, 127, 255, cv2.THRESH_TOZERO)
cv2.imwrite('output.png', img)
ocr = image_to_string(img, config='--psm 6')
print(ocr)
which gives ocr output
Tool Crafting Part
Apprentice Craft Kit
Adept Craft Kit
Expert Craft Kit
=
Master Craft Kit
1/1

How to find table like structure in image by Open CV

I have not bordered table like on picture .
I tried to use example from this post
I have got this result
But I need something like this
How I could tune Open CV to get needed result?
You can easily achieved by using image_to_data method of pytesseract.
You also need to know:
Pre-processing methods
Page-segmentation-modes
Steps:
Load the image in BGR mode and convert it to the gray-scale
Get region-of-interest areas
From each area, get the coordinates and draw rectangle.
Result:
Code:
# Load the library
import cv2
import pytesseract
# Load the image
img = cv2.imread("1Tksb.jpg")
# Convert to gry-scale
gry = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# OCR detection
d = pytesseract.image_to_data(gry, config="--psm 6", output_type=pytesseract.Output.DICT)
# Get ROI part from the detection
n_boxes = len(d['level'])
# For each detected part
for i in range(n_boxes):
# Get the localized region
(x, y, w, h) = (d['left'][i], d['top'][i], d['width'][i], d['height'][i])
# Draw rectangle to the detected region
cv2.rectangle(img, (x, y), (x+w, y+h), (0, 0, 255), 1)
# Display
cv2.imshow("img", img)
cv2.waitKey(0)
If you want to read you can use image_to_string method.

get a lines above and below each line of text on a landscape image or boxes over text in an image without losing its resolution in python using OpenCV

Hi I have been trying to develop a tool to put my menu cards through OCR to digitize menu cards. Menu cards can be of various types where a portrait oriented page is divided into two or a landscape oriented page is divided into multiple columns of menu items. I have somehow managed to gather snippets from around here to process portrait oriented menu pages but when it comes to landscape orientation that code fails. if i remove the if condition for rotation of image then instead of giving me a result where the text in the menu card will be between two lines it just processes the image to remove noise thats all.Let me explain my problem with a few examples here. Please guide me through the process of processing menu's in the form of images to put them through OCR for digitization. I am using pytesseract for OCR and OpenCV for image processing.
this is what i am using to make underlines and overlines on text inside an image.
import cv2
import numpy as np
## (1) read
img = cv2.imread("out-1.jpg")
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
## (2) threshold
th, threshed = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY_INV|cv2.THRESH_OTSU)
## (3) minAreaRect on the nozeros
pts = cv2.findNonZero(threshed)
ret = cv2.minAreaRect(pts)
(cx,cy), (w,h), ang = ret
if w>h:
w,h = h,w
ang += 90
## (4) Find rotated matrix, do rotation
M = cv2.getRotationMatrix2D((cx,cy), ang, 1.0)
rotated = cv2.warpAffine(threshed, M, (img.shape[1], img.shape[0]))
## (5) find and draw the upper and lower boundary of each lines
hist = cv2.reduce(rotated,1, cv2.REDUCE_AVG).reshape(-1)
th = 2
H,W = img.shape[:2]
uppers = [y for y in range(H-1) if hist[y]<=th and hist[y+1]>th]
lowers = [y for y in range(H-1) if hist[y]>th and hist[y+1]<=th]
rotated = cv2.cvtColor(rotated, cv2.COLOR_GRAY2BGR)
for y in uppers:
cv2.line(rotated, (0,y), (W, y), (255,0,0), 1)
for y in lowers:
cv2.line(rotated, (0,y), (W, y), (0,255,0), 1)
cv2.imwrite("processed1.png", rotated)
this is what i am using to make boxes around text in an image(this code is running fine but needs improvement as it reduces the resolution of an image while making boxes over text and also the box outlines are really thick so the text read by OCR is sometimes misread)
import cv2
import numpy as np
large = cv2.imread('out-1.jpg')
rgb = cv2.pyrDown(large)
small = cv2.cvtColor(rgb, cv2.COLOR_BGR2GRAY)
# kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
kernel = np.ones((5, 5), np.uint8)
grad = cv2.morphologyEx(small, cv2.MORPH_GRADIENT, kernel)
_, bw = cv2.threshold(grad, 0.0, 255.0, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (9, 1))
connected = cv2.morphologyEx(bw, cv2.MORPH_CLOSE, kernel)
# using RETR_EXTERNAL instead of RETR_CCOMP
#contours, hierarchy = cv2.findContours(connected.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
#For opencv 3+ comment the previous line and uncomment the following line
_, contours, hierarchy = cv2.findContours((connected.copy()), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
mask = np.zeros(bw.shape, dtype=np.uint8)
for idx in range(len(contours)):
x, y, w, h = cv2.boundingRect(contours[idx])
mask[y:y+h, x:x+w] = 0
cv2.drawContours(mask, contours, idx, (255, 255, 255), -1)
r = float(cv2.countNonZero(mask[y:y+h, x:x+w])) / (w * h)
if r > 0.45 and w > 8 and h > 8:
cv2.rectangle(rgb, (x, y), (x+w-1, y+h-1), (0, 255, 0), 2)
cv2.imwrite('rec_output.jpg', rgb)
this is how the lines should be for landscape image as well but they do not work out
this is a landscape image example. here there should be lines as in the above image and clarity should not be compromised and the partitions inside the menu should be read correctly by OCR
when i add boxes over the text for better readability by OCR the resolution is compromised by the second code resulting in a rather poor readability and if i dont add boxes then the menu is read horizontally resulting in mix up of menu items and prices
cv2.pyrDown(large)
is reducing the resolution. Not sure why that is being used. Just removing that single line of code gave me the same output without compromising on the quality.

How to detect subscript numbers in an image using OCR?

I am using tesseract for OCR, via the pytesseract bindings. Unfortunately, I encounter difficulties when trying to extract text including subscript-style numbers - the subscript number is interpreted as a letter instead.
For example, in the basic image:
I want to extract the text as "CH3", i.e. I am not concerned about knowing that the number 3 was a subscript in the image.
My attempt at this using tesseract is:
import cv2
import pytesseract
img = cv2.imread('test.jpeg')
# Note that I have reduced the region of interest to the known
# text portion of the image
text = pytesseract.image_to_string(
img[200:300, 200:320], config='-l eng --oem 1 --psm 13'
)
print(text)
Unfortunately, this will incorrectly output
'CHs'
It's also possible to get 'CHa', depending on the psm parameter.
I suspect that this issue is related to the "baseline" of the text being inconsistent across the line, but I'm not certain.
How can I accurately extract the text from this type of image?
Update - 19th May 2020
After seeing Achintha Ihalage's answer, which doesn't provide any configuration options to tesseract, I explored the psm options.
Since the region of interest is known (in this case, I am using EAST detection to locate the bounding box of the text), the psm config option for tesseract, which in my original code treats the text as a single line, may not be necessary. Running image_to_string against the region of interest given by the bounding box above gives the output
CH
3
which can, of course, be easily processed to get CH3.
This is because the font of subscript is too small. You could resize the image using a python package such as cv2 or PIL and use the resized image for OCR as coded below.
import pytesseract
import cv2
img = cv2.imread('test.jpg')
img = cv2.resize(img, None, fx=2, fy=2) # scaling factor = 2
data = pytesseract.image_to_string(img)
print(data)
OUTPUT:
CH3
You want to do apply pre-processing to your image before feeding it into tesseract to increase the accuracy of the OCR. I use a combination of PIL and cv2 to do this here because cv2 has good filters for blur/noise removal (dilation, erosion, threshold) and PIL makes it easy to enhance the contrast (distinguish the text from the background) and I wanted to show how pre-processing could be done using either... (use of both together is not 100% necessary though, as shown below). You can write this more elegantly- it's just the general idea.
import cv2
import pytesseract
import numpy as np
from PIL import Image, ImageEnhance
img = cv2.imread('test.jpg')
def cv2_preprocess(image_path):
img = cv2.imread(image_path)
# convert to black and white if not already
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# remove noise
kernel = np.ones((1, 1), np.uint8)
img = cv2.dilate(img, kernel, iterations=1)
img = cv2.erode(img, kernel, iterations=1)
# apply a blur
# gaussian noise
img = cv2.threshold(cv2.GaussianBlur(img, (9, 9), 0), 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
# this can be used for salt and pepper noise (not necessary here)
#img = cv2.adaptiveThreshold(cv2.medianBlur(img, 7), 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 31, 2)
cv2.imwrite('new.jpg', img)
return 'new.jpg'
def pil_enhance(image_path):
image = Image.open(image_path)
contrast = ImageEnhance.Contrast(image)
contrast.enhance(2).save('new2.jpg')
return 'new2.jpg'
img = cv2.imread(pil_enhance(cv2_preprocess('test.jpg')))
text = pytesseract.image_to_string(img)
print(text)
Output:
CH3
The cv2 pre-process produces an image that looks like this:
The enhancement with PIL gives you:
In this specific example, you can actually stop after the cv2_preprocess step because that is clear enough for the reader:
img = cv2.imread(cv2_preprocess('test.jpg'))
text = pytesseract.image_to_string(img)
print(text)
output:
CH3
But if you are working with things that don't necessarily start with a white background (i.e. grey scaling converts to light grey instead of white)- I have found the PIL step really helps there.
Main point is the methods to increase accuracy of the tesseract typically are:
fix DPI (rescaling)
fix brightness/noise of image
fix tex size/lines
(skewing/warping text)
Doing one of these or all three of them will help... but the brightness/noise can be more generalizable than the other two (at least from my experience).
I think this way can be more suitable for the general situation.
import cv2
import pytesseract
from pathlib import Path
image = cv2.imread('test.jpg')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1] # (suitable for sharper black and white pictures
contours = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1] # is OpenCV2.4 or OpenCV3
result_list = []
for c in contours:
x, y, w, h = cv2.boundingRect(c)
area = cv2.contourArea(c)
if area > 200:
detect_area = image[y:y + h, x:x + w]
# detect_area = cv2.GaussianBlur(detect_area, (3, 3), 0)
predict_char = pytesseract.image_to_string(detect_area, lang='eng', config='--oem 0 --psm 10')
result_list.append((x, predict_char))
cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), thickness=2)
result = ''.join([char for _, char in sorted(result_list, key=lambda _x: _x[0])])
print(result) # CH3
output_dir = Path('./temp')
output_dir.mkdir(parents=True, exist_ok=True)
cv2.imwrite(f"{output_dir/Path('image.png')}", image)
cv2.imwrite(f"{output_dir/Path('clean.png')}", thresh)
MORE REFERENCE
I strongly suggest you refer to the following examples, which is a useful reference for OCR.
Get the location of all text present in image using opencv
Using YOLO or other image recognition techniques to identify all alphanumeric text present in images

How to use openCV and HAAR Cascades to blur faces?

I would like to know is there is a way to blur the faces that have been automatically identify by the haarcascade face classifier.
using the code below, I'm able to detect the faces, crop the image around this face or draw a rectangle on it.
image = cv2.imread(imagepath)
# Specify the trained cascade classifier
face_cascade_name = "./haarcascade_frontalface_alt.xml"
# Create a cascade classifier
face_cascade = cv2.CascadeClassifier()
# Load the specified classifier
face_cascade.load(face_cascade_name)
#Preprocess the image
grayimg = cv2.cvtColor(image, cv2.cv.CV_BGR2GRAY)
grayimg = cv2.equalizeHist(grayimg)
#Run the classifiers
faces = face_cascade.detectMultiScale(grayimg, 1.1, 2, 0|cv2.cv.CV_HAAR_SCALE_IMAGE, (30, 30))
print "Faces detected"
if len(faces) != 0: # If there are faces in the images
for f in faces: # For each face in the image
# Get the origin co-ordinates and the length and width till where the face extends
x, y, w, h = [ v for v in f ]
# Draw rectangles around all the faces
cv2.rectangle(image, (x,y), (x+w,y+h), (255,255,255))
sub_face = image[y:y+h, x:x+w]
for i in xrange(1,31,2):
cv2.blur(sub_face, (i,i))
face_file_name = "./face_" + str(y) + ".jpg"
cv2.imwrite(face_file_name, sub_face)
But I would like to blur the face of the people so they can't be recognized.
Do you have an idea on how to do that?
Thanks for your help
Arnaud
I finally succeeded to do what I want.
To do that apply a gaussianblur as Hammer has suggested.
The code is :
image = cv2.imread(imagepath)
result_image = image.copy()
# Specify the trained cascade classifier
face_cascade_name = "./haarcascade_frontalface_alt.xml"
# Create a cascade classifier
face_cascade = cv2.CascadeClassifier()
# Load the specified classifier
face_cascade.load(face_cascade_name)
#Preprocess the image
grayimg = cv2.cvtColor(image, cv2.cv.CV_BGR2GRAY)
grayimg = cv2.equalizeHist(grayimg)
#Run the classifiers
faces = face_cascade.detectMultiScale(grayimg, 1.1, 2, 0|cv2.cv.CV_HAAR_SCALE_IMAGE, (30, 30))
print "Faces detected"
if len(faces) != 0: # If there are faces in the images
for f in faces: # For each face in the image
# Get the origin co-ordinates and the length and width till where the face extends
x, y, w, h = [ v for v in f ]
# get the rectangle img around all the faces
cv2.rectangle(image, (x,y), (x+w,y+h), (255,255,0), 5)
sub_face = image[y:y+h, x:x+w]
# apply a gaussian blur on this new recangle image
sub_face = cv2.GaussianBlur(sub_face,(23, 23), 30)
# merge this blurry rectangle to our final image
result_image[y:y+sub_face.shape[0], x:x+sub_face.shape[1]] = sub_face
face_file_name = "./face_" + str(y) + ".jpg"
cv2.imwrite(face_file_name, sub_face)
# cv2.imshow("Detected face", result_image)
cv2.imwrite("./result.png", result_image)
Arnaud
The whole end of your code can be replaced by :
img[startX:endX, startY:endY] = cv2.blur(img[startX:endX, startY:endY], (23, 23))
instead of :
# Get the origin co-ordinates and the length and width till where the face extends
x, y, w, h = [ v for v in f ]
# get the rectangle img around all the faces
cv2.rectangle(image, (x,y), (x+w,y+h), (255,255,0), 5)
sub_face = image[y:y+h, x:x+w]
# apply a gaussian blur on this new recangle image
sub_face = cv2.GaussianBlur(sub_face,(23, 23), 30)
# merge this blurry rectangle to our final image
result_image[y:y+sub_face.shape[0], x:x+sub_face.shape[1]] = sub_face
Especially because you don't request to have a circular mask, it's (to me) much easier to read.
PS : Sorry for not commenting, not enough reputation to do it. Even if the post is 5 years old, I guess this may be worth it, as found it for this particular question ..
Note: Neural Networks (like Resnet) are now more accurate than HAAR Cascade to detect the faces, and they are also now integrated in OpenCV. It might be better than using the solutions mentionned in this question.
However, the code to blur / pixelate a face is still applicable.
You can also pixelate the region of the face by adding squares that contain the average of RGB values of the zones in the face.
A function performing this could be like that:
def pixelate_image(image: np.ndarray, nb_blocks=5, in_place=False) -> np.ndarray:
"""Return a pixelated version of a picture (need to be fed with a face to pixelate)"""
# To pixelate, we will split into a given number of blocks
# For each block, we will compute the average of RGB values of the block
# And then we can just replace with a rectangle of this color
# divide the input image into NxN blocks
if not in_place:
image = np.copy(image)
h, w = image.shape[:2]
blocks = tuple(
np.linspace(0, d, nb_blocks + 1, dtype="int") for d in (w, h)
)
for i, j in product(*[range(1, len(s)) for s in blocks]):
# compute the starting and ending (x, y)-coordinates
# for the current block
start = blocks[0][i - 1], blocks[1][j - 1]
end = blocks[0][i], blocks[1][j]
# extract the ROI using NumPy array slicing, compute the
# mean of the ROI, and then draw a rectangle with the
# mean RGB values over the ROI in the original image
roi = image[start[1]:end[1], start[0]:end[0]]
bgr = [int(x) for x in cv2.mean(roi)[:3]]
cv2.rectangle(image, start, end, bgr, -1)
return image
You then just need to use it in a function like this (updated to Python 3 with pathlib and type hints):
from pathlib import Path
from typing import Union
import cv2
import numpy as np
PathLike = Union[Path, str]
face_cascade = cv2.CascadeClassifier("haarcascade_frontalface_alt.xml")
def pixelate_faces_haar(img_path: PathLike, dest: Path):
"""Pixelate faces of people with OpenCV and save to a destination file"""
img = cv2.imread(str(img_path))
# To use cascade, we need to use Grayscale images
# We can then detect faces
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
faces = face_cascade.detectMultiScale(gray, 1.1, 4)
for (x, y, width, height) in faces:
roi = img[y:y+height, x:x+width]
pixelate_image(roi, 15, in_place=True)
dest.parent.mkdir(parents=True, exist_ok=True)
cv2.imwrite(str(dest), img)
print(f"Saved pixelated version of {img_path} to {dest}")```

Categories