How to add padding to "cv2.getStructuringElement" margins - python

I am using OpenCV to dynamically detect barcodes on licenses. The cv2 method getStructingElement creates a rectangle around the barcode. How can I add padding to all sides of the barcode borders? The contour is fit too tightly on the barcode, such that I am losing data from the edges. The barcode is in pdf417 format, which is a 2D barcode.
Tightly fit Detected Barcode:
# import the necessary packages
import numpy as np
import imutils
import cv2
# load the image and convert it to grayscale
image = cv2.imread("image.png")
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# compute the Scharr gradient magnitude representation of the images
# in both the x and y direction using OpenCV 2.4
ddepth = cv2.cv.CV_32F if imutils.is_cv2() else cv2.CV_32F
gradY = cv2.Sobel(gray, ddepth=ddepth, dx=0, dy=1, ksize=-1)
gradX = cv2.Sobel(gray, ddepth=ddepth, dx=1, dy=0, ksize=-1)
# subtract the y-gradient from the x-gradient
gradient = cv2.subtract(gradX, gradY)
gradient = cv2.convertScaleAbs(gradient)
# blur and threshold the image
blurred = cv2.blur(gradient, (9, 9))
(_, thresh) = cv2.threshold(blurred, 225, 255, cv2.THRESH_BINARY)
# construct a closing kernel and apply it to the thresholded image
kernel = cv2.getStructuringElement(cv2.MORPH_RECT,(27, 7))
closed = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
# perform a series of erosions and dilations
closed = cv2.erode(closed, None, iterations = 4)
closed = cv2.dilate(closed, None, iterations = 4)
# find the contours in the thresholded image, then sort the contours
# by their area, keeping only the largest one
cnts = cv2.findContours(closed.copy(), cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)
cnts = imutils.grab_contours(cnts)
c = sorted(cnts, key = cv2.contourArea, reverse = True)[0]
# print(c)
# compute the rotated bounding box of the largest contour
rect = cv2.minAreaRect(c)
box = cv2.cv.BoxPoints(rect) if imutils.is_cv2() else cv2.boxPoints(rect)
box = np.int0(box)
# draw a bounding box arounded the detected barcode and display the
# image
cv2.drawContours(image, [box], -1, (0, 255, 0), 3)
# draw a bounding box
min_y = int(np.min(box[:,-1]))
max_y = int(np.max(box[:,-1]))
min_x = int(np.min(box[:,0]))
max_x = int(np.max(box[:,0]))
image = image[min_y:max_y, min_x:max_x]
cv2.imshow("Image", image)
cv2.waitKey(0)
Test Image:

Related

Python - cv2 find Contours

I would like to find all the big elements in the document, but I do not know how to control the size (the document is downloaded from the Internet :))
I have a document
And I wrote a simple code
import cv2
import pytesseract
image = cv2.imread('2.png')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (7, 7), 0)
thresh = cv2.threshold(
blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
kernal = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 50))
dilate = cv2.dilate(thresh, kernal, iterations=1)
cv2.imwrite('1_dilated.png', dilate)
cnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
cnts = sorted(cnts, key=lambda x: cv2.boundingRect(x)[1])
for c in cnts:
x, y, w, h = cv2.boundingRect(c)
if h > 100 and w > 100:
roi = image[y:y+h, x:x+w]
cv2.rectangle(image, (x, y), (x+w, y+h), (36, 255, 12), 2)
# ocr = pytesseract.image_to_string(roi)
# print(ocr)
cv2.imwrite('1_boxes4.png', image)
But only detects it
And I would like this
How to control the size of the detected area ?
Thank you very much for all your comments
You are close, but you need to increase the number of iterations of the dilate operation. Also, a rectangular structuring element might help better forming the blobs of text. Let's check out some possible improvements of your code:
# imports:
import cv2
import numpy as np
# Set image path
imagePath = "D://opencvImages//"
imageName = "F74Yq.png"
# Read image:
inputImage = cv2.imread(imagePath + imageName)
# Store a deeep copy for results:
inputCopy = inputImage.copy()
# Convert BGR to grayscale:
grayInput = cv2.cvtColor(inputImage, cv2.COLOR_BGR2GRAY)
# Threshold via Otsu
_, binaryImage = cv2.threshold(grayInput, 0, 255, cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU)
The first part produces the binary image of the input image, there's nothing fancy going on here - just a direct thresholding via Otsu's method. This is the binary image obtained:
Now, let's apply the dilate operation. Let's use a 9 x 9 rectangular kernel and set the number of iterations to 5. Gotta be careful you don't dilate too much, because blobs of text from different portions of the document could end up joined:
# Set kernel (structuring element) size:
kernelSize = (9, 9)
# Set operation iterations:
opIterations = 5
# Get the structuring element:
morphKernel = cv2.getStructuringElement(cv2.MORPH_RECT, kernelSize)
# Perform Dilate:
dilateImage = cv2.morphologyEx(binaryImage, cv2.MORPH_DILATE, morphKernel, None, None, opIterations, cv2.BORDER_REFLECT101)
This is the result:
Ok, now let's just detect external contours and get their bounding boxes so we can draw rectangles around the target areas. Note that I'm drawing the rectangles on a deep copy of the input:
# Find the contours on the binary image:
contours, hierarchy = cv2.findContours(dilateImage, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# Look for the outer bounding boxes (no children):
for _, c in enumerate(contours):
# Get the contours bounding rectangle:
boundRect = cv2.boundingRect(c)
# Get the dimensions of the bounding rectangle:
rectX = boundRect[0]
rectY = boundRect[1]
rectWidth = boundRect[2]
rectHeight = boundRect[3]
# Set bounding rectangle:
color = (0, 0, 255)
cv2.rectangle( inputCopy, (int(rectX), int(rectY)),
(int(rectX + rectWidth), int(rectY + rectHeight)), color, 5 )
cv2.imshow("Bounding Rectangles", inputCopy)
cv2.waitKey()
This is the final result:

detect an initial/a sketch drawing on a text page

I would like to get the coordinates of the box around the initial ("H") on the following page (and similar ones with other initials, so opencv template matching is not an option):
Following this tutorial, I tried to solve the problem with opencv contours:
import cv2
import matplotlib.pyplot as plt
page = "image.jpg"
# read the image
image = cv2.imread(page)
# convert to RGB
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# convert to grayscale
gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
# create a binary thresholded image
_, binary = cv2.threshold(gray, 0,150,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
# find the contours from the thresholded image
contours, hierarchy = cv2.findContours(binary, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
# draw all contours
image = cv2.drawContours(image, contours, 3, (0, 255, 0), 2)
plt.savefig("result.png")
The result is of course not exactly what I wanted:
Does anyone know of an viable algorithm (and possibly an implementation thereof) that could provide an easy solution to my task?
You can find the target area by filtering your contours. Now, there's at least two filtering criteria that you can use. One is filter by area - that is, discard too small and too large contours until you get the contour you are looking for. The other one is by computing the extent of every contour. The extent is the ratio of the contour's area to its bounding rectangle area. You are looking for a square-like contour, so its extent should be close to 1.0.
Let's see the code:
# imports:
import cv2
import numpy as np
# Reading an image in default mode:
inputImage = cv2.imread(path + fileName)
# Deep copy for results:
inputImageCopy = inputImage.copy()
# Convert RGB to grayscale:
grayscaleImage = cv2.cvtColor(inputImage, cv2.COLOR_BGR2GRAY)
# Get binary image via Otsu:
_, binaryImage = cv2.threshold(grayscaleImage, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
The first portion of the code gets you a binary image that you can use as a mask to compute contours:
Now, let's filter contours. Let's use the area approach first. You need to define a range of minimum area and maximum area to filter everything that does not fall in this range. I've heuristically determined a range of areas from 30000 px to 150000 px:
# Find the contours on the binary image:
contours, hierarchy = cv2.findContours(binaryImage, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# Look for the outer bounding boxes (no children):
for _, c in enumerate(contours):
# Get blob area:
currentArea = cv2.contourArea(c)
print("Contour Area: "+str(currentArea))
# Set an area range:
minArea = 30000
maxArea = 150000
if minArea < currentArea < maxArea:
# Get the contour's bounding rectangle:
boundRect = cv2.boundingRect(c)
# Get the dimensions of the bounding rect:
rectX = boundRect[0]
rectY = boundRect[1]
rectWidth = boundRect[2]
rectHeight = boundRect[3]
# Set bounding rect:
color = (0, 0, 255)
cv2.rectangle( inputImageCopy, (int(rectX), int(rectY)),
(int(rectX + rectWidth), int(rectY + rectHeight)), color, 2 )
cv2.imshow("Rectangles", inputImageCopy)
cv2.waitKey(0)
Once you successfully filter the area, you can then compute the bounding rectangle of the contour with cv2.boundingRect. You can retrieve the bounding rectangle's x, y (top left) coordinates as well as its width and height. After that just draw the rectangle on a deep copy of the original input.
Now, let's see the second option, using the contour's extent. The for loop gets modified as follows:
# Look for the outer bounding boxes (no children):
for _, c in enumerate(contours):
# Get blob area:
currentArea = cv2.contourArea(c)
# Get the contour's bounding rectangle:
boundRect = cv2.boundingRect(c)
# Get the dimensions of the bounding rect:
rectX = boundRect[0]
rectY = boundRect[1]
rectWidth = boundRect[2]
rectHeight = boundRect[3]
# Calculate extent:
extent = float(currentArea)/(rectWidth *rectHeight)
print("Extent: " + str(extent))
# Set the extent filter, look for an extent close to 1.0:
delta = abs(1.0 - extent)
epsilon = 0.1
if delta < epsilon:
# Set bounding rect:
color = (0, 0, 255)
cv2.rectangle( inputImageCopy, (int(rectX), int(rectY)),
(int(rectX + rectWidth), int(rectY + rectHeight)), color, 2 )
cv2.imshow("Rectangles", inputImageCopy)
cv2.waitKey(0)
Both approaches yield this result:
You almost have it. You just need to filter contours on area and aspect ratio. Here is my approach in Python/OpenCV.
Input:
import cv2
import numpy as np
# read image as grayscale
img = cv2.imread('syriados.jpg')
# convert to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# threshold to binary
#thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY)[1]
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)[1]
# invert threshold
thresh = 255 - thresh
# apply morphology to remove small white regions and to close the rectangle boundary
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))
morph = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (7,7))
morph = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
# find contours
result = img.copy()
cntrs = cv2.findContours(morph, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
cntrs = cntrs[0] if len(cntrs) == 2 else cntrs[1]
# filter on area and aspect ratio
for c in cntrs:
area = cv2.contourArea(c)
x,y,w,h = cv2.boundingRect(c)
if area > 10000 and abs(w-h) < 100:
cv2.drawContours(result, [c], 0, (0,0,255), 2)
# write results
cv2.imwrite("syriados_thresh.jpg", thresh)
cv2.imwrite("syriados_morph.jpg", morph)
cv2.imwrite("syriados_box.jpg", result)
# show results
cv2.imshow("thresh", thresh)
cv2.imshow("morph", morph)
cv2.imshow("result", result)
cv2.waitKey(0)
Threshold image:
Morphology image:
Resulting contour image:
To get a result like this:
You'll need to detect the contour in the image with the second to the greatest area, as the one possessing the greatest area would be the border of the image.
So with the list of contours, we can get the one with the second greatest area via the built-in sorted method, using the cv2.contourArea method as the custom key:
import cv2
import numpy as np
def process(img):
img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
img_blur = cv2.GaussianBlur(img_gray, (7, 7), 2)
img_canny = cv2.Canny(img_blur, 50, 50)
kernel = np.ones((6, 6))
img_dilate = cv2.dilate(img_canny, kernel, iterations=1)
img_erode = cv2.erode(img_dilate, kernel, iterations=2)
return img_erode
def get_contours(img):
contours, _ = cv2.findContours(process(img), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
cnt = sorted(contours, key=cv2.contourArea)[-2]
peri = cv2.arcLength(cnt, True)
approx = cv2.approxPolyDP(cnt, 0.02 * peri, True)
cv2.drawContours(img, [approx], -1, (0, 255, 0), 2)
page = "image.jpg"
image = cv2.imread(page)
get_contours(image)
cv2.imshow("Image", image)
cv2.waitKey(0)
The above only puts the area of the contours into consideration; if you want more reliable results, you can make it so that it will only detect contours that are 4-sided.

Using OpenCV to find a rotated rectangle of a certain aspect ratio?

I am not having difficulty transforming a found box, it is the fact that I am not able to detect the box in the first place when it is at an angle.
Here is a sample image I want the largest ~1230:123 rectangle in the image the problem is the rectangle can be rotated.
Here is a picture of a rotated barcode that I am unable to detect:
The function I have been using to process uses contour area just looks for the largest rectangle.
What methods should I use to look for a rotated rectangle so that even when rotated I can detect it?
#PYTHON 3.6 Snippet for Image Processing
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# compute the Scharr gradient magnitude representation of the images
# in both the x and y direction using OpenCV 2.4
ddepth = cv2.cv.CV_32F if imutils.is_cv2() else cv2.CV_32F
gradX = cv2.Sobel(gray, ddepth=ddepth, dx=1, dy=0, ksize=-1)
gradY = cv2.Sobel(gray, ddepth=ddepth, dx=0, dy=1, ksize=-1)
# subtract the y-gradient from the x-gradient
gradient = cv2.subtract(gradX, gradY)
gradient = cv2.convertScaleAbs(gradient)
# blur and threshold the image
blurred = cv2.blur(gradient, (8, 8))
(_, thresh) = cv2.threshold(blurred, 225, 255, cv2.THRESH_BINARY)
# construct a closing kernel and apply it to the thresholded image
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (21, 7))
closed = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
# perform a series of erosions and dilations
closed = cv2.erode(closed, None, iterations = 4)
closed = cv2.dilate(closed, None, iterations = 4)
# find the contours in the thresholded image, then sort the contours
# by their area, keeping only the largest one
cnts = cv2.findContours(closed.copy(), cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)
cnts = imutils.grab_contours(cnts)
c = sorted(cnts, key = cv2.contourArea, reverse = True)[0]
# compute the rotated bounding box of the largest contour
rect = cv2.minAreaRect(c)
You don't need all the preprocessing (like Sobel, erode, dilate) for finding before executing findContours.
findContours works better when contours are full (filled with white color) instead of having just the edges.
I suppose you can keep the code from cv2.findContours to the end, and get the result you are looking for.
You may use the following stages:
Apply binary threshold using Otsu's thresholding (just in case image is not a binary image).
Execute cv2.findContours, and Find the contour with the maximum area.
Use cv2.minAreaRect for finding the minimum area bounding rectangle.
Here is a code sample:
import numpy as np
import cv2
img = cv2.imread('img.png', cv2.IMREAD_GRAYSCALE) # Read input image as gray-scale
ret, img = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU) # Apply threshold using Otsu's thresholding (just in case image is not a binary image).
# Find contours in img.
cnts = cv2.findContours(img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)[-2] # [-2] indexing takes return value before last (due to OpenCV compatibility issues).
# Find the contour with the maximum area.
c = max(cnts, key=cv2.contourArea)
# Find the minimum area bounding rectangle
# https://stackoverflow.com/questions/18207181/opencv-python-draw-minarearect-rotatedrect-not-implemented
rect = cv2.minAreaRect(c)
box = cv2.boxPoints(rect)
box = np.int0(box)
# Convert image to BGR (just for drawing a green rectangle on it).
bgr_img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
cv2.drawContours(bgr_img, [box], 0, (0, 255, 0), 2)
# Show images for debugging
cv2.imshow('bgr_img', bgr_img)
cv2.waitKey()
cv2.destroyAllWindows()
Result:
Note: The largest contour seems to be a parallelogram and not a perfect rectangle.

How to rotate an image to align the text for extraction?

I am using pytessearct to extract the text from images. But it doesn't work on images which are inclined. Consider the image given below:
Here is the code to extract text, which is working fine on images which are not inclined.
img = cv2.imread(<path_to_image>)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (5,5),0)
ret3, thresh = cv2.threshold(blur,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
def findSignificantContours (img, edgeImg):
contours, heirarchy = cv2.findContours(edgeImg, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
# Find level 1 contours
level1 = []
for i, tupl in enumerate(heirarchy[0]):
# Each array is in format (Next, Prev, First child, Parent)
# Filter the ones without parent
if tupl[3] == -1:
tupl = np.insert(tupl, 0, [i])
level1.append(tupl)
significant = []
tooSmall = edgeImg.size * 5 / 100 # If contour isn't covering 5% of total area of image then it probably is too small
for tupl in level1:
contour = contours[tupl[0]];
area = cv2.contourArea(contour)
if area > tooSmall:
significant.append([contour, area])
# Draw the contour on the original image
cv2.drawContours(img, [contour], 0, (0,255,0),2, cv2.LINE_AA, maxLevel=1)
significant.sort(key=lambda x: x[1])
#print ([x[1] for x in significant]);
mx = (0,0,0,0) # biggest bounding box so far
mx_area = 0
for cont in contours:
x,y,w,h = cv2.boundingRect(cont)
area = w*h
if area > mx_area:
mx = x,y,w,h
mx_area = area
x,y,w,h = mx
# Output to files
roi = img[y:y+h,x:x+w]
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (5,5),0)
ret3, thresh = cv2.threshold(blur,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
cv2_imshow(thresh)
text = pytesseract.image_to_string(roi);
print(text); print("\n"); print(pytesseract.image_to_string(thresh));
print("\n")
return [x[0] for x in significant];
edgeImg_8u = np.asarray(thresh, np.uint8)
# Find contours
significant = findSignificantContours(img, edgeImg_8u)
mask = thresh.copy()
mask[mask > 0] = 0
cv2.fillPoly(mask, significant, 255)
# Invert mask
mask = np.logical_not(mask)
#Finally remove the background
img[mask] = 0;
Tesseract can't extract the text from this image. Is there a way I can rotate it to align the text perfectly and then feed it to pytesseract? Please let me know if my question require any more clarity.
Here's a simple approach:
Obtain binary image. Load image, convert to grayscale,
Gaussian blur, then Otsu's threshold.
Find contours and sort for largest contour. We find contours then filter using contour area with cv2.contourArea() to isolate the rectangular contour.
Perform perspective transform. Next we perform contour approximation with cv2.contourArea() to obtain the rectangular contour. Finally we utilize imutils.perspective.four_point_transform to actually obtain the bird's eye view of the image.
Binary image
Result
To actually extract the text, take a look at
Use pytesseract OCR to recognize text from an image
Cleaning image for OCR
Detect text area in an image using python and opencv
Code
from imutils.perspective import four_point_transform
import cv2
import numpy
# Load image, grayscale, Gaussian blur, Otsu's threshold
image = cv2.imread("1.jpg")
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (7,7), 0)
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
# Find contours and sort for largest contour
cnts = cv2.findContours(thresh, cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
cnts = sorted(cnts, key=cv2.contourArea, reverse=True)
displayCnt = None
for c in cnts:
# Perform contour approximation
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.02 * peri, True)
if len(approx) == 4:
displayCnt = approx
break
# Obtain birds' eye view of image
warped = four_point_transform(image, displayCnt.reshape(4, 2))
cv2.imshow("thresh", thresh)
cv2.imshow("warped", warped)
cv2.waitKey()
To Solve this problem you can also use minAreaRect api in opencv which will give you a minimum area rotated rectangle with an angle of rotation. You can then get the rotation matrix and apply warpAffine for the image to straighten it. I have also attached a colab notebook which you can play around on.
Colab notebook : https://colab.research.google.com/drive/1SKxrWJBOHhGjEgbR2ALKxl-dD1sXIf4h?usp=sharing
import cv2
from google.colab.patches import cv2_imshow
import numpy as np
def rotate_image(image, angle):
image_center = tuple(np.array(image.shape[1::-1]) / 2)
rot_mat = cv2.getRotationMatrix2D(image_center, angle, 1.0)
result = cv2.warpAffine(image, rot_mat, image.shape[1::-1], flags=cv2.INTER_LINEAR)
return result
img = cv2.imread("/content/sxJzw.jpg")
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
mask = np.zeros((img.shape[0], img.shape[1]))
blur = cv2.GaussianBlur(gray, (5,5),0)
ret, thresh = cv2.threshold(blur,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
cv2_imshow(thresh)
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
largest_countour = max(contours, key = cv2.contourArea)
binary_mask = cv2.drawContours(mask, [largest_countour], 0, 1, -1)
new_img = img * np.dstack((binary_mask, binary_mask, binary_mask))
minRect = cv2.minAreaRect(largest_countour)
rotate_angle = minRect[-1] if minRect[-1] < 0 else -minRect[-1]
new_img = rotate_image(new_img, rotate_angle)
cv2_imshow(new_img)

Finding the corners of a rectangle

I'm trying to get the corners of this rectangle:
.
I tried using cv2.cornerHarris(rectangle, 2, 3, 0.04), but the left edges are not showed due to image brightness, I guess. So I tried applying a threshold before using cornerHarris, but the image produced showed a lot of vertices along the edges, not being possible to filter the corners.
I know that I need to filter it before using cornerHarris, but I don't know how. Could someone help me with this problem?
Ps. I've already tried to use blur, but it also doesn't work.
import cv2
import numpy as np
import matplotlib.pyplot as plt
rectangle = cv2.imread('rectangle.png', cv2.IMREAD_GRAYSCALE)
rectangle = np.where(rectangle > np.mean(rectangle), 255, 0).astype(np.uint8)
dst_rectangle = cv2.cornerHarris(rectangle, 2, 3, 0.04)
dst_rectangle = cv2.dilate(dst_rectangle, None)
mask = np.where(dst_rectangle > 0.01*np.max(dst_rectangle), 255, 0).astype(np.uint8)
points = np.nonzero(mask)
plt.imshow(dst_rectangle, cmap='gray')
plt.plot(points[1], points[0], 'or')
plt.show()
I would approach it differently by getting the corners of the rotated bounding box of the contour after adaptive thresholding. Here is my code in Python/OpenCV.
Input:
import cv2
import numpy as np
# read image
img = cv2.imread("rectangle.png")
# convert img to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
gray = 255-gray
# do adaptive threshold on gray image
thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 17, 1)
thresh = 255-thresh
# apply morphology
kernel = np.ones((3,3), np.uint8)
morph = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)
morph = cv2.morphologyEx(morph, cv2.MORPH_CLOSE, kernel)
# separate horizontal and vertical lines to filter out spots outside the rectangle
kernel = np.ones((7,3), np.uint8)
vert = cv2.morphologyEx(morph, cv2.MORPH_OPEN, kernel)
kernel = np.ones((3,7), np.uint8)
horiz = cv2.morphologyEx(morph, cv2.MORPH_OPEN, kernel)
# combine
rect = cv2.add(horiz,vert)
# thin
kernel = np.ones((3,3), np.uint8)
rect = cv2.morphologyEx(rect, cv2.MORPH_ERODE, kernel)
# get largest contour
contours = cv2.findContours(rect, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
for c in contours:
area_thresh = 0
area = cv2.contourArea(c)
if area > area_thresh:
area = area_thresh
big_contour = c
# get rotated rectangle from contour
rot_rect = cv2.minAreaRect(big_contour)
box = cv2.boxPoints(rot_rect)
box = np.int0(box)
print(box)
# draw rotated rectangle on copy of img
rot_bbox = img.copy()
cv2.drawContours(rot_bbox,[box],0,(0,0,255),2)
# write img with red rotated bounding box to disk
cv2.imwrite("rectangle_thresh.png", thresh)
cv2.imwrite("rectangle_outline.png", rect)
cv2.imwrite("rectangle_bounds.png", rot_bbox)
# display it
cv2.imshow("IMAGE", img)
cv2.imshow("THRESHOLD", thresh)
cv2.imshow("MORPH", morph)
cv2.imshow("VERT", vert)
cv2.imshow("HORIZ", horiz)
cv2.imshow("RECT", rect)
cv2.imshow("BBOX", rot_bbox)
cv2.waitKey(0)
Thresholded Image:
Rectangle Region Extracted:
Rotated Bounding Box on Image:
Rotated Bounding Box Corners:
[[446 335]
[163 328]
[168 117]
[451 124]]
ADDITION:
Here is a slightly shorter version of the code, which is achievable by adding some gaussian blurring before thresholding.
import cv2
import numpy as np
# read image
img = cv2.imread("rectangle.png")
# convert img to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
gray = 255-gray
# blur image
blur = cv2.GaussianBlur(gray, (3,3), 0)
# do adaptive threshold on gray image
thresh = cv2.adaptiveThreshold(blur, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 75, 2)
thresh = 255-thresh
# apply morphology
kernel = np.ones((5,5), np.uint8)
rect = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)
rect = cv2.morphologyEx(rect, cv2.MORPH_CLOSE, kernel)
# thin
kernel = np.ones((5,5), np.uint8)
rect = cv2.morphologyEx(rect, cv2.MORPH_ERODE, kernel)
# get largest contour
contours = cv2.findContours(rect, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
for c in contours:
area_thresh = 0
area = cv2.contourArea(c)
if area > area_thresh:
area = area_thresh
big_contour = c
# get rotated rectangle from contour
rot_rect = cv2.minAreaRect(big_contour)
box = cv2.boxPoints(rot_rect)
box = np.int0(box)
for p in box:
pt = (p[0],p[1])
print(pt)
# draw rotated rectangle on copy of img
rot_bbox = img.copy()
cv2.drawContours(rot_bbox,[box],0,(0,0,255),2)
# write img with red rotated bounding box to disk
cv2.imwrite("rectangle_thresh.png", thresh)
cv2.imwrite("rectangle_outline.png", rect)
cv2.imwrite("rectangle_bounds.png", rot_bbox)
# display it
cv2.imshow("IMAGE", img)
cv2.imshow("THRESHOLD", thresh)
cv2.imshow("RECT", rect)
cv2.imshow("BBOX", rot_bbox)
cv2.waitKey(0)
Thresholded Image:
Rectangle Region Extracted:
Rotated Bounding Box on Image:
Rotated Bounding Box Corners:
(444, 335)
(167, 330)
(170, 120)
(448, 125)
Here's a simple approach:
Obtain binary image. We load the image, grayscale, Gaussian blur, then adaptive threshold.
Morphological operations. We create a rectangular kernel and morph open to remove the small noise
Find distorted rectangle contour and draw onto a mask. Find contours, determine rotated bounding box, and draw onto a blank mask
Find corners. We use the Shi-Tomasi Corner Detector already implemented as cv2.goodFeaturesToTrack which is supposedly shows better results compared to the Harris Corner Detector
Here's a visualization of each step:
Binary image
Morph open
Find rotated rectangle contour and draw/fill onto a blank mask
Draw rotated rectangle and corners to get result
Corner coordinates
(448.0, 337.0)
(164.0, 332.0)
(452.0, 123.0)
(168.0, 118.0)
Code
import cv2
import numpy as np
# Load image, grayscale, Gaussian blur, adaptive threshold
image = cv2.imread("1.png")
mask = np.zeros(image.shape, dtype=np.uint8)
gray = 255 - cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (3,3), 0)
thresh = cv2.adaptiveThreshold(blur, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV, 51, 3)
# Morph open
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))
opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=1)
# Find distorted rectangle contour and draw onto a mask
cnts = cv2.findContours(opening, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
rect = cv2.minAreaRect(cnts[0])
box = cv2.boxPoints(rect)
box = np.int0(box)
cv2.drawContours(image,[box],0,(36,255,12),2)
cv2.fillPoly(mask, [box], (255,255,255))
# Find corners on the mask
mask = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)
corners = cv2.goodFeaturesToTrack(mask, maxCorners=4, qualityLevel=0.5, minDistance=150)
for corner in corners:
x,y = corner.ravel()
cv2.circle(image,(x,y),8,(255,120,255),-1)
print("({}, {})".format(x,y))
cv2.imshow("thresh", thresh)
cv2.imshow("opening", opening)
cv2.imshow("mask", mask)
cv2.imshow("image", image)
cv2.waitKey(0)
You can try with an adaptive threshold. Then you may either use cornerHarris if you only need corners, or depending on what you need to do next, you could also find useful findContours, which returns a list of bounding boxes
I was able to locate 3 out of the 4 points, the 4th point can be found easily given the other three points since it's rectangle. Here is my solution:
import cv2
import numpy as np
img = cv2.imread('6dUIr.png',1)
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
#smooth the image
kernel = np.ones((5,5),np.float32)/25
gray = cv2.filter2D(gray,-1,kernel)
#histogram equalization
clahe = cv2.createCLAHE(clipLimit=1.45, tileGridSize=(4,4))
cl1 = clahe.apply(gray)
#find edges
edges = cv2.Canny(cl1,4,100)
#find corners
dst = cv2.cornerHarris(edges,2,3,0.04)
#result is dilated for marking the corners, not important
dst = cv2.dilate(dst,None)
# Threshold for an optimal value, it may vary depending on the image.
img[dst>0.25*dst.max()]=[0,0,255]
cv2.imshow('edges', edges)
cv2.imshow('output', img)
# cv2.imshow('Histogram equalized', img_output)
cv2.waitKey(0)
The code has many hard coded thresholds but it's a good start.

Categories