I'm working on a personal project using opencv in python. Want to detect a sudoku grid.
The original image is:
So far I have created this:
Then tried to select a big blob. Result may be similar to this:
I got a black image as result:
The code is:
import cv2
import numpy as np
def find_biggest_blob(outerBox):
max = -1
maxPt = (0, 0)
h, w = outerBox.shape[:2]
mask = np.zeros((h + 2, w + 2), np.uint8)
for y in range(0, h):
for x in range(0, w):
if outerBox[y, x] >= 128:
area = cv2.floodFill(outerBox, mask, (x, y), (0, 0, 64))
#cv2.floodFill(outerBox, mask, maxPt, (255, 255, 255))
image_path = 'Images/Results/sudoku-find-biggest-blob.jpg'
cv2.imwrite(image_path, outerBox)
cv2.imshow(image_path, outerBox)
def main():
image = cv2.imread('Images/Test/sudoku-grid-detection.jpg', 0)
find_biggest_blob(image)
cv2.waitKey(0)
cv2.destroyAllWindows()
if __name__ == '__main__':
main()
The code in repl is: https://repl.it/#gmunumel/SudokuSolver
Any idea?
Here's an approach:
Convert image to grayscale and median blur to smooth image
Adaptive threshold to obtain binary image
Find contours and filter for largest contour
Perform perspective transform to obtain top-down view
After converting to grayscale and median blurring, we adaptive threshold to obtain a binary image
Next we find contours and filter using contour area. Here's the detected board
Now to get a top-down view of the image, we perform a perspective transform. Here's the result
import cv2
import numpy as np
def perspective_transform(image, corners):
def order_corner_points(corners):
# Separate corners into individual points
# Index 0 - top-right
# 1 - top-left
# 2 - bottom-left
# 3 - bottom-right
corners = [(corner[0][0], corner[0][1]) for corner in corners]
top_r, top_l, bottom_l, bottom_r = corners[0], corners[1], corners[2], corners[3]
return (top_l, top_r, bottom_r, bottom_l)
# Order points in clockwise order
ordered_corners = order_corner_points(corners)
top_l, top_r, bottom_r, bottom_l = ordered_corners
# Determine width of new image which is the max distance between
# (bottom right and bottom left) or (top right and top left) x-coordinates
width_A = np.sqrt(((bottom_r[0] - bottom_l[0]) ** 2) + ((bottom_r[1] - bottom_l[1]) ** 2))
width_B = np.sqrt(((top_r[0] - top_l[0]) ** 2) + ((top_r[1] - top_l[1]) ** 2))
width = max(int(width_A), int(width_B))
# Determine height of new image which is the max distance between
# (top right and bottom right) or (top left and bottom left) y-coordinates
height_A = np.sqrt(((top_r[0] - bottom_r[0]) ** 2) + ((top_r[1] - bottom_r[1]) ** 2))
height_B = np.sqrt(((top_l[0] - bottom_l[0]) ** 2) + ((top_l[1] - bottom_l[1]) ** 2))
height = max(int(height_A), int(height_B))
# Construct new points to obtain top-down view of image in
# top_r, top_l, bottom_l, bottom_r order
dimensions = np.array([[0, 0], [width - 1, 0], [width - 1, height - 1],
[0, height - 1]], dtype = "float32")
# Convert to Numpy format
ordered_corners = np.array(ordered_corners, dtype="float32")
# Find perspective transform matrix
matrix = cv2.getPerspectiveTransform(ordered_corners, dimensions)
# Return the transformed image
return cv2.warpPerspective(image, matrix, (width, height))
image = cv2.imread('1.jpg')
original = image.copy()
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.medianBlur(gray, 3)
thresh = cv2.adaptiveThreshold(blur,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV,11,3)
cnts = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
cnts = sorted(cnts, key=cv2.contourArea, reverse=True)
for c in cnts:
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.015 * peri, True)
transformed = perspective_transform(original, approx)
break
cv2.imshow('transformed', transformed)
cv2.imwrite('board.png', transformed)
cv2.waitKey()
Here is my solution that will generalize to any image whether it is warped or not.
Convert the image to grayscale
Apply adaptive thresholding to convert the image to binary
(Adaptive thresholding works better than normal thresholding because the original image can have different lighting at different areas)
Identify the Corners of the large square
Perspective transform of the image to the final square image
Depending on the amount of skewness of the original image the corners identified may be out of order, do we need to arrange them in the correct order. the method used here is to identify the centroid of the large square and identify the order of the corners from there
Here is the code:
import cv2
import numpy as np
# Helper functions for getting square image
def euclidian_distance(point1, point2):
# Calcuates the euclidian distance between the point1 and point2
#used to calculate the length of the four sides of the square
distance = np.sqrt((point1[0] - point2[0]) ** 2 + (point1[1] - point2[1]) ** 2)
return distance
def order_corner_points(corners):
# The points obtained from contours may not be in order because of the skewness of the image, or
# because of the camera angle. This function returns a list of corners in the right order
sort_corners = [(corner[0][0], corner[0][1]) for corner in corners]
sort_corners = [list(ele) for ele in sort_corners]
x, y = [], []
for i in range(len(sort_corners[:])):
x.append(sort_corners[i][0])
y.append(sort_corners[i][1])
centroid = [sum(x) / len(x), sum(y) / len(y)]
for _, item in enumerate(sort_corners):
if item[0] < centroid[0]:
if item[1] < centroid[1]:
top_left = item
else:
bottom_left = item
elif item[0] > centroid[0]:
if item[1] < centroid[1]:
top_right = item
else:
bottom_right = item
ordered_corners = [top_left, top_right, bottom_right, bottom_left]
return np.array(ordered_corners, dtype="float32")
def image_preprocessing(image, corners):
# This function undertakes all the preprocessing of the image and return
ordered_corners = order_corner_points(corners)
print("ordered corners: ", ordered_corners)
top_left, top_right, bottom_right, bottom_left = ordered_corners
# Determine the widths and heights ( Top and bottom ) of the image and find the max of them for transform
width1 = euclidian_distance(bottom_right, bottom_left)
width2 = euclidian_distance(top_right, top_left)
height1 = euclidian_distance(top_right, bottom_right)
height2 = euclidian_distance(top_left, bottom_right)
width = max(int(width1), int(width2))
height = max(int(height1), int(height2))
# To find the matrix for warp perspective function we need dimensions and matrix parameters
dimensions = np.array([[0, 0], [width, 0], [width, width],
[0, width]], dtype="float32")
matrix = cv2.getPerspectiveTransform(ordered_corners, dimensions)
# Return the transformed image
transformed_image = cv2.warpPerspective(image, matrix, (width, width))
#Now, chances are, you may want to return your image into a specific size. If not, you may ignore the following line
transformed_image = cv2.resize(transformed_image, (252, 252), interpolation=cv2.INTER_AREA)
return transformed_image
# main function
def get_square_box_from_image(image):
# This function returns the top-down view of the puzzle in grayscale.
#
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.medianBlur(gray, 3)
adaptive_threshold = cv2.adaptiveThreshold(blur, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV, 11, 3)
corners = cv2.findContours(adaptive_threshold, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
corners = corners[0] if len(corners) == 2 else corners[1]
corners = sorted(corners, key=cv2.contourArea, reverse=True)
for corner in corners:
length = cv2.arcLength(corner, True)
approx = cv2.approxPolyDP(corner, 0.015 * length, True)
print(approx)
puzzle_image = image_preprocessing(image, approx)
break
return puzzle_image
# Call the get_square_box_from_image method on any sudoku image to get the top view of the puzzle
original = cv2.imread("large_puzzle.jpg")
sudoku = get_square_box_from_image(original)
Here are the results from the given image and a custom example
Related
I am trying to unshear the image , just like cam scanner does, It is working for some images , IfI give any random image it is not working , The image named as new_image.jpeg is not working and image named as 1111.jpeg is working . Although the picture is totally same .
Code:
import numpy as np
import cv2
import re
from matplotlib import pyplot as plt
import warnings
warnings.filterwarnings('ignore')
# ## **Use Gaussian Blurring combined with Adaptive Threshold**
def blur_and_threshold(gray):
gray = cv2.GaussianBlur(gray,(3,3),2)
threshold = cv2.adaptiveThreshold(gray,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY,11,2)
threshold = cv2.fastNlMeansDenoising(threshold, 11, 31, 9)
return threshold
# ## **Find the Biggest Contour**
# **Note: We made sure the minimum contour is bigger than 1/10 size of the whole picture. This helps in removing very small contours (noise) from our dataset**
def biggest_contour(contours,min_area):
biggest = None
max_area = 0
biggest_n=0
approx_contour=None
for n,i in enumerate(contours):
area = cv2.contourArea(i)
if area > min_area/10:
peri = cv2.arcLength(i,True)
approx = cv2.approxPolyDP(i,0.02*peri,True)
if area > max_area and len(approx)==4:
biggest = approx
max_area = area
biggest_n=n
approx_contour=approx
return biggest_n,approx_contour
def order_points(pts):
# initialzie a list of coordinates that will be ordered
# such that the first entry in the list is the top-left,
# the second entry is the top-right, the third is the
# bottom-right, and the fourth is the bottom-left
pts=pts.reshape(4,2)
rect = np.zeros((4, 2), dtype = "float32")
# the top-left point will have the smallest sum, whereas
# the bottom-right point will have the largest sum
s = pts.sum(axis = 1)
rect[0] = pts[np.argmin(s)]
rect[2] = pts[np.argmax(s)]
# now, compute the difference between the points, the
# top-right point will have the smallest difference,
# whereas the bottom-left will have the largest difference
diff = np.diff(pts, axis = 1)
rect[1] = pts[np.argmin(diff)]
rect[3] = pts[np.argmax(diff)]
# return the ordered coordinates
return rect
### Find the exact (x,y) coordinates of the biggest contour and crop it out
def four_point_transform(image, pts):
# obtain a consistent order of the points and unpack them
# individually
rect = order_points(pts)
(tl, tr, br, bl) = rect
# compute the width of the new image, which will be the
# maximum distance between bottom-right and bottom-left
# x-coordiates or the top-right and top-left x-coordinates
widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
maxWidth = max(int(widthA), int(widthB))
# compute the height of the new image, which will be the
# maximum distance between the top-right and bottom-right
# y-coordinates or the top-left and bottom-left y-coordinates
heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
maxHeight = max(int(heightA), int(heightB))
# now that we have the dimensions of the new image, construct
# the set of destination points to obtain a "birds eye view",
# (i.e. top-down view) of the image, again specifying points
# in the top-left, top-right, bottom-right, and bottom-left
# order
dst = np.array([
[0, 0],
[maxWidth - 1, 0],
[maxWidth - 1, maxHeight - 1],
[0, maxHeight - 1]], dtype = "float32")
# compute the perspective transform matrix and then apply it
M = cv2.getPerspectiveTransform(rect, dst)
warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))
# return the warped image
return warped
# # Transformation the image
# **1. Convert the image to grayscale**
# **2. Remove noise and smoothen out the image by applying blurring and thresholding techniques**
# **3. Use Canny Edge Detection to find the edges**
# **4. Find the biggest contour and crop it out**
def transformation(image):
image=image.copy()
height, width, channels = image.shape
gray=cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
image_size=gray.size
threshold=blur_and_threshold(gray)
# We need two threshold values, minVal and maxVal. Any edges with intensity gradient more than maxVal
# are sure to be edges and those below minVal are sure to be non-edges, so discarded.
# Those who lie between these two thresholds are classified edges or non-edges based on their connectivity.
# If they are connected to "sure-edge" pixels, they are considered to be part of edges.
# Otherwise, they are also discarded
edges = cv2.Canny(threshold,50,150,apertureSize = 7)
contours, hierarchy = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
simplified_contours = []
for cnt in contours:
hull = cv2.convexHull(cnt)
simplified_contours.append(cv2.approxPolyDP(hull,
0.001*cv2.arcLength(hull,True),True))
simplified_contours = np.array(simplified_contours)
biggest_n,approx_contour = biggest_contour(simplified_contours,image_size)
threshold = cv2.drawContours(image, simplified_contours ,biggest_n, (0,255,0), 1)
dst = 0
if approx_contour is not None and len(approx_contour)==4:
approx_contour=np.float32(approx_contour)
dst=four_point_transform(threshold,approx_contour)
croppedImage = dst
return croppedImage
# **Increase the brightness of the image by playing with the "V" value (from HSV)**
def increase_brightness(img, value=30):
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
h, s, v = cv2.split(hsv)
lim = 255 - value
v[v > lim] = 255
v[v <= lim] += value
final_hsv = cv2.merge((h, s, v))
img = cv2.cvtColor(final_hsv, cv2.COLOR_HSV2BGR)
return img
# **Sharpen the image using Kernel Sharpening Technique**
def final_image(rotated):
# Create our shapening kernel, it must equal to one eventually
kernel_sharpening = np.array([[0,-1,0],
[-1, 5,-1],
[0,-1,0]])
# applying the sharpening kernel to the input image & displaying it.
sharpened = cv2.filter2D(rotated, -1, kernel_sharpening)
sharpened=increase_brightness(sharpened,30)
return sharpened
# ## 1. Pass the image through the transformation function to crop out the biggest contour
# ## 2. Brighten & Sharpen the image to get a final cleaned image
path = "/home/hamza/Desktop/"
image = cv2.imread("path of image")
blurred_threshold = transformation(image)
cleaned_image = final_image(blurred_threshold)
cv2.imwrite(path + "Final_Image4.jpg", cleaned_image)
Pictures
first pic
second image
Edit 1:
Picture 1 test image 1
Picture 2 test image 2
Picture 3 test image 3
Picture 4 test image 4
Edit 2:
If I just pass only black and white image can it remove shearness , you may try it if it works please share with me .
Pic:
black and white image
Note: If image comes unsheared , then that particular image should displayed exactly same means code should not touched it . Hope so you got the point.
Nothing is wrong with your images, I tested your code and indeed the first image works with the code as you wrote it, for the second image, with some quick debugging you can figure out that no contour was found in your transformation function - you have the block :
if approx_contour is not None and len(approx_contour)==4:
approx_contour=np.float32(approx_contour)
dst=four_point_transform(threshold,approx_contour)
Just add:
else:
print("no contour found")
to see for yourself.
The problem is with your Canny filter. With apertureSize = 7 your first image works but not the second one, with apertureSize = 3 your second image work but not the first one.
So both of your image work, but not with the same parameters. If processing time is not an issue for your task, you could iterate several values of the parameters, or else steer away from the Canny method. On both your images the paper is a lot brighter than the background so a convexHull on the threshold image would work.
Since your initial image is in color, another approach is to use the H place to detect the 'white color' of the paper. I have made some adjustments in the code and it now works for both your images.
import numpy as np
import cv2
import imutils
def order_points(pts):
""" Return sorted list of corners, from top-left then clockwise """
pts = np.reshape(pts, (6,2))
rect = np.zeros((4, 2), dtype = "float32")
s = pts.sum(axis = 1)
rect[0] = pts[np.argmin(s)]
rect[2] = pts[np.argmax(s)]
diff = np.diff(pts, axis = 1)
rect[1] = pts[np.argmin(diff)]
rect[3] = pts[np.argmax(diff)]
return rect
def four_point_transform(image, pts):
(tl, tr, br, bl) = rect
widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
maxWidth = max(int(widthA), int(widthB))
heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
maxHeight = max(int(heightA), int(heightB))
dst = np.array([
[0, 0],
[maxWidth - 1, 0],
[maxWidth - 1, maxHeight - 1],
[0, maxHeight - 1]], dtype = "float32")
M = cv2.getPerspectiveTransform(rect, dst)
warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))
return warped
image1 = cv2.imread('im1.jpg')
mask = np.zeros(image1.shape, np.uint8)
gray = cv2.cvtColor(image1,cv2.COLOR_BGR2GRAY)
# Hue thresholding for the white paper
HSV = cv2.cvtColor(image1, cv2.COLOR_BGR2HSV)
lo_H = 100
hi_H = 200
thresh = cv2.inRange(HSV, (lo_H, 0, 0), (hi_H, 255, 255))
# Find the contour of the paper sheet - use convexhull
contours, hierarchy = cv2.findContours(thresh,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
cnt = sorted(contours, key=cv2.contourArea, reverse=True)
hull = []
for i in range(len(cnt)):
hull.append(cv2.convexHull(cnt[i], False))
cv2.drawContours(mask, hull, 0, (255, 255, 255), thickness=cv2.FILLED)
mask = cv2.cvtColor(mask,cv2.COLOR_BGR2GRAY)
gray = cv2.bitwise_and(gray, gray, mask=mask)
# Corner detection - keep 6 best corners
corners = cv2.goodFeaturesToTrack(mask, 6, 0.01, 50)
corners = np.int0(corners)
# Order the corners and keep 4
rect = order_points(corners)
wrap = four_point_transform(gray, rect)
# Display results
cv2.drawContours(image1, hull, 0, (0,255,125), 3)
cv2.imshow("image", image1)
#cv2.imshow('thresh', thresh)
#cv2.imshow("mask", mask)
cv2.imshow("gray", gray)
cv2.imshow("wrap", wrap)
I am new to Opencv and python and trying to identify the largest three rectangles as marked in the sample image and extract them into three separate images. I am able to identify contours in the image but all of them are showing up (as shown in second image) and I am not able to separate out the three largest ones.
Code I have written so far:
import cv2
image = cv2.imread('imgpath')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
canny = cv2.Canny(gray, 130, 255, 1)
cnts = cv2.findContours(canny, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
#largest_contours = sorted(cnts, key=cv2.contourArea)[-3:]
#print(len(largest_contours))
for c in cnts:
cv2.drawContours(image,[c], 0, (0,255,0), 3)
#cv2.imshow("result", image)
#cv2.drawContours(image, largest_contours, -1, (0,255,0), 3)
cv2.imshow('contours', image)
cv2.waitKey(0)
Here's an approach:
Convert image to grayscale
Adaptive threshold to get a binary image
Find contours and sort for largest three
Perform contour approximation to ensure we have a square contour
Perform perspective transform to get top-down view
Rotate image to get correct orientation
The extracted rectangles after performing perspective transform and rotating
import cv2
import numpy as np
def rotate_image(image, angle):
# Grab the dimensions of the image and then determine the center
(h, w) = image.shape[:2]
(cX, cY) = (w / 2, h / 2)
# grab the rotation matrix (applying the negative of the
# angle to rotate clockwise), then grab the sine and cosine
# (i.e., the rotation components of the matrix)
M = cv2.getRotationMatrix2D((cX, cY), -angle, 1.0)
cos = np.abs(M[0, 0])
sin = np.abs(M[0, 1])
# Compute the new bounding dimensions of the image
nW = int((h * sin) + (w * cos))
nH = int((h * cos) + (w * sin))
# Adjust the rotation matrix to take into account translation
M[0, 2] += (nW / 2) - cX
M[1, 2] += (nH / 2) - cY
# Perform the actual rotation and return the image
return cv2.warpAffine(image, M, (nW, nH))
def perspective_transform(image, corners):
def order_corner_points(corners):
# Separate corners into individual points
# Index 0 - top-right
# 1 - top-left
# 2 - bottom-left
# 3 - bottom-right
corners = [(corner[0][0], corner[0][1]) for corner in corners]
top_r, top_l, bottom_l, bottom_r = corners[0], corners[1], corners[2], corners[3]
return (top_l, top_r, bottom_r, bottom_l)
# Order points in clockwise order
ordered_corners = order_corner_points(corners)
top_l, top_r, bottom_r, bottom_l = ordered_corners
# Determine width of new image which is the max distance between
# (bottom right and bottom left) or (top right and top left) x-coordinates
width_A = np.sqrt(((bottom_r[0] - bottom_l[0]) ** 2) + ((bottom_r[1] - bottom_l[1]) ** 2))
width_B = np.sqrt(((top_r[0] - top_l[0]) ** 2) + ((top_r[1] - top_l[1]) ** 2))
width = max(int(width_A), int(width_B))
# Determine height of new image which is the max distance between
# (top right and bottom right) or (top left and bottom left) y-coordinates
height_A = np.sqrt(((top_r[0] - bottom_r[0]) ** 2) + ((top_r[1] - bottom_r[1]) ** 2))
height_B = np.sqrt(((top_l[0] - bottom_l[0]) ** 2) + ((top_l[1] - bottom_l[1]) ** 2))
height = max(int(height_A), int(height_B))
# Construct new points to obtain top-down view of image in
# top_r, top_l, bottom_l, bottom_r order
dimensions = np.array([[0, 0], [width - 1, 0], [width - 1, height - 1],
[0, height - 1]], dtype = "float32")
# Convert to Numpy format
ordered_corners = np.array(ordered_corners, dtype="float32")
# Find perspective transform matrix
matrix = cv2.getPerspectiveTransform(ordered_corners, dimensions)
# Return the transformed image
return cv2.warpPerspective(image, matrix, (width, height))
image = cv2.imread('1.jpg')
original = image.copy()
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = cv2.adaptiveThreshold(gray,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV,11,3)
cnts = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
cnts = sorted(cnts, key = cv2.contourArea, reverse = True)[:3]
ROI_number = 0
for c in cnts:
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.015 * peri, True)
if len(approx) == 4:
cv2.drawContours(image,[c], 0, (36,255,12), 3)
transformed = perspective_transform(original, approx)
rotated = rotate_image(transformed, -90)
cv2.imwrite('ROI_{}.png'.format(ROI_number), rotated)
cv2.imshow('ROI_{}'.format(ROI_number), rotated)
ROI_number += 1
cv2.imshow('thresh', thresh)
cv2.imshow('image', image)
cv2.waitKey()
Sort the contours according to their area and then pick the top three.
cnts = sorted(cnts, key=lambda c: cv2.contourArea(c), reverse=True)
for c in cnts[:3]:
cv2.drawContours(image,[c], 0, (0,255,0), 3)
(x,y,w,h) = cv2.boundingRect(c)
(x,y,w,h) represent co-ordinates (x,y), width and height of the contour. These values can be used to crop out the rectangle.
I need to find a go game board and detect chips on the photo with opencv2 on python, but now I have problems with the board detecting, there are strange dots in the same contour and I don't understand, how I can remove them. That's what I have now:
from skimage import exposure
import numpy as np
import argparse
import imutils
import cv2
ap = argparse.ArgumentParser()
ap.add_argument("-r", required = True,
help = "ratio", type=int, default = 800)
args = vars(ap.parse_args())
img = cv2.imread('3.jpg') #upload image and change resolution
ratio = img.shape[0] / args["r"]
orig = img.copy()
img = imutils.resize(img, height = args["r"])
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
gray = cv2.bilateralFilter(gray, 11, 17, 17)
edged = cv2.Canny(gray, 30, 200)
cnts= cv2.findContours(edged.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE) #search contours and sorting them
cnts = imutils.grab_contours(cnts)
cnts = sorted(cnts, key = cv2.contourArea, reverse = True)[:10]
screenCnt = None
for cnt in cnts:
rect = cv2.minAreaRect(cnt) # try to fit each contour in rectangle
box = cv2.boxPoints(rect)
box = np.int0(box)
area = int(rect[1][0]*rect[1][1]) # calculating contour area
if (area > 300000):
print(area)
cv2.drawContours(img, cnt, -1, (255, 0, 0), 4) #dots in contour
hull = cv2.convexHull(cnt) # calculating convex hull
cv2.drawContours(img, [hull], -1, (0, 0, 255), 3)
cv2.imshow("death", img)
cv2.waitKey(0)
Source
Result
Here's an approach to detect the board
Convert image to grayscale and blur with bilateral filter
Threshold to obtain binary image
Find contours
Filter using contour area and contour shape approximation
Perform perspective transform to extract board ROI
Threshold
Find contours and then filter using cv2.contourArea() and a minimum threshold area. In addition, use contour approximation as a second filter with cv2.approxPolyDP(). Essentially, if a contour has four vertices, then it must be a square or a rectangle (the board).
We can also extract the bounding box of the board and put it onto a mask
Finally, if we want to obtain a top-down view of the board, we can perform a perspective transform
import cv2
import numpy as np
def perspective_transform(image, corners):
def order_corner_points(corners):
# Separate corners into individual points
# Index 0 - top-right
# 1 - top-left
# 2 - bottom-left
# 3 - bottom-right
corners = [(corner[0][0], corner[0][1]) for corner in corners]
top_r, top_l, bottom_l, bottom_r = corners[0], corners[1], corners[2], corners[3]
return (top_l, top_r, bottom_r, bottom_l)
# Order points in clockwise order
ordered_corners = order_corner_points(corners)
top_l, top_r, bottom_r, bottom_l = ordered_corners
# Determine width of new image which is the max distance between
# (bottom right and bottom left) or (top right and top left) x-coordinates
width_A = np.sqrt(((bottom_r[0] - bottom_l[0]) ** 2) + ((bottom_r[1] - bottom_l[1]) ** 2))
width_B = np.sqrt(((top_r[0] - top_l[0]) ** 2) + ((top_r[1] - top_l[1]) ** 2))
width = max(int(width_A), int(width_B))
# Determine height of new image which is the max distance between
# (top right and bottom right) or (top left and bottom left) y-coordinates
height_A = np.sqrt(((top_r[0] - bottom_r[0]) ** 2) + ((top_r[1] - bottom_r[1]) ** 2))
height_B = np.sqrt(((top_l[0] - bottom_l[0]) ** 2) + ((top_l[1] - bottom_l[1]) ** 2))
height = max(int(height_A), int(height_B))
# Construct new points to obtain top-down view of image in
# top_r, top_l, bottom_l, bottom_r order
dimensions = np.array([[0, 0], [width - 1, 0], [width - 1, height - 1],
[0, height - 1]], dtype = "float32")
# Convert to Numpy format
ordered_corners = np.array(ordered_corners, dtype="float32")
# Find perspective transform matrix
matrix = cv2.getPerspectiveTransform(ordered_corners, dimensions)
# Return the transformed image
return cv2.warpPerspective(image, matrix, (width, height))
image = cv2.imread('1.jpg')
original = image.copy()
blur = cv2.bilateralFilter(image,9,75,75)
gray = cv2.cvtColor(blur, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray,40,255, cv2.THRESH_BINARY_INV)[1]
cnts = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
mask = np.zeros(image.shape, dtype=np.uint8)
for c in cnts:
area = cv2.contourArea(c)
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.015 * peri, True)
if area > 150000 and len(approx) == 4:
cv2.drawContours(image,[c], 0, (36,255,12), 3)
cv2.drawContours(mask,[c], 0, (255,255,255), -1)
transformed = perspective_transform(original, approx)
mask = cv2.bitwise_and(mask, original)
cv2.imshow('thresh', thresh)
cv2.imshow('image', image)
cv2.imshow('mask', mask)
cv2.imshow('transformed', transformed)
cv2.waitKey()
I also worked on a similar task with chessboard detection. I used two different methods. First one is similar to answer of nathancy so I don't think I need to post that one, second one is template based method (I used SIFT). Here is an example:
Template image:
Example query image:
Result:
Code:
import numpy as np
import cv2
from matplotlib import pyplot as plt
import os
MIN_MATCH_COUNT = 5
template_image = cv2.imread('go_board_template.png')
template_image_gray = cv2.cvtColor(template_image, cv2.COLOR_BGR2GRAY)
# Initiate SIFT detector
#sift = cv2.SIFT()
sift = cv2.xfeatures2d.SIFT_create()
# find the keypoints and descriptors with SIFT in template image
kp_template, des_template = sift.detectAndCompute(template_image_gray, None)
FLANN_INDEX_KDTREE = 0
index_params = dict(algorithm = FLANN_INDEX_KDTREE, trees = 5)
search_params = dict(checks = 50)
flann = cv2.FlannBasedMatcher(index_params, search_params)
img = cv2.imread("1.jpg") # use second parameter 0 for auto gray conversion?
# convert image to gray
img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# find the keypoints and descriptors with SIFT in query image
kp_img, des_img = sift.detectAndCompute(img, None)
# get image dimension info
img_height, img_width = img_gray.shape
print("Image height:{}, image width:{}".format(img_height, img_width))
matches = flann.knnMatch(des_template,des_img,k=2)
# store all the good matches as per Lowe's ratio test.
good = []
for m,n in matches:
if m.distance < 0.7*n.distance:
good.append(m)
if len(good)>MIN_MATCH_COUNT:
src_pts = np.float32([ kp_template[m.queryIdx].pt for m in good ]).reshape(-1,1,2)
dst_pts = np.float32([ kp_img[m.trainIdx].pt for m in good ]).reshape(-1,1,2)
M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC,5.0)
matchesMask = mask.ravel().tolist()
h,w = template_image_gray.shape
pts = np.float32([ [0,0],[0,h-1],[w-1,h-1],[w-1,0] ]).reshape(-1,1,2)
dst = cv2.perspectiveTransform(pts,M)
img_board = img.copy()
cv2.polylines(img_board,[np.int32(dst)],True,255,10, cv2.LINE_AA)
"""
draw_params = dict(matchColor = (0,255,0), # draw matches in green color
singlePointColor = None,
matchesMask = matchesMask, # draw only inliers
flags = 2)
img3 = cv2.drawMatches(template_image,kp_template,img,kp_img,good,None,**draw_params)
"""
plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
plt.show()
# get axis aligned bounding box for chessboard in input image
x,y,w,h = cv2.boundingRect(dst)
img_crop = img.copy()
cv2.rectangle(img_crop,(x,y),(x+w,y+h),(0,0,255),5)
# draw OBB and AABB
fig = plt.figure()
ax1 = fig.add_subplot(121)
ax2 = fig.add_subplot(122)
ax1.axis("off")
ax2.axis("off")
ax1.set_title('OBB')
ax2.set_title('AABB')
ax1.imshow(cv2.cvtColor(img_board, cv2.COLOR_BGR2RGB))
ax2.imshow(cv2.cvtColor(img_crop, cv2.COLOR_BGR2RGB))
plt.show()
# crop board
cropped_img = img[y:y+h, x:x+w].copy()
plt.imshow(cv2.cvtColor(cropped_img, cv2.COLOR_BGR2RGB))
plt.show()
# convert cropped area to gray
cropped_img_gray = cv2.cvtColor(cropped_img, cv2.COLOR_BGR2GRAY)
plt.imshow(cropped_img_gray, cmap="gray")
plt.show()
else:
print("Not enough match")
I build a python script to detect and extract documents/citizen cards from a image, it was working as i wanted until i tried a spanish citizen card (DNI 3.0).
When i perform canny edge algorythm on this card, it has a lot of noise/lines/things that merge into the edges of the card and thats what i think its causing the issue.
Canny edge applied:
Code:
def attemptPerspectiveTransform(openCVImage, debug_mode):
image = openCVImage
ratio = image.shape[0] / 300.0
orig = image.copy()
image = imutils.resize(image, height = 300)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray = cv2.bilateralFilter(gray, 11, 17, 17)
edged = cv2.Canny(gray, 0, 200)
if (debug_mode == True):
cv2.imshow("gray", gray)
cv2.imshow("edged", edged)
cv2.waitKey(0)
cnts = cv2.findContours(edged.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
cnts = imutils.grab_contours(cnts)
cnts = sorted(cnts, key = cv2.contourArea, reverse = True)[:10]
screenCnt = None
for c in cnts:
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.015 * peri, True)
print('APPROX')
print(len(approx))
# if our approximated contour has four points, should be the document we are looking for
if len(approx) == 4:
screenCnt = approx
break
# now that we have our screen contour, we need to determine
# the top-left, top-right, bottom-right, and bottom-left
# points so that we can later warp the image -- we'll start
# by reshaping our contour to be our finals and initializing
# our output rectangle in top-left, top-right, bottom-right,
# and bottom-left order
pts = screenCnt.reshape(4, 2)
rect = np.zeros((4, 2), dtype = "float32")
# the top-left point has the smallest sum whereas the
# bottom-right has the largest sum
s = pts.sum(axis = 1)
rect[0] = pts[np.argmin(s)]
rect[2] = pts[np.argmax(s)]
# compute the difference between the points -- the top-right
# will have the minumum difference and the bottom-left will
# have the maximum difference
diff = np.diff(pts, axis = 1)
rect[1] = pts[np.argmin(diff)]
rect[3] = pts[np.argmax(diff)]
# multiply the rectangle by the original ratio
rect *= ratio
# now that we have our rectangle of points, let's compute
# the width of our new image
(tl, tr, br, bl) = rect
widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
# ...and now for the height of our new image
heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
# take the maximum of the width and height values to reach
# our final dimensions
maxWidth = max(int(widthA), int(widthB))
maxHeight = max(int(heightA), int(heightB))
# construct our destination points which will be used to
# map the screen to a top-down, "birds eye" view
dst = np.array([
[0, 0],
[maxWidth - 1, 0],
[maxWidth - 1, maxHeight - 1],
[0, maxHeight - 1]], dtype = "float32")
# calculate the perspective transform matrix and warp
# the perspective to grab the screen
M = cv2.getPerspectiveTransform(rect, dst)
warp = cv2.warpPerspective(orig, M, (maxWidth, maxHeight))
# convert the warped image to grayscale and then adjust
# the intensity of the pixels to have minimum and maximum
# values of 0 and 255, respectively
warp = cv2.cvtColor(warp, cv2.COLOR_BGR2GRAY)
warp = exposure.rescale_intensity(warp, out_range = (0, 255))
height, width = warp.shape
if (height > width):
warp = rotate_image(warp, 90)
if (debug_mode == True):
# show images
cv2.imshow("image", image)
cv2.imshow("edge", edged)
cv2.imshow("warp", imutils.resize(warp, height = 300))
cv2.imshow("warp pure", imutils.resize(warp, height = 300))
cv2.imwrite('cropped_final.jpg', warp)
cv2.waitKey(0)
return warp
Results in error:
Traceback (most recent call last):
File "main.py", line 60, in <module>
cv_front_cropped_path = image_preprocessing.processImage(front_img, debug_mode)
File "/Users/duarteandrade/Desktop/OCR-DEMO/PythonOCR/pyocr/image_preprocessing.py", line 179, in processImage
image = perspective_transform.attemptPerspectiveTransform(image, debug_mode)
File "/Users/duarteandrade/Desktop/OCR-DEMO/PythonOCR/pyocr/perspective_transform.py", line 84, in attemptPerspectiveTransform
pts = screenCnt.reshape(4, 2)
AttributeError: 'NoneType' object has no attribute 'reshape'
It works good with the portuguese citizen card with worse images, i guess because it doesnt have those lines at the top of the card and all the noise that can be seen in the spanish card.
How can i fix this ?
**Original image : **
I've written some code to detect the computer screen in an image. I need to do some work on pixels that are in the center of that selected rectangle. How can I extract the selected rectangle as a rectangular image?
import imutils
import cv2
image = cv2.imread('test-img/imgRec3.jpg')
ratio = image.shape[0] / 300.0
image = imutils.resize(image, height=300)
realImage = image.copy()
# convert the image to grayscale, blur it, and find edges in the image
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray = cv2.bilateralFilter(gray, 11, 17, 17)
edged = cv2.Canny(gray, 30, 200)
cnts = cv2.findContours(edged.copy(), cv2.RETR_TREE,
cv2.CHAIN_APPROX_SIMPLE)
cnts = imutils.grab_contours(cnts)
cnts = sorted(cnts, key = cv2.contourArea, reverse = True)[:10]
screenCnt = None
# loop over our contours
for c in cnts:
# approximate the contour
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.015 * peri, True)
if len(approx) == 4:
screenCnt = approx
break
cv2.drawContours(image, [screenCnt], -1, (0, 255, 0), 3)
cv2.imshow("image", realImage)
cv2.imshow("Screen Rec", image)
cv2.waitKey(0)
Thanks for your help in advance.
Using skimage you could do it like that:
def transform(intersections, image):
w,h = get_orientation()
a = np.array([0,h])
b = np.array([w,h])
c = np.array([w,0])
d = np.array([0,0])
tf = skimage.transform.estimate_transform("projective",
dst=np.vstack((a,b,c,d)),
src=intersections)
invtf = tf.inverse
transformedImage = skimage.transform.warp(image=image,inverse_map=invtf, output_shape=(h,w))
return transformedImage
fig,(ax0,ax1) = plt.subplots(ncols=2, figsize=(15,8))
transformed_image = transform(sorted_intersec, img[index] )
ax0.imshow(transformed_image,cmap="gray")
ax1.imshow(img[index])
intersections are your 4 edges. Keep in mind you have to put them in in the correct order.
To extract the selected rectangle as a rectangular image, we can use a perspective transformation to obtain a top-down view of the image. Since you were able to find the bounding box of the rectangle, we can use those coordinates as the corners of the new image. To begin, we separate the four corners into individual points given to us by cv2.approxPolyDP(). We reorder the points into a clockwise orientation (top-left, top-right, bottom-right, bottom-left) using this function:
def order_corner_points(corners):
# Separate corners into individual points
# Index 0 - top-right
# 1 - top-left
# 2 - bottom-left
# 3 - bottom-right
corners = [(corner[0][0], corner[0][1]) for corner in corners]
top_r, top_l, bottom_l, bottom_r = corners[0], corners[1], corners[2], corners[3]
return (top_l, top_r, bottom_r, bottom_l)
Now with the isolated corner points, we find the new width and length dimensions for the top-down image. We can obtain the transformation matrix using cv2.getPerspectiveTransform() and actually obtain the transformed image using cv2.warpPerspective().
def perspective_transform(image, corners):
# Order points in clockwise order
ordered_corners = order_corner_points(corners)
top_l, top_r, bottom_r, bottom_l = ordered_corners
# Determine width of new image which is the max distance between
# (bottom right and bottom left) or (top right and top left) x-coordinates
width_A = np.sqrt(((bottom_r[0] - bottom_l[0]) ** 2) + ((bottom_r[1] - bottom_l[1]) ** 2))
width_B = np.sqrt(((top_r[0] - top_l[0]) ** 2) + ((top_r[1] - top_l[1]) ** 2))
width = max(int(width_A), int(width_B))
# Determine height of new image which is the max distance between
# (top right and bottom right) or (top left and bottom left) y-coordinates
height_A = np.sqrt(((top_r[0] - bottom_r[0]) ** 2) + ((top_r[1] - bottom_r[1]) ** 2))
height_B = np.sqrt(((top_l[0] - bottom_l[0]) ** 2) + ((top_l[1] - bottom_l[1]) ** 2))
height = max(int(height_A), int(height_B))
# Construct new points to obtain top-down view of image in
# top_r, top_l, bottom_l, bottom_r order
dimensions = np.array([[0, 0], [width - 1, 0], [width - 1, height - 1],
[0, height - 1]], dtype = "float32")
# Convert to Numpy format
ordered_corners = np.array(ordered_corners, dtype="float32")
# Find perspective transform matrix
matrix = cv2.getPerspectiveTransform(ordered_corners, dimensions)
# Return the transformed image
return cv2.warpPerspective(image, matrix, (width, height))
Obtained bounding box coordinates
Extracted rectangle
Full code
import imutils
import cv2
import numpy as np
def order_corner_points(corners):
# Separate corners into individual points
# Index 0 - top-right
# 1 - top-left
# 2 - bottom-left
# 3 - bottom-right
corners = [(corner[0][0], corner[0][1]) for corner in corners]
top_r, top_l, bottom_l, bottom_r = corners[0], corners[1], corners[2], corners[3]
return (top_l, top_r, bottom_r, bottom_l)
def perspective_transform(image, corners):
# Order points in clockwise order
ordered_corners = order_corner_points(corners)
top_l, top_r, bottom_r, bottom_l = ordered_corners
# Determine width of new image which is the max distance between
# (bottom right and bottom left) or (top right and top left) x-coordinates
width_A = np.sqrt(((bottom_r[0] - bottom_l[0]) ** 2) + ((bottom_r[1] - bottom_l[1]) ** 2))
width_B = np.sqrt(((top_r[0] - top_l[0]) ** 2) + ((top_r[1] - top_l[1]) ** 2))
width = max(int(width_A), int(width_B))
# Determine height of new image which is the max distance between
# (top right and bottom right) or (top left and bottom left) y-coordinates
height_A = np.sqrt(((top_r[0] - bottom_r[0]) ** 2) + ((top_r[1] - bottom_r[1]) ** 2))
height_B = np.sqrt(((top_l[0] - bottom_l[0]) ** 2) + ((top_l[1] - bottom_l[1]) ** 2))
height = max(int(height_A), int(height_B))
# Construct new points to obtain top-down view of image in
# top_r, top_l, bottom_l, bottom_r order
dimensions = np.array([[0, 0], [width - 1, 0], [width - 1, height - 1],
[0, height - 1]], dtype = "float32")
# Convert to Numpy format
ordered_corners = np.array(ordered_corners, dtype="float32")
# Find perspective transform matrix
matrix = cv2.getPerspectiveTransform(ordered_corners, dimensions)
# Return the transformed image
return cv2.warpPerspective(image, matrix, (width, height))
image = cv2.imread('1.jpg')
ratio = image.shape[0] / 300.0
image = imutils.resize(image, height=300)
realImage = image.copy()
# convert the image to grayscale, blur it, and find edges in the image
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray = cv2.bilateralFilter(gray, 11, 17, 17)
edged = cv2.Canny(gray, 30, 200)
cnts = cv2.findContours(edged.copy(), cv2.RETR_TREE,
cv2.CHAIN_APPROX_SIMPLE)
cnts = imutils.grab_contours(cnts)
cnts = sorted(cnts, key = cv2.contourArea, reverse = True)[:10]
screenCnt = None
# loop over our contours
for c in cnts:
# approximate the contour
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.015 * peri, True)
if len(approx) == 4:
screenCnt = approx
transformed = perspective_transform(realImage, screenCnt)
break
cv2.drawContours(image, [screenCnt], -1, (0, 255, 0), 3)
cv2.imshow("image", realImage)
cv2.imshow("Screen Rec", image)
cv2.imshow("transformed", transformed)
cv2.waitKey(0)