I need to find a go game board and detect chips on the photo with opencv2 on python, but now I have problems with the board detecting, there are strange dots in the same contour and I don't understand, how I can remove them. That's what I have now:
from skimage import exposure
import numpy as np
import argparse
import imutils
import cv2
ap = argparse.ArgumentParser()
ap.add_argument("-r", required = True,
help = "ratio", type=int, default = 800)
args = vars(ap.parse_args())
img = cv2.imread('3.jpg') #upload image and change resolution
ratio = img.shape[0] / args["r"]
orig = img.copy()
img = imutils.resize(img, height = args["r"])
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
gray = cv2.bilateralFilter(gray, 11, 17, 17)
edged = cv2.Canny(gray, 30, 200)
cnts= cv2.findContours(edged.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE) #search contours and sorting them
cnts = imutils.grab_contours(cnts)
cnts = sorted(cnts, key = cv2.contourArea, reverse = True)[:10]
screenCnt = None
for cnt in cnts:
rect = cv2.minAreaRect(cnt) # try to fit each contour in rectangle
box = cv2.boxPoints(rect)
box = np.int0(box)
area = int(rect[1][0]*rect[1][1]) # calculating contour area
if (area > 300000):
print(area)
cv2.drawContours(img, cnt, -1, (255, 0, 0), 4) #dots in contour
hull = cv2.convexHull(cnt) # calculating convex hull
cv2.drawContours(img, [hull], -1, (0, 0, 255), 3)
cv2.imshow("death", img)
cv2.waitKey(0)
Source
Result
Here's an approach to detect the board
Convert image to grayscale and blur with bilateral filter
Threshold to obtain binary image
Find contours
Filter using contour area and contour shape approximation
Perform perspective transform to extract board ROI
Threshold
Find contours and then filter using cv2.contourArea() and a minimum threshold area. In addition, use contour approximation as a second filter with cv2.approxPolyDP(). Essentially, if a contour has four vertices, then it must be a square or a rectangle (the board).
We can also extract the bounding box of the board and put it onto a mask
Finally, if we want to obtain a top-down view of the board, we can perform a perspective transform
import cv2
import numpy as np
def perspective_transform(image, corners):
def order_corner_points(corners):
# Separate corners into individual points
# Index 0 - top-right
# 1 - top-left
# 2 - bottom-left
# 3 - bottom-right
corners = [(corner[0][0], corner[0][1]) for corner in corners]
top_r, top_l, bottom_l, bottom_r = corners[0], corners[1], corners[2], corners[3]
return (top_l, top_r, bottom_r, bottom_l)
# Order points in clockwise order
ordered_corners = order_corner_points(corners)
top_l, top_r, bottom_r, bottom_l = ordered_corners
# Determine width of new image which is the max distance between
# (bottom right and bottom left) or (top right and top left) x-coordinates
width_A = np.sqrt(((bottom_r[0] - bottom_l[0]) ** 2) + ((bottom_r[1] - bottom_l[1]) ** 2))
width_B = np.sqrt(((top_r[0] - top_l[0]) ** 2) + ((top_r[1] - top_l[1]) ** 2))
width = max(int(width_A), int(width_B))
# Determine height of new image which is the max distance between
# (top right and bottom right) or (top left and bottom left) y-coordinates
height_A = np.sqrt(((top_r[0] - bottom_r[0]) ** 2) + ((top_r[1] - bottom_r[1]) ** 2))
height_B = np.sqrt(((top_l[0] - bottom_l[0]) ** 2) + ((top_l[1] - bottom_l[1]) ** 2))
height = max(int(height_A), int(height_B))
# Construct new points to obtain top-down view of image in
# top_r, top_l, bottom_l, bottom_r order
dimensions = np.array([[0, 0], [width - 1, 0], [width - 1, height - 1],
[0, height - 1]], dtype = "float32")
# Convert to Numpy format
ordered_corners = np.array(ordered_corners, dtype="float32")
# Find perspective transform matrix
matrix = cv2.getPerspectiveTransform(ordered_corners, dimensions)
# Return the transformed image
return cv2.warpPerspective(image, matrix, (width, height))
image = cv2.imread('1.jpg')
original = image.copy()
blur = cv2.bilateralFilter(image,9,75,75)
gray = cv2.cvtColor(blur, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray,40,255, cv2.THRESH_BINARY_INV)[1]
cnts = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
mask = np.zeros(image.shape, dtype=np.uint8)
for c in cnts:
area = cv2.contourArea(c)
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.015 * peri, True)
if area > 150000 and len(approx) == 4:
cv2.drawContours(image,[c], 0, (36,255,12), 3)
cv2.drawContours(mask,[c], 0, (255,255,255), -1)
transformed = perspective_transform(original, approx)
mask = cv2.bitwise_and(mask, original)
cv2.imshow('thresh', thresh)
cv2.imshow('image', image)
cv2.imshow('mask', mask)
cv2.imshow('transformed', transformed)
cv2.waitKey()
I also worked on a similar task with chessboard detection. I used two different methods. First one is similar to answer of nathancy so I don't think I need to post that one, second one is template based method (I used SIFT). Here is an example:
Template image:
Example query image:
Result:
Code:
import numpy as np
import cv2
from matplotlib import pyplot as plt
import os
MIN_MATCH_COUNT = 5
template_image = cv2.imread('go_board_template.png')
template_image_gray = cv2.cvtColor(template_image, cv2.COLOR_BGR2GRAY)
# Initiate SIFT detector
#sift = cv2.SIFT()
sift = cv2.xfeatures2d.SIFT_create()
# find the keypoints and descriptors with SIFT in template image
kp_template, des_template = sift.detectAndCompute(template_image_gray, None)
FLANN_INDEX_KDTREE = 0
index_params = dict(algorithm = FLANN_INDEX_KDTREE, trees = 5)
search_params = dict(checks = 50)
flann = cv2.FlannBasedMatcher(index_params, search_params)
img = cv2.imread("1.jpg") # use second parameter 0 for auto gray conversion?
# convert image to gray
img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# find the keypoints and descriptors with SIFT in query image
kp_img, des_img = sift.detectAndCompute(img, None)
# get image dimension info
img_height, img_width = img_gray.shape
print("Image height:{}, image width:{}".format(img_height, img_width))
matches = flann.knnMatch(des_template,des_img,k=2)
# store all the good matches as per Lowe's ratio test.
good = []
for m,n in matches:
if m.distance < 0.7*n.distance:
good.append(m)
if len(good)>MIN_MATCH_COUNT:
src_pts = np.float32([ kp_template[m.queryIdx].pt for m in good ]).reshape(-1,1,2)
dst_pts = np.float32([ kp_img[m.trainIdx].pt for m in good ]).reshape(-1,1,2)
M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC,5.0)
matchesMask = mask.ravel().tolist()
h,w = template_image_gray.shape
pts = np.float32([ [0,0],[0,h-1],[w-1,h-1],[w-1,0] ]).reshape(-1,1,2)
dst = cv2.perspectiveTransform(pts,M)
img_board = img.copy()
cv2.polylines(img_board,[np.int32(dst)],True,255,10, cv2.LINE_AA)
"""
draw_params = dict(matchColor = (0,255,0), # draw matches in green color
singlePointColor = None,
matchesMask = matchesMask, # draw only inliers
flags = 2)
img3 = cv2.drawMatches(template_image,kp_template,img,kp_img,good,None,**draw_params)
"""
plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
plt.show()
# get axis aligned bounding box for chessboard in input image
x,y,w,h = cv2.boundingRect(dst)
img_crop = img.copy()
cv2.rectangle(img_crop,(x,y),(x+w,y+h),(0,0,255),5)
# draw OBB and AABB
fig = plt.figure()
ax1 = fig.add_subplot(121)
ax2 = fig.add_subplot(122)
ax1.axis("off")
ax2.axis("off")
ax1.set_title('OBB')
ax2.set_title('AABB')
ax1.imshow(cv2.cvtColor(img_board, cv2.COLOR_BGR2RGB))
ax2.imshow(cv2.cvtColor(img_crop, cv2.COLOR_BGR2RGB))
plt.show()
# crop board
cropped_img = img[y:y+h, x:x+w].copy()
plt.imshow(cv2.cvtColor(cropped_img, cv2.COLOR_BGR2RGB))
plt.show()
# convert cropped area to gray
cropped_img_gray = cv2.cvtColor(cropped_img, cv2.COLOR_BGR2GRAY)
plt.imshow(cropped_img_gray, cmap="gray")
plt.show()
else:
print("Not enough match")
Related
I am trying to filter the background of images presenting electric cables. I tried to do the following:
Transform from color to gray
Apply cv2.Laplacian or 2 times of cv2.Sobel for finding edges in both directions.
Apply thresholding cv2.THRESH_BINARY(_INV), cv2.THRESH_OTSU
Lastly, I tried to find edges with 'filtered' images using cv2.Canny together with cv2.HoughLinesP
Overall, the results aren't satisfying at all. I will give an example of 2 images:
And the output of my script:
I also played with the values in config, but the results weren't different much.
Here's the little script I managed to do:
import cv2
import matplotlib.pyplot as plt
import numpy as np
def img_show(images, cmap=None):
fig = plt.figure(figsize=(17, 10))
root = 3 # len(images) ** 0.5
for i, img in enumerate(images):
ax = fig.add_subplot(root, root, i + 1)
ax.imshow(img, cmap=cmap[i])
plt.show()
class Config:
scale = 0.4
min_threshold = 120
max_threshold = 200
canny_min_threshold = 100
canny_max_threshold = 200
config = Config()
def find_lines(img, rgb_img):
dst = cv2.Canny(img, config.canny_min_threshold, config.canny_max_threshold)
cdstP = np.copy(rgb_img)
lines = cv2.HoughLinesP(dst, 1, np.pi / 180, 150, None, 0, 0)
lines1 = lines[:, 0, :]
for x1, y1, x2, y2 in lines1[:]:
cv2.line(cdstP, (x1, y1), (x2, y2), (255, 0, 0), 5)
return cdstP
if __name__ == "__main__":
bgr_img = cv2.imread('DJI_0009.JPG')
bgr_img = cv2.resize(bgr_img, (0, 0), bgr_img, config.scale, config.scale)
rgb_img = cv2.cvtColor(bgr_img, cv2.COLOR_BGR2RGB)
gray_img = cv2.cvtColor(bgr_img, cv2.COLOR_BGR2GRAY)
# _, threshold = cv2.threshold(gray_img, config.min_threshold, config.max_threshold, cv2.THRESH_BINARY)
# laplacian = cv2.Laplacian(rgb_img, cv2.CV_8UC1)
sobelx = cv2.Sobel(gray_img, cv2.CV_8UC1, 1, 0)
sobely = cv2.Sobel(gray_img, cv2.CV_8UC1, 0, 1)
blended = cv2.addWeighted(src1=sobelx, alpha=0.5, src2=sobely, beta=0.5, gamma=0)
_, threshold = cv2.threshold(blended, config.min_threshold, config.max_threshold,
cv2.THRESH_BINARY | cv2.THRESH_OTSU)
p1 = find_lines(threshold, rgb_img)
p2 = find_lines(blended, rgb_img)
p3 = find_lines(gray_img, rgb_img)
plots = [rgb_img, p1, p2, p3]
cmaps = [None] + ['gray'] * (len(plots) - 1)
img_show(plots, cmaps)
I am assuming I need to do much better filtring. However, I also tried image segmentation, but the results weren't promising at all.
Any ideas on how to improve this?
Thanks
Here is one way to do that in Python/OpenCV. I threshold, then optionally clean with morphology. Then get the contours and for each contour compute its rotated rectangle. Then get the dimensions of the rotated rectangle and compute the aspect ratio (largest dimension / smallest dimension) and optionally the area. Then I threshold on the aspect ratio (and optionally the area) and keep only those contours that pass)
Input:
import cv2
import numpy as np
image = cv2.imread("DCIM-100-MEDIA-DJI-0009-JPG.jpg")
hh, ww = image.shape[:2]
# convert to grayscale
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# create a binary thresholded image
thresh = cv2.threshold(gray, 64, 255, cv2.THRESH_BINARY)[1]
# invert so line is white on black background
thresh = 255 - thresh
# apply morphology
kernel = np.ones((11,11), np.uint8)
clean = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
# get external contours
contours = cv2.findContours(clean, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
area_thresh = ww / 2
aspect_thresh = ww / 30
print(area_thresh,aspect_thresh)
print('')
result = image.copy()
for c in contours:
# get rotated rectangle from contour
# get its dimensions
rotrect = cv2.minAreaRect(c)
(center), (dim1,dim2), angle = rotrect
maxdim = max(dim1,dim2)
mindim = min(dim1,dim2)
area = dim1 * dim2
if mindim != 0:
aspect = maxdim / mindim
#print(area, aspect)
#if area > area_thresh and aspect > aspect_thresh:
if aspect > aspect_thresh:
# draw contour on input
cv2.drawContours(result,[c],0,(0,0,255),3)
print(area, aspect)
# save result
cv2.imwrite("DCIM-100-MEDIA-DJI-0009-JPG_thresh.jpg",thresh)
cv2.imwrite("DCIM-100-MEDIA-DJI-0009-JPG_clean.jpg",clean)
cv2.imwrite("DCIM-100-MEDIA-DJI-0009-JPG_result.jpg",result)
# display result
cv2.imshow("thresh", thresh)
cv2.imshow("clean", clean)
cv2.imshow("result", result)
cv2.waitKey(0)
cv2.destroyAllWindows()
Thresholded image:
Morphology cleaned image:
Result image:
I want to crop the image only inside the box or rectangle. I tried so many approaches but nothing worked.
import cv2
import numpy as np
img = cv2.imread("C:/Users/hp/Desktop/segmentation/add.jpeg", 0);
h, w = img.shape[:2]
# print(img.shape)
kernel = np.ones((3,3),np.uint8)
img2 = img.copy()
img2 = cv2.medianBlur(img2,5)
img2 = cv2.adaptiveThreshold(img2,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,\
cv2.THRESH_BINARY,11,2)
img2 = 255 - img2
img2 = cv2.dilate(img2, kernel)
img2 = cv2.medianBlur(img2, 9)
img2 = cv2.medianBlur(img2, 9)
cv2.imshow('anything', img2)
cv2.waitKey(0)
cv2.destroyAllWindows()
position = np.where(img2 !=0)
x0 = position[0].min()
x1 = position[0].max()
y0 = position[1].min()
y1 = position[1].max()
print(x0,x1,y0,y1)
result = img[x0:x1,y0:y1]
cv2.imshow('anything', result)
cv2.waitKey(0)
cv2.destroyAllWindows()
Output should be the image inside the sqaure.
You can use contour detection for this. If your image has basically only a hand drawn rectangle in it, I think it's good enough to assume it's the largest closed contour in the image. From that contour, we can figure out a polygon/quadrilateral approximation and then finally get an approximate rectangle. I'll define some utilities at the beginning which I generally use to make my time easier when messing around with images:
def load_image(filename):
return cv2.imread(filename)
def bnw(image):
return cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
def col(image):
return cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
def fixrgb(image):
return cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
def show_image(image, figsize=(7,7), cmap=None):
cmap = cmap if len(image.shape)==3 else 'gray'
plt.figure(figsize=figsize)
plt.imshow(image, cmap=cmap)
plt.show()
def AdaptiveThresh(gray):
blur = cv2.medianBlur(gray, 5)
adapt_type = cv2.ADAPTIVE_THRESH_GAUSSIAN_C
thresh_type = cv2.THRESH_BINARY_INV
return cv2.adaptiveThreshold(blur, 255, adapt_type, thresh_type, 11, 2)
def get_rect(pts):
xmin = pts[:,0,1].min()
ymin = pts[:,0,0].min()
xmax = pts[:,0,1].max()
ymax = pts[:,0,0].max()
return (ymin,xmin), (ymax,xmax)
Let's load the image and convert it to grayscale:
image_name = 'test.jpg'
image_original = fixrgb(load_image(image_name))
image_gray = 255-bnw(image_original)
show_image(image_gray)
Use some morph ops to enhance the image:
kernel = np.ones((3,3),np.uint8)
d = 255-cv2.dilate(image_gray,kernel,iterations = 1)
show_image(d)
Find the edges and enhance/denoise:
e = AdaptiveThresh(d)
show_image(e)
m = cv2.dilate(e,kernel,iterations = 1)
m = cv2.medianBlur(m,11)
m = cv2.dilate(m,kernel,iterations = 1)
show_image(m)
Contour detection:
contours, hierarchy = cv2.findContours(m, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
total_area = np.prod(image_gray.shape)
max_area = 0
for cnt in contours:
# Simplify contour
perimeter = cv2.arcLength(cnt, True)
approx = cv2.approxPolyDP(cnt, 0.03 * perimeter, True)
area = cv2.contourArea(approx)
# Shape is recrangular, so 4 points approximately and it's convex
if (len(approx) == 4 and cv2.isContourConvex(approx) and max_area<area<total_area):
max_area = cv2.contourArea(approx)
quad_polygon = approx
img1 = image_original.copy()
img2 = image_original.copy()
cv2.polylines(img1,[quad_polygon],True,(0,255,0),10)
show_image(img1)
tl, br = get_rect(quad_polygon)
cv2.rectangle(img2, tl, br, (0,255,0), 10)
show_image(img2)
So you can see the approximate polygon and the corresponding rectangle, using which you can get your crop. I suggest you play around with median blur and morphological ops like erosion, dilation, opening, closing etc and see which set of operations suits your images the best; I can't really say what's good from just one image. You can crop using the top left and bottom right coordinates:
show_image(image_original[tl[1]:br[1],tl[0]:br[0],:])
Draw the square with a different color (e.g red) so it can be distinguishable from other writing and background. Then threshold it so you get a black and white image: the red line will be white in this image. Get the coordinates of white pixels: from this set, select only the two pairs (minX, minY)(maxX,maxY). They are the top-left and bottom-right points of the box (remember that in an image the 0,0 point is on the top left of the image) and you can use them to crop the image.
I am new to Opencv and python and trying to identify the largest three rectangles as marked in the sample image and extract them into three separate images. I am able to identify contours in the image but all of them are showing up (as shown in second image) and I am not able to separate out the three largest ones.
Code I have written so far:
import cv2
image = cv2.imread('imgpath')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
canny = cv2.Canny(gray, 130, 255, 1)
cnts = cv2.findContours(canny, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
#largest_contours = sorted(cnts, key=cv2.contourArea)[-3:]
#print(len(largest_contours))
for c in cnts:
cv2.drawContours(image,[c], 0, (0,255,0), 3)
#cv2.imshow("result", image)
#cv2.drawContours(image, largest_contours, -1, (0,255,0), 3)
cv2.imshow('contours', image)
cv2.waitKey(0)
Here's an approach:
Convert image to grayscale
Adaptive threshold to get a binary image
Find contours and sort for largest three
Perform contour approximation to ensure we have a square contour
Perform perspective transform to get top-down view
Rotate image to get correct orientation
The extracted rectangles after performing perspective transform and rotating
import cv2
import numpy as np
def rotate_image(image, angle):
# Grab the dimensions of the image and then determine the center
(h, w) = image.shape[:2]
(cX, cY) = (w / 2, h / 2)
# grab the rotation matrix (applying the negative of the
# angle to rotate clockwise), then grab the sine and cosine
# (i.e., the rotation components of the matrix)
M = cv2.getRotationMatrix2D((cX, cY), -angle, 1.0)
cos = np.abs(M[0, 0])
sin = np.abs(M[0, 1])
# Compute the new bounding dimensions of the image
nW = int((h * sin) + (w * cos))
nH = int((h * cos) + (w * sin))
# Adjust the rotation matrix to take into account translation
M[0, 2] += (nW / 2) - cX
M[1, 2] += (nH / 2) - cY
# Perform the actual rotation and return the image
return cv2.warpAffine(image, M, (nW, nH))
def perspective_transform(image, corners):
def order_corner_points(corners):
# Separate corners into individual points
# Index 0 - top-right
# 1 - top-left
# 2 - bottom-left
# 3 - bottom-right
corners = [(corner[0][0], corner[0][1]) for corner in corners]
top_r, top_l, bottom_l, bottom_r = corners[0], corners[1], corners[2], corners[3]
return (top_l, top_r, bottom_r, bottom_l)
# Order points in clockwise order
ordered_corners = order_corner_points(corners)
top_l, top_r, bottom_r, bottom_l = ordered_corners
# Determine width of new image which is the max distance between
# (bottom right and bottom left) or (top right and top left) x-coordinates
width_A = np.sqrt(((bottom_r[0] - bottom_l[0]) ** 2) + ((bottom_r[1] - bottom_l[1]) ** 2))
width_B = np.sqrt(((top_r[0] - top_l[0]) ** 2) + ((top_r[1] - top_l[1]) ** 2))
width = max(int(width_A), int(width_B))
# Determine height of new image which is the max distance between
# (top right and bottom right) or (top left and bottom left) y-coordinates
height_A = np.sqrt(((top_r[0] - bottom_r[0]) ** 2) + ((top_r[1] - bottom_r[1]) ** 2))
height_B = np.sqrt(((top_l[0] - bottom_l[0]) ** 2) + ((top_l[1] - bottom_l[1]) ** 2))
height = max(int(height_A), int(height_B))
# Construct new points to obtain top-down view of image in
# top_r, top_l, bottom_l, bottom_r order
dimensions = np.array([[0, 0], [width - 1, 0], [width - 1, height - 1],
[0, height - 1]], dtype = "float32")
# Convert to Numpy format
ordered_corners = np.array(ordered_corners, dtype="float32")
# Find perspective transform matrix
matrix = cv2.getPerspectiveTransform(ordered_corners, dimensions)
# Return the transformed image
return cv2.warpPerspective(image, matrix, (width, height))
image = cv2.imread('1.jpg')
original = image.copy()
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = cv2.adaptiveThreshold(gray,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV,11,3)
cnts = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
cnts = sorted(cnts, key = cv2.contourArea, reverse = True)[:3]
ROI_number = 0
for c in cnts:
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.015 * peri, True)
if len(approx) == 4:
cv2.drawContours(image,[c], 0, (36,255,12), 3)
transformed = perspective_transform(original, approx)
rotated = rotate_image(transformed, -90)
cv2.imwrite('ROI_{}.png'.format(ROI_number), rotated)
cv2.imshow('ROI_{}'.format(ROI_number), rotated)
ROI_number += 1
cv2.imshow('thresh', thresh)
cv2.imshow('image', image)
cv2.waitKey()
Sort the contours according to their area and then pick the top three.
cnts = sorted(cnts, key=lambda c: cv2.contourArea(c), reverse=True)
for c in cnts[:3]:
cv2.drawContours(image,[c], 0, (0,255,0), 3)
(x,y,w,h) = cv2.boundingRect(c)
(x,y,w,h) represent co-ordinates (x,y), width and height of the contour. These values can be used to crop out the rectangle.
I have a reference image and I am finding that reference image within a larger test image using ORB detection. Once found, I want to save ONLY the area within the crop as a new image. I'd like to transform it into the same dimensions as the reference image and save it in the same orientation.
So far I have matched the ref image in the larger image and have masked it out. But I cant figure out how to display ONLY the cropped area as its own picture, in the proper orientation and dimensions. I want to save whats in the crop and get rid of the rest of it.
Any help would be greatly appreciated. Thank you.
import cv2
import numpy as np
#minimum ORB matches required to make a match
MIN_MATCH_COUNT = 10
img1 = cv2.imread("reference.jpg")
img2 = cv2.imread("1.jpg")
orb = cv2.ORB_create()
kp1, des1 = orb.detectAndCompute(img1,None)
kp2, des2 = orb.detectAndCompute(img2,None)
bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
matches = bf.match(des1, des2, None)
# sorts matches
good = []
for i, m in enumerate(matches):
if i < len(matches) - 1 and m.distance < 0.7 * matches[i+1].distance:
good.append(m)
if len(good)>MIN_MATCH_COUNT:
src_pts = np.float32([ kp1[m.queryIdx].pt for m in good ]).reshape(-1,1,2)
dst_pts = np.float32([ kp2[m.trainIdx].pt for m in good ]).reshape(-1,1,2)
M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC,5.0)
matchesMask = mask.ravel().tolist()
h,w,d = img1.shape
pts = np.float32([ [0,0],[0,h-1],[w-1,h-1],[w-1,0] ]).reshape(-1,1,2)
dst = cv2.perspectiveTransform(pts,M)
mask = np.ones(img2.shape[:2], dtype="uint8") * 255
rect = cv2.minAreaRect(dst)
box = cv2.boxPoints(rect)
box = np.int0(box)
new = cv2.drawContours(mask, [box], -1, 0, -1)
# remove the contours from the image and show the resulting images
img = cv2.bitwise_and(img2, img2, mask=mask)
cv2.imshow("Mask", mask)
cv2.imshow("After", img)
else:
print( "Not enough matches are found - {}/{}".format(len(good), MIN_MATCH_COUNT) )
matchesMask = None
#This is for drawing the match lines inbetween the ref and 1.jpg images
draw_params = dict(matchColor = (0,255,0), # draw matches in green color
singlePointColor = None,
matchesMask = matchesMask, # draw only inliers
flags = 2)
img3 = cv2.drawMatches(img1,kp1,img2,kp2,good,None,**draw_params)
cv2.imshow("Matches", img3)
You're on the right track and already did most of the work. Since you have found the ROI mask, you can perform a perspective transform to get the correct orientation. Afterwards, you can resize the image to match your reference/template image.
First we invert the mask to get the desired ROI in white and then find contours on this mask. From here we find the corners using cv2.arcLength() and cv2.approxPolyDP(). Next we perspective transform to get this
template shape: (210, 236, 3)
transformed shape: (288, 279, 3)
Now that we have the correct orientation, we just need to simply resize to match the template image. Here's the result (left) and template image (right)
template shape: (210, 236, 3)
resized shape: (210, 236, 3)
Code
import cv2
import numpy as np
def perspective_transform(image, corners):
def order_corner_points(corners):
# Separate corners into individual points
# Index 0 - top-right
# 1 - top-left
# 2 - bottom-left
# 3 - bottom-right
corners = [(corner[0][0], corner[0][1]) for corner in corners]
top_r, top_l, bottom_l, bottom_r = corners[0], corners[1], corners[2], corners[3]
return (top_l, top_r, bottom_r, bottom_l)
# Order points in clockwise order
ordered_corners = order_corner_points(corners)
top_l, top_r, bottom_r, bottom_l = ordered_corners
# Determine width of new image which is the max distance between
# (bottom right and bottom left) or (top right and top left) x-coordinates
width_A = np.sqrt(((bottom_r[0] - bottom_l[0]) ** 2) + ((bottom_r[1] - bottom_l[1]) ** 2))
width_B = np.sqrt(((top_r[0] - top_l[0]) ** 2) + ((top_r[1] - top_l[1]) ** 2))
width = max(int(width_A), int(width_B))
# Determine height of new image which is the max distance between
# (top right and bottom right) or (top left and bottom left) y-coordinates
height_A = np.sqrt(((top_r[0] - bottom_r[0]) ** 2) + ((top_r[1] - bottom_r[1]) ** 2))
height_B = np.sqrt(((top_l[0] - bottom_l[0]) ** 2) + ((top_l[1] - bottom_l[1]) ** 2))
height = max(int(height_A), int(height_B))
# Construct new points to obtain top-down view of image in
# top_r, top_l, bottom_l, bottom_r order
dimensions = np.array([[0, 0], [width - 1, 0], [width - 1, height - 1],
[0, height - 1]], dtype = "float32")
# Convert to Numpy format
ordered_corners = np.array(ordered_corners, dtype="float32")
# Find perspective transform matrix
matrix = cv2.getPerspectiveTransform(ordered_corners, dimensions)
# Return the transformed image
return cv2.warpPerspective(image, matrix, (width, height))
#minimum ORB matches required to make a match
MIN_MATCH_COUNT = 10
img1 = cv2.imread("reference.jpg")
img2 = cv2.imread("1.jpg")
orb = cv2.ORB_create()
kp1, des1 = orb.detectAndCompute(img1,None)
kp2, des2 = orb.detectAndCompute(img2,None)
bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
matches = bf.match(des1, des2, None)
# sorts matches
good = []
for i, m in enumerate(matches):
if i < len(matches) - 1 and m.distance < 0.7 * matches[i+1].distance:
good.append(m)
if len(good)>MIN_MATCH_COUNT:
src_pts = np.float32([ kp1[m.queryIdx].pt for m in good ]).reshape(-1,1,2)
dst_pts = np.float32([ kp2[m.trainIdx].pt for m in good ]).reshape(-1,1,2)
M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC,5.0)
matchesMask = mask.ravel().tolist()
h,w,d = img1.shape
pts = np.float32([ [0,0],[0,h-1],[w-1,h-1],[w-1,0] ]).reshape(-1,1,2)
dst = cv2.perspectiveTransform(pts,M)
mask = np.ones(img2.shape[:2], dtype="uint8") * 255
rect = cv2.minAreaRect(dst)
box = cv2.boxPoints(rect)
box = np.int0(box)
new = cv2.drawContours(mask, [box], -1, 0, -1)
# remove the contours from the image and show the resulting images
img = cv2.bitwise_and(img2, img2, mask=mask)
mask = 255 - mask
cv2.imshow("After", img)
else:
print( "Not enough matches are found - {}/{}".format(len(good), MIN_MATCH_COUNT) )
matchesMask = None
#This is for drawing the match lines inbetween the ref and 1.jpg images
draw_params = dict(matchColor = (0,255,0), # draw matches in green color
singlePointColor = None,
matchesMask = matchesMask, # draw only inliers
flags = 2)
img3 = cv2.drawMatches(img1,kp1,img2,kp2,good,None,**draw_params)
cv2.imshow("Matches", img3)
cv2.imshow("Mask", mask)
# Find contour on mask and perform perspective transform
cnts = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.015 * peri, True)
if len(approx) == 4:
transformed = perspective_transform(img2, approx)
cv2.imshow("transformed", transformed)
print('template shape:', img1.shape)
print('transformed shape:',transformed.shape)
resized = cv2.resize(transformed, (img1.shape[1], img1.shape[0]))
cv2.imshow("resized", resized)
print('template shape:', img1.shape)
print('resized shape:',resized.shape)
cv2.waitKey()
I'm working on a personal project using opencv in python. Want to detect a sudoku grid.
The original image is:
So far I have created this:
Then tried to select a big blob. Result may be similar to this:
I got a black image as result:
The code is:
import cv2
import numpy as np
def find_biggest_blob(outerBox):
max = -1
maxPt = (0, 0)
h, w = outerBox.shape[:2]
mask = np.zeros((h + 2, w + 2), np.uint8)
for y in range(0, h):
for x in range(0, w):
if outerBox[y, x] >= 128:
area = cv2.floodFill(outerBox, mask, (x, y), (0, 0, 64))
#cv2.floodFill(outerBox, mask, maxPt, (255, 255, 255))
image_path = 'Images/Results/sudoku-find-biggest-blob.jpg'
cv2.imwrite(image_path, outerBox)
cv2.imshow(image_path, outerBox)
def main():
image = cv2.imread('Images/Test/sudoku-grid-detection.jpg', 0)
find_biggest_blob(image)
cv2.waitKey(0)
cv2.destroyAllWindows()
if __name__ == '__main__':
main()
The code in repl is: https://repl.it/#gmunumel/SudokuSolver
Any idea?
Here's an approach:
Convert image to grayscale and median blur to smooth image
Adaptive threshold to obtain binary image
Find contours and filter for largest contour
Perform perspective transform to obtain top-down view
After converting to grayscale and median blurring, we adaptive threshold to obtain a binary image
Next we find contours and filter using contour area. Here's the detected board
Now to get a top-down view of the image, we perform a perspective transform. Here's the result
import cv2
import numpy as np
def perspective_transform(image, corners):
def order_corner_points(corners):
# Separate corners into individual points
# Index 0 - top-right
# 1 - top-left
# 2 - bottom-left
# 3 - bottom-right
corners = [(corner[0][0], corner[0][1]) for corner in corners]
top_r, top_l, bottom_l, bottom_r = corners[0], corners[1], corners[2], corners[3]
return (top_l, top_r, bottom_r, bottom_l)
# Order points in clockwise order
ordered_corners = order_corner_points(corners)
top_l, top_r, bottom_r, bottom_l = ordered_corners
# Determine width of new image which is the max distance between
# (bottom right and bottom left) or (top right and top left) x-coordinates
width_A = np.sqrt(((bottom_r[0] - bottom_l[0]) ** 2) + ((bottom_r[1] - bottom_l[1]) ** 2))
width_B = np.sqrt(((top_r[0] - top_l[0]) ** 2) + ((top_r[1] - top_l[1]) ** 2))
width = max(int(width_A), int(width_B))
# Determine height of new image which is the max distance between
# (top right and bottom right) or (top left and bottom left) y-coordinates
height_A = np.sqrt(((top_r[0] - bottom_r[0]) ** 2) + ((top_r[1] - bottom_r[1]) ** 2))
height_B = np.sqrt(((top_l[0] - bottom_l[0]) ** 2) + ((top_l[1] - bottom_l[1]) ** 2))
height = max(int(height_A), int(height_B))
# Construct new points to obtain top-down view of image in
# top_r, top_l, bottom_l, bottom_r order
dimensions = np.array([[0, 0], [width - 1, 0], [width - 1, height - 1],
[0, height - 1]], dtype = "float32")
# Convert to Numpy format
ordered_corners = np.array(ordered_corners, dtype="float32")
# Find perspective transform matrix
matrix = cv2.getPerspectiveTransform(ordered_corners, dimensions)
# Return the transformed image
return cv2.warpPerspective(image, matrix, (width, height))
image = cv2.imread('1.jpg')
original = image.copy()
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.medianBlur(gray, 3)
thresh = cv2.adaptiveThreshold(blur,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV,11,3)
cnts = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
cnts = sorted(cnts, key=cv2.contourArea, reverse=True)
for c in cnts:
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.015 * peri, True)
transformed = perspective_transform(original, approx)
break
cv2.imshow('transformed', transformed)
cv2.imwrite('board.png', transformed)
cv2.waitKey()
Here is my solution that will generalize to any image whether it is warped or not.
Convert the image to grayscale
Apply adaptive thresholding to convert the image to binary
(Adaptive thresholding works better than normal thresholding because the original image can have different lighting at different areas)
Identify the Corners of the large square
Perspective transform of the image to the final square image
Depending on the amount of skewness of the original image the corners identified may be out of order, do we need to arrange them in the correct order. the method used here is to identify the centroid of the large square and identify the order of the corners from there
Here is the code:
import cv2
import numpy as np
# Helper functions for getting square image
def euclidian_distance(point1, point2):
# Calcuates the euclidian distance between the point1 and point2
#used to calculate the length of the four sides of the square
distance = np.sqrt((point1[0] - point2[0]) ** 2 + (point1[1] - point2[1]) ** 2)
return distance
def order_corner_points(corners):
# The points obtained from contours may not be in order because of the skewness of the image, or
# because of the camera angle. This function returns a list of corners in the right order
sort_corners = [(corner[0][0], corner[0][1]) for corner in corners]
sort_corners = [list(ele) for ele in sort_corners]
x, y = [], []
for i in range(len(sort_corners[:])):
x.append(sort_corners[i][0])
y.append(sort_corners[i][1])
centroid = [sum(x) / len(x), sum(y) / len(y)]
for _, item in enumerate(sort_corners):
if item[0] < centroid[0]:
if item[1] < centroid[1]:
top_left = item
else:
bottom_left = item
elif item[0] > centroid[0]:
if item[1] < centroid[1]:
top_right = item
else:
bottom_right = item
ordered_corners = [top_left, top_right, bottom_right, bottom_left]
return np.array(ordered_corners, dtype="float32")
def image_preprocessing(image, corners):
# This function undertakes all the preprocessing of the image and return
ordered_corners = order_corner_points(corners)
print("ordered corners: ", ordered_corners)
top_left, top_right, bottom_right, bottom_left = ordered_corners
# Determine the widths and heights ( Top and bottom ) of the image and find the max of them for transform
width1 = euclidian_distance(bottom_right, bottom_left)
width2 = euclidian_distance(top_right, top_left)
height1 = euclidian_distance(top_right, bottom_right)
height2 = euclidian_distance(top_left, bottom_right)
width = max(int(width1), int(width2))
height = max(int(height1), int(height2))
# To find the matrix for warp perspective function we need dimensions and matrix parameters
dimensions = np.array([[0, 0], [width, 0], [width, width],
[0, width]], dtype="float32")
matrix = cv2.getPerspectiveTransform(ordered_corners, dimensions)
# Return the transformed image
transformed_image = cv2.warpPerspective(image, matrix, (width, width))
#Now, chances are, you may want to return your image into a specific size. If not, you may ignore the following line
transformed_image = cv2.resize(transformed_image, (252, 252), interpolation=cv2.INTER_AREA)
return transformed_image
# main function
def get_square_box_from_image(image):
# This function returns the top-down view of the puzzle in grayscale.
#
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.medianBlur(gray, 3)
adaptive_threshold = cv2.adaptiveThreshold(blur, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV, 11, 3)
corners = cv2.findContours(adaptive_threshold, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
corners = corners[0] if len(corners) == 2 else corners[1]
corners = sorted(corners, key=cv2.contourArea, reverse=True)
for corner in corners:
length = cv2.arcLength(corner, True)
approx = cv2.approxPolyDP(corner, 0.015 * length, True)
print(approx)
puzzle_image = image_preprocessing(image, approx)
break
return puzzle_image
# Call the get_square_box_from_image method on any sudoku image to get the top view of the puzzle
original = cv2.imread("large_puzzle.jpg")
sudoku = get_square_box_from_image(original)
Here are the results from the given image and a custom example