How to fix/optimize document detection with a lot of noise - python

I build a python script to detect and extract documents/citizen cards from a image, it was working as i wanted until i tried a spanish citizen card (DNI 3.0).
When i perform canny edge algorythm on this card, it has a lot of noise/lines/things that merge into the edges of the card and thats what i think its causing the issue.
Canny edge applied:
Code:
def attemptPerspectiveTransform(openCVImage, debug_mode):
image = openCVImage
ratio = image.shape[0] / 300.0
orig = image.copy()
image = imutils.resize(image, height = 300)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray = cv2.bilateralFilter(gray, 11, 17, 17)
edged = cv2.Canny(gray, 0, 200)
if (debug_mode == True):
cv2.imshow("gray", gray)
cv2.imshow("edged", edged)
cv2.waitKey(0)
cnts = cv2.findContours(edged.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
cnts = imutils.grab_contours(cnts)
cnts = sorted(cnts, key = cv2.contourArea, reverse = True)[:10]
screenCnt = None
for c in cnts:
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.015 * peri, True)
print('APPROX')
print(len(approx))
# if our approximated contour has four points, should be the document we are looking for
if len(approx) == 4:
screenCnt = approx
break
# now that we have our screen contour, we need to determine
# the top-left, top-right, bottom-right, and bottom-left
# points so that we can later warp the image -- we'll start
# by reshaping our contour to be our finals and initializing
# our output rectangle in top-left, top-right, bottom-right,
# and bottom-left order
pts = screenCnt.reshape(4, 2)
rect = np.zeros((4, 2), dtype = "float32")
# the top-left point has the smallest sum whereas the
# bottom-right has the largest sum
s = pts.sum(axis = 1)
rect[0] = pts[np.argmin(s)]
rect[2] = pts[np.argmax(s)]
# compute the difference between the points -- the top-right
# will have the minumum difference and the bottom-left will
# have the maximum difference
diff = np.diff(pts, axis = 1)
rect[1] = pts[np.argmin(diff)]
rect[3] = pts[np.argmax(diff)]
# multiply the rectangle by the original ratio
rect *= ratio
# now that we have our rectangle of points, let's compute
# the width of our new image
(tl, tr, br, bl) = rect
widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
# ...and now for the height of our new image
heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
# take the maximum of the width and height values to reach
# our final dimensions
maxWidth = max(int(widthA), int(widthB))
maxHeight = max(int(heightA), int(heightB))
# construct our destination points which will be used to
# map the screen to a top-down, "birds eye" view
dst = np.array([
[0, 0],
[maxWidth - 1, 0],
[maxWidth - 1, maxHeight - 1],
[0, maxHeight - 1]], dtype = "float32")
# calculate the perspective transform matrix and warp
# the perspective to grab the screen
M = cv2.getPerspectiveTransform(rect, dst)
warp = cv2.warpPerspective(orig, M, (maxWidth, maxHeight))
# convert the warped image to grayscale and then adjust
# the intensity of the pixels to have minimum and maximum
# values of 0 and 255, respectively
warp = cv2.cvtColor(warp, cv2.COLOR_BGR2GRAY)
warp = exposure.rescale_intensity(warp, out_range = (0, 255))
height, width = warp.shape
if (height > width):
warp = rotate_image(warp, 90)
if (debug_mode == True):
# show images
cv2.imshow("image", image)
cv2.imshow("edge", edged)
cv2.imshow("warp", imutils.resize(warp, height = 300))
cv2.imshow("warp pure", imutils.resize(warp, height = 300))
cv2.imwrite('cropped_final.jpg', warp)
cv2.waitKey(0)
return warp
Results in error:
Traceback (most recent call last):
File "main.py", line 60, in <module>
cv_front_cropped_path = image_preprocessing.processImage(front_img, debug_mode)
File "/Users/duarteandrade/Desktop/OCR-DEMO/PythonOCR/pyocr/image_preprocessing.py", line 179, in processImage
image = perspective_transform.attemptPerspectiveTransform(image, debug_mode)
File "/Users/duarteandrade/Desktop/OCR-DEMO/PythonOCR/pyocr/perspective_transform.py", line 84, in attemptPerspectiveTransform
pts = screenCnt.reshape(4, 2)
AttributeError: 'NoneType' object has no attribute 'reshape'
It works good with the portuguese citizen card with worse images, i guess because it doesnt have those lines at the top of the card and all the noise that can be seen in the spanish card.
How can i fix this ?
**Original image : **

Related

Error : Invalid number of channels in input image: 'VScn::contains(scn)' using opencv?

I am trying to unshear the image , just like cam scanner does, It is working for some images , IfI give any random image it is not working , The image named as new_image.jpeg is not working and image named as 1111.jpeg is working . Although the picture is totally same .
Code:
import numpy as np
import cv2
import re
from matplotlib import pyplot as plt
import warnings
warnings.filterwarnings('ignore')
# ## **Use Gaussian Blurring combined with Adaptive Threshold**
def blur_and_threshold(gray):
gray = cv2.GaussianBlur(gray,(3,3),2)
threshold = cv2.adaptiveThreshold(gray,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY,11,2)
threshold = cv2.fastNlMeansDenoising(threshold, 11, 31, 9)
return threshold
# ## **Find the Biggest Contour**
# **Note: We made sure the minimum contour is bigger than 1/10 size of the whole picture. This helps in removing very small contours (noise) from our dataset**
def biggest_contour(contours,min_area):
biggest = None
max_area = 0
biggest_n=0
approx_contour=None
for n,i in enumerate(contours):
area = cv2.contourArea(i)
if area > min_area/10:
peri = cv2.arcLength(i,True)
approx = cv2.approxPolyDP(i,0.02*peri,True)
if area > max_area and len(approx)==4:
biggest = approx
max_area = area
biggest_n=n
approx_contour=approx
return biggest_n,approx_contour
def order_points(pts):
# initialzie a list of coordinates that will be ordered
# such that the first entry in the list is the top-left,
# the second entry is the top-right, the third is the
# bottom-right, and the fourth is the bottom-left
pts=pts.reshape(4,2)
rect = np.zeros((4, 2), dtype = "float32")
# the top-left point will have the smallest sum, whereas
# the bottom-right point will have the largest sum
s = pts.sum(axis = 1)
rect[0] = pts[np.argmin(s)]
rect[2] = pts[np.argmax(s)]
# now, compute the difference between the points, the
# top-right point will have the smallest difference,
# whereas the bottom-left will have the largest difference
diff = np.diff(pts, axis = 1)
rect[1] = pts[np.argmin(diff)]
rect[3] = pts[np.argmax(diff)]
# return the ordered coordinates
return rect
### Find the exact (x,y) coordinates of the biggest contour and crop it out
def four_point_transform(image, pts):
# obtain a consistent order of the points and unpack them
# individually
rect = order_points(pts)
(tl, tr, br, bl) = rect
# compute the width of the new image, which will be the
# maximum distance between bottom-right and bottom-left
# x-coordiates or the top-right and top-left x-coordinates
widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
maxWidth = max(int(widthA), int(widthB))
# compute the height of the new image, which will be the
# maximum distance between the top-right and bottom-right
# y-coordinates or the top-left and bottom-left y-coordinates
heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
maxHeight = max(int(heightA), int(heightB))
# now that we have the dimensions of the new image, construct
# the set of destination points to obtain a "birds eye view",
# (i.e. top-down view) of the image, again specifying points
# in the top-left, top-right, bottom-right, and bottom-left
# order
dst = np.array([
[0, 0],
[maxWidth - 1, 0],
[maxWidth - 1, maxHeight - 1],
[0, maxHeight - 1]], dtype = "float32")
# compute the perspective transform matrix and then apply it
M = cv2.getPerspectiveTransform(rect, dst)
warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))
# return the warped image
return warped
# # Transformation the image
# **1. Convert the image to grayscale**
# **2. Remove noise and smoothen out the image by applying blurring and thresholding techniques**
# **3. Use Canny Edge Detection to find the edges**
# **4. Find the biggest contour and crop it out**
def transformation(image):
image=image.copy()
height, width, channels = image.shape
gray=cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
image_size=gray.size
threshold=blur_and_threshold(gray)
# We need two threshold values, minVal and maxVal. Any edges with intensity gradient more than maxVal
# are sure to be edges and those below minVal are sure to be non-edges, so discarded.
# Those who lie between these two thresholds are classified edges or non-edges based on their connectivity.
# If they are connected to "sure-edge" pixels, they are considered to be part of edges.
# Otherwise, they are also discarded
edges = cv2.Canny(threshold,50,150,apertureSize = 7)
contours, hierarchy = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
simplified_contours = []
for cnt in contours:
hull = cv2.convexHull(cnt)
simplified_contours.append(cv2.approxPolyDP(hull,
0.001*cv2.arcLength(hull,True),True))
simplified_contours = np.array(simplified_contours)
biggest_n,approx_contour = biggest_contour(simplified_contours,image_size)
threshold = cv2.drawContours(image, simplified_contours ,biggest_n, (0,255,0), 1)
dst = 0
if approx_contour is not None and len(approx_contour)==4:
approx_contour=np.float32(approx_contour)
dst=four_point_transform(threshold,approx_contour)
croppedImage = dst
return croppedImage
# **Increase the brightness of the image by playing with the "V" value (from HSV)**
def increase_brightness(img, value=30):
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
h, s, v = cv2.split(hsv)
lim = 255 - value
v[v > lim] = 255
v[v <= lim] += value
final_hsv = cv2.merge((h, s, v))
img = cv2.cvtColor(final_hsv, cv2.COLOR_HSV2BGR)
return img
# **Sharpen the image using Kernel Sharpening Technique**
def final_image(rotated):
# Create our shapening kernel, it must equal to one eventually
kernel_sharpening = np.array([[0,-1,0],
[-1, 5,-1],
[0,-1,0]])
# applying the sharpening kernel to the input image & displaying it.
sharpened = cv2.filter2D(rotated, -1, kernel_sharpening)
sharpened=increase_brightness(sharpened,30)
return sharpened
# ## 1. Pass the image through the transformation function to crop out the biggest contour
# ## 2. Brighten & Sharpen the image to get a final cleaned image
path = "/home/hamza/Desktop/"
image = cv2.imread("path of image")
blurred_threshold = transformation(image)
cleaned_image = final_image(blurred_threshold)
cv2.imwrite(path + "Final_Image4.jpg", cleaned_image)
Pictures
first pic
second image
Edit 1:
Picture 1 test image 1
Picture 2 test image 2
Picture 3 test image 3
Picture 4 test image 4
Edit 2:
If I just pass only black and white image can it remove shearness , you may try it if it works please share with me .
Pic:
black and white image
Note: If image comes unsheared , then that particular image should displayed exactly same means code should not touched it . Hope so you got the point.
Nothing is wrong with your images, I tested your code and indeed the first image works with the code as you wrote it, for the second image, with some quick debugging you can figure out that no contour was found in your transformation function - you have the block :
if approx_contour is not None and len(approx_contour)==4:
approx_contour=np.float32(approx_contour)
dst=four_point_transform(threshold,approx_contour)
Just add:
else:
print("no contour found")
to see for yourself.
The problem is with your Canny filter. With apertureSize = 7 your first image works but not the second one, with apertureSize = 3 your second image work but not the first one.
So both of your image work, but not with the same parameters. If processing time is not an issue for your task, you could iterate several values of the parameters, or else steer away from the Canny method. On both your images the paper is a lot brighter than the background so a convexHull on the threshold image would work.
Since your initial image is in color, another approach is to use the H place to detect the 'white color' of the paper. I have made some adjustments in the code and it now works for both your images.
import numpy as np
import cv2
import imutils
def order_points(pts):
""" Return sorted list of corners, from top-left then clockwise """
pts = np.reshape(pts, (6,2))
rect = np.zeros((4, 2), dtype = "float32")
s = pts.sum(axis = 1)
rect[0] = pts[np.argmin(s)]
rect[2] = pts[np.argmax(s)]
diff = np.diff(pts, axis = 1)
rect[1] = pts[np.argmin(diff)]
rect[3] = pts[np.argmax(diff)]
return rect
def four_point_transform(image, pts):
(tl, tr, br, bl) = rect
widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
maxWidth = max(int(widthA), int(widthB))
heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
maxHeight = max(int(heightA), int(heightB))
dst = np.array([
[0, 0],
[maxWidth - 1, 0],
[maxWidth - 1, maxHeight - 1],
[0, maxHeight - 1]], dtype = "float32")
M = cv2.getPerspectiveTransform(rect, dst)
warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))
return warped
image1 = cv2.imread('im1.jpg')
mask = np.zeros(image1.shape, np.uint8)
gray = cv2.cvtColor(image1,cv2.COLOR_BGR2GRAY)
# Hue thresholding for the white paper
HSV = cv2.cvtColor(image1, cv2.COLOR_BGR2HSV)
lo_H = 100
hi_H = 200
thresh = cv2.inRange(HSV, (lo_H, 0, 0), (hi_H, 255, 255))
# Find the contour of the paper sheet - use convexhull
contours, hierarchy = cv2.findContours(thresh,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
cnt = sorted(contours, key=cv2.contourArea, reverse=True)
hull = []
for i in range(len(cnt)):
hull.append(cv2.convexHull(cnt[i], False))
cv2.drawContours(mask, hull, 0, (255, 255, 255), thickness=cv2.FILLED)
mask = cv2.cvtColor(mask,cv2.COLOR_BGR2GRAY)
gray = cv2.bitwise_and(gray, gray, mask=mask)
# Corner detection - keep 6 best corners
corners = cv2.goodFeaturesToTrack(mask, 6, 0.01, 50)
corners = np.int0(corners)
# Order the corners and keep 4
rect = order_points(corners)
wrap = four_point_transform(gray, rect)
# Display results
cv2.drawContours(image1, hull, 0, (0,255,125), 3)
cv2.imshow("image", image1)
#cv2.imshow('thresh', thresh)
#cv2.imshow("mask", mask)
cv2.imshow("gray", gray)
cv2.imshow("wrap", wrap)

Labyrinth recognition is slow and recognition is not always acurate

I'm making a Python script that:
gets the image from file
Looks for a papersheet (done by openCV)
Warps the image
crops the margins of papersheet
Looks for lowest amount of white pixels in a single row of pixels which makes up for width/height of a single field of a grid (goes pixel by pixel)
Recreates the labyrinth based on what are the average pixel RGB Values in each field (goes pixel by pixel) and saves it to file
It shows me the grid of 0 and 1s in the CLI
When I do it on original image (5.2MB, 4000x3000) , it takes up to 20 seconds (although I have pretty snappy 8 core ryzen cpu), actually I only need the output (0 or 1 grid). How can I speed up the process as it will run on raspberry pi and it needs to take only about few seconds to finish? I know that the code is quite long, however it's split into sections that are described so it should be fairly easy to read.
#LIBRARY IMPORT
import math
import turtle
import time
import sys
from collections import deque
import numpy as np
from skimage import exposure
import argparse
import imutils
import cv2
from PIL import Image, ImageDraw
#LOAD THE FIRST IMAGE
image = cv2.imread("./image3.jpg")
ratio = image.shape[0] / 600.0
orig = image.copy()
image = imutils.resize(image, height = 600)
#Range of the colors of paper
lower = [160, 160, 160]
upper = [255,255,255]
#create array from ranges
lower = np.array(lower, dtype="uint8")
upper = np.array(upper, dtype="uint8")
#finding contours
mask = cv2.inRange(image, lower, upper)
output = cv2.bitwise_and(image, image, mask=mask)
ret,thresh = cv2.threshold(mask, 40, 255, 0)
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
contours = max(contours, key = cv2.contourArea)
cnt = contours
rect = cv2.minAreaRect(cnt)
box = cv2.boxPoints(rect)
box = np.int0(box)
for c in [contours]:
#contour approximation
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.01 * peri, True)
#crop of contour with 4 points
if len(approx) == 4:
screenCnt = approx
break
img = cv2.drawContours(image,[screenCnt],0,(0,0,255),2)
print(screenCnt)
pts = screenCnt.reshape(4, 2)
rect = np.zeros((4, 2), dtype = "float32")
s = pts.sum(axis = 1)
rect[0] = pts[np.argmin(s)]
rect[2] = pts[np.argmax(s)]
diff = np.diff(pts, axis = 1)
rect[1] = pts[np.argmin(diff)]
rect[3] = pts[np.argmax(diff)]
rect *= ratio
(tl, tr, br, bl) = rect
widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
maxWidth = max(int(widthA), int(widthB))
maxHeight = max(int(heightA), int(heightB))
dst = np.array([
[0, 0],
[maxWidth - 1, 0],
[maxWidth - 1, maxHeight - 1],
[0, maxHeight - 1]], dtype = "float32")
M = cv2.getPerspectiveTransform(rect, dst)
warp = cv2.warpPerspective(orig, M, (maxWidth, maxHeight))
warp = imutils.resize(warp)
cv2.imshow("image", image)
cv2.moveWindow("image",0,450)
cv2.imwrite("warp.jpg",warp)
#warping of found labyrinth
warp = cv2.imread("warp.jpg",0)
warp = cv2.medianBlur(warp,5)
ret,warp = cv2.threshold(warp,170,255,cv2.THRESH_BINARY)
cv2.imwrite("warp.jpg",warp)
image_file = "warp.jpg"
im = Image.open(image_file)
width, height = im.size
T = height*(5/210)
B = height-(height*(5/210))
L = width*(5/210)
R = width-(width*(5/210))
im_crop = im.crop((L, T, R, B))
im=im_crop
im_crop.save('warp.png', quality=100)
white=0
#finding field-size
im = Image.open("warp.png")
width, height = im.size
minimalGridWidth2 = width
minimalGridWidth = width
MaximumGridWidth = width
for y in range (0, height):
for x in range (0, width):
if im.getpixel((x,y)) > 200:
white = white+1
if white <= minimalGridWidth:
minimalGridWidth = white
#checks out how many X how many fields the labirynt has, checks what's the average value (more black or more white) is there on that field and recreates the new "ideal" labyrinth from this data
gridWidth = int(round(width/minimalGridWidth))
gridHeight = int(round(height/minimalGridWidth))
print(gridWidth)|
print(gridHeight)
newHeight = 0
newWidth = 0
newHeight=(minimalGridWidth*gridHeight)
newWidth=(minimalGridWidth*gridWidth)
print(minimalGridWidth)
print(newWidth)
print(newHeight)
im = im.resize((newWidth, newHeight), Image.ANTIALIAS)
i=0
x, y = gridWidth, gridHeight
pixelcount = [[0 for x in range(0,gridWidth)] for y in range(0,gridHeight)]
pixelavg = [[0 for x in range(0,gridWidth)] for y in range(0,gridHeight)]
print(pixelcount)
for y in range (0, gridHeight):
for x in range (0, gridWidth):
i=0
pixel=0
for v in range (0, minimalGridWidth):
for w in range (0, minimalGridWidth):
pixel=pixel+im.getpixel((((x*minimalGridWidth)+w),((y*minimalGridWidth)+v)))
i=i+1
if (pixel/i)<127:
pixelavg[y][x]=1
elif (pixel/i)>127:
pixelavg[y][x]=0
print(np.array(pixelavg))
y,x,v,w,i=0,0,0,0,0
im2 = Image.new('RGB',(newWidth,newHeight),'white')
for y in range (0, gridHeight):
for x in range (0, gridWidth):
for v in range (0, minimalGridWidth):
for w in range (0, minimalGridWidth):
pixelx=pixelavg[y][x]
if pixelx==0:
pixelDoc=(255,255,255)
if pixelx==1:
pixelDoc=(0,0,0)
Xw=((x*minimalGridWidth)+w)
Yh=((y*minimalGridWidth)+v)
im2.putpixel((Xw,Yh),pixelDoc)
im2.save('warp3.png',quality=100)
imx=cv2.imread('warp3.png',0)
cv2.imshow('finito',imx)
cv2.imwrite('koniec.png',imx)
cv2.moveWindow("finito",750,450)
warp=cv2.imread("warp.png",0)
cv2.imshow("warp",warp)
cv2.moveWindow("warp",450,450)
When you are concerned with performance or looking to optimize code, it can help to profile your program.
You could use Python's profiler or an IDE with a profiler like PyCharm.
When profiling your code on a 4000x2665 image, I found the following:
As you can see, the getpixel and putpixel functions take ~60% of the total execution time.
This makes sense, as they are called for every image pixel in a nested loop:
for y in range (0, height):
white = 0
for x in range (0, width):
if im.getpixel((x,y)) > 200:
white = white+1
if white <= minimalGridWidth:
minimalGridWidth = white
The above code can be fixed by replacing the nested loops by image-wide operations.
np_im = np.array(im)
white_per_row = np.sum(np_im > 200, axis=1)
minimalGridWidth = np.min(white_per_row)
Replacing this single operation cuts down the total execution time by 5644 ms or ~32%

How to identify largest bounding rectangles from an image and separate them into separate images using Opencv and python

I am new to Opencv and python and trying to identify the largest three rectangles as marked in the sample image and extract them into three separate images. I am able to identify contours in the image but all of them are showing up (as shown in second image) and I am not able to separate out the three largest ones.
Code I have written so far:
import cv2
image = cv2.imread('imgpath')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
canny = cv2.Canny(gray, 130, 255, 1)
cnts = cv2.findContours(canny, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
#largest_contours = sorted(cnts, key=cv2.contourArea)[-3:]
#print(len(largest_contours))
for c in cnts:
cv2.drawContours(image,[c], 0, (0,255,0), 3)
#cv2.imshow("result", image)
#cv2.drawContours(image, largest_contours, -1, (0,255,0), 3)
cv2.imshow('contours', image)
cv2.waitKey(0)
Here's an approach:
Convert image to grayscale
Adaptive threshold to get a binary image
Find contours and sort for largest three
Perform contour approximation to ensure we have a square contour
Perform perspective transform to get top-down view
Rotate image to get correct orientation
The extracted rectangles after performing perspective transform and rotating
import cv2
import numpy as np
def rotate_image(image, angle):
# Grab the dimensions of the image and then determine the center
(h, w) = image.shape[:2]
(cX, cY) = (w / 2, h / 2)
# grab the rotation matrix (applying the negative of the
# angle to rotate clockwise), then grab the sine and cosine
# (i.e., the rotation components of the matrix)
M = cv2.getRotationMatrix2D((cX, cY), -angle, 1.0)
cos = np.abs(M[0, 0])
sin = np.abs(M[0, 1])
# Compute the new bounding dimensions of the image
nW = int((h * sin) + (w * cos))
nH = int((h * cos) + (w * sin))
# Adjust the rotation matrix to take into account translation
M[0, 2] += (nW / 2) - cX
M[1, 2] += (nH / 2) - cY
# Perform the actual rotation and return the image
return cv2.warpAffine(image, M, (nW, nH))
def perspective_transform(image, corners):
def order_corner_points(corners):
# Separate corners into individual points
# Index 0 - top-right
# 1 - top-left
# 2 - bottom-left
# 3 - bottom-right
corners = [(corner[0][0], corner[0][1]) for corner in corners]
top_r, top_l, bottom_l, bottom_r = corners[0], corners[1], corners[2], corners[3]
return (top_l, top_r, bottom_r, bottom_l)
# Order points in clockwise order
ordered_corners = order_corner_points(corners)
top_l, top_r, bottom_r, bottom_l = ordered_corners
# Determine width of new image which is the max distance between
# (bottom right and bottom left) or (top right and top left) x-coordinates
width_A = np.sqrt(((bottom_r[0] - bottom_l[0]) ** 2) + ((bottom_r[1] - bottom_l[1]) ** 2))
width_B = np.sqrt(((top_r[0] - top_l[0]) ** 2) + ((top_r[1] - top_l[1]) ** 2))
width = max(int(width_A), int(width_B))
# Determine height of new image which is the max distance between
# (top right and bottom right) or (top left and bottom left) y-coordinates
height_A = np.sqrt(((top_r[0] - bottom_r[0]) ** 2) + ((top_r[1] - bottom_r[1]) ** 2))
height_B = np.sqrt(((top_l[0] - bottom_l[0]) ** 2) + ((top_l[1] - bottom_l[1]) ** 2))
height = max(int(height_A), int(height_B))
# Construct new points to obtain top-down view of image in
# top_r, top_l, bottom_l, bottom_r order
dimensions = np.array([[0, 0], [width - 1, 0], [width - 1, height - 1],
[0, height - 1]], dtype = "float32")
# Convert to Numpy format
ordered_corners = np.array(ordered_corners, dtype="float32")
# Find perspective transform matrix
matrix = cv2.getPerspectiveTransform(ordered_corners, dimensions)
# Return the transformed image
return cv2.warpPerspective(image, matrix, (width, height))
image = cv2.imread('1.jpg')
original = image.copy()
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = cv2.adaptiveThreshold(gray,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV,11,3)
cnts = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
cnts = sorted(cnts, key = cv2.contourArea, reverse = True)[:3]
ROI_number = 0
for c in cnts:
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.015 * peri, True)
if len(approx) == 4:
cv2.drawContours(image,[c], 0, (36,255,12), 3)
transformed = perspective_transform(original, approx)
rotated = rotate_image(transformed, -90)
cv2.imwrite('ROI_{}.png'.format(ROI_number), rotated)
cv2.imshow('ROI_{}'.format(ROI_number), rotated)
ROI_number += 1
cv2.imshow('thresh', thresh)
cv2.imshow('image', image)
cv2.waitKey()
Sort the contours according to their area and then pick the top three.
cnts = sorted(cnts, key=lambda c: cv2.contourArea(c), reverse=True)
for c in cnts[:3]:
cv2.drawContours(image,[c], 0, (0,255,0), 3)
(x,y,w,h) = cv2.boundingRect(c)
(x,y,w,h) represent co-ordinates (x,y), width and height of the contour. These values can be used to crop out the rectangle.

How can I extract a selected rectangle from a screen as a top-view image?

I've written some code to detect the computer screen in an image. I need to do some work on pixels that are in the center of that selected rectangle. How can I extract the selected rectangle as a rectangular image?
import imutils
import cv2
image = cv2.imread('test-img/imgRec3.jpg')
ratio = image.shape[0] / 300.0
image = imutils.resize(image, height=300)
realImage = image.copy()
# convert the image to grayscale, blur it, and find edges in the image
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray = cv2.bilateralFilter(gray, 11, 17, 17)
edged = cv2.Canny(gray, 30, 200)
cnts = cv2.findContours(edged.copy(), cv2.RETR_TREE,
cv2.CHAIN_APPROX_SIMPLE)
cnts = imutils.grab_contours(cnts)
cnts = sorted(cnts, key = cv2.contourArea, reverse = True)[:10]
screenCnt = None
# loop over our contours
for c in cnts:
# approximate the contour
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.015 * peri, True)
if len(approx) == 4:
screenCnt = approx
break
cv2.drawContours(image, [screenCnt], -1, (0, 255, 0), 3)
cv2.imshow("image", realImage)
cv2.imshow("Screen Rec", image)
cv2.waitKey(0)
Thanks for your help in advance.
Using skimage you could do it like that:
def transform(intersections, image):
w,h = get_orientation()
a = np.array([0,h])
b = np.array([w,h])
c = np.array([w,0])
d = np.array([0,0])
tf = skimage.transform.estimate_transform("projective",
dst=np.vstack((a,b,c,d)),
src=intersections)
invtf = tf.inverse
transformedImage = skimage.transform.warp(image=image,inverse_map=invtf, output_shape=(h,w))
return transformedImage
fig,(ax0,ax1) = plt.subplots(ncols=2, figsize=(15,8))
transformed_image = transform(sorted_intersec, img[index] )
ax0.imshow(transformed_image,cmap="gray")
ax1.imshow(img[index])
intersections are your 4 edges. Keep in mind you have to put them in in the correct order.
To extract the selected rectangle as a rectangular image, we can use a perspective transformation to obtain a top-down view of the image. Since you were able to find the bounding box of the rectangle, we can use those coordinates as the corners of the new image. To begin, we separate the four corners into individual points given to us by cv2.approxPolyDP(). We reorder the points into a clockwise orientation (top-left, top-right, bottom-right, bottom-left) using this function:
def order_corner_points(corners):
# Separate corners into individual points
# Index 0 - top-right
# 1 - top-left
# 2 - bottom-left
# 3 - bottom-right
corners = [(corner[0][0], corner[0][1]) for corner in corners]
top_r, top_l, bottom_l, bottom_r = corners[0], corners[1], corners[2], corners[3]
return (top_l, top_r, bottom_r, bottom_l)
Now with the isolated corner points, we find the new width and length dimensions for the top-down image. We can obtain the transformation matrix using cv2.getPerspectiveTransform() and actually obtain the transformed image using cv2.warpPerspective().
def perspective_transform(image, corners):
# Order points in clockwise order
ordered_corners = order_corner_points(corners)
top_l, top_r, bottom_r, bottom_l = ordered_corners
# Determine width of new image which is the max distance between
# (bottom right and bottom left) or (top right and top left) x-coordinates
width_A = np.sqrt(((bottom_r[0] - bottom_l[0]) ** 2) + ((bottom_r[1] - bottom_l[1]) ** 2))
width_B = np.sqrt(((top_r[0] - top_l[0]) ** 2) + ((top_r[1] - top_l[1]) ** 2))
width = max(int(width_A), int(width_B))
# Determine height of new image which is the max distance between
# (top right and bottom right) or (top left and bottom left) y-coordinates
height_A = np.sqrt(((top_r[0] - bottom_r[0]) ** 2) + ((top_r[1] - bottom_r[1]) ** 2))
height_B = np.sqrt(((top_l[0] - bottom_l[0]) ** 2) + ((top_l[1] - bottom_l[1]) ** 2))
height = max(int(height_A), int(height_B))
# Construct new points to obtain top-down view of image in
# top_r, top_l, bottom_l, bottom_r order
dimensions = np.array([[0, 0], [width - 1, 0], [width - 1, height - 1],
[0, height - 1]], dtype = "float32")
# Convert to Numpy format
ordered_corners = np.array(ordered_corners, dtype="float32")
# Find perspective transform matrix
matrix = cv2.getPerspectiveTransform(ordered_corners, dimensions)
# Return the transformed image
return cv2.warpPerspective(image, matrix, (width, height))
Obtained bounding box coordinates
Extracted rectangle
Full code
import imutils
import cv2
import numpy as np
def order_corner_points(corners):
# Separate corners into individual points
# Index 0 - top-right
# 1 - top-left
# 2 - bottom-left
# 3 - bottom-right
corners = [(corner[0][0], corner[0][1]) for corner in corners]
top_r, top_l, bottom_l, bottom_r = corners[0], corners[1], corners[2], corners[3]
return (top_l, top_r, bottom_r, bottom_l)
def perspective_transform(image, corners):
# Order points in clockwise order
ordered_corners = order_corner_points(corners)
top_l, top_r, bottom_r, bottom_l = ordered_corners
# Determine width of new image which is the max distance between
# (bottom right and bottom left) or (top right and top left) x-coordinates
width_A = np.sqrt(((bottom_r[0] - bottom_l[0]) ** 2) + ((bottom_r[1] - bottom_l[1]) ** 2))
width_B = np.sqrt(((top_r[0] - top_l[0]) ** 2) + ((top_r[1] - top_l[1]) ** 2))
width = max(int(width_A), int(width_B))
# Determine height of new image which is the max distance between
# (top right and bottom right) or (top left and bottom left) y-coordinates
height_A = np.sqrt(((top_r[0] - bottom_r[0]) ** 2) + ((top_r[1] - bottom_r[1]) ** 2))
height_B = np.sqrt(((top_l[0] - bottom_l[0]) ** 2) + ((top_l[1] - bottom_l[1]) ** 2))
height = max(int(height_A), int(height_B))
# Construct new points to obtain top-down view of image in
# top_r, top_l, bottom_l, bottom_r order
dimensions = np.array([[0, 0], [width - 1, 0], [width - 1, height - 1],
[0, height - 1]], dtype = "float32")
# Convert to Numpy format
ordered_corners = np.array(ordered_corners, dtype="float32")
# Find perspective transform matrix
matrix = cv2.getPerspectiveTransform(ordered_corners, dimensions)
# Return the transformed image
return cv2.warpPerspective(image, matrix, (width, height))
image = cv2.imread('1.jpg')
ratio = image.shape[0] / 300.0
image = imutils.resize(image, height=300)
realImage = image.copy()
# convert the image to grayscale, blur it, and find edges in the image
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray = cv2.bilateralFilter(gray, 11, 17, 17)
edged = cv2.Canny(gray, 30, 200)
cnts = cv2.findContours(edged.copy(), cv2.RETR_TREE,
cv2.CHAIN_APPROX_SIMPLE)
cnts = imutils.grab_contours(cnts)
cnts = sorted(cnts, key = cv2.contourArea, reverse = True)[:10]
screenCnt = None
# loop over our contours
for c in cnts:
# approximate the contour
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.015 * peri, True)
if len(approx) == 4:
screenCnt = approx
transformed = perspective_transform(realImage, screenCnt)
break
cv2.drawContours(image, [screenCnt], -1, (0, 255, 0), 3)
cv2.imshow("image", realImage)
cv2.imshow("Screen Rec", image)
cv2.imshow("transformed", transformed)
cv2.waitKey(0)

OpenCv Sudoku Grabber [duplicate]

I'm working on a personal project using opencv in python. Want to detect a sudoku grid.
The original image is:
So far I have created this:
Then tried to select a big blob. Result may be similar to this:
I got a black image as result:
The code is:
import cv2
import numpy as np
def find_biggest_blob(outerBox):
max = -1
maxPt = (0, 0)
h, w = outerBox.shape[:2]
mask = np.zeros((h + 2, w + 2), np.uint8)
for y in range(0, h):
for x in range(0, w):
if outerBox[y, x] >= 128:
area = cv2.floodFill(outerBox, mask, (x, y), (0, 0, 64))
#cv2.floodFill(outerBox, mask, maxPt, (255, 255, 255))
image_path = 'Images/Results/sudoku-find-biggest-blob.jpg'
cv2.imwrite(image_path, outerBox)
cv2.imshow(image_path, outerBox)
def main():
image = cv2.imread('Images/Test/sudoku-grid-detection.jpg', 0)
find_biggest_blob(image)
cv2.waitKey(0)
cv2.destroyAllWindows()
if __name__ == '__main__':
main()
The code in repl is: https://repl.it/#gmunumel/SudokuSolver
Any idea?
Here's an approach:
Convert image to grayscale and median blur to smooth image
Adaptive threshold to obtain binary image
Find contours and filter for largest contour
Perform perspective transform to obtain top-down view
After converting to grayscale and median blurring, we adaptive threshold to obtain a binary image
Next we find contours and filter using contour area. Here's the detected board
Now to get a top-down view of the image, we perform a perspective transform. Here's the result
import cv2
import numpy as np
def perspective_transform(image, corners):
def order_corner_points(corners):
# Separate corners into individual points
# Index 0 - top-right
# 1 - top-left
# 2 - bottom-left
# 3 - bottom-right
corners = [(corner[0][0], corner[0][1]) for corner in corners]
top_r, top_l, bottom_l, bottom_r = corners[0], corners[1], corners[2], corners[3]
return (top_l, top_r, bottom_r, bottom_l)
# Order points in clockwise order
ordered_corners = order_corner_points(corners)
top_l, top_r, bottom_r, bottom_l = ordered_corners
# Determine width of new image which is the max distance between
# (bottom right and bottom left) or (top right and top left) x-coordinates
width_A = np.sqrt(((bottom_r[0] - bottom_l[0]) ** 2) + ((bottom_r[1] - bottom_l[1]) ** 2))
width_B = np.sqrt(((top_r[0] - top_l[0]) ** 2) + ((top_r[1] - top_l[1]) ** 2))
width = max(int(width_A), int(width_B))
# Determine height of new image which is the max distance between
# (top right and bottom right) or (top left and bottom left) y-coordinates
height_A = np.sqrt(((top_r[0] - bottom_r[0]) ** 2) + ((top_r[1] - bottom_r[1]) ** 2))
height_B = np.sqrt(((top_l[0] - bottom_l[0]) ** 2) + ((top_l[1] - bottom_l[1]) ** 2))
height = max(int(height_A), int(height_B))
# Construct new points to obtain top-down view of image in
# top_r, top_l, bottom_l, bottom_r order
dimensions = np.array([[0, 0], [width - 1, 0], [width - 1, height - 1],
[0, height - 1]], dtype = "float32")
# Convert to Numpy format
ordered_corners = np.array(ordered_corners, dtype="float32")
# Find perspective transform matrix
matrix = cv2.getPerspectiveTransform(ordered_corners, dimensions)
# Return the transformed image
return cv2.warpPerspective(image, matrix, (width, height))
image = cv2.imread('1.jpg')
original = image.copy()
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.medianBlur(gray, 3)
thresh = cv2.adaptiveThreshold(blur,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV,11,3)
cnts = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
cnts = sorted(cnts, key=cv2.contourArea, reverse=True)
for c in cnts:
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.015 * peri, True)
transformed = perspective_transform(original, approx)
break
cv2.imshow('transformed', transformed)
cv2.imwrite('board.png', transformed)
cv2.waitKey()
Here is my solution that will generalize to any image whether it is warped or not.
Convert the image to grayscale
Apply adaptive thresholding to convert the image to binary
(Adaptive thresholding works better than normal thresholding because the original image can have different lighting at different areas)
Identify the Corners of the large square
Perspective transform of the image to the final square image
Depending on the amount of skewness of the original image the corners identified may be out of order, do we need to arrange them in the correct order. the method used here is to identify the centroid of the large square and identify the order of the corners from there
Here is the code:
import cv2
import numpy as np
# Helper functions for getting square image
def euclidian_distance(point1, point2):
# Calcuates the euclidian distance between the point1 and point2
#used to calculate the length of the four sides of the square
distance = np.sqrt((point1[0] - point2[0]) ** 2 + (point1[1] - point2[1]) ** 2)
return distance
def order_corner_points(corners):
# The points obtained from contours may not be in order because of the skewness of the image, or
# because of the camera angle. This function returns a list of corners in the right order
sort_corners = [(corner[0][0], corner[0][1]) for corner in corners]
sort_corners = [list(ele) for ele in sort_corners]
x, y = [], []
for i in range(len(sort_corners[:])):
x.append(sort_corners[i][0])
y.append(sort_corners[i][1])
centroid = [sum(x) / len(x), sum(y) / len(y)]
for _, item in enumerate(sort_corners):
if item[0] < centroid[0]:
if item[1] < centroid[1]:
top_left = item
else:
bottom_left = item
elif item[0] > centroid[0]:
if item[1] < centroid[1]:
top_right = item
else:
bottom_right = item
ordered_corners = [top_left, top_right, bottom_right, bottom_left]
return np.array(ordered_corners, dtype="float32")
def image_preprocessing(image, corners):
# This function undertakes all the preprocessing of the image and return
ordered_corners = order_corner_points(corners)
print("ordered corners: ", ordered_corners)
top_left, top_right, bottom_right, bottom_left = ordered_corners
# Determine the widths and heights ( Top and bottom ) of the image and find the max of them for transform
width1 = euclidian_distance(bottom_right, bottom_left)
width2 = euclidian_distance(top_right, top_left)
height1 = euclidian_distance(top_right, bottom_right)
height2 = euclidian_distance(top_left, bottom_right)
width = max(int(width1), int(width2))
height = max(int(height1), int(height2))
# To find the matrix for warp perspective function we need dimensions and matrix parameters
dimensions = np.array([[0, 0], [width, 0], [width, width],
[0, width]], dtype="float32")
matrix = cv2.getPerspectiveTransform(ordered_corners, dimensions)
# Return the transformed image
transformed_image = cv2.warpPerspective(image, matrix, (width, width))
#Now, chances are, you may want to return your image into a specific size. If not, you may ignore the following line
transformed_image = cv2.resize(transformed_image, (252, 252), interpolation=cv2.INTER_AREA)
return transformed_image
# main function
def get_square_box_from_image(image):
# This function returns the top-down view of the puzzle in grayscale.
#
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.medianBlur(gray, 3)
adaptive_threshold = cv2.adaptiveThreshold(blur, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV, 11, 3)
corners = cv2.findContours(adaptive_threshold, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
corners = corners[0] if len(corners) == 2 else corners[1]
corners = sorted(corners, key=cv2.contourArea, reverse=True)
for corner in corners:
length = cv2.arcLength(corner, True)
approx = cv2.approxPolyDP(corner, 0.015 * length, True)
print(approx)
puzzle_image = image_preprocessing(image, approx)
break
return puzzle_image
# Call the get_square_box_from_image method on any sudoku image to get the top view of the puzzle
original = cv2.imread("large_puzzle.jpg")
sudoku = get_square_box_from_image(original)
Here are the results from the given image and a custom example

Categories