I would like to detect the contour of the completed form in this scan.
Ideally I would want to find the corners of the table painted with red.
My final goal is to detect that the whole document was scanned and that the four corners are within the boundaries of the scan.
I used OpenCV from python - but it was not able to find the contour of the big container.
Any ideas?
With the observation that the form can be identified using the table grid, here's a simple approach:
Obtain binary image. Load the image, grayscale, Gaussian blur, then Otsu's threshold to get a binary image
Find horizontal sections. We create a horizontal shaped kernel and find horizontal table lines and draw onto a mask
Find vertical sections. We create a vertical shaped kernel and find vertical table lines and draw onto a mask
Fill text document body and morph open. We perform morph operations to close the table then find contours and fill the mask to obtain a contour of the shape. This step fulfills your needs since you can just find contours on the mask but we can go further and extract only the desired sections.
Perform four-point perspective transform. We find contours, sort for the largest contour, sort using contour approximation then perform a four-point perspective transform to obtain a birds eye view of the image.
Here's the results:
Input image
Detected contour to extract highlighted in green
Output after 4-point perspective transform
Code
import cv2
import numpy as np
from imutils.perspective import four_point_transform
# Load image, create mask, grayscale, and Otsu's threshold
image = cv2.imread('1.jpg')
mask = np.zeros(image.shape, dtype=np.uint8)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (3,3), 0)
thresh = cv2.adaptiveThreshold(blur,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV,11,3)
# Find horizontal sections and draw on mask
horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (80,1))
detect_horizontal = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, horizontal_kernel, iterations=2)
cnts = cv2.findContours(detect_horizontal, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
cv2.drawContours(mask, [c], -1, (255,255,255), -1)
# Find vertical sections and draw on mask
vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1,50))
detect_vertical = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, vertical_kernel, iterations=2)
cnts = cv2.findContours(detect_vertical, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
cv2.drawContours(mask, [c], -1, (255,255,255), -1)
# Fill text document body
mask = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)
close_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (9,9))
close = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, close_kernel, iterations=3)
cnts = cv2.findContours(close, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
cv2.drawContours(mask, [c], -1, 255, -1)
# Perform morph operations to remove noise
# Find contours and sort for largest contour
opening = cv2.morphologyEx(mask, cv2.MORPH_OPEN, close_kernel, iterations=5)
cnts = cv2.findContours(opening, cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
cnts = sorted(cnts, key=cv2.contourArea, reverse=True)
displayCnt = None
for c in cnts:
# Perform contour approximation
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.02 * peri, True)
if len(approx) == 4:
displayCnt = approx
break
# Obtain birds' eye view of image
warped = four_point_transform(image, displayCnt.reshape(4, 2))
cv2.imwrite('mask.png', mask)
cv2.imwrite('thresh.png', thresh)
cv2.imwrite('warped.png', warped)
cv2.imwrite('opening.png', opening)
What about using the Hough transform with a narrow direction range, to find the verticals and horizontals ? If you are lucky, those that you need will be the longest, and after selecting them you can reconstruct the rectangle.
Related
I have an input signature for which I want to remove the gridlines of same color.
So far I am using this python code to do that
import cv2
import numpy as np
## Load image and make B&W version
image = cv2.imread('Downloads/image.png')
gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# Remove horizontal
horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (50,1))
detected_lines = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, horizontal_kernel, iterations=2)
cnts = cv2.findContours(detected_lines, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
cv2.drawContours(thresh, [c], -1, (0,0,0), 2)
cv2.imwrite('Downloads/out.png', thresh)
Which takes me to the following image.
Now I am trying to infill the gaps introduced, I tried using a vertical kernel and morph close but that infills the other spaces in images a lot.
Any ideas on how I can achieve the said infilling using a more sophisticated operation and get the infill done without tampering with the existing signature much.
I have searched and found the following python code but it doesn't return the result as expected. I need to use ocr to convert the numbers on the sudoku image and read it as a grid
import cv2
from imutils import contours
import numpy as np
# Load image, grayscale, and adaptive threshold
image = cv2.imread('Sample.jpg')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = cv2.adaptiveThreshold(gray,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV,57,5)
# Filter out all numbers and noise to isolate only boxes
cnts = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
area = cv2.contourArea(c)
if area < 1000:
cv2.drawContours(thresh, [c], -1, (0,0,0), -1)
# Fix horizontal and vertical lines
vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1,5))
thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, vertical_kernel, iterations=9)
horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5,1))
thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, horizontal_kernel, iterations=4)
# Sort by top to bottom and each row by left to right
invert = 255 - thresh
cnts = cv2.findContours(invert, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
(cnts, _) = contours.sort_contours(cnts, method="top-to-bottom")
sudoku_rows = []
row = []
for (i, c) in enumerate(cnts, 1):
area = cv2.contourArea(c)
if area < 50000:
row.append(c)
if i % 9 == 0:
(cnts, _) = contours.sort_contours(row, method="left-to-right")
sudoku_rows.append(cnts)
row = []
# Iterate through each box
for row in sudoku_rows:
for c in row:
mask = np.zeros(image.shape, dtype=np.uint8)
cv2.drawContours(mask, [c], -1, (255,255,255), -1)
result = cv2.bitwise_and(image, mask)
result[mask==0] = 255
cv2.imshow('result', result)
cv2.waitKey(175)
cv2.imshow('thresh', thresh)
cv2.imshow('invert', invert)
cv2.waitKey()
I have no great idea how to solve such a problem and forgive me if I was a beginner.
Here's sample of the image.
The best I could do CLI wise was run the image via any converter into PNM format which is preferred for most OCR apps, however most OCR apps will convert to Plain Text and those 7 may sometimes be seen as T (easy enough in this simplified case to Find and Replace).
The BIGGER hurdle is OCR just like PDF has no concept of Indents or margins so now we get this output. And no amount of correction of char spacing would help.
Thus your solution may rely on convert Image to vector placement by convert to PDF XY positions then with PDF OCR attempt to get the Character Layout from the pdf extraction result.
Python libs have data frame solutions that attempt to maintain tabular positions, however I don't do python to suggest which one can do this well.
I have the picture below.
I want to find the the black colored rectangles on the left using opencv. Thanks for help=)
Here's a simple approach using thresholding + morphological operations.
Obtain binary image. Load image, convert to grayscale, then adaptive threshold
Fill rectangular contours. Find contours and fill the contours to create filled rectangular blocks.
Perform morph open. We create a rectangular structuring element and morph open to remove the lines
Draw rectangles around largest rectangles Find contours and draw bounding rectangles around rectangles with an area above a certain treshold.
Here's each step visualized:
Obtain binary image
Adaptive treshold
Filled rectangular contours
Perform morph open
Draw rectangles around largest rectangles
In code:
import numpy as np
import cv2
#load the image
image = cv2.imread("mtF6y.jpg")
# grayscale
result = image.copy()
gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
# adaptive threshold
thresh = cv2.adaptiveThreshold(gray,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV,51,9)
# Fill rectangular contours
cnts = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
cv2.drawContours(thresh, [c], -1, (255,255,255), -1)
# Morph open
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (9,9))
opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=4)
# Draw rectangles, the 'area_treshold' value was determined empirically
cnts = cv2.findContours(opening, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
area_treshold = 4000
for c in cnts:
if cv2.contourArea(c) > area_treshold :
x,y,w,h = cv2.boundingRect(c)
cv2.rectangle(image, (x, y), (x + w, y + h), (36,255,12), 3)
cv2.imshow('thresh', thresh)
cv2.imshow('opening', opening)
cv2.imshow('image', image)
cv2.waitKey()
I am trying to extract handwritten characters from field boxes
My desired output would be the character segments with the boxes removed. So far, I've tried defining contours and filtering by area but that hasn't yielded any good results.
# Reading image and binarization
im = cv2.imread('test.png')
char_gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
char_bw = cv2.adaptiveThreshold(char_gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 75, 10)
# Applying erosion and dilation
kernel = np.ones((5,5), np.uint8)
img_erosion = cv2.erode(char_bw, kernel, iterations=1)
img_dilation = cv2.dilate(img_erosion, kernel, iterations=1)
# Find Canny edges
edged = cv2.Canny(img_dilation, 100, 200)
# Finding Contours
edged_copy = edged.copy()
im2, cnts, hierarchy = cv2.findContours(edged_copy, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
print("Number of Contours found = " + str(len(cnts)))
# Draw all contours
cv2.drawContours(im, cnts, -1, (0, 255, 0), 3)
# Filter using area and save
for no, c in enumerate(cnts):
area = cv2.contourArea(c)
if area > 100:
contour = c
(x, y, w, h) = cv2.boundingRect(contour)
img = im[y:y+h, x:x+w]
cv2.imwrite(f'./cnts/cnt-{no}.png', img_dilation)
Here's a simple approach:
Obtain binary image. We load the image, enlarge using imutils.resize(), convert to grayscale, and perform Otsu's thresholding to obtain a binary image
Remove horizontal lines. We create a horizontal kernel then perform morphological opening and remove the horizontal lines using cv2.drawContours
Remove vertical lines. We create a vertical kernel then perform morphological opening and remove the vertical lines using cv2.drawContours
Here's a visualization of each step:
Binary image
Detected lines/boxes to remove highlighted in green
Result
Code
import cv2
import numpy as np
import imutils
# Load image, enlarge, convert to grayscale, Otsu's threshold
image = cv2.imread('1.png')
image = imutils.resize(image, width=500)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# Remove horizontal
horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (25,1))
detect_horizontal = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, horizontal_kernel, iterations=2)
cnts = cv2.findContours(detect_horizontal, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
cv2.drawContours(image, [c], -1, (255,255,255), 5)
# Remove vertical
vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1,25))
detect_vertical = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, vertical_kernel, iterations=2)
cnts = cv2.findContours(detect_vertical, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
cv2.drawContours(image, [c], -1, (255,255,255), 5)
cv2.imshow('thresh', thresh)
cv2.imshow('image', image)
cv2.waitKey()
My input image
To extract highlighted part
My desired output
Please someone help and give me a suggestion. My images looks like this. This is just sample one. I need to crop the bottom template part and do OCR. I have attached my desire output picture. Please have a look. How to implement it using python?
PS: The sheet size will differ and there may the chance of template to dislocate. but mostly it will be in lower left corner
Here's a potential approach:
Obtain binary image. We convert to grayscale, Gaussian blur, then Otsu's threshold
Fill in potential contours. We iterate through contours and filter using contour approximation to determine if they are rectangular.
Perform morphological operations. We morph open to remove non-rectangular contours using a rectangular kernel.
Filter and extract desired contour. Find contours and filter using contour approximation, aspect ratio, and contour area to isolate the desired contour. Then extract using Numpy slicing.
Binary image
Filled in contours
Morphological operation to remove non-rectangular contours
Desired contour highlighted in green
Extracted ROI
Code
import cv2
# Grayscale, blur, and threshold
image = cv2.imread('1.png')
original = image.copy()
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (3,3), 0)
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# Fill in potential contours
cnts = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.05 * peri, True)
if len(approx) == 4:
cv2.drawContours(thresh, [c], -1, (255,255,255), -1)
# Remove non rectangular contours
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (40,10))
close = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=2)
# Filtered for desired contour
cnts = cv2.findContours(close, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.05 * peri, True)
x,y,w,h = cv2.boundingRect(approx)
aspect_ratio = w / float(h)
area = cv2.contourArea(approx)
if len(approx) == 4 and w > h and aspect_ratio > 2.75 and area > 45000:
cv2.drawContours(image, [c], -1, (36,255,12), -1)
ROI = original[y:y+h, x:x+w]
cv2.imwrite('image.png', image)
cv2.imwrite('ROI.png', ROI)
cv2.waitKey()