Related
I am trying to detect cells in bill image:
I have this image
Removed the stamp with this code:
import cv2
import numpy as np
# read image
img = cv2.imread('dummy1.PNG')
# threshold on yellow
lower = (0, 200, 200)
upper = (100, 255, 255)
thresh = cv2.inRange(img, lower, upper)
# apply dilate morphology
kernel = np.ones((9, 9), np.uint8)
mask = cv2.morphologyEx(thresh, cv2.MORPH_DILATE, kernel)
# get largest contour
contours = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
big_contour = max(contours, key=cv2.contourArea)
x, y, w, h = cv2.boundingRect(big_contour)
# draw filled white contour on input
result = img.copy()
cv2.drawContours(result, [big_contour], 0, (255, 255, 255), -1)
cv2.imwrite('removed.png', result)
# show the images
cv2.imshow("RESULT", result)
cv2.waitKey(0)
cv2.destroyAllWindows()
And obtained this image:
Then applied grayscale, inverted, detected vertical and horizontal kernel and merged through this main.py :
# Imports
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import csv
try:
from PIL import Image
except ImportError:
import Image
import pytesseract
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
#################################################################################################
# Read your file
file = 'removed.png'
img = cv2.imread(file, 0)
img.shape
# thresholding the image to a binary image
thresh, img_bin = cv2.threshold(img, 128, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
# inverting the image
img_bin = 255 - img_bin
cv2.imwrite(r'C:\Users\marou\Desktop\cv_inverted.png', img_bin)
# Plotting the image to see the output
plotting = plt.imshow(img_bin, cmap='gray')
plt.show()
# Define a kernel to detect rectangular boxes
# Length(width) of kernel as 100th of total width
kernel_len = np.array(img).shape[1] // 100
# Defining a vertical kernel to detect all vertical lines of image
ver_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, kernel_len))
# Defining a horizontal kernel to detect all horizontal lines of image
hor_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (kernel_len, 1))
# A kernel of 2x2
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
#### Vertical LINES ####
# Use vertical kernel to detect and save the vertical lines in a jpg
image_1 = cv2.erode(img_bin, ver_kernel, iterations=5)
vertical_lines = cv2.dilate(image_1, ver_kernel, iterations=5)
cv2.imwrite(r'C:\Users\marou\Desktop\vertical.jpg', vertical_lines)
# Plot the generated image
plotting = plt.imshow(image_1, cmap='gray')
plt.show()
#### HORTIZONAL LINES ####
# Use horizontal kernel to detect and save the horizontal lines in a jpg
image_2 = cv2.erode(img_bin, hor_kernel, iterations=5)
horizontal_lines = cv2.dilate(image_2, hor_kernel, iterations=5)
cv2.imwrite(r'C:\Users\marou\Desktop\horizontal.jpg', horizontal_lines)
# Plot the generated image
plotting = plt.imshow(image_2, cmap='gray')
plt.show()
# Combining both H and V
# Combine horizontal and vertical lines in a new third image, with both having same weight.
img_vh = cv2.addWeighted(vertical_lines, 0.5, horizontal_lines, 0.5, 0.0)
# Eroding and thesholding the image
img_vh = cv2.erode(~img_vh, kernel, iterations=2)
thresh, img_vh = cv2.threshold(img_vh, 128, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
cv2.imwrite(r'C:\Users\marou\Desktop\img_vh.jpg', img_vh)
plotting = plt.imshow(img_vh, cmap='gray')
plt.show()
To get this :
Now I am trying to fill the voids in my lines that happened due to the watermark removal, to be able to apply correct OCR.
I tried following the steps in this thread but I can't seem to get it right.
When I try to fill the grid holes :
# Fill individual grid holes
cnts = cv2.findContours(result, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
x,y,w,h = cv2.boundingRect(c)
cv2.rectangle(result, (x, y), (x + w, y + h), 255, -1)
cv2.imshow('result', result)
cv2.waitKey()
I get blank image:
I have outlined an approach to fill the missing lines in the table using the second image as input.
image = cv2.imread(image_path)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
Now to create a separate mask for the horizontal lines:
h_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (50,1))
# contains only the horizontal lines
h_mask = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, h_kernel, iterations=1)
# performing repeated iterations to join lines
h_mask = cv2.dilate(h_mask, h_kernel, iterations=7)
And a separate mask for the vertical lines:
v_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1,50))
v_mask = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, v_kernel, iterations=1)
Upon combining the above results we get the following:
joined_lines = cv2.bitwise_or(v_mask, h_mask)
The result above is not what you expected, the lines have extended beyond the boundaries of the table. In order to avoid this, I created a separate mask bounding the table region.
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5,5))
dilate = cv2.dilate(thresh, kernel, iterations=1)
Now find the largest contour in the above image and draw it on another binary image to create the mask.
contours, hierarchy = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
c = max(contours, key = cv2.contourArea) # contour with largest area
black = np.zeros((image.shape[0], image.shape[1]), np.uint8)
mask = cv2.drawContours(black, [c], 0, 255, -1) # --> -1 to fill the contour
Using the above image as mask over the joined_lines created further above
fin = cv2.bitwise_and(joined_lines, joined_lines, mask = mask)
Note:
You can perform more iterations over the morphological operations to better join the discontinuous lines
I'm trying to detect and draw a rectangular contour on every painting on for example this image:
I followed some guides and did the following:
Grayscale conversion
Applied median blur
Sharpen image
Applied adaptive Threshold
Applied Morphological Gradient
Find contours
Draw contours
And got the following result:
I know it's messy but is there a way to somehow detect and draw a contour around the paintings better?
Here is the code I used:
path = '<PATH TO THE PICTURE>'
#reading in and showing original image
image = cv2.imread(path)
image = cv2.resize(image,(880,600)) # resize was nessecary because of the large images
cv2.imshow("original", image)
cv2.waitKey(0)
cv2.destroyAllWindows()
# grayscale conversion
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
cv2.imshow("painting_gray", gray)
cv2.waitKey(0)
cv2.destroyAllWindows()
# we need to find a way to detect the edges better so we implement a couple of things
# A little help was found on stackoverflow: https://stackoverflow.com/questions/55169645/square-detection-in-image
median = cv2.medianBlur(gray,5)
cv2.imshow("painting_median_blur", median) #we use median blur to smooth the image
cv2.waitKey(0)
cv2.destroyAllWindows()
# now we sharpen the image with help of following URL: https://www.analyticsvidhya.com/blog/2021/08/sharpening-an-image-using-opencv-library-in-python/
kernel = np.array([[0, -1, 0],
[-1, 5,-1],
[0, -1, 0]])
image_sharp = cv2.filter2D(src=median, ddepth=-1, kernel=kernel)
cv2.imshow('painting_sharpend', image_sharp)
cv2.waitKey(0)
cv2.destroyAllWindows()
# now we apply adapptive thresholding
# thresholding: https://opencv24-python-tutorials.readthedocs.io/en/latest/py_tutorials/py_imgproc/py_thresholding/py_thresholding.html#adaptive-thresholding
thresh = cv2.adaptiveThreshold(src=image_sharp,maxValue=255,adaptiveMethod=cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
thresholdType=cv2.THRESH_BINARY,blockSize=61,C=20)
cv2.imshow('thresholded image', thresh)
cv2.waitKey(0)
cv2.destroyAllWindows()
# lets apply a morphological transformation
kernel = np.ones((7,7),np.uint8)
gradient = cv2.morphologyEx(thresh, cv2.MORPH_GRADIENT, kernel)
cv2.imshow('dilated image', gradient)
cv2.waitKey(0)
cv2.destroyAllWindows()
# # lets now find the contours of the image
# # find contours: https://docs.opencv.org/4.x/dd/d49/tutorial_py_contour_features.html
contours, hierarchy = cv2.findContours(gradient, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
print("contours: ", len(contours))
print("hierachy: ", len(hierarchy))
print(hierarchy)
cv2.drawContours(image, contours, -1, (0,255,0), 3)
cv2.imshow("contour image", image)
cv2.waitKey(0)
cv2.destroyAllWindows()
Tips, help or code is appreciated!
Here's a simple approach:
Obtain binary image. We load the image, grayscale, Gaussian blur, then Otsu's threshold to obtain a binary image.
Two pass dilation to merge contours. At this point, we have a binary image but individual separated contours. Since we can assume that a painting is a single large square contour, we can merge small individual adjacent contours together to form a single contour. To do this, we create a vertical and horizontal kernel using cv2.getStructuringElement then dilate to merge them together. Depending on the image, you may need to adjust the kernel sizes or number of dilation iterations.
Detect paintings. Now we find contours and filter using contour area using a minimum threshold area to filter out small contours. Finally we obtain the bounding rectangle coordinates and draw the rectangle with cv2.rectangle.
Code
import cv2
# Load image, grayscale, Gaussian blur, Otsu's threshold
image = cv2.imread('1.jpeg')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (13,13), 0)
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
# Two pass dilate with horizontal and vertical kernel
horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (9,5))
dilate = cv2.dilate(thresh, horizontal_kernel, iterations=2)
vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5,9))
dilate = cv2.dilate(dilate, vertical_kernel, iterations=2)
# Find contours, filter using contour threshold area, and draw rectangle
cnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
area = cv2.contourArea(c)
if area > 20000:
x,y,w,h = cv2.boundingRect(c)
cv2.rectangle(image, (x, y), (x + w, y + h), (36, 255, 12), 3)
cv2.imshow('thresh', thresh)
cv2.imshow('dilate', dilate)
cv2.imshow('image', image)
cv2.waitKey()
So here is the actual size of the portrait frame.
So here is small code.
#!/usr/bin/python 37
#OpenCV 4.3.0, Raspberry Pi 3/B/4B-w/4/8GB RAM, Buster,v10.
#Date: 3rd, June, 2020
import cv2
# Load the image
img = cv2.imread('portrait.jpeg')
# convert to grayscale
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
edged = cv2.Canny(img, 120,890)
# Apply adaptive threshold
thresh = cv2.adaptiveThreshold(edged, 255, 1, 1, 11, 2)
thresh_color = cv2.cvtColor(thresh, cv2.COLOR_GRAY2BGR)
# apply some dilation and erosion to join the gaps - change iteration to detect more or less area's
thresh = cv2.dilate(thresh,None,iterations = 50)
thresh = cv2.erode(thresh,None,iterations = 50)
# Find the contours
contours,hierarchy = cv2.findContours(thresh,
cv2.RETR_TREE,
cv2.CHAIN_APPROX_SIMPLE)
# For each contour, find the bounding rectangle and draw it
for cnt in contours:
area = cv2.contourArea(cnt)
if area > 20000:
x,y,w,h = cv2.boundingRect(cnt)
cv2.rectangle(img,
(x,y),(x+w,y+h),
(0,255,0),
2)
cv2.imshow('img',img)
cv2.waitKey(0)
cv2.destroyAllWindows()
Here is output:
I am using pytessearct to extract the text from images. But it doesn't work on images which are inclined. Consider the image given below:
Here is the code to extract text, which is working fine on images which are not inclined.
img = cv2.imread(<path_to_image>)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (5,5),0)
ret3, thresh = cv2.threshold(blur,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
def findSignificantContours (img, edgeImg):
contours, heirarchy = cv2.findContours(edgeImg, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
# Find level 1 contours
level1 = []
for i, tupl in enumerate(heirarchy[0]):
# Each array is in format (Next, Prev, First child, Parent)
# Filter the ones without parent
if tupl[3] == -1:
tupl = np.insert(tupl, 0, [i])
level1.append(tupl)
significant = []
tooSmall = edgeImg.size * 5 / 100 # If contour isn't covering 5% of total area of image then it probably is too small
for tupl in level1:
contour = contours[tupl[0]];
area = cv2.contourArea(contour)
if area > tooSmall:
significant.append([contour, area])
# Draw the contour on the original image
cv2.drawContours(img, [contour], 0, (0,255,0),2, cv2.LINE_AA, maxLevel=1)
significant.sort(key=lambda x: x[1])
#print ([x[1] for x in significant]);
mx = (0,0,0,0) # biggest bounding box so far
mx_area = 0
for cont in contours:
x,y,w,h = cv2.boundingRect(cont)
area = w*h
if area > mx_area:
mx = x,y,w,h
mx_area = area
x,y,w,h = mx
# Output to files
roi = img[y:y+h,x:x+w]
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (5,5),0)
ret3, thresh = cv2.threshold(blur,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
cv2_imshow(thresh)
text = pytesseract.image_to_string(roi);
print(text); print("\n"); print(pytesseract.image_to_string(thresh));
print("\n")
return [x[0] for x in significant];
edgeImg_8u = np.asarray(thresh, np.uint8)
# Find contours
significant = findSignificantContours(img, edgeImg_8u)
mask = thresh.copy()
mask[mask > 0] = 0
cv2.fillPoly(mask, significant, 255)
# Invert mask
mask = np.logical_not(mask)
#Finally remove the background
img[mask] = 0;
Tesseract can't extract the text from this image. Is there a way I can rotate it to align the text perfectly and then feed it to pytesseract? Please let me know if my question require any more clarity.
Here's a simple approach:
Obtain binary image. Load image, convert to grayscale,
Gaussian blur, then Otsu's threshold.
Find contours and sort for largest contour. We find contours then filter using contour area with cv2.contourArea() to isolate the rectangular contour.
Perform perspective transform. Next we perform contour approximation with cv2.contourArea() to obtain the rectangular contour. Finally we utilize imutils.perspective.four_point_transform to actually obtain the bird's eye view of the image.
Binary image
Result
To actually extract the text, take a look at
Use pytesseract OCR to recognize text from an image
Cleaning image for OCR
Detect text area in an image using python and opencv
Code
from imutils.perspective import four_point_transform
import cv2
import numpy
# Load image, grayscale, Gaussian blur, Otsu's threshold
image = cv2.imread("1.jpg")
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (7,7), 0)
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
# Find contours and sort for largest contour
cnts = cv2.findContours(thresh, cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
cnts = sorted(cnts, key=cv2.contourArea, reverse=True)
displayCnt = None
for c in cnts:
# Perform contour approximation
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.02 * peri, True)
if len(approx) == 4:
displayCnt = approx
break
# Obtain birds' eye view of image
warped = four_point_transform(image, displayCnt.reshape(4, 2))
cv2.imshow("thresh", thresh)
cv2.imshow("warped", warped)
cv2.waitKey()
To Solve this problem you can also use minAreaRect api in opencv which will give you a minimum area rotated rectangle with an angle of rotation. You can then get the rotation matrix and apply warpAffine for the image to straighten it. I have also attached a colab notebook which you can play around on.
Colab notebook : https://colab.research.google.com/drive/1SKxrWJBOHhGjEgbR2ALKxl-dD1sXIf4h?usp=sharing
import cv2
from google.colab.patches import cv2_imshow
import numpy as np
def rotate_image(image, angle):
image_center = tuple(np.array(image.shape[1::-1]) / 2)
rot_mat = cv2.getRotationMatrix2D(image_center, angle, 1.0)
result = cv2.warpAffine(image, rot_mat, image.shape[1::-1], flags=cv2.INTER_LINEAR)
return result
img = cv2.imread("/content/sxJzw.jpg")
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
mask = np.zeros((img.shape[0], img.shape[1]))
blur = cv2.GaussianBlur(gray, (5,5),0)
ret, thresh = cv2.threshold(blur,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
cv2_imshow(thresh)
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
largest_countour = max(contours, key = cv2.contourArea)
binary_mask = cv2.drawContours(mask, [largest_countour], 0, 1, -1)
new_img = img * np.dstack((binary_mask, binary_mask, binary_mask))
minRect = cv2.minAreaRect(largest_countour)
rotate_angle = minRect[-1] if minRect[-1] < 0 else -minRect[-1]
new_img = rotate_image(new_img, rotate_angle)
cv2_imshow(new_img)
I am working on OCRing a document image. I want to detect all pictures and remove from the document image. I want to retain tables in the document image. Once I detect pictures I will remove and then want to OCR. I tried to find contour tried to detect all the bigger areas. unfortunately it detects tables also. Also how to remove the objects keeping other data in the doc image. I am using opencv and python
Here's my code
import os
from PIL import Image
import pytesseract
img = cv2.imread('block2.jpg' , 0)
mask = np.ones(img.shape[:2], dtype="uint8") * 255
ret,thresh1 = cv2.threshold(img,127,255,0)
contours, sd = cv2.findContours(thresh1,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
areacontainer = []
for cnt in contours:
area = cv2.contourArea(cnt)
areacontainer.append(area)
avgArea = sum(areacontainer)/len(areacontainer)
[enter code here][1]
for c in contours:# average area heuristics
if cv2.contourArea(c)>6*avgArea:
cv2.drawContours(mask, [c], -1, 0, -1)
binary = cv2.bitwise_and(img, img, mask=mask) # subtracting
cv2.imwrite("bin.jpg" , binary)
cv2.imwrite("mask.jpg" , mask)
Here's an approach:
Convert image to grayscale and Gaussian blur
Perform canny edge detection
Perform morphological operations to smooth image
Find contours and filter using a minimum/maximum threshold area
Remove portrait images
Here's the detected portraits highlighted in green
Now that we have the bounding box ROIs, we can effectively remove the pictures by filling them in with white. Here's the result
import cv2
image = cv2.imread('1.jpg')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (3,3), 0)
canny = cv2.Canny(blur, 120, 255, 1)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))
close = cv2.morphologyEx(canny, cv2.MORPH_CLOSE, kernel, iterations=2)
cnts = cv2.findContours(close, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
area = cv2.contourArea(c)
if area > 15000 and area < 35000:
x,y,w,h = cv2.boundingRect(c)
cv2.rectangle(image, (x, y), (x + w, y + h), (255,255,255), -1)
cv2.imshow('image', image)
cv2.waitKey()
Using the following code I can remove horizontal lines in images. See result below.
import cv2
from matplotlib import pyplot as plt
img = cv2.imread('image.png',0)
laplacian = cv2.Laplacian(img,cv2.CV_64F)
sobelx = cv2.Sobel(img,cv2.CV_64F,1,0,ksize=5)
plt.subplot(2,2,1),plt.imshow(img,cmap = 'gray')
plt.title('Original'), plt.xticks([]), plt.yticks([])
plt.subplot(2,2,2),plt.imshow(laplacian,cmap = 'gray')
plt.title('Laplacian'), plt.xticks([]), plt.yticks([])
plt.subplot(2,2,3),plt.imshow(sobelx,cmap = 'gray')
plt.title('Sobel X'), plt.xticks([]), plt.yticks([])
plt.show()
The result is pretty good, not perfect but good. What I want to achieve is the one showed here.
I am using this code.
Source image..
One of my questions is: how to save the Sobel X without that grey effect applied ? As original but processed..
Also, is there a better way to do it ?
EDIT
Using the following code for the source image is good. Works pretty well.
import cv2
import numpy as np
img = cv2.imread("image.png")
img=cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
img = cv2.bitwise_not(img)
th2 = cv2.adaptiveThreshold(img,255, cv2.ADAPTIVE_THRESH_MEAN_C,cv2.THRESH_BINARY,15,-2)
cv2.imshow("th2", th2)
cv2.imwrite("th2.jpg", th2)
cv2.waitKey(0)
cv2.destroyAllWindows()
horizontal = th2
vertical = th2
rows,cols = horizontal.shape
#inverse the image, so that lines are black for masking
horizontal_inv = cv2.bitwise_not(horizontal)
#perform bitwise_and to mask the lines with provided mask
masked_img = cv2.bitwise_and(img, img, mask=horizontal_inv)
#reverse the image back to normal
masked_img_inv = cv2.bitwise_not(masked_img)
cv2.imshow("masked img", masked_img_inv)
cv2.imwrite("result2.jpg", masked_img_inv)
cv2.waitKey(0)
cv2.destroyAllWindows()
horizontalsize = int(cols / 30)
horizontalStructure = cv2.getStructuringElement(cv2.MORPH_RECT, (horizontalsize,1))
horizontal = cv2.erode(horizontal, horizontalStructure, (-1, -1))
horizontal = cv2.dilate(horizontal, horizontalStructure, (-1, -1))
cv2.imshow("horizontal", horizontal)
cv2.imwrite("horizontal.jpg", horizontal)
cv2.waitKey(0)
cv2.destroyAllWindows()
verticalsize = int(rows / 30)
verticalStructure = cv2.getStructuringElement(cv2.MORPH_RECT, (1, verticalsize))
vertical = cv2.erode(vertical, verticalStructure, (-1, -1))
vertical = cv2.dilate(vertical, verticalStructure, (-1, -1))
cv2.imshow("vertical", vertical)
cv2.imwrite("vertical.jpg", vertical)
cv2.waitKey(0)
cv2.destroyAllWindows()
vertical = cv2.bitwise_not(vertical)
cv2.imshow("vertical_bitwise_not", vertical)
cv2.imwrite("vertical_bitwise_not.jpg", vertical)
cv2.waitKey(0)
cv2.destroyAllWindows()
#step1
edges = cv2.adaptiveThreshold(vertical,255, cv2.ADAPTIVE_THRESH_MEAN_C,cv2.THRESH_BINARY,3,-2)
cv2.imshow("edges", edges)
cv2.imwrite("edges.jpg", edges)
cv2.waitKey(0)
cv2.destroyAllWindows()
#step2
kernel = np.ones((2, 2), dtype = "uint8")
dilated = cv2.dilate(edges, kernel)
cv2.imshow("dilated", dilated)
cv2.imwrite("dilated.jpg", dilated)
cv2.waitKey(0)
cv2.destroyAllWindows()
# step3
smooth = vertical.copy()
#step 4
smooth = cv2.blur(smooth, (4,4))
cv2.imshow("smooth", smooth)
cv2.imwrite("smooth.jpg", smooth)
cv2.waitKey(0)
cv2.destroyAllWindows()
#step 5
(rows, cols) = np.where(img == 0)
vertical[rows, cols] = smooth[rows, cols]
cv2.imshow("vertical_final", vertical)
cv2.imwrite("vertical_final.jpg", vertical)
cv2.waitKey(0)
cv2.destroyAllWindows()
But if I have this image ?
I tried to execute the code above and the result is really poor...
Other images which I am working on are these...
Obtain binary image. Load the image, convert to grayscale, then Otsu's threshold to obtain a binary black/white image.
Detect and remove horizontal lines. To detect horizontal lines, we create a special horizontal kernel and morph open to detect horizontal contours. From here we find contours on the mask and "fill in"
the detected horizontal contours with white to effectively remove the lines
Repair image. At this point the image may have gaps if the horizontal lines intersected through characters. To repair the text, we create a vertical kernel and morph close to reverse the damage
After converting to grayscale, we Otsu's threshold to obtain a binary image
image = cv2.imread('1.png')
gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
Next we create a special horizontal kernel to detect horizontal lines. We draw these lines onto a mask and then find contours on the mask. To remove the lines, we fill in the contours with white
Detected lines
Mask
Filled in contours
# Remove horizontal
horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (25,1))
detected_lines = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, horizontal_kernel, iterations=2)
cnts = cv2.findContours(detected_lines, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
cv2.drawContours(image, [c], -1, (255,255,255), 2)
The image currently has gaps. To fix this, we construct a vertical kernel to repair the image
# Repair image
repair_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1,6))
result = 255 - cv2.morphologyEx(255 - image, cv2.MORPH_CLOSE, repair_kernel, iterations=1)
Note depending on the image, the size of the kernel will change. You can think of the kernel as (horizontal, vertical). For instance, to detect longer lines, we could use a (50,1) kernel instead. If we wanted thicker lines, we could increase the 2nd parameter to say (50,2).
Here's the results with the other images
Detected lines
Original -> Removed
Detected lines
Original -> Removed
Full code
import cv2
image = cv2.imread('1.png')
gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# Remove horizontal
horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (25,1))
detected_lines = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, horizontal_kernel, iterations=2)
cnts = cv2.findContours(detected_lines, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
cv2.drawContours(image, [c], -1, (255,255,255), 2)
# Repair image
repair_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1,6))
result = 255 - cv2.morphologyEx(255 - image, cv2.MORPH_CLOSE, repair_kernel, iterations=1)
cv2.imshow('thresh', thresh)
cv2.imshow('detected_lines', detected_lines)
cv2.imshow('image', image)
cv2.imshow('result', result)
cv2.waitKey()