OpenCV Python: Closed Contour Approximation For A Speech Bubble Shape - python

I've got a shape like a speech bubble. And I only want to detect the ellipse of this shape like in the image with the green encircled one.
I tried with closed morphology, but certain parts of the bubbles are also removed. I used a Kernel with a matrix of 20, 20. The shape becomes more rectangular. Maybe I have to change the kernel matrix more like this:
0 1 0
1 1 1
0 1 0
I also tried to draw a convex hull, but it also has no effect. And a inner convex hull is not possible. Here is my code for drawing a convex hull:
for i in range (max_index):
hull = cv2.convexHull(contours[i])
cv2.drawContours(image, [hull], 0, (0, 255, 0), 2)
I retrieved the contours with the parameters cv2.RETR_EXTERNAL and cv2.CHAIN_APPROX_NONE

This is the best I was able to get:
It is not the most clever way to do it. What I am doing here is actually simple, despite the verbose code.
First, I get the gray image and add a lot of blur and in the same way you tried, apply threshold and find contours. I then take the biggest contour and find the ellipse that fits this contour with fitEllipse. This is all in getEllipse function.
In this first round, the ellipse will be skewed because the tail is getting in the way. So, I use this not so good ellipse to process the original image and give another try.
The function grayEllipse filters an image by an ellipse. So, I use the ellipse from the first try to get process the original image and highlight the points the are inside the first ellipse. I use this image as input in this second round.
By repeating the process, the final ellipse I get in the second time is much less skewed.
Here is the code:
import cv2
import numpy as np
def getEllipse(imgray):
ret, thresh = cv2.threshold(imgray, 20, 255, cv2.THRESH_BINARY|cv2.THRESH_OTSU)
_, contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
maxArea = 0
best = None
for contour in contours:
area = cv2.contourArea(contour)
if area > maxArea :
maxArea = area
best = contour
ellipse = cv2.fitEllipse(best)
el = np.zeros(imgray.shape)
cv2.ellipse(el, ellipse,(255,255,255),-1)
return el
def grayEllipse(el, img):
el = np.dstack((el,el,el))
el = el*img
el = el/(255)
el = el.astype('uint8')
imgray = cv2.cvtColor(el, cv2.COLOR_BGR2LAB)[...,0]
return imgray
image = cv2.imread("./baloon.png", cv2.IMREAD_COLOR)
img = image.copy()
imgray = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)[...,0]
imgray = cv2.GaussianBlur(imgray, (79,79), 0)
el = getEllipse(imgray)
imgray = grayEllipse(el, img.copy())
imgray = cv2.GaussianBlur(imgray, (11,11), 0)
el = getEllipse(imgray)
imgray = grayEllipse(el, img.copy())
ret, thresh = cv2.threshold(imgray, 20, 255, cv2.THRESH_BINARY|cv2.THRESH_OTSU)
_, contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
maxArea = 0
best = None
for contour in contours:
area = cv2.contourArea(contour)
if area > maxArea :
maxArea = area
best = contour
ellipse = cv2.fitEllipse(best)
cv2.ellipse(image, ellipse, (0,255,0),3)
while True:
cv2.imshow("result", image)
k = cv2.waitKey(30) & 0xff
if k == 27:
break

Related

How to rotate an image to align the text for extraction?

I am using pytessearct to extract the text from images. But it doesn't work on images which are inclined. Consider the image given below:
Here is the code to extract text, which is working fine on images which are not inclined.
img = cv2.imread(<path_to_image>)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (5,5),0)
ret3, thresh = cv2.threshold(blur,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
def findSignificantContours (img, edgeImg):
contours, heirarchy = cv2.findContours(edgeImg, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
# Find level 1 contours
level1 = []
for i, tupl in enumerate(heirarchy[0]):
# Each array is in format (Next, Prev, First child, Parent)
# Filter the ones without parent
if tupl[3] == -1:
tupl = np.insert(tupl, 0, [i])
level1.append(tupl)
significant = []
tooSmall = edgeImg.size * 5 / 100 # If contour isn't covering 5% of total area of image then it probably is too small
for tupl in level1:
contour = contours[tupl[0]];
area = cv2.contourArea(contour)
if area > tooSmall:
significant.append([contour, area])
# Draw the contour on the original image
cv2.drawContours(img, [contour], 0, (0,255,0),2, cv2.LINE_AA, maxLevel=1)
significant.sort(key=lambda x: x[1])
#print ([x[1] for x in significant]);
mx = (0,0,0,0) # biggest bounding box so far
mx_area = 0
for cont in contours:
x,y,w,h = cv2.boundingRect(cont)
area = w*h
if area > mx_area:
mx = x,y,w,h
mx_area = area
x,y,w,h = mx
# Output to files
roi = img[y:y+h,x:x+w]
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (5,5),0)
ret3, thresh = cv2.threshold(blur,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
cv2_imshow(thresh)
text = pytesseract.image_to_string(roi);
print(text); print("\n"); print(pytesseract.image_to_string(thresh));
print("\n")
return [x[0] for x in significant];
edgeImg_8u = np.asarray(thresh, np.uint8)
# Find contours
significant = findSignificantContours(img, edgeImg_8u)
mask = thresh.copy()
mask[mask > 0] = 0
cv2.fillPoly(mask, significant, 255)
# Invert mask
mask = np.logical_not(mask)
#Finally remove the background
img[mask] = 0;
Tesseract can't extract the text from this image. Is there a way I can rotate it to align the text perfectly and then feed it to pytesseract? Please let me know if my question require any more clarity.
Here's a simple approach:
Obtain binary image. Load image, convert to grayscale,
Gaussian blur, then Otsu's threshold.
Find contours and sort for largest contour. We find contours then filter using contour area with cv2.contourArea() to isolate the rectangular contour.
Perform perspective transform. Next we perform contour approximation with cv2.contourArea() to obtain the rectangular contour. Finally we utilize imutils.perspective.four_point_transform to actually obtain the bird's eye view of the image.
Binary image
Result
To actually extract the text, take a look at
Use pytesseract OCR to recognize text from an image
Cleaning image for OCR
Detect text area in an image using python and opencv
Code
from imutils.perspective import four_point_transform
import cv2
import numpy
# Load image, grayscale, Gaussian blur, Otsu's threshold
image = cv2.imread("1.jpg")
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (7,7), 0)
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
# Find contours and sort for largest contour
cnts = cv2.findContours(thresh, cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
cnts = sorted(cnts, key=cv2.contourArea, reverse=True)
displayCnt = None
for c in cnts:
# Perform contour approximation
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.02 * peri, True)
if len(approx) == 4:
displayCnt = approx
break
# Obtain birds' eye view of image
warped = four_point_transform(image, displayCnt.reshape(4, 2))
cv2.imshow("thresh", thresh)
cv2.imshow("warped", warped)
cv2.waitKey()
To Solve this problem you can also use minAreaRect api in opencv which will give you a minimum area rotated rectangle with an angle of rotation. You can then get the rotation matrix and apply warpAffine for the image to straighten it. I have also attached a colab notebook which you can play around on.
Colab notebook : https://colab.research.google.com/drive/1SKxrWJBOHhGjEgbR2ALKxl-dD1sXIf4h?usp=sharing
import cv2
from google.colab.patches import cv2_imshow
import numpy as np
def rotate_image(image, angle):
image_center = tuple(np.array(image.shape[1::-1]) / 2)
rot_mat = cv2.getRotationMatrix2D(image_center, angle, 1.0)
result = cv2.warpAffine(image, rot_mat, image.shape[1::-1], flags=cv2.INTER_LINEAR)
return result
img = cv2.imread("/content/sxJzw.jpg")
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
mask = np.zeros((img.shape[0], img.shape[1]))
blur = cv2.GaussianBlur(gray, (5,5),0)
ret, thresh = cv2.threshold(blur,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
cv2_imshow(thresh)
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
largest_countour = max(contours, key = cv2.contourArea)
binary_mask = cv2.drawContours(mask, [largest_countour], 0, 1, -1)
new_img = img * np.dstack((binary_mask, binary_mask, binary_mask))
minRect = cv2.minAreaRect(largest_countour)
rotate_angle = minRect[-1] if minRect[-1] < 0 else -minRect[-1]
new_img = rotate_image(new_img, rotate_angle)
cv2_imshow(new_img)

Python/OpenCV — Intelligent Centroid Tracking in Bacterial Images?

I'm currently working on an algorithm to detect bacterial centroids in microscopy images.
This question is a continuation of: OpenCV/Python — Matching Centroid Points of Bacteria in Two Images: Python/OpenCV — Matching Centroid Points of Bacteria in Two Images
I am using a modified version of the program proposed by Rahul Kedia.
https://stackoverflow.com/a/63049277/13696853
Currently, the issues in segmentation I am working on are:
Low Contrast
Clustering
The images below are sampled a second apart. However, in the latter image, one of the bacteria does not get detected.
Bright-field Image #1
Bright-Field Image #2
Bright-Field Contour Image #1
Bright-Field Contour Image #2
Bright-Field Image #1 (Unsegmented)
Bright-Field Image #2 (Unsegmented)
I want to know, given that I can successfully determine bacterial centroids in an image, can I use the data to intelligently look for the same bacteria in the subsequent image?
I haven't been able to find anything substantial online; I believe SIFT/SURF would likely be ineffective as the bacteria have the same appearance. Moreover, I am looking for specific points in the images. You can view my program below. Insert a specific path as indicated if you'd like to run the program.
import cv2
import numpy as np
import os
kernel = np.array([[0, 0, 1, 0, 0],
[0, 1, 1, 1, 0],
[1, 1, 1, 1, 1],
[0, 1, 1, 1, 0],
[0, 0, 1, 0, 0]], dtype=np.uint8)
def e_d(image, it):
image = cv2.erode(image, kernel, iterations=it)
image = cv2.dilate(image, kernel, iterations=it)
return image
path = r"[INSERT PATH]"
img_files = [file for file in os.listdir(path)]
def segment_index(index: int):
segment_file(img_files[index])
def segment_file(img_file: str):
img_path = path + "\\" + img_file
print(img_path)
img = cv2.imread(img_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Applying adaptive mean thresholding
th = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV, 11, 2)
# Removing small noise
th = e_d(th.copy(), 1)
# Finding contours with RETR_EXTERNAL flag and removing undesired contours and
# drawing them on a new image.
cnt, hie = cv2.findContours(th, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
cntImg = th.copy()
for contour in cnt:
x, y, w, h = cv2.boundingRect(contour)
# Eliminating the contour if its width is more than half of image width
# (bacteria will not be that big).
if w > img.shape[1] / 2:
continue
cntImg = cv2.drawContours(cntImg, [cv2.convexHull(contour)], -1, 255, -1)
# Removing almost all the remaining noise.
# (Some big circular noise will remain along with bacteria contours)
cntImg = e_d(cntImg, 3)
# Finding new filtered contours again
cnt2, hie2 = cv2.findContours(cntImg, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
# Now eliminating circular type noise contours by comparing each contour's
# extent of overlap with its enclosing circle.
finalContours = [] # This will contain the final bacteria contours
for contour in cnt2:
# Finding minimum enclosing circle
(x, y), radius = cv2.minEnclosingCircle(contour)
center = (int(x), int(y))
radius = int(radius)
# creating a image with only this circle drawn on it(filled with white colour)
circleImg = np.zeros(img.shape, dtype=np.uint8)
circleImg = cv2.circle(circleImg, center, radius, 255, -1)
# creating a image with only the contour drawn on it(filled with white colour)
contourImg = np.zeros(img.shape, dtype=np.uint8)
contourImg = cv2.drawContours(contourImg, [contour], -1, 255, -1)
# White pixels not common in both contour and circle will remain white
# else will become black.
union_inter = cv2.bitwise_xor(circleImg, contourImg)
# Finding ratio of the extent of overlap of contour to its enclosing circle.
# Smaller the ratio, more circular the contour.
ratio = np.sum(union_inter == 255) / np.sum(circleImg == 255)
# Storing only non circular contours(bacteria)
if ratio > 0.55:
finalContours.append(contour)
finalContours = np.asarray(finalContours)
# Finding center of bacteria and showing it.
bacteriaImg = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
for bacteria in finalContours:
M = cv2.moments(bacteria)
cx = int(M['m10'] / M['m00'])
cy = int(M['m01'] / M['m00'])
bacteriaImg = cv2.circle(bacteriaImg, (cx, cy), 5, (0, 0, 255), -1)
cv2.imshow("bacteriaImg", bacteriaImg)
cv2.waitKey(0)
# Segment Each Image
for i in range(len(img_files)):
segment_index(i)
Edit #1: Applying frmw42's approach, this image seems to get lost. I have tried adjusting a number of parameters but the image does not seem to show up.
Bright-Field Image #3
Bright-Field Image #4
Here is my Python/OpenCV code to extract your bacteria. I simply threshold, then get the contours and draw filled contours for those within a certain area range. I will let you do any further processing that you want. I simply viewed each step to make sure I have tuned the arguments appropriately before moving to the next step.
Input 1:
Input 2:
import cv2
import numpy as np
# read image
#img = cv2.imread("bacteria1.png")
img = cv2.imread("bacteria2.png")
# convert img to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
gray = 255 - gray
# do adaptive threshold on inverted gray image
thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 21, 5)
result = np.zeros_like(img)
contours = cv2.findContours(thresh , cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
for cntr in contours:
area = cv2.contourArea(cntr)
if area > 600 and area < 1100:
cv2.drawContours(result, [cntr], 0, (255,255,255), -1)
# write results to disk
#cv2.imwrite("bacteria_filled_contours1.png", result)
cv2.imwrite("bacteria_filled_contours2.png", result)
# display it
cv2.imshow("thresh", thresh)
cv2.imshow("result", result)
cv2.waitKey(0)
Result 1:
Result 2:
Adjust as desired.
It would seem that adaptive threshold is not able to handle all your various images. I suspect nothing simple will. You may need to use AI with training. Nevertheless, this works for your images: 1, 2 and 4 in Python/OpenCV. I make no guarantee that it will work for any of your other images.
First I found a simple threshold that seems to work, but brings in other regions. So since all your bacteria have similar shapes and range of orientations, I fit and ellipse to your bacteria and get the orientation of the major axis and filter the contours with area and angle.
import cv2
import numpy as np
# read image
#img = cv2.imread("bacteria1.png")
#img = cv2.imread("bacteria2.png")
img = cv2.imread("bacteria4.png")
# convert img to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
gray = 255 - gray
# median filter
#gray = cv2.medianBlur(gray, 1)
# do simple threshold on inverted gray image
thresh = cv2.threshold(gray, 170, 255, cv2.THRESH_BINARY)[1]
result = np.zeros_like(img)
contours = cv2.findContours(thresh , cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
for cntr in contours:
area = cv2.contourArea(cntr)
if area > 600 and area < 1100:
ellipse = cv2.fitEllipse(cntr)
(xc,yc),(d1,d2),angle = ellipse
if angle > 90:
angle = angle - 90
else:
angle = angle + 90
print(angle,area)
if angle >= 150 and angle <= 250:
cv2.drawContours(result, [cntr], 0, (255,255,255), -1)
# write results to disk
#cv2.imwrite("bacteria_filled_contours1.png", result)
#cv2.imwrite("bacteria_filled_contours2.png", result)
cv2.imwrite("bacteria_filled_contours4.png", result)
# display it
cv2.imshow("thresh", thresh)
cv2.imshow("result", result)
cv2.waitKey(0)
Result for image 1:
Result for image 2:
Result for image 4:
You might explore noise reduction before thresholding. I had some success with using some of ImageMagick tools and there is a Python version called Python Wand that uses ImageMagick.

Opencv : How to remove rectangle black box without removing any other characters

I would like to remove a rectangle black box from the below image.
I do some preprocessing operation to keep the upper top of the image only. My problem with the rectangle in the middle of the image
This is the preprocessing operation I do on this image
gray = cv2.cvtColor(cropped_top, cv2.COLOR_BGR2GRAY)
binary = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 15, 2)
binary = cv2.fastNlMeansDenoising(binary, None, 65, 5, 21)
ret, thresh1 = cv2.threshold(binary, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
k = np.ones((4,4))
binary = cv2.morphologyEx(thresh1, cv2.MORPH_CLOSE, k)
This is the output till now
Here it appears 3 lines connected together. I have used cv2.findContours. But till now I failed to remove this rectangle. I know I am doing something wrong regarding contours.
Here is the code I used for detecting contours
_,binary = cv2.threshold(image, 150, 255, cv2.THRESH_BINARY)
# find external contours of all shapes
_,contours,_ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
# create a mask for floodfill function, see documentation
h,w= image.shape
mask = np.zeros((h+2,w+2), np.uint8)
# determine which contour belongs to a square or rectangle
for cnt in contours:
poly = cv2.approxPolyDP(cnt, 0.05*cv2.arcLength(cnt,True),True)
if len(poly) == 4:
# if the contour has 4 vertices then floodfill that contour with black color
cnt = np.vstack(cnt).squeeze()
_,binary,_,_ = cv2.floodFill(binary, mask, tuple(cnt[0]), 0)
how I can successfully remove this black rectangle without distorting letter Q
I used cv2.fillConvexPoly() rather than cv2.floodFill(). Why?
I first found the contour having the highest perimeter and stored its points in a variable. I then used cv2.fillConvexPoly() to fill the contour of having highest perimeter with any color (in this case black (0, 0, 0) ).
Code:
_, binary = cv2.threshold(im, 150, 255, cv2.THRESH_BINARY_INV)
cv2.imshow('binary', binary)
#--- taking a copy of the image above ---
b = binary.copy()
#--- finding contours ---
i, contours, hierarchy = cv2.findContours(binary.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
im2 = img.copy()
max_peri = 0 #--- variable to store the maximum perimeter
max_contour = 0 #--- variable to store the contour with maximum perimeter
# determine which contour belongs to a square or rectangle
for cnt in contours:
peri = cv2.arcLength(cnt, True)
print(peri)
if peri > max_peri:
max_peri = peri
max_contour = cnt
#---- filling the particular contour with black ---
res = cv2.fillConvexPoly(b, max_contour, 0)
cv2.imshow('res.jpg', res)

Finding edges of road using opencv

I am interested in finding the edges of the road. The output image should have only edges marked.
Here is one of my input image:
But the edges in output are either distorted or have lot of noise.
Here is its output:
I have tried applying watershed algorithm, but it does not detect the roads properly.
Here is my code:
import cv2
import numpy as np
img = cv2.imread('road2.jpg',0)
ret,thresh1 = cv2.threshold(img,127,255,cv2.THRESH_BINARY)
kernel = np.ones((5,5),np.uint8)
erosion = cv2.erode(thresh1,kernel,iterations = 1)
#Removing noise from image
blur = cv2.blur(img,(5,5))
#finding edges using edge detection
edges = cv2.Canny(blur, 100 ,200)
laplacian = cv2.Laplacian(edges, cv2.CV_8UC1)
sobely = cv2.Sobel(laplacian,cv2.CV_8UC1, 0, 1, ksize=5)
im2, contours, hierarchy = cv2.findContours(sobely,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
frame = cv2.drawContours(im2, contours, -1, (255,0,0), 3)
cv2.imshow('window',frame)
cv2.waitKey(0)
cv2.destroyAllWindows()
What should do inoder to mark only the edges. I want only the edges in the output beacuse I would later need these edges to find the middle lane of the road.
Result can be seen here. Not perfect, but best I could do. Idea taken from here
The code works for this image on the assumption that:
If there are no cars on the road, then the two markings(left and right), will meet at a vanishing point on the horizon and form a triangle. So, I keep only the largest contour that can be approximated by a triangle.
import cv2
import numpy as np
img = cv2.imread('road2.jpg',0)
ret,thresh1 = cv2.threshold(img,127,255,cv2.THRESH_BINARY)
kernel = np.ones((5,5),np.uint8)
erosion = cv2.erode(thresh1,kernel,iterations = 1)
#Removing noise from image
blur = cv2.blur(img,(5,5))
#finding edges using edge detection
edges = cv2.Canny(blur, 100 ,200)
laplacian = cv2.Laplacian(edges, cv2.CV_8UC1)
sobely = cv2.Sobel(laplacian,cv2.CV_8UC1, 0, 1, ksize=5)
# Do a dilation and erosion to accentuate the triangle shape
dilated = cv2.dilate(sobely,kernel,iterations = 1)
erosion = cv2.erode(dilated,kernel,iterations = 1)
im2, contours, hierarchy = cv2.findContours(erosion,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
#keep 10 largest contours
cnts = sorted(contours, key = cv2.contourArea, reverse = True)[:10]
screenCnt = None
for c in cnts:
# approximate the contour
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.05 * peri, True)
# if our approximated contour has three points, then
# it must be the road markings
if len(approx) == 3:
screenCnt = approx
break
cv2.drawContours(img, [screenCnt], -1, (0, 255, 0), 3)
cv2.imshow("Road markings", img)
cv2.waitKey(0)

How to extract white region in an image

I have a sample image like this
I'm looking for a way to black out the noise from the image such that I end up with an image that just has black text on white background so that I may send it to tesseract.
I've tried morphing with
kernel = np.ones((4,4),np.uint8)
opening = cv2.morphologyEx(img, cv2.MORPH_OPEN, kernel)
cv2.imshow("opening", opening)
but it doesn't seem to work.
I've also tried to find contours
img = cv2.cvtColor(rotated, cv2.COLOR_BGR2GRAY)
(cnts, _) = cv2.findContours(img, cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
cnts = sorted(cnts, key = cv2.contourArea, reverse = True)[:1]
for c in cnts:
x,y,w,h = cv2.boundingRect(c)
roi=rotated[y:y+h,x:x+w].copy()
cv2.imwrite("roi.png", roi)
With the above code, I get the following contours:
which leads to this image when cropped:
which is still not good enough. I want black text on white background, so that I can send it to tesseract OCR and have good success rate.
Is there anything else I can try?
Update
Here is an additional similar image. This one is a bit easier because it has a smooth rectangle in it
The following works for your given example, although it might need tweaking for a wider range of images.
import numpy as np
import cv2
image_src = cv2.imread("input.png")
gray = cv2.cvtColor(image_src, cv2.COLOR_BGR2GRAY)
ret, gray = cv2.threshold(gray, 250,255,0)
image, contours, hierarchy = cv2.findContours(gray, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
largest_area = sorted(contours, key=cv2.contourArea)[-1]
mask = np.zeros(image_src.shape, np.uint8)
cv2.drawContours(mask, [largest_area], 0, (255,255,255,255), -1)
dst = cv2.bitwise_and(image_src, mask)
mask = 255 - mask
roi = cv2.add(dst, mask)
roi_gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
ret, gray = cv2.threshold(roi_gray, 250,255,0)
image, contours, hierarchy = cv2.findContours(gray, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
max_x = 0
max_y = 0
min_x = image_src.shape[1]
min_y = image_src.shape[0]
for c in contours:
if 150 < cv2.contourArea(c) < 100000:
x, y, w, h = cv2.boundingRect(c)
min_x = min(x, min_x)
min_y = min(y, min_y)
max_x = max(x+w, max_x)
max_y = max(y+h, max_y)
roi = roi[min_y:max_y, min_x:max_x]
cv2.imwrite("roi.png", roi)
Giving you the following type of output images:
And...
The code works by first locating the largest contour area. From this a mask is created which is used to first select only the area inside, i.e. the text. The inverse of the mask is then added to the image to convert the area outside the mask to white.
Lastly contours are found again for this new image. Any contour areas outside a suitable size range are discarded (this is used to ignore any small noise areas), and a bounding rect is found for each. With each of these rectangles, an outer bounding rect is calculated for all of the remaining contours, and a crop is made using these values to give the final image.
Update - To get the remainder of the image, i.e. with the above area removed, the following could be used:
image_src = cv2.imread("input.png")
gray = cv2.cvtColor(image_src, cv2.COLOR_BGR2GRAY)
ret, gray = cv2.threshold(gray, 10, 255,0)
image, contours, hierarchy = cv2.findContours(gray, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
largest_area = sorted(contours, key=cv2.contourArea)[-1]
mask = np.zeros(image_src.shape, np.uint8)
cv2.drawContours(mask, [largest_area], 0, (255,255,255,255), -1)
image_remainder = cv2.bitwise_and(image_src, 255 - mask)
cv2.imwrite("remainder.png", image_remainder)
I get this:
Result
Source Code:
if __name__ == '__main__':
SrcImg = cv2.imread('./Yahi9.png', cv2.CV_LOAD_IMAGE_GRAYSCALE)
_, BinImg = cv2.threshold(SrcImg, 80, 255, cv2.THRESH_OTSU)
Contours, Hierarchy = cv2.findContours(image=copy.deepcopy(SrcImg),
mode=cv2.cv.CV_RETR_EXTERNAL,
method=cv2.cv.CV_CHAIN_APPROX_NONE)
MaxContour, _ = getMaxContour(Contours)
Canvas = np.ones(SrcImg.shape, np.uint8)
cv2.drawContours(image=Canvas, contours=[MaxContour], contourIdx=0, color=(255), thickness=-1)
mask = (Canvas != 255)
RoiImg = copy.deepcopy(BinImg)
RoiImg[mask] = 255
RoiImg = cv2.morphologyEx(src=RoiImg, op=cv2.MORPH_CLOSE, kernel=np.ones((3,3)), iterations=4)
cv2.imshow('RoiImg', RoiImg)
cv2.waitKey(0)
Function:
def getMaxContour(contours):
MaxArea = 0
Location = 0
for idx in range(0, len(contours)):
Area = cv2.contourArea(contours[idx])
if Area > MaxArea:
MaxArea = Area
Location = idx
MaxContour = np.array(contours[Location])
return MaxContour, MaxArea
Ehh, it's python code.
It only works when the white region is the max contour.
Basic idea of this answer is to use border around text.
1) Erode horizontally with a very large kernel, say size of 100 px or 8 times size of single expected character, something like that. It should be done row-wise. The extreme ordinate will give y-location of boundaries around text.
2) Process vertically same way to get x-location of boundaries around text. Then use these locations to crop out image you want.
-- One benefit of this method is you will get every sentence/word segmented separately which, I presume, is good for an OCR.
Happy Coding :)
Edited in by Mark Setchell
Here is a demo of 1)
Here is a demo of 2)

Categories