Extract bounding box and save it as an image

Extract bounding box and save it as an image - python

Suppose you have the following image:
Now I want to extract each of the independent letters into individual images. Currently, I've recovered the contours and then drew a bounding box, in this case for the character a:
After this, I want to extract each of the boxes (in this case for the letter a) and save it to an image file.
Expected result:
Here's my code so far:
import numpy as np
import cv2
im = cv2.imread('abcd.png')
im[im == 255] = 1
im[im == 0] = 255
im[im == 1] = 0
im2 = cv2.cvtColor(im,cv2.COLOR_BGR2GRAY)
ret,thresh = cv2.threshold(im2,127,255,0)
contours, hierarchy = cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
for i in range(0, len(contours)):
if (i % 2 == 0):
cnt = contours[i]
#mask = np.zeros(im2.shape,np.uint8)
#cv2.drawContours(mask,[cnt],0,255,-1)
x,y,w,h = cv2.boundingRect(cnt)
cv2.rectangle(im,(x,y),(x+w,y+h),(0,255,0),2)
cv2.imshow('Features', im)
cv2.imwrite(str(i)+'.png', im)
cv2.destroyAllWindows()
Thanks in advance.

The following will give you a single letter
letter = im[y:y+h,x:x+w]

Here's an approach:
Convert image to grayscale
Otsu's threshold to obtain a binary image
Find contours
Iterate through contours and extract ROI using Numpy slicing
After finding contours, we use cv2.boundingRect() to obtain the bounding rectangle coordinates for each letter.
x,y,w,h = cv2.boundingRect(c)
To extract the ROI, we use Numpy slicing
ROI = image[y:y+h, x:x+w]
Since we have the bounding rectangle coordinates, we can draw the green bounding boxes
cv2.rectangle(copy,(x,y),(x+w,y+h),(36,255,12),2)
Here's the detected letters
Here's each saved letter ROI
import cv2
image = cv2.imread('1.png')
copy = image.copy()
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray,0,255,cv2.THRESH_OTSU + cv2.THRESH_BINARY)[1]
cnts = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
ROI_number = 0
for c in cnts:
x,y,w,h = cv2.boundingRect(c)
ROI = image[y:y+h, x:x+w]
cv2.imwrite('ROI_{}.png'.format(ROI_number), ROI)
cv2.rectangle(copy,(x,y),(x+w,y+h),(36,255,12),2)
ROI_number += 1
cv2.imshow('thresh', thresh)
cv2.imshow('copy', copy)
cv2.waitKey()

def bounding_box_img(img,bbox):
x_min, y_min, x_max, y_max = bbox
bbox_obj = img[y_min:y_max, x_min:x_max]
return bbox_obj
img = cv2.imread("image.jpg")
cropped_img = bounding_box_img(img,bbox)
cv2.imshow(cropped_img)
this returns cropped image (bounding box)
in this aproach, bounding box coordinates bases on pascal-voc annotation formats like here

Related

How to rotate an image to align the text for extraction?

I am using pytessearct to extract the text from images. But it doesn't work on images which are inclined. Consider the image given below:
Here is the code to extract text, which is working fine on images which are not inclined.
img = cv2.imread(<path_to_image>)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (5,5),0)
ret3, thresh = cv2.threshold(blur,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
def findSignificantContours (img, edgeImg):
contours, heirarchy = cv2.findContours(edgeImg, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
# Find level 1 contours
level1 = []
for i, tupl in enumerate(heirarchy[0]):
# Each array is in format (Next, Prev, First child, Parent)
# Filter the ones without parent
if tupl[3] == -1:
tupl = np.insert(tupl, 0, [i])
level1.append(tupl)
significant = []
tooSmall = edgeImg.size * 5 / 100 # If contour isn't covering 5% of total area of image then it probably is too small
for tupl in level1:
contour = contours[tupl[0]];
area = cv2.contourArea(contour)
if area > tooSmall:
significant.append([contour, area])
# Draw the contour on the original image
cv2.drawContours(img, [contour], 0, (0,255,0),2, cv2.LINE_AA, maxLevel=1)
significant.sort(key=lambda x: x[1])
#print ([x[1] for x in significant]);
mx = (0,0,0,0) # biggest bounding box so far
mx_area = 0
for cont in contours:
x,y,w,h = cv2.boundingRect(cont)
area = w*h
if area > mx_area:
mx = x,y,w,h
mx_area = area
x,y,w,h = mx
# Output to files
roi = img[y:y+h,x:x+w]
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (5,5),0)
ret3, thresh = cv2.threshold(blur,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
cv2_imshow(thresh)
text = pytesseract.image_to_string(roi);
print(text); print("\n"); print(pytesseract.image_to_string(thresh));
print("\n")
return [x[0] for x in significant];
edgeImg_8u = np.asarray(thresh, np.uint8)
# Find contours
significant = findSignificantContours(img, edgeImg_8u)
mask = thresh.copy()
mask[mask > 0] = 0
cv2.fillPoly(mask, significant, 255)
# Invert mask
mask = np.logical_not(mask)
#Finally remove the background
img[mask] = 0;
Tesseract can't extract the text from this image. Is there a way I can rotate it to align the text perfectly and then feed it to pytesseract? Please let me know if my question require any more clarity.

Here's a simple approach:
Obtain binary image. Load image, convert to grayscale,
Gaussian blur, then Otsu's threshold.
Find contours and sort for largest contour. We find contours then filter using contour area with cv2.contourArea() to isolate the rectangular contour.
Perform perspective transform. Next we perform contour approximation with cv2.contourArea() to obtain the rectangular contour. Finally we utilize imutils.perspective.four_point_transform to actually obtain the bird's eye view of the image.
Binary image
Result
To actually extract the text, take a look at
Use pytesseract OCR to recognize text from an image
Cleaning image for OCR
Detect text area in an image using python and opencv
Code
from imutils.perspective import four_point_transform
import cv2
import numpy
# Load image, grayscale, Gaussian blur, Otsu's threshold
image = cv2.imread("1.jpg")
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (7,7), 0)
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
# Find contours and sort for largest contour
cnts = cv2.findContours(thresh, cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
cnts = sorted(cnts, key=cv2.contourArea, reverse=True)
displayCnt = None
for c in cnts:
# Perform contour approximation
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.02 * peri, True)
if len(approx) == 4:
displayCnt = approx
break
# Obtain birds' eye view of image
warped = four_point_transform(image, displayCnt.reshape(4, 2))
cv2.imshow("thresh", thresh)
cv2.imshow("warped", warped)
cv2.waitKey()

To Solve this problem you can also use minAreaRect api in opencv which will give you a minimum area rotated rectangle with an angle of rotation. You can then get the rotation matrix and apply warpAffine for the image to straighten it. I have also attached a colab notebook which you can play around on.
Colab notebook : https://colab.research.google.com/drive/1SKxrWJBOHhGjEgbR2ALKxl-dD1sXIf4h?usp=sharing
import cv2
from google.colab.patches import cv2_imshow
import numpy as np
def rotate_image(image, angle):
image_center = tuple(np.array(image.shape[1::-1]) / 2)
rot_mat = cv2.getRotationMatrix2D(image_center, angle, 1.0)
result = cv2.warpAffine(image, rot_mat, image.shape[1::-1], flags=cv2.INTER_LINEAR)
return result
img = cv2.imread("/content/sxJzw.jpg")
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
mask = np.zeros((img.shape[0], img.shape[1]))
blur = cv2.GaussianBlur(gray, (5,5),0)
ret, thresh = cv2.threshold(blur,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
cv2_imshow(thresh)
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
largest_countour = max(contours, key = cv2.contourArea)
binary_mask = cv2.drawContours(mask, [largest_countour], 0, 1, -1)
new_img = img * np.dstack((binary_mask, binary_mask, binary_mask))
minRect = cv2.minAreaRect(largest_countour)
rotate_angle = minRect[-1] if minRect[-1] < 0 else -minRect[-1]
new_img = rotate_image(new_img, rotate_angle)
cv2_imshow(new_img)

How to extract oval contours from an image and save into different variables?

I need to extract the 12 oval shapes from the image and store them in separate variables say 1 to 12.
The original image was as follows
Original Image:
Output image:
Can someone help me extract all those oval shapes into different variables ?
my code is
import cv2
import numpy as np
path = r'/home/parallels/Desktop/Opencv/data/test.JPG'
i = cv2.imread(path, -1)
img_rgb = cv2.resize(i, (1280,720))
cv2.namedWindow("Original Image",cv2.WINDOW_NORMAL)
img = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2HSV)
img = cv2.bilateralFilter(img,9,105,105)
r,g,b=cv2.split(img)
equalize1= cv2.equalizeHist(r)
equalize2= cv2.equalizeHist(g)
equalize3= cv2.equalizeHist(b)
equalize=cv2.merge((r,g,b))
equalize = cv2.cvtColor(equalize,cv2.COLOR_RGB2GRAY)
ret,thresh_image = cv2.threshold(equalize,0,255,cv2.THRESH_OTSU+cv2.THRESH_BINARY)
equalize= cv2.equalizeHist(thresh_image)
canny_image = cv2.Canny(equalize,250,255)
canny_image = cv2.convertScaleAbs(canny_image)
kernel = np.ones((3,3), np.uint8)
dilated_image = cv2.dilate(canny_image,kernel,iterations=1)
contours, hierarchy = cv2.findContours(dilated_image, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
contours= sorted(contours, key = cv2.contourArea, reverse = True)[:10]
c=contours[0]
print(cv2.contourArea(c))
final = cv2.drawContours(img, [c], -1, (255,0, 0), 3)
mask = np.zeros(img_rgb.shape,np.uint8)
new_image = cv2.drawContours(mask,[c],0,255,-1,)
new_image = cv2.bitwise_and(img_rgb, img_rgb, mask=equalize)
cv2.namedWindow("new",cv2.WINDOW_NORMAL)
cv2.imshow("new",new_image)
cv2.waitKey(0)

You're on the right track. After obtaining your binary image, you can perform contour area + aspect ratio filtering. We can sort the contours in order from left-to-right using imutils.contours.sort_contours(). We find contours then filter using cv2.contourArea
and aspect ratio with cv2.approxPolyDP + cv2.arcLength. If they pass this filter, we draw the contours and append it to a oval list to keep track of the contours. Here's the results:
Filtered mask
Results
Isolated ovals
Output from oval list
Oval contours: 12
Code
import cv2
import numpy as np
from imutils import contours
# Load image, resize, convert to HSV, bilaterial filter
image = cv2.imread('1.jpg')
resize = cv2.resize(image, (1280,720))
original = resize.copy()
mask = np.zeros(resize.shape[:2], dtype=np.uint8)
hsv = cv2.cvtColor(resize, cv2.COLOR_RGB2HSV)
hsv = cv2.bilateralFilter(hsv,9,105,105)
# Split into channels and equalize
r,g,b=cv2.split(hsv)
equalize1 = cv2.equalizeHist(r)
equalize2 = cv2.equalizeHist(g)
equalize3 = cv2.equalizeHist(b)
equalize = cv2.merge((r,g,b))
equalize = cv2.cvtColor(equalize,cv2.COLOR_RGB2GRAY)
# Blur and threshold for binary image
blur = cv2.GaussianBlur(equalize, (3,3), 0)
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
# Find contours, sort from left-to-right
# Filter using contour area and aspect ratio filtering
ovals = []
num = 0
cnts = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
(cnts, _) = contours.sort_contours(cnts, method="left-to-right")
for c in cnts:
area = cv2.contourArea(c)
x,y,w,h = cv2.boundingRect(c)
ar = w / float(h)
if area > 1000 and ar < .8:
cv2.drawContours(resize, [c], -1, (36,255,12), 3)
cv2.drawContours(mask, [c], -1, (255,255,255), -1)
cv2.putText(resize, str(num), (x,y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (36,55,12), 2)
ovals.append(c)
num += 1
result = cv2.bitwise_and(original, original, mask=mask)
result[mask==0] = (255,255,255)
print('Oval contours: {}'.format(len(ovals)))
cv2.imshow('equalize', equalize)
cv2.imshow('thresh', thresh)
cv2.imshow('resize', resize)
cv2.imshow('result', result)
cv2.imshow('mask', mask)
cv2.waitKey()

Extract car license plate number in image

I want to find the car plate number to search in a database. Since Saudi plates are different, I face this problem
The result of the code
My current approach is to search for the cross in openCV using edge detection. How can I found the cross and take the below character (using container and edge detection)?
import numpy as np
import pytesseract
from PIL import Image
import cv2
import imutils
import matplotlib.pyplot as plt
import numpy as np
img = cv2.imread('M4.png')
img = cv2.resize(img, (820,680) )
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) #convert to grey scale
gray = cv2.blur(gray, (3,3))#Blur to reduce noise
edged = cv2.Canny(gray, 10, 100) #Perform Edge detection
# find contours in the edged image, keep only the largest
# ones, and initialize our screen contour
cnts = cv2.findContours(edged.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
cnts = imutils.grab_contours(cnts)
cnts = sorted(cnts, key = cv2.contourArea, reverse = True)[:10]
screenCnt = None
# loop over our contours
for c in cnts:
# approximate the contour
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.1 * peri, True)
# if our approximated contour has four points, then
# we can assume that we have found our screen
if len(approx) == 4:
screenCnt = approx
break
if screenCnt is None:
detected = 0
print "No contour detected"
else:
detected = 1
if detected == 1:
cv2.drawContours(img, [screenCnt], -1, (0, 255, 0), 3)
# Masking the part other than the number plate
imgs = img
mask = np.zeros(gray.shape,np.uint8)
new_image = cv2.drawContours(mask,[screenCnt],0,255,-1,)
new_image = cv2.bitwise_and(imgs,imgs,mask=mask)
# Now crop
(x, y) = np.where(mask == 255)
(topx, topy) = (np.min(x), np.min(y))
(bottomx, bottomy) = (np.max(x), np.max(y))
Cropped = gray[topx:bottomx+1, topy:bottomy+1]
#Read the number plate
text = pytesseract.image_to_string(Cropped, config='--psm 11')
print("Detected Number is:",text)
plt.title(text)
plt.subplot(1,4,1),plt.imshow(img,cmap = 'gray')
plt.title('Original'), plt.xticks([]), plt.yticks([])
plt.subplot(1,4,2),plt.imshow(gray,cmap = 'gray')
plt.title('gray'), plt.xticks([]), plt.yticks([])
plt.subplot(1,4,3),plt.imshow(Cropped,cmap = 'gray')
plt.title('Cropped'), plt.xticks([]), plt.yticks([])
plt.subplot(1,4,4),plt.imshow(edged,cmap = 'gray')
plt.title('edged'), plt.xticks([]), plt.yticks([])
plt.show()
#check data base
#recoed the entre
cv2.waitKey(0)
cv2.destroyAllWindows()
Thanks for your help

Here's an approach:
Convert image to grayscale and Gaussian blur
Otsu's threshold to get a binary image
Find contours and sort contours from left-to-right to maintain order
Iterate through contours and filter for the bottom two rectangles
Extract ROI and OCR
After converting to grayscale and Gaussian blurring, we Otsu's threshold to get a binary image. We find contours then sort the contours using imutils.contours.sort_contours() with the left-to-right parameter. This step keeps the contours in order. From here we iterate through the contours and perform contour filtering using these three filtering conditions:
The contour must be larger than some specified threshold area (3000)
The width must be larger than the height
The center of each ROI must be in the bottom half of the image. We find the center of each contour and compare it to where it is located on the image.
If a ROI passes these filtering conditions, we extract the ROI using numpy slicing and then throw it into Pytesseract. Here's the detected ROIs that pass the filter highlighted in green
Since we already have the bounding box, we extract each ROI
We throw each individual ROI into Pytesseract one at a time to construct our license plate string. Here's the result
License plate: 430SRU
Code
import cv2
import pytesseract
from imutils import contours
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
image = cv2.imread('1.png')
height, width, _ = image.shape
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (5,5), 0)
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
cnts = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
cnts, _ = contours.sort_contours(cnts, method="left-to-right")
plate = ""
for c in cnts:
area = cv2.contourArea(c)
x,y,w,h = cv2.boundingRect(c)
center_y = y + h/2
if area > 3000 and (w > h) and center_y > height/2:
ROI = image[y:y+h, x:x+w]
data = pytesseract.image_to_string(ROI, lang='eng', config='--psm 6')
plate += data
print('License plate:', plate)

Cropping greyscale images within larger images using OpenCV and python

Hi I'm new to python and opencv. I've got this image:
I'm trying to crop the greyscale images from the picture. At the moment, the code finds the biggest bounding box i.e. the top right image and then crops it. What I want to do is find all the greyscale images even if there are more than 4 in the picture and crop all of them. I'm thinking of using a loop to do it but I don't want to set a loop where it finds the largest bounding boxes 4 times and then stops as other images that I'm processing would have more than 4 images in it. Any help would be greatly appreciated!
import cv2
import numpy as np
# load image
img = cv2.imread('multi.png')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # convert to grayscale
# threshold to get just the signature (INVERTED)
retval, thresh_gray = cv2.threshold(gray, thresh=100, maxval=255, \
type=cv2.THRESH_BINARY_INV)
image, contours, hierarchy = cv2.findContours(thresh_gray,cv2.RETR_LIST, \
cv2.CHAIN_APPROX_SIMPLE)
# Find object with the biggest bounding box
mx = (0,0,0,0) # biggest bounding box so far
mx_area = 0
for cont in contours:
x,y,w,h = cv2.boundingRect(cont)
area = w*h
if area > mx_area:
mx = x,y,w,h
mx_area = area
x,y,w,h = mx
# Find object with the biggest bounding box
mx = (0,0,0,0) # biggest bounding box so far
mx_area = 0
for cont in contours:
x,y,w,h = cv2.boundingRect(cont)
area = w*h
if area > mx_area:
mx = x,y,w,h
mx_area = area
x,y,w,h = mx
# Output to files
roi=img[y:y+h,x:x+w]
cv2.imwrite('Image_crop.jpg', roi)
cv2.rectangle(img,(x,y),(x+w,y+h),(200,0,0),2)
cv2.imwrite('Image_cont.jpg', img)

I have elaborated my comment.
In the code provided by you, the contours are found using cv2.RETR_LIST which every possible contour in the image including those present within contours. I have used cv2.RETR_EXTERNAL which ignores those contours within other contours.
image = cv2.imread(r'C:\Users\Desktop\g.png')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
retval, thresh_gray = cv2.threshold(gray, thresh=100, maxval=255, \
type=cv2.THRESH_BINARY_INV)
cv2.imshow('thresh_gray.png', thresh_gray)
image, contours, hierarchy = cv2.findContours(thresh_gray,cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
for i, c in enumerate(contours):
if cv2.contourArea(c) > 10000:
x, y, w, h = cv2.boundingRect(c)
roi = image[y :y + h, x : x + w ]
cv2.imshow('Region_{}.jpg'.format(i), roi)
cv2.waitKey(0)
cv2.destroyAllWindows()

How to extract white region in an image

I have a sample image like this
I'm looking for a way to black out the noise from the image such that I end up with an image that just has black text on white background so that I may send it to tesseract.
I've tried morphing with
kernel = np.ones((4,4),np.uint8)
opening = cv2.morphologyEx(img, cv2.MORPH_OPEN, kernel)
cv2.imshow("opening", opening)
but it doesn't seem to work.
I've also tried to find contours
img = cv2.cvtColor(rotated, cv2.COLOR_BGR2GRAY)
(cnts, _) = cv2.findContours(img, cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
cnts = sorted(cnts, key = cv2.contourArea, reverse = True)[:1]
for c in cnts:
x,y,w,h = cv2.boundingRect(c)
roi=rotated[y:y+h,x:x+w].copy()
cv2.imwrite("roi.png", roi)
With the above code, I get the following contours:
which leads to this image when cropped:
which is still not good enough. I want black text on white background, so that I can send it to tesseract OCR and have good success rate.
Is there anything else I can try?
Update
Here is an additional similar image. This one is a bit easier because it has a smooth rectangle in it

The following works for your given example, although it might need tweaking for a wider range of images.
import numpy as np
import cv2
image_src = cv2.imread("input.png")
gray = cv2.cvtColor(image_src, cv2.COLOR_BGR2GRAY)
ret, gray = cv2.threshold(gray, 250,255,0)
image, contours, hierarchy = cv2.findContours(gray, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
largest_area = sorted(contours, key=cv2.contourArea)[-1]
mask = np.zeros(image_src.shape, np.uint8)
cv2.drawContours(mask, [largest_area], 0, (255,255,255,255), -1)
dst = cv2.bitwise_and(image_src, mask)
mask = 255 - mask
roi = cv2.add(dst, mask)
roi_gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
ret, gray = cv2.threshold(roi_gray, 250,255,0)
image, contours, hierarchy = cv2.findContours(gray, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
max_x = 0
max_y = 0
min_x = image_src.shape[1]
min_y = image_src.shape[0]
for c in contours:
if 150 < cv2.contourArea(c) < 100000:
x, y, w, h = cv2.boundingRect(c)
min_x = min(x, min_x)
min_y = min(y, min_y)
max_x = max(x+w, max_x)
max_y = max(y+h, max_y)
roi = roi[min_y:max_y, min_x:max_x]
cv2.imwrite("roi.png", roi)
Giving you the following type of output images:
And...
The code works by first locating the largest contour area. From this a mask is created which is used to first select only the area inside, i.e. the text. The inverse of the mask is then added to the image to convert the area outside the mask to white.
Lastly contours are found again for this new image. Any contour areas outside a suitable size range are discarded (this is used to ignore any small noise areas), and a bounding rect is found for each. With each of these rectangles, an outer bounding rect is calculated for all of the remaining contours, and a crop is made using these values to give the final image.
Update - To get the remainder of the image, i.e. with the above area removed, the following could be used:
image_src = cv2.imread("input.png")
gray = cv2.cvtColor(image_src, cv2.COLOR_BGR2GRAY)
ret, gray = cv2.threshold(gray, 10, 255,0)
image, contours, hierarchy = cv2.findContours(gray, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
largest_area = sorted(contours, key=cv2.contourArea)[-1]
mask = np.zeros(image_src.shape, np.uint8)
cv2.drawContours(mask, [largest_area], 0, (255,255,255,255), -1)
image_remainder = cv2.bitwise_and(image_src, 255 - mask)
cv2.imwrite("remainder.png", image_remainder)

I get this:
Result
Source Code:
if __name__ == '__main__':
SrcImg = cv2.imread('./Yahi9.png', cv2.CV_LOAD_IMAGE_GRAYSCALE)
_, BinImg = cv2.threshold(SrcImg, 80, 255, cv2.THRESH_OTSU)
Contours, Hierarchy = cv2.findContours(image=copy.deepcopy(SrcImg),
mode=cv2.cv.CV_RETR_EXTERNAL,
method=cv2.cv.CV_CHAIN_APPROX_NONE)
MaxContour, _ = getMaxContour(Contours)
Canvas = np.ones(SrcImg.shape, np.uint8)
cv2.drawContours(image=Canvas, contours=[MaxContour], contourIdx=0, color=(255), thickness=-1)
mask = (Canvas != 255)
RoiImg = copy.deepcopy(BinImg)
RoiImg[mask] = 255
RoiImg = cv2.morphologyEx(src=RoiImg, op=cv2.MORPH_CLOSE, kernel=np.ones((3,3)), iterations=4)
cv2.imshow('RoiImg', RoiImg)
cv2.waitKey(0)
Function:
def getMaxContour(contours):
MaxArea = 0
Location = 0
for idx in range(0, len(contours)):
Area = cv2.contourArea(contours[idx])
if Area > MaxArea:
MaxArea = Area
Location = idx
MaxContour = np.array(contours[Location])
return MaxContour, MaxArea
Ehh, it's python code.
It only works when the white region is the max contour.

Basic idea of this answer is to use border around text.
1) Erode horizontally with a very large kernel, say size of 100 px or 8 times size of single expected character, something like that. It should be done row-wise. The extreme ordinate will give y-location of boundaries around text.
2) Process vertically same way to get x-location of boundaries around text. Then use these locations to crop out image you want.
-- One benefit of this method is you will get every sentence/word segmented separately which, I presume, is good for an OCR.
Happy Coding :)
Edited in by Mark Setchell
Here is a demo of 1)
Here is a demo of 2)

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Extract bounding box and save it as an image - python

The following will give you a single letter letter = im[y:y+h,x:x+w]

Related

How to rotate an image to align the text for extraction?

How to extract oval contours from an image and save into different variables?

Extract car license plate number in image

Cropping greyscale images within larger images using OpenCV and python

How to extract white region in an image

Categories

Resources