Hi I'm new to python and opencv. I've got this image:
I'm trying to crop the greyscale images from the picture. At the moment, the code finds the biggest bounding box i.e. the top right image and then crops it. What I want to do is find all the greyscale images even if there are more than 4 in the picture and crop all of them. I'm thinking of using a loop to do it but I don't want to set a loop where it finds the largest bounding boxes 4 times and then stops as other images that I'm processing would have more than 4 images in it. Any help would be greatly appreciated!
import cv2
import numpy as np
# load image
img = cv2.imread('multi.png')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # convert to grayscale
# threshold to get just the signature (INVERTED)
retval, thresh_gray = cv2.threshold(gray, thresh=100, maxval=255, \
type=cv2.THRESH_BINARY_INV)
image, contours, hierarchy = cv2.findContours(thresh_gray,cv2.RETR_LIST, \
cv2.CHAIN_APPROX_SIMPLE)
# Find object with the biggest bounding box
mx = (0,0,0,0) # biggest bounding box so far
mx_area = 0
for cont in contours:
x,y,w,h = cv2.boundingRect(cont)
area = w*h
if area > mx_area:
mx = x,y,w,h
mx_area = area
x,y,w,h = mx
# Find object with the biggest bounding box
mx = (0,0,0,0) # biggest bounding box so far
mx_area = 0
for cont in contours:
x,y,w,h = cv2.boundingRect(cont)
area = w*h
if area > mx_area:
mx = x,y,w,h
mx_area = area
x,y,w,h = mx
# Output to files
roi=img[y:y+h,x:x+w]
cv2.imwrite('Image_crop.jpg', roi)
cv2.rectangle(img,(x,y),(x+w,y+h),(200,0,0),2)
cv2.imwrite('Image_cont.jpg', img)
I have elaborated my comment.
In the code provided by you, the contours are found using cv2.RETR_LIST which every possible contour in the image including those present within contours. I have used cv2.RETR_EXTERNAL which ignores those contours within other contours.
image = cv2.imread(r'C:\Users\Desktop\g.png')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
retval, thresh_gray = cv2.threshold(gray, thresh=100, maxval=255, \
type=cv2.THRESH_BINARY_INV)
cv2.imshow('thresh_gray.png', thresh_gray)
image, contours, hierarchy = cv2.findContours(thresh_gray,cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
for i, c in enumerate(contours):
if cv2.contourArea(c) > 10000:
x, y, w, h = cv2.boundingRect(c)
roi = image[y :y + h, x : x + w ]
cv2.imshow('Region_{}.jpg'.format(i), roi)
cv2.waitKey(0)
cv2.destroyAllWindows()
Related
I am trying to detect texts and remove those thick letters. The goal is same proposed by the following link: How to detect text on an X-Ray image with OpenCV. ("to extract the oriented bounding boxes as a matrix")
Two sample images:
Sample Image #1
Sample Image #2
Here, the fonts L, J, C, O (from 1st image) and L, D, A, N, circle-shape (from 2nd image) must be detected. I tried the methods proposed by above link (How to detect text on an X-Ray image with OpenCV), however, it fails to detect the texts in the background.
Original Image --> Binary image
Binary Image (thresholding)
Morph close --> Detected text
Morph close
detected texts (nothing)
As you can see, it fails to detect the texts from the background. It just returns black image. Don't know what happened.
The code:
import cv2
import numpy as np
# Load image, create mask, grayscale, Gaussian blur, Otsu's threshold
image = cv2.imread('HandImage.png')
original = image.copy()
blank = np.zeros(image.shape[:2], dtype=np.uint8)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (5,5), 0)
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
# Merge text into a single contour
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5,5))
close = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=3)
# Find contours
cnts = cv2.findContours(close, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
# Filter using contour area and aspect ratio
x,y,w,h = cv2.boundingRect(c)
area = cv2.contourArea(c)
ar = w / float(h)
if (ar > 1.4 and ar < 4) or ar < .85 and area > 10 and area < 500:
# Find rotated bounding box
rect = cv2.minAreaRect(c)
box = cv2.boxPoints(rect)
box = np.int0(box)
cv2.drawContours(image,[box],0,(36,255,12),2)
cv2.drawContours(blank,[box],0,(255,255,255),-1)
# Bitwise operations to isolate text
extract = cv2.bitwise_and(thresh, blank)
extract = cv2.bitwise_and(original, original, mask=extract)
cv2.imshow('thresh', thresh)
cv2.imshow('image', image)
cv2.imshow('close', close)
cv2.imshow('extract', extract)
cv2.waitKey()
I am new to the computer vision and image manipulation... Please help!
Update:
I have figured out the error in my code. I had to adjust and identify the right size/area of the bounding box that selected for the texts.
The code:
# Find contours
cnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
cnts = sorted(cnts, key = lambda x: cv2.boundingRect(x)[0])
for c in cnts:
# Filter using contour area and aspect ratio (x1 = width, y1 = height)
x, y, x1, y1 = cv2.boundingRect(c)
area = cv2.contourArea(c)
if (30 < x1 < 200 or 350 < x1 < 500) and 50 < y1 < 350:
# Using the area above to calculate the rotation
rect = cv2.minAreaRect(c)
box = cv2.boxPoints(rect)
translated_box = box - np.mean(box, axis=0)
scaled_box = translated_box * 2 # 2 is scale factor
retranslated_box = scaled_box + np.mean(box, axis=0)
box = np.int0(retranslated_box)
cv2.drawContours(image, [box], 0, (36,255,12), 2)
cv2.drawContours(blank, [box], 0, (255,255,255), -1)
But I also liked the suggestion by #Mikel B, where he used a neural network method.
Not a solution in OpenCV as requested. But I leave it here in case a neural network-based solution could be of interest for anybody
First of all install the package by:
pip install craft-text-detector
Then copy paste the following
from craft_text_detector import Craft
# import craft functions
from craft_text_detector import (
read_image,
load_craftnet_model,
load_refinenet_model,
get_prediction,
export_detected_regions,
export_extra_results,
empty_cuda_cache
)
# set image path and export folder directory
image = '/path/to/JP6x0.png' # can be filepath, PIL image or numpy array
output_dir = 'outputs/'
# read image
image = read_image(image)
# load models
refine_net = load_refinenet_model(cuda=False)
craft_net = load_craftnet_model(cuda=False)
# perform prediction
prediction_result = get_prediction(
image=image,
craft_net=craft_net,
refine_net=refine_net,
text_threshold=0.5,
link_threshold=0.1,
low_text=0.1,
cuda=False,
long_size=1280
)
# export detected text regions
exported_file_paths = export_detected_regions(
image=image,
regions=prediction_result["boxes"],
output_dir=output_dir,
rectify=True
)
# export heatmap, detection points, box visualization
export_extra_results(
image=image,
regions=prediction_result["boxes"],
heatmaps=prediction_result["heatmaps"],
output_dir=output_dir
)
# unload models from gpu
empty_cuda_cache()
My results with these parameters:
I am using pytessearct to extract the text from images. But it doesn't work on images which are inclined. Consider the image given below:
Here is the code to extract text, which is working fine on images which are not inclined.
img = cv2.imread(<path_to_image>)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (5,5),0)
ret3, thresh = cv2.threshold(blur,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
def findSignificantContours (img, edgeImg):
contours, heirarchy = cv2.findContours(edgeImg, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
# Find level 1 contours
level1 = []
for i, tupl in enumerate(heirarchy[0]):
# Each array is in format (Next, Prev, First child, Parent)
# Filter the ones without parent
if tupl[3] == -1:
tupl = np.insert(tupl, 0, [i])
level1.append(tupl)
significant = []
tooSmall = edgeImg.size * 5 / 100 # If contour isn't covering 5% of total area of image then it probably is too small
for tupl in level1:
contour = contours[tupl[0]];
area = cv2.contourArea(contour)
if area > tooSmall:
significant.append([contour, area])
# Draw the contour on the original image
cv2.drawContours(img, [contour], 0, (0,255,0),2, cv2.LINE_AA, maxLevel=1)
significant.sort(key=lambda x: x[1])
#print ([x[1] for x in significant]);
mx = (0,0,0,0) # biggest bounding box so far
mx_area = 0
for cont in contours:
x,y,w,h = cv2.boundingRect(cont)
area = w*h
if area > mx_area:
mx = x,y,w,h
mx_area = area
x,y,w,h = mx
# Output to files
roi = img[y:y+h,x:x+w]
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (5,5),0)
ret3, thresh = cv2.threshold(blur,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
cv2_imshow(thresh)
text = pytesseract.image_to_string(roi);
print(text); print("\n"); print(pytesseract.image_to_string(thresh));
print("\n")
return [x[0] for x in significant];
edgeImg_8u = np.asarray(thresh, np.uint8)
# Find contours
significant = findSignificantContours(img, edgeImg_8u)
mask = thresh.copy()
mask[mask > 0] = 0
cv2.fillPoly(mask, significant, 255)
# Invert mask
mask = np.logical_not(mask)
#Finally remove the background
img[mask] = 0;
Tesseract can't extract the text from this image. Is there a way I can rotate it to align the text perfectly and then feed it to pytesseract? Please let me know if my question require any more clarity.
Here's a simple approach:
Obtain binary image. Load image, convert to grayscale,
Gaussian blur, then Otsu's threshold.
Find contours and sort for largest contour. We find contours then filter using contour area with cv2.contourArea() to isolate the rectangular contour.
Perform perspective transform. Next we perform contour approximation with cv2.contourArea() to obtain the rectangular contour. Finally we utilize imutils.perspective.four_point_transform to actually obtain the bird's eye view of the image.
Binary image
Result
To actually extract the text, take a look at
Use pytesseract OCR to recognize text from an image
Cleaning image for OCR
Detect text area in an image using python and opencv
Code
from imutils.perspective import four_point_transform
import cv2
import numpy
# Load image, grayscale, Gaussian blur, Otsu's threshold
image = cv2.imread("1.jpg")
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (7,7), 0)
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
# Find contours and sort for largest contour
cnts = cv2.findContours(thresh, cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
cnts = sorted(cnts, key=cv2.contourArea, reverse=True)
displayCnt = None
for c in cnts:
# Perform contour approximation
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.02 * peri, True)
if len(approx) == 4:
displayCnt = approx
break
# Obtain birds' eye view of image
warped = four_point_transform(image, displayCnt.reshape(4, 2))
cv2.imshow("thresh", thresh)
cv2.imshow("warped", warped)
cv2.waitKey()
To Solve this problem you can also use minAreaRect api in opencv which will give you a minimum area rotated rectangle with an angle of rotation. You can then get the rotation matrix and apply warpAffine for the image to straighten it. I have also attached a colab notebook which you can play around on.
Colab notebook : https://colab.research.google.com/drive/1SKxrWJBOHhGjEgbR2ALKxl-dD1sXIf4h?usp=sharing
import cv2
from google.colab.patches import cv2_imshow
import numpy as np
def rotate_image(image, angle):
image_center = tuple(np.array(image.shape[1::-1]) / 2)
rot_mat = cv2.getRotationMatrix2D(image_center, angle, 1.0)
result = cv2.warpAffine(image, rot_mat, image.shape[1::-1], flags=cv2.INTER_LINEAR)
return result
img = cv2.imread("/content/sxJzw.jpg")
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
mask = np.zeros((img.shape[0], img.shape[1]))
blur = cv2.GaussianBlur(gray, (5,5),0)
ret, thresh = cv2.threshold(blur,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
cv2_imshow(thresh)
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
largest_countour = max(contours, key = cv2.contourArea)
binary_mask = cv2.drawContours(mask, [largest_countour], 0, 1, -1)
new_img = img * np.dstack((binary_mask, binary_mask, binary_mask))
minRect = cv2.minAreaRect(largest_countour)
rotate_angle = minRect[-1] if minRect[-1] < 0 else -minRect[-1]
new_img = rotate_image(new_img, rotate_angle)
cv2_imshow(new_img)
I want to crop the biggest object in the image (Characters). This code only works if there is no line (shown in the first image). But I need to ignore the line and make the image of the second image. Only crop the biggest object image.
import cv2
x1, y1, w1, h1 = (0,0,0,0)
points = 0
# load image
img = cv2.imread('Image.jpg')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # convert to grayscale
# threshold to get just the signature
retval, thresh_gray = cv2.threshold(gray, thresh=100, maxval=255, type=cv2.THRESH_BINARY)
# find where the signature is and make a cropped region
points = np.argwhere(thresh_gray==0) # find where the black pixels are
points = np.fliplr(points) # store them in x,y coordinates instead of row,col indices
x, y, w, h = cv2.boundingRect(points) # create a rectangle around those points
crop = img[y:y+h, x:x+w]
cv2.imshow('save.jpg', crop)
cv2.waitkey(0)
Input
Output:
You can use function findContours to do this.
For example, like this:
#!/usr/bin/env python
import cv2
import numpy as np
# load image
img = cv2.imread('Image.jpg')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # convert to grayscale
# threshold to get just the signature (INVERTED)
retval, thresh_gray = cv2.threshold(gray, thresh=100, maxval=255, \
type=cv2.THRESH_BINARY_INV)
image, contours, hierarchy = cv2.findContours(thresh_gray,cv2.RETR_LIST, \
cv2.CHAIN_APPROX_SIMPLE)
# Find object with the biggest bounding box
mx = (0,0,0,0) # biggest bounding box so far
mx_area = 0
for cont in contours:
x,y,w,h = cv2.boundingRect(cont)
area = w*h
if area > mx_area:
mx = x,y,w,h
mx_area = area
x,y,w,h = mx
# Output to files
roi=img[y:y+h,x:x+w]
cv2.imwrite('Image_crop.jpg', roi)
cv2.rectangle(img,(x,y),(x+w,y+h),(200,0,0),2)
cv2.imwrite('Image_cont.jpg', img)
Note that I used THRESH_BINARY_INV instead of THRESH_BINARY.
Image_cont.jpg:
Image_crop.jpg:
You can also use this with skewed rectangles as #Jello pointed out. Unlike simpler solution above, this will correctly filter out diagonal lines.
For example:
#!/usr/bin/env python
import cv2
import numpy as np
# load image
img = cv2.imread('Image2.png')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # convert to grayscale
# threshold to get just the signature (INVERTED)
retval, thresh_gray = cv2.threshold(gray, 100, maxval=255, \
type=cv2.THRESH_BINARY_INV)
image, contours, hierarchy = cv2.findContours(thresh_gray,cv2.RETR_LIST, \
cv2.CHAIN_APPROX_SIMPLE)
def crop_minAreaRect(img, rect):
# Source: https://stackoverflow.com/questions/37177811/
# rotate img
angle = rect[2]
rows,cols = img.shape[0], img.shape[1]
matrix = cv2.getRotationMatrix2D((cols/2,rows/2),angle,1)
img_rot = cv2.warpAffine(img,matrix,(cols,rows))
# rotate bounding box
rect0 = (rect[0], rect[1], 0.0)
box = cv2.boxPoints(rect)
pts = np.int0(cv2.transform(np.array([box]), matrix))[0]
pts[pts < 0] = 0
# crop and return
return img_rot[pts[1][1]:pts[0][1], pts[1][0]:pts[2][0]]
# Find object with the biggest bounding box
mx_rect = (0,0,0,0) # biggest skewed bounding box
mx_area = 0
for cont in contours:
arect = cv2.minAreaRect(cont)
area = arect[1][0]*arect[1][1]
if area > mx_area:
mx_rect, mx_area = arect, area
# Output to files
roi = crop_minAreaRect(img, mx_rect)
cv2.imwrite('Image_crop.jpg', roi)
box = cv2.boxPoints(mx_rect)
box = np.int0(box)
cv2.drawContours(img,[box],0,(200,0,0),2)
cv2.imwrite('Image_cont.jpg', img)
Image2.png (the input image):
Image_cont.jpg:
Image_crop.jpg:
If you use opencv-python 4.x, change image, contours, hierarchy to just contours, hierarchy.
I have a sample image like this
I'm looking for a way to black out the noise from the image such that I end up with an image that just has black text on white background so that I may send it to tesseract.
I've tried morphing with
kernel = np.ones((4,4),np.uint8)
opening = cv2.morphologyEx(img, cv2.MORPH_OPEN, kernel)
cv2.imshow("opening", opening)
but it doesn't seem to work.
I've also tried to find contours
img = cv2.cvtColor(rotated, cv2.COLOR_BGR2GRAY)
(cnts, _) = cv2.findContours(img, cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
cnts = sorted(cnts, key = cv2.contourArea, reverse = True)[:1]
for c in cnts:
x,y,w,h = cv2.boundingRect(c)
roi=rotated[y:y+h,x:x+w].copy()
cv2.imwrite("roi.png", roi)
With the above code, I get the following contours:
which leads to this image when cropped:
which is still not good enough. I want black text on white background, so that I can send it to tesseract OCR and have good success rate.
Is there anything else I can try?
Update
Here is an additional similar image. This one is a bit easier because it has a smooth rectangle in it
The following works for your given example, although it might need tweaking for a wider range of images.
import numpy as np
import cv2
image_src = cv2.imread("input.png")
gray = cv2.cvtColor(image_src, cv2.COLOR_BGR2GRAY)
ret, gray = cv2.threshold(gray, 250,255,0)
image, contours, hierarchy = cv2.findContours(gray, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
largest_area = sorted(contours, key=cv2.contourArea)[-1]
mask = np.zeros(image_src.shape, np.uint8)
cv2.drawContours(mask, [largest_area], 0, (255,255,255,255), -1)
dst = cv2.bitwise_and(image_src, mask)
mask = 255 - mask
roi = cv2.add(dst, mask)
roi_gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
ret, gray = cv2.threshold(roi_gray, 250,255,0)
image, contours, hierarchy = cv2.findContours(gray, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
max_x = 0
max_y = 0
min_x = image_src.shape[1]
min_y = image_src.shape[0]
for c in contours:
if 150 < cv2.contourArea(c) < 100000:
x, y, w, h = cv2.boundingRect(c)
min_x = min(x, min_x)
min_y = min(y, min_y)
max_x = max(x+w, max_x)
max_y = max(y+h, max_y)
roi = roi[min_y:max_y, min_x:max_x]
cv2.imwrite("roi.png", roi)
Giving you the following type of output images:
And...
The code works by first locating the largest contour area. From this a mask is created which is used to first select only the area inside, i.e. the text. The inverse of the mask is then added to the image to convert the area outside the mask to white.
Lastly contours are found again for this new image. Any contour areas outside a suitable size range are discarded (this is used to ignore any small noise areas), and a bounding rect is found for each. With each of these rectangles, an outer bounding rect is calculated for all of the remaining contours, and a crop is made using these values to give the final image.
Update - To get the remainder of the image, i.e. with the above area removed, the following could be used:
image_src = cv2.imread("input.png")
gray = cv2.cvtColor(image_src, cv2.COLOR_BGR2GRAY)
ret, gray = cv2.threshold(gray, 10, 255,0)
image, contours, hierarchy = cv2.findContours(gray, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
largest_area = sorted(contours, key=cv2.contourArea)[-1]
mask = np.zeros(image_src.shape, np.uint8)
cv2.drawContours(mask, [largest_area], 0, (255,255,255,255), -1)
image_remainder = cv2.bitwise_and(image_src, 255 - mask)
cv2.imwrite("remainder.png", image_remainder)
I get this:
Result
Source Code:
if __name__ == '__main__':
SrcImg = cv2.imread('./Yahi9.png', cv2.CV_LOAD_IMAGE_GRAYSCALE)
_, BinImg = cv2.threshold(SrcImg, 80, 255, cv2.THRESH_OTSU)
Contours, Hierarchy = cv2.findContours(image=copy.deepcopy(SrcImg),
mode=cv2.cv.CV_RETR_EXTERNAL,
method=cv2.cv.CV_CHAIN_APPROX_NONE)
MaxContour, _ = getMaxContour(Contours)
Canvas = np.ones(SrcImg.shape, np.uint8)
cv2.drawContours(image=Canvas, contours=[MaxContour], contourIdx=0, color=(255), thickness=-1)
mask = (Canvas != 255)
RoiImg = copy.deepcopy(BinImg)
RoiImg[mask] = 255
RoiImg = cv2.morphologyEx(src=RoiImg, op=cv2.MORPH_CLOSE, kernel=np.ones((3,3)), iterations=4)
cv2.imshow('RoiImg', RoiImg)
cv2.waitKey(0)
Function:
def getMaxContour(contours):
MaxArea = 0
Location = 0
for idx in range(0, len(contours)):
Area = cv2.contourArea(contours[idx])
if Area > MaxArea:
MaxArea = Area
Location = idx
MaxContour = np.array(contours[Location])
return MaxContour, MaxArea
Ehh, it's python code.
It only works when the white region is the max contour.
Basic idea of this answer is to use border around text.
1) Erode horizontally with a very large kernel, say size of 100 px or 8 times size of single expected character, something like that. It should be done row-wise. The extreme ordinate will give y-location of boundaries around text.
2) Process vertically same way to get x-location of boundaries around text. Then use these locations to crop out image you want.
-- One benefit of this method is you will get every sentence/word segmented separately which, I presume, is good for an OCR.
Happy Coding :)
Edited in by Mark Setchell
Here is a demo of 1)
Here is a demo of 2)
Suppose you have the following image:
Now I want to extract each of the independent letters into individual images. Currently, I've recovered the contours and then drew a bounding box, in this case for the character a:
After this, I want to extract each of the boxes (in this case for the letter a) and save it to an image file.
Expected result:
Here's my code so far:
import numpy as np
import cv2
im = cv2.imread('abcd.png')
im[im == 255] = 1
im[im == 0] = 255
im[im == 1] = 0
im2 = cv2.cvtColor(im,cv2.COLOR_BGR2GRAY)
ret,thresh = cv2.threshold(im2,127,255,0)
contours, hierarchy = cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
for i in range(0, len(contours)):
if (i % 2 == 0):
cnt = contours[i]
#mask = np.zeros(im2.shape,np.uint8)
#cv2.drawContours(mask,[cnt],0,255,-1)
x,y,w,h = cv2.boundingRect(cnt)
cv2.rectangle(im,(x,y),(x+w,y+h),(0,255,0),2)
cv2.imshow('Features', im)
cv2.imwrite(str(i)+'.png', im)
cv2.destroyAllWindows()
Thanks in advance.
The following will give you a single letter
letter = im[y:y+h,x:x+w]
Here's an approach:
Convert image to grayscale
Otsu's threshold to obtain a binary image
Find contours
Iterate through contours and extract ROI using Numpy slicing
After finding contours, we use cv2.boundingRect() to obtain the bounding rectangle coordinates for each letter.
x,y,w,h = cv2.boundingRect(c)
To extract the ROI, we use Numpy slicing
ROI = image[y:y+h, x:x+w]
Since we have the bounding rectangle coordinates, we can draw the green bounding boxes
cv2.rectangle(copy,(x,y),(x+w,y+h),(36,255,12),2)
Here's the detected letters
Here's each saved letter ROI
import cv2
image = cv2.imread('1.png')
copy = image.copy()
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray,0,255,cv2.THRESH_OTSU + cv2.THRESH_BINARY)[1]
cnts = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
ROI_number = 0
for c in cnts:
x,y,w,h = cv2.boundingRect(c)
ROI = image[y:y+h, x:x+w]
cv2.imwrite('ROI_{}.png'.format(ROI_number), ROI)
cv2.rectangle(copy,(x,y),(x+w,y+h),(36,255,12),2)
ROI_number += 1
cv2.imshow('thresh', thresh)
cv2.imshow('copy', copy)
cv2.waitKey()
def bounding_box_img(img,bbox):
x_min, y_min, x_max, y_max = bbox
bbox_obj = img[y_min:y_max, x_min:x_max]
return bbox_obj
img = cv2.imread("image.jpg")
cropped_img = bounding_box_img(img,bbox)
cv2.imshow(cropped_img)
this returns cropped image (bounding box)
in this aproach, bounding box coordinates bases on pascal-voc annotation formats like here