I would like to find all the big elements in the document, but I do not know how to control the size (the document is downloaded from the Internet :))
I have a document
And I wrote a simple code
import cv2
import pytesseract
image = cv2.imread('2.png')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (7, 7), 0)
thresh = cv2.threshold(
blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
kernal = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 50))
dilate = cv2.dilate(thresh, kernal, iterations=1)
cv2.imwrite('1_dilated.png', dilate)
cnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
cnts = sorted(cnts, key=lambda x: cv2.boundingRect(x)[1])
for c in cnts:
x, y, w, h = cv2.boundingRect(c)
if h > 100 and w > 100:
roi = image[y:y+h, x:x+w]
cv2.rectangle(image, (x, y), (x+w, y+h), (36, 255, 12), 2)
# ocr = pytesseract.image_to_string(roi)
# print(ocr)
cv2.imwrite('1_boxes4.png', image)
But only detects it
And I would like this
How to control the size of the detected area ?
Thank you very much for all your comments
You are close, but you need to increase the number of iterations of the dilate operation. Also, a rectangular structuring element might help better forming the blobs of text. Let's check out some possible improvements of your code:
# imports:
import cv2
import numpy as np
# Set image path
imagePath = "D://opencvImages//"
imageName = "F74Yq.png"
# Read image:
inputImage = cv2.imread(imagePath + imageName)
# Store a deeep copy for results:
inputCopy = inputImage.copy()
# Convert BGR to grayscale:
grayInput = cv2.cvtColor(inputImage, cv2.COLOR_BGR2GRAY)
# Threshold via Otsu
_, binaryImage = cv2.threshold(grayInput, 0, 255, cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU)
The first part produces the binary image of the input image, there's nothing fancy going on here - just a direct thresholding via Otsu's method. This is the binary image obtained:
Now, let's apply the dilate operation. Let's use a 9 x 9 rectangular kernel and set the number of iterations to 5. Gotta be careful you don't dilate too much, because blobs of text from different portions of the document could end up joined:
# Set kernel (structuring element) size:
kernelSize = (9, 9)
# Set operation iterations:
opIterations = 5
# Get the structuring element:
morphKernel = cv2.getStructuringElement(cv2.MORPH_RECT, kernelSize)
# Perform Dilate:
dilateImage = cv2.morphologyEx(binaryImage, cv2.MORPH_DILATE, morphKernel, None, None, opIterations, cv2.BORDER_REFLECT101)
This is the result:
Ok, now let's just detect external contours and get their bounding boxes so we can draw rectangles around the target areas. Note that I'm drawing the rectangles on a deep copy of the input:
# Find the contours on the binary image:
contours, hierarchy = cv2.findContours(dilateImage, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# Look for the outer bounding boxes (no children):
for _, c in enumerate(contours):
# Get the contours bounding rectangle:
boundRect = cv2.boundingRect(c)
# Get the dimensions of the bounding rectangle:
rectX = boundRect[0]
rectY = boundRect[1]
rectWidth = boundRect[2]
rectHeight = boundRect[3]
# Set bounding rectangle:
color = (0, 0, 255)
cv2.rectangle( inputCopy, (int(rectX), int(rectY)),
(int(rectX + rectWidth), int(rectY + rectHeight)), color, 5 )
cv2.imshow("Bounding Rectangles", inputCopy)
cv2.waitKey()
This is the final result:
Related
I have some sketched images where the images contain text captions. I am trying to remove those caption.
I am using this code:
import cv2
import pytesseract
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
# Load image, grayscale, blur, Otsu's threshold
image = cv2.imread('1.png')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (3,3), 0)
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# Find contours and filter using contour area
cnts = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
area = cv2.contourArea(c)
if area > 500:
cv2.drawContours(thresh, [c], -1, 0, -1)
# Invert image and OCR
invert = 255 - thresh
Output= thresh - invert
cv2.imshow('thresh', thresh)
cv2.imshow('invert', invert)
cv2.imshow('output', output)
cv2.waitKey()
Code not working for these images.
The cv2 pre-processing is unecessary here, tesseract is able to find the text on its own. See the example below, commented inline:
results = pytesseract.image_to_data('1.png', config='--psm 11', output_type='dict')
for i in range(len(results["text"])):
# extract the bounding box coordinates of the text region from
# the current result
x = results["left"][i]
y = results["top"][i]
w = results["width"][i]
h = results["height"][i]
# Extract the confidence of the text
conf = int(results["conf"][i])
if conf > 60: # adjust to your liking
# Cover the text with a white rectangle
cv2.rectangle(image, (x, y), (x + w, y + h), (255, 255, 255), -1)
Detected text on the left, cleaned image on the right:
Another option, without using Tesseract. Just use the area of the contours to filter the smaller ones by covering them with white-filled rectangles:
# Imports
import cv2
import numpy as np
# Read image
imagePath = "C://opencvImages//"
inputImage = cv2.imread(imagePath+"0enxN.png")
# Convert BGR to grayscale:
binaryImage = cv2.cvtColor(inputImage, cv2.COLOR_BGR2GRAY)
# Invert image:
binaryImage = 255 - binaryImage
# Find the external contours on the binary image:
contours, hierarchy = cv2.findContours(binaryImage, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# Invert image:
binaryImage = 255 - binaryImage
# Look for the bounding boxes:
for _, c in enumerate(contours):
# Get the contour's bounding rectangle:
boundRect = cv2.boundingRect(c)
# Get the dimensions of the bounding rect:
rectX = boundRect[0]
rectY = boundRect[1]
rectWidth = boundRect[2]
rectHeight = boundRect[3]
# Get Bounding Rectangle Area:
rectArea = rectWidth * rectHeight
# Set minimum area threshold:
minArea = 1000
# Check for minimum area:
if rectArea < minArea:
# Draw white rectangle to cover small contour:
cv2.rectangle(binaryImage, (rectX, rectY), (rectX + rectWidth, rectY + rectHeight),
(255, 255, 255), -1)
cv2.imshow("Binary Mask", binaryImage)
cv2.waitKey(0)
This produces:
I would like to get the coordinates of the box around the initial ("H") on the following page (and similar ones with other initials, so opencv template matching is not an option):
Following this tutorial, I tried to solve the problem with opencv contours:
import cv2
import matplotlib.pyplot as plt
page = "image.jpg"
# read the image
image = cv2.imread(page)
# convert to RGB
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# convert to grayscale
gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
# create a binary thresholded image
_, binary = cv2.threshold(gray, 0,150,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
# find the contours from the thresholded image
contours, hierarchy = cv2.findContours(binary, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
# draw all contours
image = cv2.drawContours(image, contours, 3, (0, 255, 0), 2)
plt.savefig("result.png")
The result is of course not exactly what I wanted:
Does anyone know of an viable algorithm (and possibly an implementation thereof) that could provide an easy solution to my task?
You can find the target area by filtering your contours. Now, there's at least two filtering criteria that you can use. One is filter by area - that is, discard too small and too large contours until you get the contour you are looking for. The other one is by computing the extent of every contour. The extent is the ratio of the contour's area to its bounding rectangle area. You are looking for a square-like contour, so its extent should be close to 1.0.
Let's see the code:
# imports:
import cv2
import numpy as np
# Reading an image in default mode:
inputImage = cv2.imread(path + fileName)
# Deep copy for results:
inputImageCopy = inputImage.copy()
# Convert RGB to grayscale:
grayscaleImage = cv2.cvtColor(inputImage, cv2.COLOR_BGR2GRAY)
# Get binary image via Otsu:
_, binaryImage = cv2.threshold(grayscaleImage, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
The first portion of the code gets you a binary image that you can use as a mask to compute contours:
Now, let's filter contours. Let's use the area approach first. You need to define a range of minimum area and maximum area to filter everything that does not fall in this range. I've heuristically determined a range of areas from 30000 px to 150000 px:
# Find the contours on the binary image:
contours, hierarchy = cv2.findContours(binaryImage, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# Look for the outer bounding boxes (no children):
for _, c in enumerate(contours):
# Get blob area:
currentArea = cv2.contourArea(c)
print("Contour Area: "+str(currentArea))
# Set an area range:
minArea = 30000
maxArea = 150000
if minArea < currentArea < maxArea:
# Get the contour's bounding rectangle:
boundRect = cv2.boundingRect(c)
# Get the dimensions of the bounding rect:
rectX = boundRect[0]
rectY = boundRect[1]
rectWidth = boundRect[2]
rectHeight = boundRect[3]
# Set bounding rect:
color = (0, 0, 255)
cv2.rectangle( inputImageCopy, (int(rectX), int(rectY)),
(int(rectX + rectWidth), int(rectY + rectHeight)), color, 2 )
cv2.imshow("Rectangles", inputImageCopy)
cv2.waitKey(0)
Once you successfully filter the area, you can then compute the bounding rectangle of the contour with cv2.boundingRect. You can retrieve the bounding rectangle's x, y (top left) coordinates as well as its width and height. After that just draw the rectangle on a deep copy of the original input.
Now, let's see the second option, using the contour's extent. The for loop gets modified as follows:
# Look for the outer bounding boxes (no children):
for _, c in enumerate(contours):
# Get blob area:
currentArea = cv2.contourArea(c)
# Get the contour's bounding rectangle:
boundRect = cv2.boundingRect(c)
# Get the dimensions of the bounding rect:
rectX = boundRect[0]
rectY = boundRect[1]
rectWidth = boundRect[2]
rectHeight = boundRect[3]
# Calculate extent:
extent = float(currentArea)/(rectWidth *rectHeight)
print("Extent: " + str(extent))
# Set the extent filter, look for an extent close to 1.0:
delta = abs(1.0 - extent)
epsilon = 0.1
if delta < epsilon:
# Set bounding rect:
color = (0, 0, 255)
cv2.rectangle( inputImageCopy, (int(rectX), int(rectY)),
(int(rectX + rectWidth), int(rectY + rectHeight)), color, 2 )
cv2.imshow("Rectangles", inputImageCopy)
cv2.waitKey(0)
Both approaches yield this result:
You almost have it. You just need to filter contours on area and aspect ratio. Here is my approach in Python/OpenCV.
Input:
import cv2
import numpy as np
# read image as grayscale
img = cv2.imread('syriados.jpg')
# convert to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# threshold to binary
#thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY)[1]
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)[1]
# invert threshold
thresh = 255 - thresh
# apply morphology to remove small white regions and to close the rectangle boundary
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))
morph = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (7,7))
morph = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
# find contours
result = img.copy()
cntrs = cv2.findContours(morph, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
cntrs = cntrs[0] if len(cntrs) == 2 else cntrs[1]
# filter on area and aspect ratio
for c in cntrs:
area = cv2.contourArea(c)
x,y,w,h = cv2.boundingRect(c)
if area > 10000 and abs(w-h) < 100:
cv2.drawContours(result, [c], 0, (0,0,255), 2)
# write results
cv2.imwrite("syriados_thresh.jpg", thresh)
cv2.imwrite("syriados_morph.jpg", morph)
cv2.imwrite("syriados_box.jpg", result)
# show results
cv2.imshow("thresh", thresh)
cv2.imshow("morph", morph)
cv2.imshow("result", result)
cv2.waitKey(0)
Threshold image:
Morphology image:
Resulting contour image:
To get a result like this:
You'll need to detect the contour in the image with the second to the greatest area, as the one possessing the greatest area would be the border of the image.
So with the list of contours, we can get the one with the second greatest area via the built-in sorted method, using the cv2.contourArea method as the custom key:
import cv2
import numpy as np
def process(img):
img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
img_blur = cv2.GaussianBlur(img_gray, (7, 7), 2)
img_canny = cv2.Canny(img_blur, 50, 50)
kernel = np.ones((6, 6))
img_dilate = cv2.dilate(img_canny, kernel, iterations=1)
img_erode = cv2.erode(img_dilate, kernel, iterations=2)
return img_erode
def get_contours(img):
contours, _ = cv2.findContours(process(img), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
cnt = sorted(contours, key=cv2.contourArea)[-2]
peri = cv2.arcLength(cnt, True)
approx = cv2.approxPolyDP(cnt, 0.02 * peri, True)
cv2.drawContours(img, [approx], -1, (0, 255, 0), 2)
page = "image.jpg"
image = cv2.imread(page)
get_contours(image)
cv2.imshow("Image", image)
cv2.waitKey(0)
The above only puts the area of the contours into consideration; if you want more reliable results, you can make it so that it will only detect contours that are 4-sided.
i have this binary image (numpy array) that represents an approximation of a rectangle :
I'm trying to extract the real shape of the rectangle but can't seem to find a way.
The expected result is the following:
I'm using this code
contours,_ = cv2.findContours(numpymask.copy(), 1, 1) # not copying here will throw an error
rect = cv2.minAreaRect(contours[0]) # basically you can feed this rect into your classifier
(x,y),(w,h), a = rect # a - angle
box = cv2.boxPoints(rect)
box = np.int0(box) #turn into ints
rect2 = cv2.drawContours(img.copy(),[box],0,(0,0,255),10)
plt.imshow(rect2)
plt.show()
But the resut i'm getting is the following, which i not what i need :
For this i'm using Python with opencv.
This is something i played around with before. It should work with your image.
import imutils
import cv2
# load the image, convert it to grayscale, and blur it slightly
image = cv2.imread("test.jpg")
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray = cv2.GaussianBlur(gray, (5, 5), 0)
# threshold the image,
thresh = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)[1]
# find contours in thresholded image, then grab the largest
# one
cnts = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)
cnts = imutils.grab_contours(cnts)
c = max(cnts, key=cv2.contourArea)
# draw the contours of c
cv2.drawContours(image, [c], -1, (0, 0, 255), 2)
# show the output image
cv2.imshow("Image", image)
cv2.waitKey(0)
I'm using the following code to detect the brightly illuminated lamp. The illumination might vary. I'm using the following code to detect the same.
img = cv2.imread("input_img.jpg")
rgb = img.copy()
img_grey = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
while True:
th3 = cv2.adaptiveThreshold(img_grey, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, \
cv2.THRESH_BINARY, 11, 2)
cv2.imshow("th3",th3)
edged = cv2.Canny(th3, 50, 100)
edged = cv2.dilate(edged, None, iterations=1)
edged = cv2.erode(edged, None, iterations=1)
cv2.imshow("edge", edged)
cnts = cv2.findContours(edged.copy(), cv2.RETR_TREE,
cv2.CHAIN_APPROX_SIMPLE)
cnts = imutils.grab_contours(cnts)
areaArray = []
for i, c in enumerate(cnts):
area = cv2.contourArea(c)
areaArray.append(area)
sorteddata = sorted(zip(areaArray, cnts), key=lambda x: x[0], reverse=True)
thirdlargestcontour = sorteddata[2][1]
x, y, w, h = cv2.boundingRect(thirdlargestcontour)
cv2.drawContours(rgb, thirdlargestcontour, -1, (255, 0, 0), 2)
cv2.rectangle(rgb, (x, y), (x + w, y + h), (0, 255, 0), 2)
cv2.imshow("rgb", rgb)
if cv2.waitKey(1) == 27:
break
The above code works but,
It only gives the rectangle that encompasses the lamp. How do I get the four corner points of the lamp precisely?
How can I improve detection? at the moment I'm picking the third-largest contour which does not guarantee that it will always be the lamp as the environment poses challenge?
ApproxPolydp works when the contour is complete but if the contour is incomplete, ApproxPolydp is not returning the proper coordinate. for instance in the following image the approxpolydp returns a wrong coordinates.
Here is one way to do that in Python/OpenCV.
Read the input image and convert to grayscale
Use adaptive thresholding to get a thick outline of the lamp region
Find the contours
Filter the contours on area to remove extraneous regions and keep only the larger of the two (inner and outer contours of thresholded region)
Get the perimeter
Fit the perimeter to a polygon, which should be a quadrilateral with the right choice of arguments.
Draw the contour (red) and polygon (blue) over a copy of the input image as the result
Input:
import cv2
import numpy as np
# load image
img = cv2.imread("lamp.jpg")
# convert to gray
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# threshold image
thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 11, 10)
thresh = 255 - thresh
# find contours
cntrs = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
cntrs = cntrs[0] if len(cntrs) == 2 else cntrs[1]
# Contour filtering -- remove small objects and those that are too large
# Keep the larger of the two contours (inner and outer contours from thresh)
area_thresh = 0
for c in cntrs:
area = cv2.contourArea(c)
if area > 200 and area > area_thresh:
big_contour = c
area_thresh = area
# draw big_contour on image in red and polygon in blue and print corners
results = img.copy()
cv2.drawContours(results,[big_contour],0,(0,0,255),1)
peri = cv2.arcLength(big_contour, True)
corners = cv2.approxPolyDP(big_contour, 0.04 * peri, True)
cv2.drawContours(results,[corners],0,(255,0,0),1)
print(len(corners))
print(corners)
# write result to disk
cv2.imwrite("lamp_thresh.jpg", thresh)
cv2.imwrite("lamp_corners.jpg", results)
cv2.imshow("THRESH", thresh)
cv2.imshow("RESULTS", results)
cv2.waitKey(0)
cv2.destroyAllWindows()
Thresholded Image:
Result Image:
Corner Coordinates:
[[[233 145]]
[[219 346]]
[[542 348]]
[[508 153]]]
Right now I am trying to create one program, which remove text from background but I am facing a lot of problem going through it
My approach is to use pytesseract to get text boxes and once I get boxes, I use cv2.inpaint to paint it and remove text from there. In short:
d = pytesseract.image_to_data(img, output_type=Output.DICT) # Get text
n_boxes = len(d['level']) # get boxes
for i in range(n_boxes): # Looping through boxes
# Get coordinates
(x, y, w, h) = (d['left'][i], d['top'][i], d['width'][i], d['height'][i])
crop_img = img[y:y+h, x:x+w] # Crop image
gray = cv2.cvtColor(crop_img, cv2.COLOR_BGR2GRAY)
gray = inverte(gray) # Inverse it
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_OTSU)[1]
dst = cv2.inpaint(crop_img, thresh, 10, cv2.INPAINT_TELEA) # Then Inpaint
img[y:y+h, x:x+w] = dst # Place back cropped image back to the source image
Now the problem is that I am not able to remove text completely
Image:
Now I am not sure what other method I can use to remove text from image, I am new to this that's why I am facing problem. Any help is much appreciated
Note: Image looks stretched because I resized it to show it in screen size
Original Image:
Here's an approach using morphological operations + contour filtering
Convert image to grayscale
Otsu's threshold to obtain a binary image
Perform morph close to connect words into a single contour
Dilate to ensure that all bits of text are contained in the contour
Find contours and filter using contour area
Remove text by "filling" in the contour rectangle with the background color
I used chrome developer tools to determine the background color of the image which was (222,228,251). If you want to dynamically determine the background color, you could try finding the dominant color using k-means. Here's the result
import cv2
image = cv2.imread('1.jpg')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
close_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (15,3))
close = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, close_kernel, iterations=1)
dilate_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5,3))
dilate = cv2.dilate(close, dilate_kernel, iterations=1)
cnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
area = cv2.contourArea(c)
if area > 800 and area < 15000:
x,y,w,h = cv2.boundingRect(c)
cv2.rectangle(image, (x, y), (x + w, y + h), (222,228,251), -1)
cv2.imshow('image', image)
cv2.waitKey()