I'm having some difficulty detecting text on the following type of image:
It seems that tesseract has difficulty distinguishing the numbers from the diagrams. And my goal is to find every digits and their location.
From this image I run the following code which is supposed to give me rectangles around text found :
import cv2
import pytesseract
from pytesseract import Output
import numpy as np
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract'
img = cv2.imread('Temp/VE_cropped.png')
kernel = np.ones((2,2),np.uint8)
img_processed = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
img_processed = cv2.medianBlur(img_processed,3)
img_processed = cv2.threshold(img_processed, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
img_processed = cv2.dilate(img_processed, kernel, iterations = 1)
dict_wordsDetected = pytesseract.image_to_data(img_processed, output_type=Output.DICT)
img_processed = cv2.cvtColor(img_processed, cv2.COLOR_GRAY2RGB)
n_boxes = len(dict_wordsDetected['text'])
for i in range(n_boxes):
(x, y, w, h) = (dict_wordsDetected['left'][i]
, dict_wordsDetected['top'][i]
, dict_wordsDetected['width'][i]
, dict_wordsDetected['height'][i])
img_processed = cv2.rectangle(img_processed, (x - 10, y - 10), (x + w + 10, y + h + 10), (0, 0, 255), 2)
cv2.imshow("processed", img_processed)
What gives us this result :
I think that I understood what you wanted. First of all, Tesseract works well for many problems, especially when we see examples with images that are easily OCR'ed. That means, images without a complex background. In your case, the image is not simple enough to be treated using just Tesseract or image thresholding. You must do more image preprocessing to OCR your image. To solve your problem, you must clean your image, trying to obtain just the numbers. It can be hard work.
Recently, I was looking for a code to apply OCR to an image with a complex background. I found some solutions. The code that I'll show you is based on this solution.
To extract the number (or try), you must follow some steps
convert your image into the gray scale
apply image threshold using Otsu method and inverse operation
apply distance transform
apply morphological operation to clean up small points in your image
apply dilate operation to enlarge your numbers
find contours and filter them according the width and height of each contours
create a list of hull objects to each contour
draw the hull objects
using dilate operation in your mask
bitwise operation to retrieval the the segmented areas
OCR the pre-processed image
print out your results
The code that I present here is not perfect and, I think that it can be improved, but I want to show you a start point for your problem resolution.
import cv2
import pytesseract
from pytesseract import Output
import numpy as np
import imutils
# loading and resizing image
img = cv2.imread('ABV5H.png')
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = imutils.resize(img, width=900)
#gray scale
gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
cv2.imshow("Gray", gray)
# thresholding with Otsu method and inverse operation
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV |
cv2.imshow("Threshold", thresh)
#distrance transform
dist = cv2.distanceTransform(thresh, cv2.DIST_L2, 5)
dist = cv2.normalize(dist, dist, 0, 1.0, cv2.NORM_MINMAX)
dist = (dist*255).astype('uint8')
dist = cv2.threshold(dist, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
cv2.imshow("Distance Transformation", dist)
# Morphological operation kernel (2,2) and OPEN method
kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, (2,2))
opening = cv2.morphologyEx(dist, cv2.MORPH_OPEN, kernel)
cv2.imshow("Morphology", opening)
cv2.imwrite("morphology.jpg", opening)
#dilate operation to enlarge the numbers
kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, (3,3))
dilation = cv2.dilate(opening, kernel, iterations = 1)
cv2.imshow("dilated", dilation)
cv2.imwrite("dilated.jpg", dilation)
#finding and grabbing the contours
cnts = cv2.findContours(dilation.copy(), cv2.RETR_EXTERNAL,
cnts = imutils.grab_contours(cnts)
output = img.copy()
for i in cnts:
cv2.drawContours(output, [i], -1, (0, 0, 255), 3)
cv2.imshow("Contours", output)
cv2.imwrite("contours.jpg", dilation)
#filtering the contours
nums = []
output2 = img.copy()
for c in cnts:
(x, y, w, h) = cv2.boundingRect(c)
if w >= 5 and w < 75 and h > 15 and h <= 35:
for i in nums:
cv2.drawContours(output2, [i], -1, (0, 0, 255), 2)
cv2.imshow("Filter", output2)
cv2.imwrite("filter.jpg", output2)
# making a list with the hull points
hull = []
# calculate points for each contour
for i in range(len(nums)):
# creating convex hull object for each contour
hull.append(cv2.convexHull(nums[i], False))
# create an empty black image
mask = np.zeros(dilation.shape[:2], dtype='uint8')
# draw contours and hull points
for i in range(len(nums)):
color = (255, 0, 0) # blue - color for convex hull
# draw ith convex hull object
cv2.drawContours(mask, hull, i, color, 1, 8)
#dilating the mask to have a proper image for bitwise
mask = cv2.dilate(mask, kernel, iterations = 15)
cv2.imshow("Dilated Mask", mask)
cv2.imwrite("dilated-mask.jpg", mask)
#bitwise operation
final = cv2.bitwise_and(dilation, dilation, mask=mask)
cv2.imshow("Pre-processed Image", final)
cv2.imwrite("pre-processed.jpg", final)
config = '--psm 12 -c tessedit_char_whitelist=0123456789' #page segmentation mode and white lists
#OCR'ing the image
dict_wordsDetected = pytesseract.image_to_data(final, config = config,
#filtering the detections and making a list of index
index = []
for idx, txt in enumerate(dict_wordsDetected['text']):
if len(txt) >= 1:
dict_wordsDetected['text'][idx] = txt.replace(" ", "")
for i in index:
(x, y, w, h) = (dict_wordsDetected['left'][i]
, dict_wordsDetected['top'][i]
, dict_wordsDetected['width'][i]
, dict_wordsDetected['height'][i])
img_processed = cv2.rectangle(img, (x - 10, y - 10), (x + w + 10, y + h + 10), (0, 0, 255), 2)
text = "{}".format(dict_wordsDetected['text'][i])
cv2.putText(img, text, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2)
cv2.imshow("Voilà le résultat", img)
cv2.imwrite('result.jpg', img)
Visualizing some operations
(I cannot upload my images for the moment. There are some hyperlinks with images. These images correspond to some image pre-processing steps)
Output image after dilation:
filtered contours:
Mask after the hull operation and dilation:
pre-processed image (the image that will be OCR'ed:
The results
As you can see, we can find numbers in the input image. We have good detection. On the other hand, we also have inaccurate outputs. The main reason is the image preprocessing. The image is noisy, even if we have performed some transformations. The key to your problem is image preprocessing. Another point you must keep in mind is that Tesseract is not perfect; it requires good images to work well. Beyond that, you must know the --psm modes (page segmentation) to improve your OCR, as well as using white lists to avoid undesirable detection. As I said, we have good results, but I guess you can improve them if your task requires just OpenCV and Tesseract. Because there are others that are way less complicated than this one.
Si tu as besoin d'aide, tu peux me contacter, je préfère parler français que l'anglais.
So, I have already detected all the edges of an object but the problem is that I can't find the two points of each edge, that is, the starting point and the ending point with its coordinates.
Actually I am trying to find the measurements of an object but I am stuck at this problem.The image is regarding the ROI of the image.
import cv2
import numpy as np
from matplotlib import pyplot as plt
#Read Image of the Object
img = cv2.imread("C:\\Users\\Desktop\\Project\\captured.jpg")
cv2.imshow('Original Image', img)
#Convert Image To GrayScale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
cv2.imshow('Gray', gray)
#Binary Thresholding
ret, thresh = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY_INV)
cv2.imshow('Binary Image', thresh)
#Crop Image
cropped = thresh[150:640, 150:500]
cv2.imshow('Cropped Image', cropped)
#Edge Detection
edges = cv2.Canny(cropped, 100, 200)
cv2.imshow('Edges', edges)
#find contours
ctrs, hier = cv2.findContours(cropped, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
#Sort Contours
sorted_ctrs = sorted(ctrs, key=lambda ctr: cv2.boundingRect(ctr)[0] + cv2.boundingRect(ctr)[1] * cropped.shape[1])
for i, ctr in enumerate(sorted_ctrs):
# Get bounding box
x, y, w, h = cv2.boundingRect(ctr)
# Getting ROI
roi = cropped[y:y + h, x:x + w]
# show ROI
# cv2.imshow('segment no:'+str(i),roi)
cv2.rectangle(cropped , (x, y), (x + w, y + h), (150, 0, 255), 2)
cv2.imshow('marked areas', cropped)
Original Image
These are 5 points and the five edges that I need with coordinates so I can calculate the distance between them for the measurement.
Harris Corner Output.
Try using Harris Corner Detection instead:
import cv2
import numpy as np
def find_centroids(dst):
ret, dst = cv2.threshold(dst, 0.01 * dst.max(), 255, 0)
dst = np.uint8(dst)
# find centroids
ret, labels, stats, centroids = cv2.connectedComponentsWithStats(dst)
# define the criteria to stop and refine the corners
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 100,
corners = cv2.cornerSubPix(gray,np.float32(centroids),(5,5),
return corners
image = cv2.imread("corner.jpg")
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray = np.float32(gray)
dst = cv2.cornerHarris(gray, 2, 3, 0.04)
dst = cv2.dilate(dst, None)
# Threshold for an optimal value, it may vary depending on the image.
# image[dst > 0.01*dst.max()] = [0, 0, 255]
# Get coordinates
corners= find_centroids(dst)
# To draw the corners
for corner in corners:
image[int(corner[1]), int(corner[0])] = [0, 0, 255]
cv2.imshow('dst', image)
You might need to fine tune the parameters for cornerHarris.
I've written some code, to crop an object (in this case the Data Matrix Code) from an image:
import numpy as np
import cv2
image = cv2.imread("datamatrixc.png")
img_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
img_height, img_width = image.shape[:2]
WHITE = [255, 255, 255]
# Threshold filter
ret, thresh = cv2.threshold(img_gray, 127, 255, cv2.THRESH_BINARY_INV)
# Get Contours
_, contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# Get Last element of the contours object
max = len(contours) - 1
cnt = contours[max]
# Get coordinates for the bounding box
x, y, w, h = cv2.boundingRect(cnt)
image_region = image[ int(((img_height / 2) - h) / 2) : int(((img_height / 2) - h) / 2 + h), int(x): int(x + w) ]
dmc = cv2.copyMakeBorder(image_region, 10, 10, 10, 10, cv2.BORDER_CONSTANT, value = WHITE)
cv2.imshow("Test", dmc)
The code works fine and I received as result:
However, the next image is a little more complicated.
I receive the same result as in the previous image, but I have no idea how to detect the two other objects.
Is there an easier way every object showing in its window?
For this specific image take the biggest contours you have and check if the object is 4 sided shape.If the half-point between the bounding box's corners (see pairs below) is in the contour array then voila, problem solved.
Pairs : TopRight-TopLeft, TopRight-BottomRight, TopLeft-BottomLeft, BottomLeft-BottomRight
Or you could check if there pixels that are not black/white inside the bounding box ?
And for the ploting individualy just slap a for on what you allready have
How about this?
import numpy as np
import cv2
image = cv2.imread("datamatrixc.png")
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
ret, bin_img = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
kernel = np.ones((3,3),np.uint8)
closing = cv2.morphologyEx(bin_img, cv2.MORPH_CLOSE, kernel, iterations=4)
n_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(bin_img)
size_thresh = 5000
for i in range(1, n_labels):
if stats[i, cv2.CC_STAT_AREA] >= size_thresh:
print(stats[i, cv2.CC_STAT_AREA])
x = stats[i, cv2.CC_STAT_LEFT]
y = stats[i, cv2.CC_STAT_TOP]
w = stats[i, cv2.CC_STAT_WIDTH]
h = stats[i, cv2.CC_STAT_HEIGHT]
cv2.imshow('img', image[y:y+h, x:x+w])
How can I crop a concave polygon from an image. My Input image look like
and the coordinates of closed polygon are
[10,150],[150,100],[300,150],[350,100],[310,20],[35,10]. I want region bounded by concave polygon to be cropped using opencv. I searched for other similar questions but I did not able to find correct answer. That's why I am asking it ? Can you help me.
Any help would be highly appreciated.!!!
find region using the poly points
create mask using the poly points
do mask op to crop
add white bg if needed
The code:
# 2018.01.17 20:39:17 CST
# 2018.01.17 20:50:35 CST
import numpy as np
import cv2
img = cv2.imread("test.png")
pts = np.array([[10,150],[150,100],[300,150],[350,100],[310,20],[35,10]])
## (1) Crop the bounding rect
rect = cv2.boundingRect(pts)
x,y,w,h = rect
croped = img[y:y+h, x:x+w].copy()
## (2) make mask
pts = pts - pts.min(axis=0)
mask = np.zeros(croped.shape[:2], np.uint8)
cv2.drawContours(mask, [pts], -1, (255, 255, 255), -1, cv2.LINE_AA)
## (3) do bit-op
dst = cv2.bitwise_and(croped, croped, mask=mask)
## (4) add the white background
bg = np.ones_like(croped, np.uint8)*255
cv2.bitwise_not(bg,bg, mask=mask)
dst2 = bg+ dst
cv2.imwrite("croped.png", croped)
cv2.imwrite("mask.png", mask)
cv2.imwrite("dst.png", dst)
cv2.imwrite("dst2.png", dst2)
Source image:
You can do it in 3 steps:
Create a mask out of the image
mask = np.zeros((height, width))
points = np.array([[[10,150],[150,100],[300,150],[350,100],[310,20],[35,10]]])
cv2.fillPoly(mask, points, (255))
Apply mask to original image
res = cv2.bitwise_and(img,img,mask = mask)
Optionally you can remove the crop the image to have a smaller one
rect = cv2.boundingRect(points) # returns (x,y,w,h) of the rect
cropped = res[rect[1]: rect[1] + rect[3], rect[0]: rect[0] + rect[2]]
With this you should have at the end the image cropped
For the sake of completeness here is the complete code:
import numpy as np
import cv2
img = cv2.imread("test.png")
height = img.shape[0]
width = img.shape[1]
mask = np.zeros((height, width), dtype=np.uint8)
points = np.array([[[10,150],[150,100],[300,150],[350,100],[310,20],[35,10]]])
cv2.fillPoly(mask, points, (255))
res = cv2.bitwise_and(img,img,mask = mask)
rect = cv2.boundingRect(points) # returns (x,y,w,h) of the rect
cropped = res[rect[1]: rect[1] + rect[3], rect[0]: rect[0] + rect[2]]
cv2.imshow("cropped" , cropped )
cv2.imshow("same size" , res)
For the colored background version use the code like this:
import numpy as np
import cv2
img = cv2.imread("test.png")
height = img.shape[0]
width = img.shape[1]
mask = np.zeros((height, width), dtype=np.uint8)
points = np.array([[[10,150],[150,100],[300,150],[350,100],[310,20],[35,10]]])
cv2.fillPoly(mask, points, (255))
res = cv2.bitwise_and(img,img,mask = mask)
rect = cv2.boundingRect(points) # returns (x,y,w,h) of the rect
im2 = np.full((res.shape[0], res.shape[1], 3), (0, 255, 0), dtype=np.uint8 ) # you can also use other colors or simply load another image of the same size
maskInv = cv2.bitwise_not(mask)
colorCrop = cv2.bitwise_or(im2,im2,mask = maskInv)
finalIm = res + colorCrop
cropped = finalIm[rect[1]: rect[1] + rect[3], rect[0]: rect[0] + rect[2]]
cv2.imshow("cropped" , cropped )
cv2.imshow("same size" , res)
For the blured image background version use the code like this:
img = cv2.imread(img_path)
box = <box points>
# -- background
blur_bg = cv2.blur(img, (h, w))
mask1 = np.zeros((h, w, 3), np.uint8)
mask2 = np.ones((h, w, 3), np.uint8) * 255
cv2.fillPoly(mask1, box, (255, 255, 255))
# -- indexing
img_idx = np.where(mask1 == mask2)
bg_idx = np.where(mask1 != mask2)
# -- fill box
res = np.zeros((h, w, 3), np.int64)
res[img_idx] = img[img_idx]
res[bg_idx] = blur_bg[bg_idx]
res = res[y1:y2, x1:x2, :]
I'm trying to make an OpenCV detect a bed in the image. I am running the usual Grayscale, Blur, Canny, and I've tried Convex Hull. However, since there's quite a number of "noise" which gives extra contours and messes up the object detection. Because of this, I am unable to detect the bed properly.
Here is the input image as well as the Canny Edge Detection result:
As you can see, it's almost there. I have the outline of the bed already, albeit, that the upper right corner has a gap - which is preventing me from detecting a closed rectangle.
Here's the code I'm running:
import cv2
import numpy as np
def contoursConvexHull(contours):
print("contours length = ", len(contours))
print("contours length of first item = ", len(contours[1]))
pts = []
for i in range(0, len(contours)):
for j in range(0, len(contours[i])):
pts = np.array(pts)
result = cv2.convexHull(pts)
return result
def auto_canny(image, sigma = 0.35):
# compute the mediam of the single channel pixel intensities
v = np.median(image)
# apply automatic Canny edge detection using the computed median
lower = int(max(0, (1.0 - sigma) * v))
upper = int(min(255, (1.0 + sigma) *v))
edged = cv2.Canny(image, lower, upper)
# return edged image
return edged
# Get our image in color mode (1)
src = cv2.imread("bed_cv.jpg", 1)
# Convert the color from BGR to Gray
srcGray = cv2.cvtColor(src, cv2.COLOR_BGR2GRAY)
# Use Gaussian Blur
srcBlur = cv2.GaussianBlur(srcGray, (3, 3), 0)
# ret is the returned value, otsu is an image
##ret, otsu = cv2.threshold(srcBlur, 0, 255,
# Use canny
##srcCanny = cv2.Canny(srcBlur, ret, ret*2, 3)
srcCanny1 = auto_canny(srcBlur, 0.70)
# im is the output image
# contours is the contour list
# I forgot what hierarchy was
im, contours, hierarchy = cv2.findContours(srcCanny1,
##cv2.drawContours(src, contours, -1, (0, 255, 0), 3)
ConvexHullPoints = contoursConvexHull(contours)
##cv2.polylines(src, [ConvexHullPoints], True, (0, 0, 255), 3)
cv2.imshow("Source", src)
cv2.imshow("Canny1", srcCanny1)
Since the contour of the bed isn't closed, I can't fit a rectangle nor detect the contour with the largest area.
The solution I can think of is to extrapolate the largest possible rectangle using the contour points in the hopes of bridging that small gap, but I'm not too sure how to proceed since the rectangle is incomplete.
Since you haven't provided any other examples, I provide an algorithm working with this case. But bare in mind that you will have to find ways of adapting it to however the light and background changes on other samples.
Since there is a lot of noise and a relatively high dynamic range, I suggest not to use Canny and instead use Adaptive Thresholding and Find Contours on that (it doesn't need edges as an input), that helps with choosing different threshold values for different parts of the image.
My result:
import cv2
import numpy as np
def clahe(img, clip_limit=2.0, grid_size=(8,8)):
clahe = cv2.createCLAHE(clipLimit=clip_limit, tileGridSize=grid_size)
return clahe.apply(img)
src = cv2.imread("bed.png")
# HSV thresholding to get rid of as much background as possible
hsv = cv2.cvtColor(src.copy(), cv2.COLOR_BGR2HSV)
lower_blue = np.array([0, 0, 120])
upper_blue = np.array([180, 38, 255])
mask = cv2.inRange(hsv, lower_blue, upper_blue)
result = cv2.bitwise_and(src, src, mask=mask)
b, g, r = cv2.split(result)
g = clahe(g, 5, (3, 3))
# Adaptive Thresholding to isolate the bed
img_blur = cv2.blur(g, (9, 9))
img_th = cv2.adaptiveThreshold(img_blur, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY, 51, 2)
im, contours, hierarchy = cv2.findContours(img_th,
# Filter the rectangle by choosing only the big ones
# and choose the brightest rectangle as the bed
max_brightness = 0
canvas = src.copy()
for cnt in contours:
rect = cv2.boundingRect(cnt)
x, y, w, h = rect
if w*h > 40000:
mask = np.zeros(src.shape, np.uint8)
mask[y:y+h, x:x+w] = src[y:y+h, x:x+w]
brightness = np.sum(mask)
if brightness > max_brightness:
brightest_rectangle = rect
max_brightness = brightness
cv2.imshow("mask", mask)
x, y, w, h = brightest_rectangle
cv2.rectangle(canvas, (x, y), (x+w, y+h), (0, 255, 0), 1)
cv2.imshow("canvas", canvas)
cv2.imwrite("result.jpg", canvas)