How to detech numbers inside a yellow cube with pytesseract - python

So I've ben trying to detect a number (1-9) inside a yellow cube, but without a solid solution..
This is two of my pictures
This is one solution I've been trying, but without any luck
from PIL import Image
from operator import itemgetter
import numpy as np
import easyocr
import cv2
import re
import imutils
import pytesseract
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract'
img = cv2.imread("ROI_0.png")
gry = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
thr = cv2.adaptiveThreshold(gry, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
cv2.THRESH_BINARY_INV, 59, 88)
bnt = cv2.bitwise_not(thr)
txt = pytesseract.image_to_string(bnt, config="--psm 6 digits")
txt = txt.strip().split("\n")
print(txt)
cv2.imshow("bnt", bnt)
cv2.waitKey(0)
Is there another way to do this, because it's not working?

Steps:
Binarize(otsu's method)
Correct skew using minAreaRect
Find max area contour
crop the number
pass cropped to pytesseract
image = cv2.imread("y6.png")
# image = image_resize(image,width=480,height=640)
gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)[1]
contours = cv2.findContours(thresh,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)[0]
big = max(contours,key=cv2.contourArea)
(x,y),(w,h),angle = cv2.minAreaRect(big)
print(angle)
(h, w) = image.shape[:2]
center = (w // 2, h // 2)
M = cv2.getRotationMatrix2D(center, angle, 1.0)
rotated = cv2.warpAffine(thresh, M, (w, h),flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_CONSTANT,borderValue=(0,0,0))
big = cv2.findContours(rotated,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)[0]
big = max(big,key=cv2.contourArea)
x,y,w,h = cv2.boundingRect(big)
# cropped = rotated[y:y+h,x:x+w]
cropped = rotated[y:y+h-h//10,w//6:x+w-w//6]
data = pytesseract.image_to_string(cropped,config='--psm 6 digits')# -c tessedit_char_whitelist = 0123456789')
print(data)
There are a few hardcoded values like h//10 and all in cropping. So optimization is needed.

You need to remove the black border first for tesseract to work. Just replace the black background with white color and then apply thresholding such that it can remove both white border and yellow color at the same time and then use tesseract to detect the character.

Related

Poor Result in Detecting Meter Reading Using Pytesseract

I am trying to develop a meter reading detection system. This is the picture
I need to get the meter reading 27599 as the output.
I used this code:
import pytesseract
import cv2
image = cv2.imread('read2.jpg')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
(H, W) = gray.shape
rectKernel = cv2.getStructuringElement(cv2.MORPH_RECT, (20, 7))
gray = cv2.GaussianBlur(gray, (1, 3), 0)
blackhat = cv2.morphologyEx(gray, cv2.MORPH_BLACKHAT, rectKernel)
res = cv2.threshold(blackhat, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
pytesseract.image_to_string(res, config='--psm 12 --oem 3 digits')
I get this output:
'.\n\n-\n\n3\n\n7\n\n7\n\n3\n\n-2105 566.261586\n\n161200\n\n310010\n\n--\n\n.-\n\n.\n\n5\n\x0c'
This is my first OCR project. Any help will be appreciated.
Well, there are a lot of texts there that can be removed before we start reading the actual meter number. On the other hand, we can limit our OCR to just numbers in order to prevent false positives (As a few 7-segment numbers are like alphabetical letters).
Since tesseract is not working well enough on 7-segment numbers. I will use EasyOCR.
So the procedure would be like this:
There are large spaces around the actual counter which can be cropped.
we blur the image and run a Hough transform to get the circular meter.
We for sure know that the number is in the upper half of that circle so we again crop based on the center and radius of the detected circle.
the cropped image then can be fed to EasyOCR and as i said previously only limited to the English language and numbers.
import cv2 as cv
import numpy as np
import matplotlib.pyplot as plt
import easyocr
cropped = orig_image[300:850,:][:,200:680]
cropped_height, cropped_width, _ = cropped.shape
gray = cv.cvtColor(cropped, cv.COLOR_BGR2GRAY)
blurred = cv.GaussianBlur(gray, (17,17),0)
minDist = 100
param1 = 30
param2 = 50
minRadius = 100
maxRadius = 300
circle_img = cropped.copy()
circles = cv.HoughCircles(blurred, cv.HOUGH_GRADIENT, 1, minDist, param1=param1, param2=param2, minRadius=minRadius, maxRadius=maxRadius)
print(f"{len(circles)} circles detected", circles[0,:][0])
if circles is not None:
circles = np.uint16(np.around(circles))
for i in circles[0,:]:
cv.circle(circle_img, (i[0], i[1]), i[2], (0, 255, 0), 2)
circle = circles[0,:][0]
circle_center = (circle[0], circle[1]) # x, y
circle_radius = circle[2]
color_cropped = cropped[circle_center[1] - circle_radius : circle_center[1],:]
reader = easyocr.Reader(['en'], gpu=False)
result = reader.readtext(color_cropped, allowlist ='0123456789')
if result:
print("detected number: ", result[0][1])
detected number: 27599

OpenCV Contour Detection in color

I'm trying to detect the optic disc in an image of the back of the eye using OpenCV and findContour, then fitEllipse, but my program isn't detecting the optic disc at all. How do I fix this? Code and images are below
import cv2
import numpy as np
from sklearn.linear_model import LinearRegression
import math
from decimal import Decimal
def find_elongation(image):
img = cv2.imread(image,0)
ret,thresh = cv2.threshold(img,127,255,0)
contour,hierarchy = cv2.findContours(thresh, 1, 2)
contours = []
for i in range(len(contour)):
if len(contour[i])>=5:
contours.append(contour[i])
cnt = contours[0]
k = cv2.isContourConvex(cnt)
ellipse = cv2.fitEllipse(cnt)
im = cv2.ellipse(img,ellipse,(0,255,0),2)
(x,y),(ma,Ma),angle = cv2.fitEllipse(cnt)
return Ma/ma
print(find_elongation('eye.png'))
print(find_elongation('eye2.png'))
print(find_elongation('eye3.png'))
Image (one of them):
I'm trying to get the brightly colored circle in the middle:
Thanks for the help!
I have developed a piece of code to implement what you have asked. It mainly uses de Value channel of the HSV color space followed by some morphological operations.
import cv2
import numpy as np
import matplotlib.pyplot as plt
# Read the image
img = cv2.imread('so.png')
# Transform the image to HSV color-space and keep only the value channel
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
h,s,v = cv2.split(hsv)
# Threshold the iamge with the 95% of the brightest possible pixels
maxVal = np.max(v)
per = 0.95
_, th = cv2.threshold(v, maxVal*per, 255, cv2.THRESH_BINARY)
# Erode the image and find the connected components
th = cv2.erode(th, np.ones((2,2), np.uint8))
n, conComp, stats, centroids = cv2.connectedComponentsWithStats(th)
# Obtain the sizes of the connectedComponents skipping the background
sizes = stats[1:,-1]
# Obtain the number of the connectedComponent with biggest size
nComp = np.argmax(sizes) + 1
# Isolate the connectedComponent and apply closing
out = np.zeros((img.shape[0], img.shape[1]), np.uint8)
out[conComp==nComp] = 1
out = cv2.morphologyEx(out, cv2.MORPH_CLOSE, np.ones((10,10)))
# Apply gradient to mask to obtain the border of the ellipse
out = cv2.morphologyEx(out, cv2.MORPH_GRADIENT, np.ones((2,2)))
# Join the border of the ellipse with the image to display it
img[out==1] = (0,0,0)
plt.imshow(cv2.cvtColor(img,cv2.COLOR_BGR2RGB))
plt.show()
I attach the output I have obtained with the picture you posted:

How to detect subscript numbers in an image using OCR?

I am using tesseract for OCR, via the pytesseract bindings. Unfortunately, I encounter difficulties when trying to extract text including subscript-style numbers - the subscript number is interpreted as a letter instead.
For example, in the basic image:
I want to extract the text as "CH3", i.e. I am not concerned about knowing that the number 3 was a subscript in the image.
My attempt at this using tesseract is:
import cv2
import pytesseract
img = cv2.imread('test.jpeg')
# Note that I have reduced the region of interest to the known
# text portion of the image
text = pytesseract.image_to_string(
img[200:300, 200:320], config='-l eng --oem 1 --psm 13'
)
print(text)
Unfortunately, this will incorrectly output
'CHs'
It's also possible to get 'CHa', depending on the psm parameter.
I suspect that this issue is related to the "baseline" of the text being inconsistent across the line, but I'm not certain.
How can I accurately extract the text from this type of image?
Update - 19th May 2020
After seeing Achintha Ihalage's answer, which doesn't provide any configuration options to tesseract, I explored the psm options.
Since the region of interest is known (in this case, I am using EAST detection to locate the bounding box of the text), the psm config option for tesseract, which in my original code treats the text as a single line, may not be necessary. Running image_to_string against the region of interest given by the bounding box above gives the output
CH
3
which can, of course, be easily processed to get CH3.
This is because the font of subscript is too small. You could resize the image using a python package such as cv2 or PIL and use the resized image for OCR as coded below.
import pytesseract
import cv2
img = cv2.imread('test.jpg')
img = cv2.resize(img, None, fx=2, fy=2) # scaling factor = 2
data = pytesseract.image_to_string(img)
print(data)
OUTPUT:
CH3
You want to do apply pre-processing to your image before feeding it into tesseract to increase the accuracy of the OCR. I use a combination of PIL and cv2 to do this here because cv2 has good filters for blur/noise removal (dilation, erosion, threshold) and PIL makes it easy to enhance the contrast (distinguish the text from the background) and I wanted to show how pre-processing could be done using either... (use of both together is not 100% necessary though, as shown below). You can write this more elegantly- it's just the general idea.
import cv2
import pytesseract
import numpy as np
from PIL import Image, ImageEnhance
img = cv2.imread('test.jpg')
def cv2_preprocess(image_path):
img = cv2.imread(image_path)
# convert to black and white if not already
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# remove noise
kernel = np.ones((1, 1), np.uint8)
img = cv2.dilate(img, kernel, iterations=1)
img = cv2.erode(img, kernel, iterations=1)
# apply a blur
# gaussian noise
img = cv2.threshold(cv2.GaussianBlur(img, (9, 9), 0), 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
# this can be used for salt and pepper noise (not necessary here)
#img = cv2.adaptiveThreshold(cv2.medianBlur(img, 7), 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 31, 2)
cv2.imwrite('new.jpg', img)
return 'new.jpg'
def pil_enhance(image_path):
image = Image.open(image_path)
contrast = ImageEnhance.Contrast(image)
contrast.enhance(2).save('new2.jpg')
return 'new2.jpg'
img = cv2.imread(pil_enhance(cv2_preprocess('test.jpg')))
text = pytesseract.image_to_string(img)
print(text)
Output:
CH3
The cv2 pre-process produces an image that looks like this:
The enhancement with PIL gives you:
In this specific example, you can actually stop after the cv2_preprocess step because that is clear enough for the reader:
img = cv2.imread(cv2_preprocess('test.jpg'))
text = pytesseract.image_to_string(img)
print(text)
output:
CH3
But if you are working with things that don't necessarily start with a white background (i.e. grey scaling converts to light grey instead of white)- I have found the PIL step really helps there.
Main point is the methods to increase accuracy of the tesseract typically are:
fix DPI (rescaling)
fix brightness/noise of image
fix tex size/lines
(skewing/warping text)
Doing one of these or all three of them will help... but the brightness/noise can be more generalizable than the other two (at least from my experience).
I think this way can be more suitable for the general situation.
import cv2
import pytesseract
from pathlib import Path
image = cv2.imread('test.jpg')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1] # (suitable for sharper black and white pictures
contours = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1] # is OpenCV2.4 or OpenCV3
result_list = []
for c in contours:
x, y, w, h = cv2.boundingRect(c)
area = cv2.contourArea(c)
if area > 200:
detect_area = image[y:y + h, x:x + w]
# detect_area = cv2.GaussianBlur(detect_area, (3, 3), 0)
predict_char = pytesseract.image_to_string(detect_area, lang='eng', config='--oem 0 --psm 10')
result_list.append((x, predict_char))
cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), thickness=2)
result = ''.join([char for _, char in sorted(result_list, key=lambda _x: _x[0])])
print(result) # CH3
output_dir = Path('./temp')
output_dir.mkdir(parents=True, exist_ok=True)
cv2.imwrite(f"{output_dir/Path('image.png')}", image)
cv2.imwrite(f"{output_dir/Path('clean.png')}", thresh)
MORE REFERENCE
I strongly suggest you refer to the following examples, which is a useful reference for OCR.
Get the location of all text present in image using opencv
Using YOLO or other image recognition techniques to identify all alphanumeric text present in images

How to extract and recognize the vehicle plate number with Python?

I have tried using pytesseract in collaboration with PIL to identify the vehicle registration number from the number plate image. But am not able to get the text from these images.
the code:
from PIL import Image
from pytesseract import image_to_string
img= Image.open('D://carimage1')
text = image_to_string(img)
print(text)
While this is working for normal scanned documents, it is not working for vehicle number plates.
Sample Image 1
Sample image 2
Here is a rough idea on how you can solve your problem. You can build on top of it. You need to extract the number plate from the image and then send the image to your tesseract. Read the code comments to understand what I am trying to do.
import numpy as np
import cv2
import pytesseract
import matplotlib.pyplot as plt
img = cv2.imread('/home/muthu/Documents/3r9OQ.jpg')
#convert my image to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
#perform adaptive threshold so that I can extract proper contours from the image
#need this to extract the name plate from the image.
thresh = cv2.adaptiveThreshold(gray,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY,11,2)
contours,h = cv2.findContours(thresh,1,2)
#once I have the contours list, i need to find the contours which form rectangles.
#the contours can be approximated to minimum polygons, polygons of size 4 are probably rectangles
largest_rectangle = [0,0]
for cnt in contours:
approx = cv2.approxPolyDP(cnt,0.01*cv2.arcLength(cnt,True),True)
if len(approx)==4: #polygons with 4 points is what I need.
area = cv2.contourArea(cnt)
if area > largest_rectangle[0]:
#find the polygon which has the largest size.
largest_rectangle = [cv2.contourArea(cnt), cnt, approx]
x,y,w,h = cv2.boundingRect(largest_rectangle[1])
#crop the rectangle to get the number plate.
roi=img[y:y+h,x:x+w]
#cv2.drawContours(img,[largest_rectangle[1]],0,(0,0,255),-1)
plt.imshow(roi, cmap = 'gray')
plt.show()
The output is the number plate as attached below:
Now pass this cropped image into your tesseract.
gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
thresh = cv2.adaptiveThreshold(gray,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY,11,2)
text = pytesseract.image_to_string(roi)
print text
I get the below output for the sample image you shared.
The parsing will be more accurate if you perspective transform your number plate image to the bounding box rectangle and also remove the extra borders around. Let me know if you need help with that too.
The code above doesn't work for second image if used as it is, because I am filtering the search to polygons with 4 sides. Hope you got the idea.
This one works only for the second image:
from PIL import Image, ImageFilter
import pytesseract
img = Image.open('TcjXJ.jpg')
img2 = img.filter(ImageFilter.BLUR)
pixels = img2.load()
width, height = img2.size
x_ = []
y_ = []
for x in range(width):
for y in range(height):
if pixels[x, y] == (255, 255, 255):
x_.append(x)
y_.append(y)
img = img.crop((min(x_), min(y_), max(x_), max(y_)))
text = pytesseract.image_to_string(img, lang='eng', config='-c tessedit_char_whitelist=ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789')
print(text)
You've got on output:
TN 99 F 2378
You can use OpenVINO engine, it contains pretrained model and sample for plate detection and recognition.
OpenALPR for Python.

OpenCV MSER detect text areas - Python

I have an invoice image, and I want to detect the text on it. So I plan to use 2 steps: first is to identify the text areas, and then using OCR to recognize the text.
I am using OpenCV 3.0 in python for that. I am able to identify the text(including some non text areas) but I further want to identify text boxes from the image(also excluding the non-text areas).
My input image is:
And the output is:
I am using the below code for this:
img = cv2.imread('/home/mis/Text_Recognition/bill.jpg')
mser = cv2.MSER_create()
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) #Converting to GrayScale
gray_img = img.copy()
regions = mser.detectRegions(gray, None)
hulls = [cv2.convexHull(p.reshape(-1, 1, 2)) for p in regions]
cv2.polylines(gray_img, hulls, 1, (0, 0, 255), 2)
cv2.imwrite('/home/mis/Text_Recognition/amit.jpg', gray_img) #Saving
Now, I want to identify the text boxes, and remove/unidentify any non-text areas on the invoice. I am new to OpenCV and am a beginner in Python. I am able to find some examples in MATAB example and C++ example, but If I convert them to python, it will take a lot of time for me.
Is there any example with python using OpenCV, or can anyone help me with this?
Below is the code
# Import packages
import cv2
import numpy as np
#Create MSER object
mser = cv2.MSER_create()
#Your image path i-e receipt path
img = cv2.imread('/home/rafiullah/PycharmProjects/python-ocr-master/receipts/73.jpg')
#Convert to gray scale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
vis = img.copy()
#detect regions in gray scale image
regions, _ = mser.detectRegions(gray)
hulls = [cv2.convexHull(p.reshape(-1, 1, 2)) for p in regions]
cv2.polylines(vis, hulls, 1, (0, 255, 0))
cv2.imshow('img', vis)
cv2.waitKey(0)
mask = np.zeros((img.shape[0], img.shape[1], 1), dtype=np.uint8)
for contour in hulls:
cv2.drawContours(mask, [contour], -1, (255, 255, 255), -1)
#this is used to find only text regions, remaining are ignored
text_only = cv2.bitwise_and(img, img, mask=mask)
cv2.imshow("text only", text_only)
cv2.waitKey(0)
This is an old post, yet I'd like to contribute that if you are trying to extract all the texts out of an image, here is the code to get that text in an array.
import cv2
import numpy as np
import re
import pytesseract
from pytesseract import image_to_string
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
from PIL import Image
image_obj = Image.open("screenshot.png")
rgb = cv2.imread('screenshot.png')
small = cv2.cvtColor(rgb, cv2.COLOR_BGR2GRAY)
#threshold the image
_, bw = cv2.threshold(small, 0.0, 255.0, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)
# get horizontal mask of large size since text are horizontal components
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (20, 1))
connected = cv2.morphologyEx(bw, cv2.MORPH_CLOSE, kernel)
# find all the contours
contours, hierarchy,=cv2.findContours(connected.copy(),cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
#Segment the text lines
counter=0
array_of_texts=[]
for idx in range(len(contours)):
x, y, w, h = cv2.boundingRect(contours[idx])
cropped_image = image_obj.crop((x-10, y, x+w+10, y+h ))
str_store = re.sub(r'([^\s\w]|_)+', '', image_to_string(cropped_image))
array_of_texts.append(str_store)
counter+=1
print(array_of_texts)

Categories