How to read digits from some images using Python - python

I have some pics from which I want to read digits. I used pytesseract as well as cv2 threshold.
import cv2
import pytesseract
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
crop = ['crop.png','crop1.png','crop2.png','crop3.png']
for c in crop:
image = cv2.imread(c, 0)
#thresh = cv2.threshold(image, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
thresh = cv2.threshold(image, 0, 255,cv2.THRESH_OTSU)[1]
#thresh = cv2.GaussianBlur(thresh, (1,3), 0 )
#thresh = cv2.adaptiveThreshold(thresh,125, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 11, 12)
#thresh = cv2.bilateralFilter(thresh, 15, 80, 80, cv2.BORDER_DEFAULT)
data = pytesseract.image_to_string(thresh, lang='eng',config='--psm 6')
print(data)
print('\nnext')
cv2.imshow('thresh', thresh)
but not getting good output
please tell me where I am doing wrong.
here are the pics
https://ibb.co/thgXTSn
https://ibb.co/cYGYL2W
https://ibb.co/R2nbt0g
https://ibb.co/ZgPKy2N

You can try to do image processing before recognition. For example, like this:
image = cv2.imread(c, 0)
se=cv2.getStructuringElement(cv2.MORPH_ELLIPSE , (5,5))
close=cv2.morphologyEx(image, cv2.MORPH_CLOSE, se)
close=cv2.absdiff(close, image)
image=cv2.bitwise_not(close)
Also try upsampling image.
Hope this solve your problem.

Related

Extracting text from image - OCR

I'm using cv2 and pytesseract library to extract text from image. Here is the image (image3_3.png) and the python code:
def threshold_image(img_src):
"""Grayscale image and apply Otsu's threshold"""
# Grayscale
img_gray = cv2.cvtColor(img_src, cv2.COLOR_BGR2GRAY)
# Binarisation and Otsu's threshold
img_thresh = cv2.threshold(img_gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
return img_thresh
img = np.array(Image.open('image3_3.png'))
# Apply dilation and erosion to remove some noise
kernel = np.ones((1, 1), np.uint8)
img = cv2.dilate(img, kernel, iterations=1)
img = cv2.erode(img, kernel, iterations=1)
# normalise the image
norm_img = np.zeros((img.shape[0], img.shape[1]))
img = cv2.normalize(img, norm_img, 0, 255, cv2.NORM_MINMAX)
# Apply blur to smooth out the edges
img = cv2.GaussianBlur(img, (5, 5), 0)
string_ocr = pytesseract.image_to_string(threshold_image(img), lang = 'eng', config = '--psm 6')
print(string_ocr)
Here is the result:
Image A3. This is image A3 with more texts.
ISAS Visual Analytics
INow everyone can easily discover and share powerful
Nsights that inspire action
Why am I not getting the same exact text? Any help highly appreciated.

Pytesseract read coloured text

I am trying to read coloured (red and orange) text with Pytesseract.
I tried to not grayscale the image, but that didn't work either.
Images, that it CAN read
Images, that it CANNOT read
My current code is:
tesstr = pytesseract.image_to_string(
cv2.cvtColor(nm.array(cap), cv2.COLOR_BGR2GRAY),
config="--psm 7")
This little function (below) will do for any color
ec9Ut.png
Thresh result
x18MN.png
Thresh result
SFr48.png
Thresh result
import cv2
from pytesseract import image_to_string
def getText(filename):
img = cv2.imread(filename)
HSV_img = cv2.cvtColor(img,cv2.COLOR_BGR2HSV)
h,s,v = cv2.split(HSV_img)
thresh = cv2.threshold(v, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
txt = image_to_string(thresh, config="--psm 6 digits")
return txt
text = getText('ec9Ut.png')
print(text)
text = getText('x18MN.png')
print(text)
text = getText('SFr48.png')
print(text)
Output
46
31
53
You can apply:
Erosion
Adaptive-threshold
Erosion
Erosion will decrease the thickness of the image like:
Original Image
Erosion
When we apply erosion to the 53 and 31 images
Original Image
Erosion
For adaptive-threshold:
When blockSize= 27
Erosion
Threshold
When blockSize= 11
Erosion
 Threshold
For each image, we need to apply different threhsolding
Code:
import cv2
from pytesseract import image_to_string
img_lst = ["fifty_three.png", "thirty_one.png"]
for img_pth in img_lst:
img = cv2.imread(img_pth)
(h, w) = img.shape[:2]
img = cv2.resize(img, (w*2, h*2))
gry = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
erd = cv2.erode(gry, None, iterations=2)
if img_pth == "fifty_three.png":
thr = cv2.adaptiveThreshold(erd, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 27, 5)
else:
thr = cv2.adaptiveThreshold(erd, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 5)
txt = image_to_string(thr, config="--psm 6 digits")
print(txt)
cv2.imshow("thr", thr)
cv2.waitKey(0)
Result:
53
31
Possible Question1: Why two different block size parameters?
Well, thickness of each image are different. So two different parameters are required for text-recognition.
Possible Question2: Why None defined as kernel for erode method?
Unfortunately, I couldn't find a suitable kernel for erosion. Therefore I set to None.

Can't get numbers from image with python, Tesseract and opencv

i have to get numbers from a water-meter image usign python tesseract and opencv.
I have tried to change the --psm but it's doesn't work.
Here the image without modification :
enter image description here
Here the outpout image :
enter image description here
I need your help guys, i'm starting python and i'm already blocked :'(
My code :
from PIL import Image
import pytesseract
import cv2
import numpy as np
import urllib
import requests
pytesseract.pytesseract.tesseract_cmd = r'C:\Users\Hymed\AppData\Local\Tesseract-OCR\tesseract.exe'
col = Image.open("pts.jpg")
gray = col.convert('L')
bw = gray.point(lambda x: 0 if x<128 else 255, '1')
bw.save("cp19.png")
image = cv2.imread('cp19.png')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = 255 - cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# Blur and perform text extraction
thresh = cv2.GaussianBlur(thresh, (3,3), 0)
img1 = np.array(thresh)
data = pytesseract.image_to_string(img1, config='--psm 11 digits')
print(data)
cv2.imshow('thresh', thresh)
cv2.waitKey()
You have nearly finished the task.
I use the divide operation, after the GaussianBlur.
div = cv2.divide(gray, thresh, scale=192)
Result:
When I read from the image:
data = pytesseract.image_to_string(div, config='--psm 11 digits')
print(data)
Result:
00000161
Code: (Just added div = cv2.divide(gray, thresh, scale=192) rest are your code)
from PIL import Image
import pytesseract
import cv2
import numpy as np
col = Image.open("TOaEW.jpg")
gray = col.convert('L')
bw = gray.point(lambda x: 0 if x < 128 else 255, '1')
bw.save("cp19.png")
image = cv2.imread('cp19.png')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = 255 - cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# Blur and perform text extraction
thresh = cv2.GaussianBlur(thresh, (3, 3), 0)
div = cv2.divide(gray, thresh, scale=192) # added
data = pytesseract.image_to_string(div, config='--psm 11 digits')
print(data)
I tried to read the number from an image using Tesseract. Except the numbers shown in the first line, it also returned an unidentified symbol in the second line. I don't understand what I did wrong. Here is the code and the results
code and output
This is the image I extracted the number from:
Image used for number extraction

opencv thresholding and pytesseract

currently I am trying to develop some simple computervision code to read the amount of kills that I have in a call of duty game and save it to an array as an integer. The code is screenshotting my screen every second and using opencv I am thresholding the image and inputting it into pytesseract. Although the numbers stay the same, the background noise changes the image a lot and forces a lot of null inputs. I am ok if it misses a few inputs but it misses %50 or more of all of the digits. If anyone has any tips on thresholding a single digit image with varying backgrounds, it would be a huge help.
'''
pytesseract.pytesseract.tesseract_cmd = r'C:/Program Files/Tesseract-OCR/tesseract'
pyautogui.screenshot('pictures/Kill.png', region = (1822, 48, 30, 23))
img = cv2.imread('pictures/Kill.png')
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
ret, thresh1 = cv2.threshold(img, 255, 255, cv2.THRESH_TRUNC)
cv2.imwrite('pictures/killthresh1.png',thresh1)
ret, thresh1 = cv2.threshold(img, 180, 255, cv2.THRESH_BINARY)
thresh1 = cv2.bitwise_not(thresh1)
cv2.imwrite('pictures/Killthresh2.png', thresh1)
custom_config = r'-l eng --oem 3 --psm 7 -c
tessedit_char_whitelist="1234567890" '
killnumber = pytesseract.image_to_string(thresh1, config = custom_config)
'''
Original pyautogui screenshot
TRUNC thresholded
BINARY thresholded
NOTE: These images yieled a 'NULL' result and I dont know why
After you read the image, img = cv2.imread('pictures/Kill.png')
Apply adaptive-threshold on Original pyautogui screenshot:
Now read:
txt = pytesseract.image_to_string(thr, config="--psm 7")
print(txt)
Result:
3
Code:
import cv2
import pytesseract
img = cv2.imread("0wHAy.png")
gry = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
thr = cv2.adaptiveThreshold(gry, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
cv2.THRESH_BINARY_INV, 21, 9)
txt = pytesseract.image_to_string(thr, config="--psm 7")
print(txt)

Incorrect thresholding before string recognition

I'm trying to recognize some text with pytesseract, but before that I have to turn the picture I have into a binary one.
Note that I first resize the picture to make it easier to read for pytesseract.
See below the original picture, the resized one, my code and the result I get, so you can understand my issue..
Original picture
image = cv2.imread('original.png',0)
image = cv2.resize(image,None,fx=2,fy=2,interpolation=cv2.INTER_CUBIC)
cv2.imwrite("resized.png", image)
thresh = cv2.threshold(image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
result = 255 - thresh
cv2.imwrite("after_threshold.png", result)
Resized picture
Picture after threshold
Thank you for your help :)
If you remove the resize, it seems to work
Output from Pytesseract
32 Force
120 Initiative
Prospection
25 agilité
53 Vitalité
5 Dommages
1 Résistance Neutre
1 Portée
7% Résistance Feu
import cv2
import pytesseract
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
image = cv2.imread('1.png', 0)
thresh = cv2.threshold(image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
result = 255 - thresh
data = pytesseract.image_to_string(result, lang='eng',config='--psm 6')
print(data)
cv2.imshow('thresh', thresh)
cv2.imshow('result', result)
cv2.waitKey()

Categories