Captcha recognition using ocr

Captcha recognition using ocr - python

I am trying to make a code for solving captcha for such images:
Here is the processed image:
And my code:
image = cv2.resize(image, (300,120))
image = cv2.dilate(image, None, iterations=1)
image = cv2.GaussianBlur(image,(1,9),0)
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
image = cv2.threshold(image, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]
image = cv2.medianBlur(image,5)
cv2.imshow("Image", image)
cv2.imwrite("im.jpg",image)
text =pytesseract.image_to_string(image,config='--psm 8 -c
tessedit_char_whitelist=0123456789abcdefghijklmnopqrstuvwxyz')
print(text)
But the code predicts 9922s for the given captcha. Instead of pez2s. Please help me solve this problem.

Related

Get text from image using tesseract and cv2 in python

I am trying to get text from image I have image something like this
All other text I have got but I am unable to get File return > button text from image. I have tried this.
import pytesseract
import cv2
from pytesseract import Output
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
filepath = 'Capture.png'
image = cv2.imread(filepath, 1)
# converting image to grayscale image
gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
threshold_img = cv2.threshold(gray_image, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
# displays the image
# cv2.imshow('threshold image', threshold_img)
status = cv2.imwrite('many_1.png',threshold_img)
# Holds the output window until the user presses a key
# cv2.waitKey(0)
# Destroying windows present on the screen
# cv2.destroyAllWindows()
# setting parameters for tesseract
custom_config = r'--oem 3 --psm 6'
# now feeding image to tesseract
details = pytesseract.image_to_data(threshold_img, output_type=Output.DICT, config=custom_config, lang='eng')
boxes = len(details['level'])
for i in range(boxes):
if details['text'][i] != '':
# print(data['left'][i], data['top'][i], data['width'][i], data['height'][i], data['text'][i])
# print(details['text'][i])
if details['text'][i] == 'File':
print(details['left'][i], details['top'][i], details['width'][i], details['height'][i], details['text'][i])

OCR not Extracting any Text

I am trying to extract text from an image that looks like OCR should be able to easily extract but it just extracting nothing or garbage in some cases.
I have tried the following OpenCV techniques from other stackoverflow resources but nothing seems to help.
Image Resisizing
GrayScaling
Dilation and Erosion
adaptiveThreshold
If someone could help me how to extract text from the attached image using OpenCV that would be really helpful
My Code
import pytesseract
import cv2
import numpy as np
# Configuration
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
image_path = "C:/Users//opencv_test/images/"
ocr_path = "C:/Users//opencv_test/ocr/"
doc_file_name = "Image.jpeg"
ocr_file_name = doc_file_name[:-4] + "txt"
# To read image from disk, we use
# cv2.imread function, in below method,
img = cv2.imread(image_path + doc_file_name, 1)
# Creating GUI window to display an image on screen
# first Parameter is windows title (should be in string format)
# Second Parameter is image array
cv2.imshow("Actual Image", img)
cv2.waitKey(0)
cv2.destroyAllWindows()
def remove_noise(img):
filtered = cv2.adaptiveThreshold(img.astype(np.uint8), 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 199, 5)
cv2.imshow("Noise Free Image", img)
cv2.waitKey(0)
cv2.destroyAllWindows()
return filtered
def remove_noise_2(img):
img = cv2.resize(img, None, fx=.9, fy=.8, interpolation=cv2.INTER_CUBIC)
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
kernel = np.ones((1, 1), np.uint8)
img = cv2.dilate(img, kernel, iterations=1)
img = cv2.erode(img, kernel, iterations=1)
cv2.imshow("Pre-Processed Image", img)
cv2.waitKey(0)
cv2.destroyAllWindows()
return img
noise_free_img = remove_noise_2(img)
# Run Tesseract
f = open(ocr_path + ocr_file_name, 'w')
text = str((pytesseract.image_to_string(noise_free_img, config='--psm 3')))
text = text.replace('-\n', '')
f.write(text)

Why does Tesseract not recognize the text on the licence plate while easyocr does?

I am working on automatic licence plate recognition. I could cropped the Plate from inital image. But, Tesseract does not recognize the text on this plate while easyocr does. What is the reason? Thanks in advance for the answers. I used the code extract the plate from a car and recognize.
import cv2 as cv
import pytesseract
import imutils
import numpy as np
import easyocr
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
img3 = cv.imread("4.png")
cv.imshow("Car", img3)
img3 = cv.cvtColor(img3, cv.COLOR_BGR2RGB)
gray = cv.cvtColor(img3, cv.COLOR_RGB2GRAY)
bfilter_img3 = cv.bilateralFilter(img3, 11, 17, 17)
edged_img3 = cv.Canny(bfilter_img3, 30, 200)
keypoints = cv.findContours(edged_img3.copy(), cv.RETR_TREE, cv.CHAIN_APPROX_SIMPLE)
contours = imutils.grab_contours(keypoints)
contours = sorted(contours, key=cv.contourArea, reverse=True)[:10]
location = None
for contour in contours:
approx = cv.approxPolyDP(contour, 10, True)
if len(approx) == 4:
location = approx
break
mask = np.zeros(gray.shape, np.uint8)
new_img = cv.drawContours(mask, [location], 0, 255, -1)
new_img = cv.bitwise_and(img3, img3, mask=mask)
print(location)
cv.imshow("Plate", new_img)
(x,y)=np.where(mask==255)
(x1,y1)=(np.min(x),np.min(y))
(x2,y2)=(np.max(x),np.max(y))
cropped_img=gray[x1:x2+1, y1:y2+1]
ret, cropped_img=cv.threshold(cropped_img,127,255,cv.THRESH_BINARY)
cv.imshow("Plate3", cropped_img)
cropped_img = cv.resize(cropped_img, None, fx=2/3, fy=2/3, interpolation=cv.INTER_AREA)
#"cropped_img= the plate image in the question"***********
text = pytesseract.image_to_string(cropped_img)
print("Text by tesseract: ",text)
""""
reader=easyocr.Reader(['en'])
text2=reader.readtext(cropped_img)
print(text2)
"""
k = cv.waitKey(0)

I'm kind of curious why did you use bilateralFilter, Canny, findContours etc.? Did you see the result of each method?
Anyway, if you set the page-segmentation-mode to 6 which is:
Assume a single uniform block of text.
The result will be:
34 DUA34
Code:
import cv2
import pytesseract
# Load the image
img = cv2.imread("vHQ5q.jpg")
# Convert to the gray-scale
gry = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# OCR
print(pytesseract.image_to_string(gry, config="--psm 6"))
# Display
cv2.imshow("", gry)
cv2.waitKey(0)
You should know the Page segmentation method.
I've got the result using pytesseract-version-0.3.7.

You can try PaddleOCR, they publish a license plate recognition model and also provide a training guide for license plate recognition.
link: https://github.com/PaddlePaddle/PaddleOCR/blob/dygraph/applications/%E8%BD%BB%E9%87%8F%E7%BA%A7%E8%BD%A6%E7%89%8C%E8%AF%86%E5%88%AB.md

Tesseract OCR for Semiconductor wafer ID detection (Python)

I am trying to ready Semiconductor wafer ID by using Tesseract OCR in Python,
but it is not very successful, also, -c tessedit_char_whitelist=0123456789XL config doesn't work. Readout chip ID as: po4>1.
My OG image as
my image before process
Part of my code as below:
# identify
optCode = pytesseract.image_to_string("c:/opencv/ID_fine_out22.jpg",lang="eng", config=' --psm 6 -c tessedit_char_whitelist=0123456789XL')
# print chip iD
print("ChipID：", optCode)
Any ideas to improve the OCR? Also try to read the digits only.
I think about ML as one approach as well since I have large amount of sample images.

For myself I wrote some dirty script with pytesseract and few techniques from opencv library. You can choose different params here and view results.
For example, I have image with name softserve.png:
Suppose you have ocr.py with following code:
# import the necessary packages
import argparse
import cv2
import numpy as np
import os
from PIL import Image
import pytesseract
# construct the argument parse and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--image", required=True,
help="path to input image to be OCR'd")
ap.add_argument("-b", "--blur", type=str, default=None,
help="type of preprocessing to be done")
ap.add_argument("-t", "--thresh", type=str, default=None,
help="type of preprocessing to be done")
ap.add_argument("-r", "--resize", type=float, default=1.0,
help="type of preprocessing to be done")
ap.add_argument("-m", "--morph", type=str, default=None,
help="type of preprocessing to be done")
args = vars(ap.parse_args())
# load the example image and convert it to grayscale
image = cv2.imread(args["image"])
# Resize to 2x
if args["resize"] != 1:
image = cv2.resize(image, None,
fx=args["resize"], fy=args["resize"],
interpolation=cv2.INTER_CUBIC)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
kernel = np.ones((5, 5), np.uint8)
# make a check to see if median blurring should be done to remove
# noise
if args["blur"] == "medianblur":
gray = cv2.medianBlur(gray, 3)
if args["blur"] == "avgblur":
gray = cv2.blur(gray, (5, 5))
if args["blur"] == "gaussblur":
gray = cv2.GaussianBlur(gray, (5, 5), 0)
if args["blur"] == "medianblur":
gray = cv2.medianBlur(gray, 3)
if args["blur"] == "filter":
gray = cv2.bilateralFilter(gray, 9, 75, 75)
if args["blur"] == "filter2d":
smoothed = cv2.filter2D(gray, -1, kernel)
# check to see if we should apply thresholding to preprocess the
# image
if args["thresh"] == "thresh":
gray = cv2.threshold(gray, 0, 255,
cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
if args["thresh"] == "thresh1":
gray = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY)[1]
if args["thresh"] == "thresh2":
gray = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV)[1]
if args["thresh"] == "thresh3":
gray = cv2.threshold(gray, 0, 255, cv2.THRESH_TRUNC)[1]
if args["thresh"] == "thresh4":
gray = cv2.threshold(gray, 0, 255, cv2.THRESH_TOZERO)[1]
if args["thresh"] == "thresh5":
gray = cv2.threshold(gray, 0, 255, cv2.THRESH_TOZERO_INV)[1]
if args["thresh"] == "thresh6":
gray = cv2.adaptiveThreshold(gray, 255,
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY, 115, 1)
if args["thresh"] == "thresh7":
gray = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
cv2.THRESH_BINARY, 115, 1)
if args["thresh"] == "thresh8":
gray = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY, 11, 2)
if args["thresh"] == "thresh9":
gray = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
if args["thresh"] == "thresh10":
# gray = cv2.GaussianBlur(gray, (5, 5), 0)
gray = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
if args["morph"] == "erosion":
gray = cv2.erode(gray, kernel, iterations=1)
if args["morph"] == "dilation":
gray = cv2.dilate(gray, kernel, iterations=1)
if args["morph"] == "opening":
gray = cv2.morphologyEx(gray, cv2.MORPH_OPEN, kernel)
if args["morph"] == "closing":
gray = cv2.morphologyEx(gray, cv2.MORPH_CLOSE, kernel)
# write the grayscale image to disk as a temporary file so we can
# apply OCR to it
filename = "{}.png".format(os.getpid())
cv2.imwrite(filename, gray)
# load the image as a PIL/Pillow image, apply OCR, and then delete
# the temporary file
text = pytesseract.image_to_string(Image.open(filename))
os.remove(filename)
print(text)
with open("output.py", "w") as text_file:
text_file.write(text)
# show the output images
cv2.imshow("Image", image)
cv2.imshow("Output", gray)
cv2.waitKey(0)
If I simply use usual OCR without anything (such as pytesseract.image_tostring()):
python3 ocr.py --image softserve.png
I would got this text:
uray ['Amir', 'Barry', 'Chales', ‘Dao']
‘amir’ rss
tee)
print(2)
It's a very bad result, isn't it?
But after playing with resize and thresh you can got a more nice output:
python3 ocr.py --image softserve.png --thresh thresh6 --resize 2.675
And see in two opened windows how looks image before OCR:
Output:
names1 = ['Amir', ‘Barry’, ‘Chales', ‘Dao']
if ‘amir' in names1:
# print(1)
else: «=
# print(2)
You also can apply morph and blur. You can read more about blur, thresholding and morphological transformations from opencv docs. I am hope, you will find that information useful in your work

how to callpython function through button in web2py?

I am creating a web2py app; I want a button that can call a Python function available in controller folder in default.py and show the text result.
The function is:
src_path =
"/home/globalstat/web2py/applications/image_resize/controllers/"
def get_string(img_path):
# Read image with opencv
img_0 = cv2.imread(img_path)
# Convert to gray
img = cv2.cvtColor(img_0, cv2.COLOR_BGR2GRAY)
# thresh = 127
# im_bw = cv2.threshold(img, thresh, 255, cv2.THRESH_BINARY)[1]
# cv2.imwrite('bw_image.png', im_bw)
# Apply threshold to get image with only black and white
#img = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, v2.THRESH_BINARY, 11, 2)
# Write the image after apply opencv to do some ...
#cv2.imwrite(src_path + "thres.png", img)
# Apply dilation and erosion to remove some noise
kernel = np.ones((1, 1), np.uint8)
img = cv2.dilate(img, kernel, iterations=1)
img = cv2.erode(img, kernel, iterations=1)
cv2.imwrite(src_path + "removednoise.jpg", img)
# Apply threshold to get image with only gray scaled
img = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)
cv2.imwrite(src_path + "thres.jpg", img)
# Recognize text with tesseract for python
#img_ref=Image.open(src_path + "removednoise.jpg")
img_ref=Image.open(src_path + "thres.jpg")
img_ref.save("test-600.png",doing=(600,600))
result = pytesseract.image_to_string(img_ref)
return result
and the code I'm using in view file for button is:
<Button type="button" name ="seeting_button" onclick =
'window.location="{{=URL('default', 'get_string',)}}";'>
How can I pass argument so it displays result?

Try changing
def get_string(img_path):
img_0 = cv2.imread(img_path)
Into
def get_string():
img_path = request.args[0]
img_0 = cv2.imread(img_path)
Any function with parameters inside a controller file is ignored by web2py. Arguments are passed through request.args

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Captcha recognition using ocr - python

Related

Get text from image using tesseract and cv2 in python

OCR not Extracting any Text

Why does Tesseract not recognize the text on the licence plate while easyocr does?

Tesseract OCR for Semiconductor wafer ID detection (Python)

how to callpython function through button in web2py?

Categories

Resources