Tesseract OCR for Semiconductor wafer ID detection (Python) - python

I am trying to ready Semiconductor wafer ID by using Tesseract OCR in Python,
but it is not very successful, also, -c tessedit_char_whitelist=0123456789XL config doesn't work. Readout chip ID as: po4>1.
My OG image as
my image before process
Part of my code as below:
# identify
optCode = pytesseract.image_to_string("c:/opencv/ID_fine_out22.jpg",lang="eng", config=' --psm 6 -c tessedit_char_whitelist=0123456789XL')
# print chip iD
print("ChipID:", optCode)
Any ideas to improve the OCR? Also try to read the digits only.
I think about ML as one approach as well since I have large amount of sample images.

For myself I wrote some dirty script with pytesseract and few techniques from opencv library. You can choose different params here and view results.
For example, I have image with name softserve.png:
Suppose you have ocr.py with following code:
# import the necessary packages
import argparse
import cv2
import numpy as np
import os
from PIL import Image
import pytesseract
# construct the argument parse and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--image", required=True,
help="path to input image to be OCR'd")
ap.add_argument("-b", "--blur", type=str, default=None,
help="type of preprocessing to be done")
ap.add_argument("-t", "--thresh", type=str, default=None,
help="type of preprocessing to be done")
ap.add_argument("-r", "--resize", type=float, default=1.0,
help="type of preprocessing to be done")
ap.add_argument("-m", "--morph", type=str, default=None,
help="type of preprocessing to be done")
args = vars(ap.parse_args())
# load the example image and convert it to grayscale
image = cv2.imread(args["image"])
# Resize to 2x
if args["resize"] != 1:
image = cv2.resize(image, None,
fx=args["resize"], fy=args["resize"],
interpolation=cv2.INTER_CUBIC)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
kernel = np.ones((5, 5), np.uint8)
# make a check to see if median blurring should be done to remove
# noise
if args["blur"] == "medianblur":
gray = cv2.medianBlur(gray, 3)
if args["blur"] == "avgblur":
gray = cv2.blur(gray, (5, 5))
if args["blur"] == "gaussblur":
gray = cv2.GaussianBlur(gray, (5, 5), 0)
if args["blur"] == "medianblur":
gray = cv2.medianBlur(gray, 3)
if args["blur"] == "filter":
gray = cv2.bilateralFilter(gray, 9, 75, 75)
if args["blur"] == "filter2d":
smoothed = cv2.filter2D(gray, -1, kernel)
# check to see if we should apply thresholding to preprocess the
# image
if args["thresh"] == "thresh":
gray = cv2.threshold(gray, 0, 255,
cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
if args["thresh"] == "thresh1":
gray = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY)[1]
if args["thresh"] == "thresh2":
gray = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV)[1]
if args["thresh"] == "thresh3":
gray = cv2.threshold(gray, 0, 255, cv2.THRESH_TRUNC)[1]
if args["thresh"] == "thresh4":
gray = cv2.threshold(gray, 0, 255, cv2.THRESH_TOZERO)[1]
if args["thresh"] == "thresh5":
gray = cv2.threshold(gray, 0, 255, cv2.THRESH_TOZERO_INV)[1]
if args["thresh"] == "thresh6":
gray = cv2.adaptiveThreshold(gray, 255,
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY, 115, 1)
if args["thresh"] == "thresh7":
gray = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
cv2.THRESH_BINARY, 115, 1)
if args["thresh"] == "thresh8":
gray = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY, 11, 2)
if args["thresh"] == "thresh9":
gray = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
if args["thresh"] == "thresh10":
# gray = cv2.GaussianBlur(gray, (5, 5), 0)
gray = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
if args["morph"] == "erosion":
gray = cv2.erode(gray, kernel, iterations=1)
if args["morph"] == "dilation":
gray = cv2.dilate(gray, kernel, iterations=1)
if args["morph"] == "opening":
gray = cv2.morphologyEx(gray, cv2.MORPH_OPEN, kernel)
if args["morph"] == "closing":
gray = cv2.morphologyEx(gray, cv2.MORPH_CLOSE, kernel)
# write the grayscale image to disk as a temporary file so we can
# apply OCR to it
filename = "{}.png".format(os.getpid())
cv2.imwrite(filename, gray)
# load the image as a PIL/Pillow image, apply OCR, and then delete
# the temporary file
text = pytesseract.image_to_string(Image.open(filename))
os.remove(filename)
print(text)
with open("output.py", "w") as text_file:
text_file.write(text)
# show the output images
cv2.imshow("Image", image)
cv2.imshow("Output", gray)
cv2.waitKey(0)
If I simply use usual OCR without anything (such as pytesseract.image_tostring()):
python3 ocr.py --image softserve.png
I would got this text:
uray ['Amir', 'Barry', 'Chales', ‘Dao']
‘amir’ rss
tee)
print(2)
It's a very bad result, isn't it?
But after playing with resize and thresh you can got a more nice output:
python3 ocr.py --image softserve.png --thresh thresh6 --resize 2.675
And see in two opened windows how looks image before OCR:
Output:
names1 = ['Amir', ‘Barry’, ‘Chales', ‘Dao']
if ‘amir' in names1:
# print(1)
else: «=
# print(2)
You also can apply morph and blur. You can read more about blur, thresholding and morphological transformations from opencv docs. I am hope, you will find that information useful in your work

Related

Pytesseract OCR doesn't recognize the digits

I am trying to read these images:
I have tried several options but I can't seem to read them correctly as 15/0, 30/0, 40/0.
frame = frame[900:1000, 450:500]
scale_percent = 200 # percent of original size
width = int(frame.shape[1] * scale_percent / 100)
height = int(frame.shape[0] * scale_percent / 100)
dim = (width, height)
frame = cv2.resize(frame, dim, interpolation=cv2.INTER_AREA)
cv2.imshow("cropped", frame)
cv2.waitKey(0)
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
cv2.imshow("cropped", frame)
cv2.waitKey(0)
pytesseract.pytesseract.tesseract_cmd = (
r"C:\Program Files\Tesseract-OCR\tesseract.exe"
)
results = pytesseract.image_to_data(
frame,
output_type=Output.DICT,
config="--psm 10 --oem 3 -c tessedit_char_whitelist=0123456789",
)
# results = replace_chars(results)
print(("-").join(results["text"]), "\n")
One way of solving is using inRange thresholding
The result will be:
If you set page-segmentation-mode 6
15
0
30
0
40
0
Code:
import cv2
import pytesseract
from numpy import array
image_list = ["LZxCs.png", "W06I0.png", "vvzE5.png"]
for image in image_list:
bgr_image = cv2.imread(image)
hsv_image = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2HSV)
mask = cv2.inRange(hsv_image, array([0, 0, 0]), array([165, 10, 255]))
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 3))
dilate = cv2.dilate(mask, kernel, iterations=1)
thresh = cv2.bitwise_and(dilate, mask)
text = pytesseract.image_to_string(thresh, config='--psm 6')
print(text)
The second way is applying global-threshold
If you set page-segmentation-mode 6
15
0
30
0
40
0
Code:
import cv2
import pytesseract
image_list = ["LZxCs.png", "W06I0.png", "vvzE5.png"]
for image in image_list:
bgr_image = cv2.imread(image)
gray_image = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray_image, 127, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
text = pytesseract.image_to_string(thresh, config='--psm 6')
print(text)
cv2.imwrite(f"/Users/ahx/Desktop/{image}", thresh)
cv2.imshow('', thresh)
cv2.waitKey(0)
For more, you can check the documentation

OCR not Extracting any Text

I am trying to extract text from an image that looks like OCR should be able to easily extract but it just extracting nothing or garbage in some cases.
I have tried the following OpenCV techniques from other stackoverflow resources but nothing seems to help.
Image Resisizing
GrayScaling
Dilation and Erosion
adaptiveThreshold
If someone could help me how to extract text from the attached image using OpenCV that would be really helpful
My Code
import pytesseract
import cv2
import numpy as np
# Configuration
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
image_path = "C:/Users//opencv_test/images/"
ocr_path = "C:/Users//opencv_test/ocr/"
doc_file_name = "Image.jpeg"
ocr_file_name = doc_file_name[:-4] + "txt"
# To read image from disk, we use
# cv2.imread function, in below method,
img = cv2.imread(image_path + doc_file_name, 1)
# Creating GUI window to display an image on screen
# first Parameter is windows title (should be in string format)
# Second Parameter is image array
cv2.imshow("Actual Image", img)
cv2.waitKey(0)
cv2.destroyAllWindows()
def remove_noise(img):
filtered = cv2.adaptiveThreshold(img.astype(np.uint8), 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 199, 5)
cv2.imshow("Noise Free Image", img)
cv2.waitKey(0)
cv2.destroyAllWindows()
return filtered
def remove_noise_2(img):
img = cv2.resize(img, None, fx=.9, fy=.8, interpolation=cv2.INTER_CUBIC)
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
kernel = np.ones((1, 1), np.uint8)
img = cv2.dilate(img, kernel, iterations=1)
img = cv2.erode(img, kernel, iterations=1)
cv2.imshow("Pre-Processed Image", img)
cv2.waitKey(0)
cv2.destroyAllWindows()
return img
noise_free_img = remove_noise_2(img)
# Run Tesseract
f = open(ocr_path + ocr_file_name, 'w')
text = str((pytesseract.image_to_string(noise_free_img, config='--psm 3')))
text = text.replace('-\n', '')
f.write(text)

Captcha recognition using ocr

I am trying to make a code for solving captcha for such images:
Here is the processed image:
And my code:
image = cv2.resize(image, (300,120))
image = cv2.dilate(image, None, iterations=1)
image = cv2.GaussianBlur(image,(1,9),0)
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
image = cv2.threshold(image, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]
image = cv2.medianBlur(image,5)
cv2.imshow("Image", image)
cv2.imwrite("im.jpg",image)
text =pytesseract.image_to_string(image,config='--psm 8 -c
tessedit_char_whitelist=0123456789abcdefghijklmnopqrstuvwxyz')
print(text)
But the code predicts 9922s for the given captcha. Instead of pez2s. Please help me solve this problem.

what's rules for cv2.approxPolyDP to detect point?

I have below two input images:
First:
Second:
Now I wanna detect the square area.
And here is my code:
#!/usr/bin/env python
# coding: utf-8
import numpy as np
import cv2
import argparse
import math
import sys
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--image", required=True, help="path to input image")
args = vars(ap.parse_args())
img = cv2.imread(args["image"])
# img = cv2.bitwise_not(img,img)
# gray = cv2.imread(args["image"],0)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
gray = cv2.GaussianBlur(gray, (5, 5), 0)
cv2.imshow('gray', gray)
cv2.waitKey(0)
gray = cv2.normalize(gray, gray, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8UC1)
cv2.imshow('normalized', gray)
cv2.waitKey(0)
# sys.exit()
ret, thresh = cv2.threshold(gray, 230, 255, cv2.THRESH_BINARY_INV)
cv2.imshow('thresh', thresh)
# cv2.imwrite('./wni230.png',thresh)
cv2.waitKey(0)
square_cnts = []
thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, np.ones((5, 5), np.uint8))
cv2.imshow('dilated', thresh)
cv2.waitKey(0)
# sys.exit()
tmpimage, contours, h = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnt0 = contours[0]
tmp_h = h[::-1]
for index, cnt in enumerate(contours[::-1]):
print '!!!!!!!!!!! new round !!!!!!!!!!'
print cv2.arcLength(cnt, True)
# print cnt
approx = cv2.approxPolyDP(cnt, 0.1*cv2.arcLength(cnt, True), True)
print '**********approx*****'
print approx
print len(approx)
print '**********approx end*****'
if len(approx) == 4:
print 'h[i][2]:%s' % tmp_h[0][index][2]
print 'h[i][3]:%s' % tmp_h[0][index][3]
if ret > 0.5:
print "Parallelogram"
elif 0.3 < ret < 0.5:
print "Rectangle"
elif 0 < ret < 0.3:
print "Rhombus"
else:
print "square"
print cv2.arcLength(cnt, True)
print approx
cv2.drawContours(img, [cnt], 0, (0, 0, 255), 2)
cv2.imshow('tmpsquare', img)
cv2.waitKey(0)
if int(cv2.arcLength(cnt, True)) >= 96:
x, y, w, h = cv2.boundingRect(approx)
for j in approx:
cv2.circle(img, (int(j[0][0]), int(j[0][1])), 1, (0, 255, 0), 2)
cv2.imshow('final', img[y:y+h, x:x+w])
cv2.waitKey(0)
print 'target but long squere detected...'
cv2.waitKey(0)
cv2.imshow('img', img)
cv2.imwrite('tmp.png', img)
cv2.waitKey(0)
cv2.destroyAllWindows()
So, I got the two output images:
First:
Second:
Line in red is contour, point in green is points detected by approxPolyDP.
So, the first one is what I wanna.
However, how approxPolyDP locate the up most point for the second one?
Could anyone please explain the rules?
Env:
Python: 2.7.10
Opencv: 3.2.0
Thanks.
Wesley

how to callpython function through button in web2py?

I am creating a web2py app; I want a button that can call a Python function available in controller folder in default.py and show the text result.
The function is:
src_path =
"/home/globalstat/web2py/applications/image_resize/controllers/"
def get_string(img_path):
# Read image with opencv
img_0 = cv2.imread(img_path)
# Convert to gray
img = cv2.cvtColor(img_0, cv2.COLOR_BGR2GRAY)
# thresh = 127
# im_bw = cv2.threshold(img, thresh, 255, cv2.THRESH_BINARY)[1]
# cv2.imwrite('bw_image.png', im_bw)
# Apply threshold to get image with only black and white
#img = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, v2.THRESH_BINARY, 11, 2)
# Write the image after apply opencv to do some ...
#cv2.imwrite(src_path + "thres.png", img)
# Apply dilation and erosion to remove some noise
kernel = np.ones((1, 1), np.uint8)
img = cv2.dilate(img, kernel, iterations=1)
img = cv2.erode(img, kernel, iterations=1)
cv2.imwrite(src_path + "removednoise.jpg", img)
# Apply threshold to get image with only gray scaled
img = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)
cv2.imwrite(src_path + "thres.jpg", img)
# Recognize text with tesseract for python
#img_ref=Image.open(src_path + "removednoise.jpg")
img_ref=Image.open(src_path + "thres.jpg")
img_ref.save("test-600.png",doing=(600,600))
result = pytesseract.image_to_string(img_ref)
return result
and the code I'm using in view file for button is:
<Button type="button" name ="seeting_button" onclick =
'window.location="{{=URL('default', 'get_string',)}}";'>
How can I pass argument so it displays result?
Try changing
def get_string(img_path):
img_0 = cv2.imread(img_path)
Into
def get_string():
img_path = request.args[0]
img_0 = cv2.imread(img_path)
Any function with parameters inside a controller file is ignored by web2py. Arguments are passed through request.args

Categories