python OCR recognize image into text - python

seems resolution of image effect the output is success or not
usually the image's resolution/quality from production line is like test image 1, instead of change camera quality, is there any way to make success rate higher? like improve code make simple AI to help detect or something? I need a hand thanks.
the demo .py code I found from tutorial
from PIL import Image
import pytesseract
img = Image.open('new_003.png')
text = pytesseract.image_to_string(img, lang='eng')
print("size")
print(img.size)
print(text)
(pic) test image 1: https://ibb.co/VLsM9LL
size
(122, 119)
# the output is:
R carac7
(pic) test image 2: https://ibb.co/XyRcf45
size
(329, 249)
# the output is:
R1 oun,
2A
R ca7ac2
(pic) test image 3: https://ibb.co/fNtDRc7
this one just for test but is the only one 100% correct
size
(640, 640)
# the output is:
BREAKING THE STATUE
i have always known
i just didn't understand
the inner conflictions
arresting our hands
gravitating close enough
expansive distamce between
i couldn't give you more
but i meant everything
when the day comes
you find your heart
wants something more
than a viece and a part
your life will change
like astatue set free
to walk among us
to created estiny
we didn't break any rules
we didn't make mistakes
making beauty in loving
making lovine for days
SHILOW
I tried to find out/proof the solution can only be the image resolution or there can be other alternative way to solve this issue
I try Dilation and Erosion to image, hoped can get more clear image for OCR recognize like link demo pic https://ibb.co/3pDgDnF
import cv2
import numpy as np
import matplotlib.pyplot as plt
import glob
from IPython.display import clear_output
def show_img(img, bigger=False):
if bigger:
plt.figure(figsize=(15,15))
image_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
plt.imshow(image_rgb)
plt.show()
def sharpen(img, sigma=100):
# sigma = 5、15、25
blur_img = cv2.GaussianBlur(img, (0, 0), sigma)
usm = cv2.addWeighted(img, 1.5, blur_img, -0.5, 0)
return usm
def img_processing(img):
# do something here
img = sharpen(img)
return img
img = cv2.imread("/home/joy/桌面/all_pic_OCR/simple_pic/03.png")
cv2.imshow('03', img) # Original image
img2 = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (11, 11))
img = cv2.dilate(img, kernel) # tried Dilation
cv2.imshow('image_after_Dilation', img) # image after Dilation
img = cv2.erode(img, kernel) # tried Erosion
cv2.imshow('then_Erosion', img) # image after Erosion
cv2.waitKey(0)
cv2.destroyAllWindows()
result: https://ibb.co/TbZjg3d
so still trying to achieve python OCR recognize image into text with 99.9999% correct

Related

Pyzbar Can't Decode QRCode

Have a bunch of QR Code labels printed from the same label printer, all can be read except for this one.
Have tried all solutions from Preprocessing images for QR detection in python
Losing my mind... any help appreciated!
Code is here:
import cv2
import numpy as np
from pyzbar.pyzbar import decode
from pyzbar.pyzbar import ZBarSymbol
from kraken import binarization
from PIL import Image
from qreader import QReader
image_path = r"C:\Users\ASinger\Pictures\hdi_pdfs\page1.png"
# Method 1
im = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
ret, bw_im = cv2.threshold(im, 127, 255, cv2.THRESH_BINARY)
barcodes = decode(bw_im, symbols=[ZBarSymbol.QRCODE])
print(f'barcodes: {barcodes}')
# Method 2
im = Image.open(image_path)
bw_im = binarization.nlbin(im)
decoded = decode(bw_im, symbols=[ZBarSymbol.QRCODE])
print(f'decoded: {decoded}')
# Method 3
im = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
blur = cv2.GaussianBlur(im, (5, 5), 0)
ret, bw_im = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)
decoded = decode(bw_im, symbols=[ZBarSymbol.QRCODE])
print(f'decoded: {decoded}')
# Method 4
qreader = QReader()
image = cv2.imread(image_path)
decoded_text = qreader.detect_and_decode(image=image)
print(f'decoded_text: {decoded_text}')
# Method 5
cropped_image = image_path
im2 = Image.open(cropped_image)
im2 = im2.resize((2800, 2800))
im2.save(cropped_image, quality=500)
im2.show()
im3 = cv2.imread(cropped_image, cv2.IMREAD_GRAYSCALE)
ret, bw_im = cv2.threshold(im3, 127, 255, cv2.THRESH_BINARY)
decoded = decode(bw_im, symbols=[ZBarSymbol.QRCODE])
print(f'decoded: {decoded}')
It's difficult to tell why Pyzbar fails, but we may guess that the issue is related to low quality scanning artifacts, and maybe compression artifacts.
Here is a small ROI in native resolution:
As you can see there is a lot of noise and artifacts.
For improving the quality I recommend using cv2.medianBlur filter:
clean_im = cv2.medianBlur(im, 25)
Median filter was selected because it applies fine threshold between black and white.
The size of the filter was selected to be 25 (relatively large) because the resolution of the image is relatively high compared to the details of the QR Code.
Same ROI after filtering:
As you can see the noise is much lower, but the details are blurred.
For improving the issue, we may downscale the image using cv2.resize:
small_clean_im = cv2.resize(clean_im, (512, 512), interpolation=cv2.INTER_AREA)
Downscaling the image with cv2.INTER_AREA interpolation is merging multiple pixels into one pixel (kind of concentrating the data), and also remove noise.
The size 512x512 seems like a good tradeoff between keeping details and removing noise.
Image after medianBlur and resize:
Same image with resize only (without medianBlur) for comparison:
I suppose it's better not to apply a threshold before using Pyzbar decode method.
I assume the decode method uses an internal thresholding algorithm that may be better than our own thresholding.
Complete code sample:
import cv2
from pyzbar.pyzbar import decode
from pyzbar.pyzbar import ZBarSymbol
im = cv2.imread('page1.png', cv2.IMREAD_GRAYSCALE)
clean_im = cv2.medianBlur(im, 25) # Apply median blur for reducing noise
small_clean_im = cv2.resize(clean_im, (512, 512), interpolation=cv2.INTER_AREA) # Downscale the image
barcodes = decode(small_clean_im, symbols=[ZBarSymbol.QRCODE])
print(f'barcodes: {barcodes}')
# Show image for testing
cv2.imshow('small_clean_im', small_clean_im)
cv2.waitKey()
cv2.destroyAllWindows()
Output:
barcodes: [Decoded(data=b'P1693921.001', type='QRCODE', rect=Rect(left=137, top=112, width=175, height=175), polygon=[Point(x=137, y=280), Point(x=304, y=287), Point(x=312, y=119), Point(x=143, y=112)])]
Note:
The processing worked with the sample image, but it is not guaranteed to work with other images.
You may try different filter sizes, and different image sizes for improving the success rate.

Pytesseract Image to String issue

Does anyone know how I can get these results better?
Total Kills: 15,230,550
Kill Details: (recorded after 2019/10,/Z3]
993,151 331,129
1,330,450 33,265,533
5,031,168
This is what it returns however it is meant to be the same as the image posted below, I am new to python so are there any parameters that I can add to make it read the image better?
img = cv2.imread("kills.jpeg")
text = pytesseract.image_to_string(img)
print(text)
This is my code to read the image, Is there anything I can add to make it read better? Also, the black boxes are to cover images that were interfering with the reading. I would like to also say that I have added the 2 black boxes to see if the images behind them were causing the issue, but I still get the same issue.
The missing knowledge is page-segmentation-mode (psm). You need to use them, when you can't get the desired result.
If we look at your image, the only artifacts are the black columns. Other than that, the image looks like a binary image. Suitable for tesseract to recognize the characters and the digits.
Lets try reading the image by setting the psm to 6.
6 Assume a single uniform block of text.
print(pytesseract.image_to_string(img, config="--psm 6")
The result will be:
Total Kills: 75,230,550
Kill Details: (recorded after 2019/10/23)
993,161 331,129
1,380,450 33,265,533
5,031,168
Update
The second way to solve the problem is getting binary mask and applying OCR to the mask features.
Binary-mask
Features of the binary-mask
As we can see the result is slightly different from the input image. Now when we apply OCR result will be:
Total Kills: 75,230,550
Kill Details: (recorded after 2019/10/23)
993,161 331,129
1,380,450 33,265,533
5,031,168
Code:
import cv2
import numpy as np
import pytesseract
# Load the image
img = cv2.imread("LuKz3.jpg")
# Convert to hsv
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
# Get the binary mask
msk = cv2.inRange(hsv, np.array([0, 0, 0]), np.array([179, 255, 154]))
# Extract
krn = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 3))
dlt = cv2.dilate(msk, krn, iterations=5)
res = 255 - cv2.bitwise_and(dlt, msk)
# OCR
txt = pytesseract.image_to_string(res, config="--psm 6")
print(txt)
# Display
cv2.imshow("res", res)
cv2.waitKey(0)

I want to increase brightness and contrast of images in dynamic way so that the program is applicable for any new images

I have few images where I need to increase or decrease the contrast and brightness of the image in a dynamic way so that it is visible clearly. And the program needs to be dynamic so that it even works for new images also. I also want character should be dark.
I was able to increase brightness and contrast but it is not working properly for each image.
import cv2
import numpy as np
img = cv2.imread('D:\Bright.png')
image = cv2.GaussianBlur(img, (5, 5), 0)
#image = cv2.threshold(image, 0, 255, cv2.THRESH_BINARY)[1]
#kernel = np.ones((2,1),np.uint8)
#dilation = cv2.dilate(img,kernel)
cv2.imshow('test', image)
cv2.waitKey(0)
cv2.destroyAllWindows()
imghsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
imghsv[:,:,2] = [[max(pixel - 25, 0) if pixel < 190 else min(pixel + 25, 255) for pixel in row] for row in imghsv[:,:,2]]
cv2.imshow('contrast', cv2.cvtColor(imghsv, cv2.COLOR_HSV2BGR))
#cv2.imwrite('D:\\112.png',cv2.cvtColor(imghsv, cv2.COLOR_HSV2BGR))
cv2.waitKey(0)
cv2.destroyAllWindows()
#raw_input()
I want a program which works fine for every image and words are a little darker so that they are easily visible.
As Tilarion suggested, you could try "Auto Brightness And Contrast" to see if it works well. The theory behind this is explained well here in the solution section. The solution is in C++. I've written a version of it in python which you can directly use, works only on 1 channel at a time for colour images:
def auto_brightandcontrast(input_img, channel, clip_percent=1):
histSize=180
alpha=0
beta=0
minGray=0
maxGray=0
accumulator=[]
if(clip_percent==0):
#min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(hist)
return input_img
else:
hist = cv2.calcHist([input_img],[channel],None,[256],[0, 256])
accumulator.insert(0,hist[0])
for i in range(1,histSize):
accumulator.insert(i,accumulator[i-1]+hist[i])
maxx=accumulator[histSize-1]
minGray=0
clip_percent=clip_percent*(maxx/100.0)
clip_percent=clip_percent/2.0
while(accumulator[minGray]<clip_percent[0]):
minGray=minGray+1
maxGray=histSize-1
while(accumulator[maxGray]>=(maxx-clip_percent[0])):
maxGray=maxGray-1
inputRange=maxGray-minGray
alpha=(histSize-1)/inputRange
beta=-minGray*alpha
out_img=input_img.copy()
cv2.convertScaleAbs(input_img,out_img,alpha,beta)
return out_img
It is a very few lines of code to do it in Python Wand (which is based upon ImageMagick). Here is a script.
#!/bin/python3.7
from wand.image import Image
with Image(filename='task4.jpg') as img:
img.contrast_stretch(black_point=0.02, white_point=0.99)
img.save(filename='task4_stretch2_99.jpg')
Input:
Result:
Increase the black point value to make the text darker and/or decrease the white point value to make the lighter parts brighter.
Thanks to Eric McConville (the Wand developer) for correcting my arguments to make the code work.

How can i read input only a part of the image without creating another image?

import cv2
fname = '1.png'
img=cv2.imread(fname, 0)
print (img)//the outcome is an array of values from 0 to 255 (grayscale)
ret, thresh = cv2.threshold(img, 254, 255, cv2.THRESH_BINARY)
thresh = cv2.bitwise_not(thresh)
nums, labels = cv2.connectedComponents(thresh, None, 4, cv2.CV_32S)
dst = cv2.convertScaleAbs(255.0*labels/nums)
cv2.imwrite(dest_dir+"output.png", dst)
that code works just fine, so i moved on to adjusting my code so it can take a portion of the image not the entire image:
from PIL import Image
img = Image.open(fname)
img2 = img.crop((int(xmin), int(yMin),int(xMax), int(yMax))
xmin ymin xmax ymax simply being the top left bottom right coordinates of the box.
then i did img = cv2.imread(img2) to continue as the previous code but got an error, i printed img2 and got <PIL.Image.Image image mode=RGB size=54x10 at 0x7F4D283AFB70> how can i adjust it to be able to input that crop or image portion instead of fname in my code above, and kindly note i don't want to save img2 as an image and carry on from there because i need to work on the main image.
try cv2.imshow() instead of printing it. In order to see an image you cropped, you need to use cv2 function. here is a sample code:
import numpy as np
import cv2
# Load an color image in grayscale
img = cv2.imread('messi5.jpg',0)
cv2.imshow('image',img)
cv2.waitKey(0)
cv2.destroyAllWindows()
The simple answer is NO you cannot.
Open up your terminal /IDE and type in help(cv2.imread).
It clearly states that The function imread loads an image from the specified file and returns it. So in order to use cv2.imread() you must pass it in as a file not an image.
Your best bet would be to save your cropped image as a file and then read it.

How to implement imbinarize in OpenCV

I developed script in Matlab which is analysing engraved text on a colour steal. I'm using range of morphological techniques to extract the text and read it with OCR. I need to implement it on Raspberry Pi therefore I decided to transfer my Matlab code into OpenCV (in python). I tried to transfer some methods and they work similarly but how do I implement imreconstruct and imbinarize (shown below) to OpenCV? (the challenge here is appropriate differentiate foreground and background).
Maybe I should try adding grabCut or getStructuringElement or morphologyEx or dilate? I tried them in range of combinations but have not found a perfect solution.
I will put the whole script for both if anyone could give me suggestions on how to generally improve this extraction and accuracy of OCR process I would greatly appreciate it.
Based on bin values of grey-scale image. I change some parameters in
those functions:
Matlab:
se = strel('disk', 300);
img = imtophat(img, se);
maker = imerode(img, strel('line',100,0)); %for whiter ones
maker = imerode(img, strel('line',85,0)); %for medium
maker = imerode(img, strel('line',5,0));
imgClear = imreconstruct(maker, img);
imgBlur = imgaussfilt(imgClear,1); %less blur for whiter frames
BW = imbinarize(imgBlur,'adaptive','ForegroundPolarity','Bright',...
'Sensitivity',0.7); %process for medium
BW = imbinarize(imgBlur, 'adaptive', 'ForegroundPolarity',...
'Dark', 'Sensitivity', 0.4); % process for black and white
res = ocr(BW, 'CharacterSet', '0123456789', 'TextLayout', 'Block');
res.Text;
OpenCv
kernel = numpy.ones((5,5),numpy.uint8)
blur = cv2.GaussianBlur(img,(5,5),0)
erosion = cv2.erode(blur,kernel,iterations = 1)
opening = cv2.morphologyEx(erosion, cv2.MORPH_OPEN, kernel)
#bremove = cv2.grabCut(opening,mask,rect,bgdModelmode==GC_INIT_WITH_MASK)
#th3 = cv2.adaptiveThreshold(opening,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU,11,2)
ret, thresh= cv2.threshold(opening,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
ocr = pytesseract.image_to_string(Image.open('image2.png'),config='stdout -c tessedit_char_whitelist=0123456789')
Here is the input image:
I am surprised at how much difference between matlab and opencv there is when they both appear to use the same algorithm. Why do you run imbinarize twice? What does the sensitivity keyword actually do (mathematically, behind the background). Because they obviously have several steps more than just the bare OTSU.
import cv2
import numpy as np
import matplotlib.pyplot as plt
def show(img):
plt.imshow(img, cmap="gray")
plt.show()
img = cv2.imread("letters.jpg", cv2.IMREAD_GRAYSCALE)
kernel = np.ones((3,3), np.uint8)
blur = cv2.GaussianBlur(img,(3,3), 0)
erosion = cv2.erode(blur, kernel, iterations=3)
opening = cv2.dilate(erosion, kernel)
th3 = cv2.adaptiveThreshold(opening, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY, 45, 2)
show(th3)
kernel2 = cv2.getGaussianKernel(6, 2) #np.ones((6,6))
kernel2 = np.outer(kernel2, kernel2)
th3 = cv2.dilate(th3, kernel2)
th3 = cv2.erode(th3, kernel)
show(th3)
The images that get displayed are:
After a bit of cleaning up:
So all in all not the same and certainly not as nice as matlab. But the basic principle seems the same, it's just that the numbers need playing with.
A better approach would probably be to do a threshold by the mean of the image and then use the output of that as a mask to adaptive threshold the original image. Hopefully then the results would be better than both opencv and matlab.
Try doing it with ADAPTIVE_THRESH_MEAN_C you can get some really nice results but there's more trash lying around. Again, maybe if you can use it as a mask to isolate the text and then do tresholding again it might turn out to be better. Also the shape of the erosion and dilation kernels will make a big difference here.
I worked out the code to have a positive result based on your engraved text sample.
import cv2
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
def show(img):
plt.imshow(img, cmap="gray")
plt.show()
# load the input image
img = cv2.imread('./imagesStackoverflow/engraved_text.jpg',0);
show(img)
ret, mask = cv2.threshold(img, 60, 120, cv2.THRESH_BINARY) # turn 60, 120 for the best OCR results
kernel = np.ones((5,3),np.uint8)
mask = cv2.erode(mask,kernel,iterations = 1)
show(mask)
# I used a version of OpenCV with Tesseract, you may use your pytesseract and set the modes as:
# OCR Enginer Mode (OEM) = 3 (defualt = 3)
# Page Segmentation mode (PSmode) = 11 (defualt = 3)
tesser = cv2.text.OCRTesseract_create('C:/Program Files/Tesseract 4.0.0/tessdata/','eng','0123456789',11,3)
retval = tesser.run(mask, 0) # return string type
print 'OCR:' + retval
Processed image and OCR output:
It would be great if you can feedback your test results with more sample images.
opencvpythontesseractocr
What I can see from your code is you have used tophat filtering in your Matlab code as the first step. However, I couldn't see the same in your python OpenCV code.
Python has built in tophat filter try applying that for getting similar result
kernel = np.ones((5,5),np.uint8)
tophat = cv2.morphologyEx(img, cv2.MORPH_TOPHAT, kernel)
Also, try using CLAHE it gives better contrast to your image and then apply blackhat to filter out small details.
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
cl1 = clahe.apply(img)
I have got better results by applying these transformations.
Tried below, it works to recognize the lighter engraved text sample. Hope it helps.
def show(img):
plt.imshow(img, cmap="gray")
plt.show()
# load the input image
img = cv2.imread('./imagesStackoverflow/engraved_text2.jpg',0);
show(img)
# apply CLAHE to adjust the contrast
clahe = cv2.createCLAHE(clipLimit=5.1, tileGridSize=(5,3))
cl1 = clahe.apply(img)
img = cl1.copy()
show(img)
img = cv2.GaussianBlur(img,(3,3), 1)
ret, mask = cv2.threshold(img, 125, 150, cv2.THRESH_BINARY) # turn 125, 150 for the best OCR results
kernel = np.ones((5,3),np.uint8)
mask = cv2.erode(mask,kernel,iterations = 1)
show(mask)
# I used a version of OpenCV with Tesseract, you may use your pytesseract and set the modes as:
# Page Segmentation mode (PSmode) = 11 (defualt = 3)
# OCR Enginer Mode (OEM) = 3 (defualt = 3)
tesser = cv2.text.OCRTesseract_create('C:/Program Files/Tesseract 4.0.0/tessdata/','eng','0123456789',11,3)
retval = tesser.run(mask, 0) # return string type
print 'OCR:' + retval

Categories