Here I am using below script to remove black spot near the image and remove line-through above number but it removes noise but not properly.
def get_string(img_path):
# Read image with opencv
img = cv2.imread(img_path)
# Convert to gray
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Apply dilation and erosion to remove some noise
kernel = np.ones((1, 1), np.uint8)
img = cv2.dilate(img, kernel, iterations=12)
img = cv2.erode(img, kernel, iterations=12)
# Write image after removed noise
cv2.imwrite(src_path + "removed_noise.png", img)
# Apply threshold to get image with only black and white
img = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 31, 2)
# Write the image after apply opencv to do some ...
cv2.imwrite(src_path + "thres.png", img)
# Recognize text with tesseract for python
result = pytesseract.image_to_string(Image.open(src_path + "vertical_final.jpg"))
# Remove template file
#os.remove(temp)
return result
but it's not working properly.
Input image:
Output Image:-
I need someone to help me out from these problems it's highly appreciated.
Source Code:-
def get_string(img_path):
# Read image with opencv
img = cv2.imread(img_path)
# Convert to gray
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Apply dilation and erosion to remove some noise
kernel = np.ones((1,20), np.uint8)
img = cv2.dilate(img, kernel, iterations=1)
img = cv2.erode(img, kernel, iterations=1)
#img = cv2.morphologyEx(img, cv2.MORPH_CLOSE, kernel)
kernel = np.ones((1, 1), np.uint8)
#img = cv2.morphologyEx(img, cv2.MORPH_CLOSE, kernel)
cv2.imwrite(src_path + "removed_noise.png", img)
img3 = cv2.subtract(cv2.imread(src_path + "removed_noise.png"),cv2.imread(src_path + "tax_amount.png"))
cv2.imwrite(src_path + "removed_noise_makes_00.png", img3)
lower_black = np.array([0,0,0], dtype = "uint16")
upper_black = np.array([70,70,70], dtype = "uint16")
black_mask = cv2.inRange(img3, lower_black, upper_black)
black_mask[np.where((black_mask == [0] ).all(axis = 1))] = [255]
opening = cv2.morphologyEx(black_mask, cv2.MORPH_CLOSE, kernel)
cv2.imwrite(src_path + "removed_noise_makes_00_1.png", opening)
# Recognize text with tesseract for python
result = pytesseract.image_to_string(Image.open(src_path + "removed_noise_makes_00_1.png"))
# Remove template file
#os.remove(temp)
return result
Where you do
kernel = np.ones((1, 1), np.uint8)
img = cv2.dilate(img, kernel, iterations=12)
You apply 12 times a dilation with a 1x1 structuring element (SE). Unless OpenCV does something special with such a SE, this code should not change your image at all.
You should create a larger SE:
kernel = np.ones((7, 7), np.uint8)
img = cv2.dilate(img, kernel, iterations=1)
img = cv2.erode(img, kernel, iterations=1)
This will first dilate and then erode the result. What this accomplishes is that small (thin) black regions disappear. These are the regions where the SE didn't fit. This is the same as
img = cv2.morphologyEx(img, cv2.MORPH_CLOSE, kernel)
To remove the long line, you want to apply a closing with an elongated SE:
kernel = np.ones((1, 30), np.uint8)
line = cv2.morphologyEx(img, cv2.MORPH_CLOSE, kernel)
This leaves only the horizontal line. The difference of img and line is the text without the line.
If you think of img as the sum of line and text, then img - line will be text. However, there is a small problem still: img has white background (255), and black foreground. So really, it is img = 255 - text - line, and the line image you found above is really 255 - line, because it also has white background. So directly taking the difference will not produce the desired effect.
The solution is to invert your images first:
img = 255 - img;
line = 255 - line;
text = img - line;
Related
I'm using cv2 and pytesseract library to extract text from image. Here is the image (image3_3.png) and the python code:
def threshold_image(img_src):
"""Grayscale image and apply Otsu's threshold"""
# Grayscale
img_gray = cv2.cvtColor(img_src, cv2.COLOR_BGR2GRAY)
# Binarisation and Otsu's threshold
img_thresh = cv2.threshold(img_gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
return img_thresh
img = np.array(Image.open('image3_3.png'))
# Apply dilation and erosion to remove some noise
kernel = np.ones((1, 1), np.uint8)
img = cv2.dilate(img, kernel, iterations=1)
img = cv2.erode(img, kernel, iterations=1)
# normalise the image
norm_img = np.zeros((img.shape[0], img.shape[1]))
img = cv2.normalize(img, norm_img, 0, 255, cv2.NORM_MINMAX)
# Apply blur to smooth out the edges
img = cv2.GaussianBlur(img, (5, 5), 0)
string_ocr = pytesseract.image_to_string(threshold_image(img), lang = 'eng', config = '--psm 6')
print(string_ocr)
Here is the result:
Image A3. This is image A3 with more texts.
ISAS Visual Analytics
INow everyone can easily discover and share powerful
Nsights that inspire action
Why am I not getting the same exact text? Any help highly appreciated.
I have a cropped image and I am trying to get the numbers on that cropped image
Here's the code I am using
image = cv2.imread('Cropped.png')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (3,3), 0)
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))
opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=1)
invert = 255 - opening
data = pytesseract.image_to_string(invert, lang='eng', config='--psm 6')
print(data)
Here's the sample cropped image
All what I got some numbers and not all of them. How to enhance such an image to be able to extract only the numbers?
I tried the code on this image but doesn't return correct numbers
You can easily solve this with three-main steps
Upsampling
Applying simple-threshold
set configuration to digits
Upsampling for accurate recognition. Otherwise tesseract may misterpret the digits.
Threshold Displays only the features of the image.
**Configuration Setting will recognize the digits
Result
Upsampling
Threshold
Pytesseract
277032200746
Code:
import cv2
import pytesseract
img1 = cv2.imread("kEpyN.png") # "FX2in.png"
gry1 = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
(h, w) = gry1.shape[:2]
gry1 = cv2.resize(gry1, (w*2, h*2))
thr1 = cv2.threshold(gry1, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
txt1 = pytesseract.image_to_string(thr1, config="digits")
print("".join(t for t in txt1 if t.isalnum()))
cv2.imshow("thr1", thr1)
cv2.waitKey(0)
Update:
Most-probably a version mismatch causes extra words and digits.
One-way to solving is taking a range of the image
For instance, from the thresholded image:
(h_thr, w_thr) = thr1.shape[:2]
thr1 = thr1[0:h_thr-10, int(w_thr/2)-400:int(w_thr/2)+200]
Result will be:
Now if you read, result should be like this output
277032200746
This is the imageI am trying to give proper shape to the images in my folder but unable to get that perfect result. Following is one type of example:
Following is the coding that I have done for my folder containing this type of images:
''''code''''
import cv2
import numpy as np
import glob
path = r'C:\Users\User\Desktop\A\*.jpg'
def k_function(image,k):
z= image.reshape((-1,4))
z=np.float32(z)
criteria = (cv2.TERM_CRITERIA_EPS+cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0)
ret,label,center=cv2.kmeans(z,k,None,criteria,10,cv2.KMEANS_RANDOM_CENTERS)
center = np.uint8(center)
res = center[label.flatten()]
res2 = res.reshape((image.shape))
return res2
def noise_function(image):
kernel = np.ones((2, 2), np.uint8)
closing = cv2.morphologyEx(image, cv2.MORPH_CLOSE,
kernel, iterations = 2)
bg = cv2.dilate(closing, kernel, iterations = 1)
dist_transform = cv2.distanceTransform(closing, cv2.DIST_L2, 0)
ret, fg = cv2.threshold(dist_transform, 0.02
* dist_transform.max(), 255, 0)
return fg
def filling(thresh):
im_floodfill = thresh.copy()
h, w = thresh.shape[:2]
mask = np.zeros((h+2, w+2), np.uint8)
cv2.floodFill(im_floodfill, mask,(60,60),255);
im_floodfill_inv = cv2.bitwise_not(im_floodfill)
n = thresh | im_floodfill_inv
return n
for i, img in enumerate(glob.glob(path)):
img1 = cv2.imread(img)
n = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
b= k_function(n,2)
nm, thresh1 = cv2.threshold(b, 127, 255, cv2.THRESH_BINARY_INV);
fill = filling(thresh1)
noise = noise_function(fill)
cv2.imwrite(r'C:\Users\User\Desktop\New folder\image{}.jpg'.format(i),noise)
Try using copyMakeBorder to make a border. It looks like you're trying to use floodFill and I've never figured out how that is supposed to work.
import cv2
image = cv2.imread('elbow.png')
image = cv2.copyMakeBorder(image, 10, 0, 0, 10, cv2.BORDER_CONSTANT)
cv2.imwrite('elbow_border.png', image)
elbow.png:
elbow_border.png:
I would approach it a bit differently in Python/OpenCV. I would convert to HSV and threshold the saturation channel. Then use morphology open to smooth outline.
Input (cropped from your post):
import cv2
# load image as HSV and select saturation
img = cv2.imread("finger.png")
sat = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)[:,:,1]
# threshold the saturation channel
ret, thresh = cv2.threshold(sat,25,255,0)
# apply morphology open to smooth the outline
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (19,19))
smoothed = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)
# write result to disk
cv2.imwrite("finger_smoothed.png", smoothed)
cv2.imshow("SAT", sat)
cv2.imshow("THRESH", thresh)
cv2.imshow("SMOOTHED", smoothed)
cv2.waitKey(0)
cv2.destroyAllWindows()
Result:
I am using pytesseract to extract text from images. Before extracting text with pytesseract, I use Pillow and cv2 to reduce noise and enhance the image:
import numpy as np
import pytesseract
from PIL import Image, ImageFilter, ImageEnhance
import cv2
img = cv2.imread('ss.png')
img = cv2.resize(img, (0,0), fx=3, fy=3)
cv2.imwrite("new.png", img)
img1 = cv2.imread("new.png", 0)
#Apply dilation and erosion
kernel = np.ones((2, 2), np.uint8)
img1 = cv2.dilate(img1, kernel, iterations=1)
img1 = cv2.erode(img1, kernel, iterations=1)
img1 = cv2.adaptiveThreshold(img1,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV,11,2)
cv2.imwrite("new1.png", img1)
img2 = Image.open("new1.png")
#Enhance the image
img2 = im.filter(ImageFilter.MedianFilter())
enhancer = ImageEnhance.Contrast(im)
img2 = enhancer.enhance(2)
img2.save('new2.png')
result = pytesseract.image_to_string(Image.open("new2.png"))
print(result)
I mostly get good results, but when I use some low quality/resolution images, I do not get the expected output. Can I improve this in my code?
Example:
Input:
new1.png:
new2.png:
The string that I get from the console is play. What could I change in my algorithm, so that I get the whole string extracted?
Any help would be greatly appreciated.
This is a late answer, but I just came across this. we can use Pillow and cv2 to reduce noise and enhance the image before extracting text from images using pytesseract. I hope it would help someone in future.
#import required library
src_path = "C:/Users/chethan/Desktop/"
def get_string(img_path):
# Read image with opencv
img = cv2.imread(img_path)
# Convert to gray
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Apply dilation and erosion to remove some noise
kernel = np.ones((1, 1), np.uint8)
img = cv2.dilate(img, kernel, iterations=1)
img = cv2.erode(img, kernel, iterations=1)
# Write image after removed noise
cv2.imwrite(src_path + "removed_noise.png", img)
# Apply threshold to get image with only black and white
#img = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 31, 2)
# Write the image after apply opencv to do some ...
cv2.imwrite(src_path + "thres.png", img)
# Recognize text with tesseract for python
result = pytesseract.image_to_string(Image.open(src_path + "thres.png"))
# Recognize text with tesseract for python
result = pytesseract.image_to_string(Image.open(img_path))
# Remove template file
# os.remove(temp)
return result
print(get_string(src_path + "dummy.png"))
here i want to read the image from db and apply some operations on my image like noise remove .... and finally i will appy pytesseract to get the text
def GetData(request):
img = Photo.objects.get(id=1)
#wrapper = FileWrapper(open(img.file))
# Read image with opencv
img = cv2.imread(img)
# Apply dilation and erosion to remove some noise
kernel = np.ones((1, 1), np.uint8)
img = cv2.dilate(img, kernel, iterations=1)
img = cv2.erode(img, kernel, iterations=1)
b,g,r = cv2.split(img)
# get b,g,r
rgb_img = cv2.merge([r,g,b])
# switch it to rgb
# Denoising
dst = cv2.fastNlMeansDenoisingColored(img,None,10,10,7,21)
img = cv2.cvtColor(dst, cv2.COLOR_BGR2GRAY)
# Apply threshold to get image with only black and white
img = cv2.adaptiveThreshold(img, 127, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11,2)
new_image = cv2.blur(img, (1, 1))
The error comes from cv2.imread(img) because imread take a string or unicode parameter with the URI of the image, but you are using a Django model class which is quite different.
Assuming that your Photo class model has an ImageField field named image you could fix your issue changing
img = cv2.imread(img)
to something like
img = cv2.imread(img.image.url)