I have captcha image as attached in this question.
I am trying to extract the text in the image. My following code is able to make all areas except the text and lines in white color
import cv2
from PIL import Image
import numpy as np
image1 = Image.open("E:\\python\\downloaded\\captcha.png").convert('L')
image2 = Image.open("E:\\python\\downloaded\\captcha.png").convert('L')
pix = image1.load()
for column in range(0, image1.height):
for row in range(0, image1.width):
if pix[row, column] >= 90:
pix[row, column] = 255
cv2.imshow("1", np.array(image2))
cv2.imshow("2", np.array(image1))
cv2.waitKey(0)
But I am trying to remove the line crossing the text, but it does not seem to work. I tried with below portion of code which is posted on other question in StackOverflow. But it does not work
def eliminate_zeros(self,vector):
return [(dex,v) for (dex,v) in enumerate(vector) if v!=0 ]
def get_line_position(self,img):
sumx=img.sum(axis=0)
list_without_zeros=self.eliminate_zeros(sumx)
min1,min2=heapq.nsmallest(2,list_without_zeros,key=itemgetter(1))
l=[dex for [dex,val] in enumerate(sumx) if val==min1[1] or val==min2[1]]
mindex=[l[0],l[len(l)-1]]
cols=img[:,mindex[:]]
col1=cols[:,0]
col2=cols[:,1]
col1_without_0=self.eliminate_zeros(col1)
col2_without_0=self.eliminate_zeros(col2)
line_length=len(col1_without_0)
dex1=col1_without_0[round(len(col1_without_0)/2)][0]
dex2=col2_without_0[round(len(col2_without_0)/2)][0]
p1=[dex1,mindex[0]]
p2=[dex2,mindex[1]]
return p1,p2,line_length
def remove_line(self,p1,p2,LL,img):
m=(p2[0]-p1[0])/(p2[1]-p1[1]) if p2[1]!=p1[1] else np.inf
w,h=len(img),len(img[0])
x=list(range(h))
y=list(map(lambda z : int(np.round(p1[0]+m*(z-p1[1]))),x))
img_removed_line=list(img)
for dex in range(h):
i,j=y[dex],x[dex]
i=int(i)
j=int(j)
rlist=[]
while i>=0 and i<len(img_removed_line)-1:
f1=i
if img_removed_line[i][j]==0 and img_removed_line[i-1][j]==0:
break
rlist.append(i)
i=i-1
i,j=y[dex],x[dex]
i=int(i)
j=int(j)
while i>=0 and i<len(img_removed_line)-1:
f2=i
if img_removed_line[i][j]==0 and img_removed_line[i+1][j]==0:
break
rlist.append(i)
i=i+1
if np.abs(f2-f1) in [LL+1,LL,LL-1]:
rlist=list(set(rlist))
for k in rlist:
img_removed_line[k][j]=0
return img_removed_line
I am new to CV and can someone help here to suggest the way?. Original and partially processed image files are attached here.
My approach is based on the fact that the line is thinner than the characters. In this example I used blurring, threshold and morphology to get rid of the line between the characters. The result is this:
import cv2
import numpy as np
image = cv2.imread('captcha.png')
image = cv2.blur(image, (3, 3))
ret, image = cv2.threshold(image, 90, 255, cv2.THRESH_BINARY)
image = cv2.dilate(image, np.ones((3, 1), np.uint8))
image = cv2.erode(image, np.ones((2, 2), np.uint8))
cv2.imshow("1", np.array(image))
cv2.waitKey(0)
You can use CV2 functions like threshold, dilate, bitwise_and and bitwise_not for removing unwanted lines from captcha
import numpy as np
import cv2
img = cv2.imread('captcha.jpg',0)
horizontal_inv = cv2.bitwise_not(img)
masked_img = cv2.bitwise_and(img, img, mask=horizontal_inv)
masked_img_inv = cv2.bitwise_not(masked_img)
kernel = np.ones((5,5),np.uint8)
dilation = cv2.dilate(masked_img_inv,kernel,iterations = 3)
ret,thresh2 = cv2.threshold(dilation,254,255,cv2.THRESH_BINARY_INV)
thresh2=cv2.bitwise_not(thresh2)
cv2.waitKey(0)
cv2.destroyAllWindows()
Related
seems resolution of image effect the output is success or not
usually the image's resolution/quality from production line is like test image 1, instead of change camera quality, is there any way to make success rate higher? like improve code make simple AI to help detect or something? I need a hand thanks.
the demo .py code I found from tutorial
from PIL import Image
import pytesseract
img = Image.open('new_003.png')
text = pytesseract.image_to_string(img, lang='eng')
print("size")
print(img.size)
print(text)
(pic) test image 1: https://ibb.co/VLsM9LL
size
(122, 119)
# the output is:
R carac7
(pic) test image 2: https://ibb.co/XyRcf45
size
(329, 249)
# the output is:
R1 oun,
2A
R ca7ac2
(pic) test image 3: https://ibb.co/fNtDRc7
this one just for test but is the only one 100% correct
size
(640, 640)
# the output is:
BREAKING THE STATUE
i have always known
i just didn't understand
the inner conflictions
arresting our hands
gravitating close enough
expansive distamce between
i couldn't give you more
but i meant everything
when the day comes
you find your heart
wants something more
than a viece and a part
your life will change
like astatue set free
to walk among us
to created estiny
we didn't break any rules
we didn't make mistakes
making beauty in loving
making lovine for days
SHILOW
I tried to find out/proof the solution can only be the image resolution or there can be other alternative way to solve this issue
I try Dilation and Erosion to image, hoped can get more clear image for OCR recognize like link demo pic https://ibb.co/3pDgDnF
import cv2
import numpy as np
import matplotlib.pyplot as plt
import glob
from IPython.display import clear_output
def show_img(img, bigger=False):
if bigger:
plt.figure(figsize=(15,15))
image_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
plt.imshow(image_rgb)
plt.show()
def sharpen(img, sigma=100):
# sigma = 5、15、25
blur_img = cv2.GaussianBlur(img, (0, 0), sigma)
usm = cv2.addWeighted(img, 1.5, blur_img, -0.5, 0)
return usm
def img_processing(img):
# do something here
img = sharpen(img)
return img
img = cv2.imread("/home/joy/桌面/all_pic_OCR/simple_pic/03.png")
cv2.imshow('03', img) # Original image
img2 = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (11, 11))
img = cv2.dilate(img, kernel) # tried Dilation
cv2.imshow('image_after_Dilation', img) # image after Dilation
img = cv2.erode(img, kernel) # tried Erosion
cv2.imshow('then_Erosion', img) # image after Erosion
cv2.waitKey(0)
cv2.destroyAllWindows()
result: https://ibb.co/TbZjg3d
so still trying to achieve python OCR recognize image into text with 99.9999% correct
I am making an AI that can play the game connect 4, from a picture of one state of the game e.g : click to see
This script below, is detecting red elements from a picture:
import cv2
import numpy as np
img = cv2.imread('connect.png')
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
#get red color
lower_range = np.array([169, 100, 100])
upper_range = np.array([189, 255, 255])
mask = cv2.inRange(hsv, lower_range, upper_range)
cv2.imshow('image', img)
cv2.imshow('mask', mask)
cv2.waitKey(0)
cv2.destroyAllWindows()
I would like to insert these data into a 2D array to be able to use this array as a game state and determine which move the AI should make.
I have tried to find a solution on Stack Overflow and on Internet but overall I didn't find anything about it.
This is a way to read a picture and to cast it in a 2-dimensional numpy array with np.array(in_image):
import numpy as np
import skimage
from skimage import io, transform
path = "C:/my/path/"
pic = 'myPic.png'
imgName = path+pic
in_image_0 = skimage.io.imread(imgName) # read the image
in_image_1 = skimage.color.rgb2gray(in_image_0) # transform it to grey-scale
in_image_2 = skimage.transform.rescale(in_image_1, 0.5) # change the resolution
in_image_3 = np.flipud(np.array(in_image_2)) # make a numpy array and flip it up/down
Hi,
I want to get difference between the first image and the second,
I want to cut the numbers from the image.
I am getting the difference between the pixels but the result is:
But what I want is:
Is it possible cut the image like this?
Here is what I did:
import cv2
import numpy as np
from PIL import Image
import pytesseract
import os
import sys
img = Image.open("recherche.png").convert("RGBA")
pattern = Image.open("pattern.png").convert("RGBA")
pixels = img.load()
pixelsPattern = pattern.load()
new = Image.open("new.png").convert("RGBA")
pixelNew = new.load()
for i in range(img.size[0]):
for j in range(img.size[1]):
if(pixels[i,j] != pixelsPattern[i,j]):
pixelNew[i,j] = pixels[i,j]
I am directly getting the bit difference, but It is not giving me what I want, I tried medianBlur and similar things to do it like 4th image but I am not able to make it sharp like in the 4th image. (I created 4th image manually with paint.)
It's a challenging problem, because the pattern was deliberately designed to make it difficult to solve by software.
I suggest the following steps:
Convert img and pattern to binary images (gray levels are not part of the number).
Compute absolute difference of img and pattern.
Apply closing morphological operation for closing small gaps.
Here is the code:
import cv2
import numpy as np
# Read image and pattern as Grayscale images (output of cv2.imread is numpty array).
img = cv2.imread("recherche.png", cv2.IMREAD_GRAYSCALE)
pattern = cv2.imread("pattern.png", cv2.IMREAD_GRAYSCALE)
# Convert img and pattern to binary images (all values above 1 goes to 255)
_, img = cv2.threshold(img, 1, 255, cv2.THRESH_BINARY)
_, pattern = cv2.threshold(pattern, 1, 255, cv2.THRESH_BINARY)
# Compute absolute difference of img and pattern (result is 0 where equal and 255 when not equal)
dif = cv2.absdiff(img, pattern)
# Apply closing morphological operation
dif = cv2.morphologyEx(dif, cv2.MORPH_CLOSE, cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5,5)));
dif = 255 - dif # Inverse polarity
# Display result
cv2.imshow('dif', dif)
cv2.waitKey(0)
cv2.destroyAllWindows()
Result:
As you can see, solution is not perfect, but getting a perfect result is very challenging...
I use NumPy to create a new 2D array with 0 on the border and the array of the original image inside. I print the new array, it's what I expect. But when I plot it, it's all black.
I tried for-loop and NumPy, it's useless.
import cv2
import numpy as np
path = 'test.jpg'
img = cv2.imread(path,0)
print(img)
height,width = img.shape # 440 * 455
new_arr = np.zeros((height+2,width+2), dtype = int)
#for i in range(height):
# for j in range(width):
# new_arr[i+1][j+1] = img[i][j]
new_arr[1:height+1,1:width+1] = img
print(new_arr)
cv2.imshow('new image',new_arr)
cv2.waitKey(0)
cv2.destroyAllWindows()
The original image is here:
I expect an image with black border (just 1 pixel), and the inside is the original image to do median filtering, but the actual output is a black image.
Not sure how you are getting black image, as your code should throw an error. You need to set dtype value in proper namespace (np.) and the value should be uint8:
import cv2
import numpy as np
path = 'test.png'
img = cv2.imread(path,0)
height,width = img.shape
new_arr = np.zeros((height+2,width+2), dtype = np.uint8)
new_arr[1:height+1,1:width+1] = img
print(new_arr)
cv2.imshow('new image',new_arr)
cv2.waitKey(0)
cv2.destroyAllWindows()
Please note that the image you have given is png, not jpg. Code tested on that image.
I am working on a project where I should apply and OCR on some documents.
The first step is to threshold the image and let only the writing (whiten the background).
Example of an input image: (For the GDPR and privacy reasons, this image is from the Internet)
Here is my code:
import cv2
import numpy as np
image = cv2.imread('b.jpg')
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
h = image.shape[0]
w = image.shape[1]
for y in range(0, h):
for x in range(0, w):
if image[y, x] >= 120:
image[y, x] = 255
else:
image[y, x] = 0
cv2.imwrite('output.jpg', image)
Here is the result that I got:
When I applied pytesseract to the output image, the results were not satisfying (I know that an OCR is not perfect). Although I tried to adjust the threshold value (in this code it is equal to 120), the result was not as clear as I wanted.
Is there a way to make a better threshold in order to only keep the writing in black and whiten the rest?
After digging deep in StackOverflow questions, I found this answer which is about removing watermark using opencv.
I adapted the code to my needs and this is what I got:
import numpy as np
import cv2
image = cv2.imread('a.png')
img = image.copy()
alpha =2.75
beta = -160.0
denoised = alpha * img + beta
denoised = np.clip(denoised, 0, 255).astype(np.uint8)
#denoised = cv2.fastNlMeansDenoising(denoised, None, 31, 7, 21)
img = cv2.cvtColor(denoised, cv2.COLOR_BGR2GRAY)
h = img.shape[0]
w = img.shape[1]
for y in range(0, h):
for x in range(0, w):
if img[y, x] >= 220:
img[y, x] = 255
else:
img[y, x] = 0
cv2.imwrite('outpu.jpg', img)
Here is the output image:
The good thing about this code is that it gives good results not only with this image, but also with all the images that I tested.
I hope it helps anyone who had the same problem.
You can use adaptive thresholding. From documentation :
In this, the algorithm calculate the threshold for a small regions of the image. So we get different thresholds for different regions of the same image and it gives us better results for images with varying illumination.
import numpy as np
import cv2
image = cv2.imread('b.jpg')
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
image = cv2.medianBlur(image ,5)
th1 = cv2.adaptiveThreshold(image,255,cv2.ADAPTIVE_THRESH_MEAN_C,\
cv2.THRESH_BINARY,11,2)
th2 = cv2.adaptiveThreshold(image,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,\
cv2.THRESH_BINARY,11,2)
cv2.imwrite('output1.jpg', th1 )
cv2.imwrite('output2.jpg', th2 )