Pytesseract OCR doesn't recognize the digits - python

I am trying to read these images:
I have tried several options but I can't seem to read them correctly as 15/0, 30/0, 40/0.
frame = frame[900:1000, 450:500]
scale_percent = 200 # percent of original size
width = int(frame.shape[1] * scale_percent / 100)
height = int(frame.shape[0] * scale_percent / 100)
dim = (width, height)
frame = cv2.resize(frame, dim, interpolation=cv2.INTER_AREA)
cv2.imshow("cropped", frame)
cv2.waitKey(0)
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
cv2.imshow("cropped", frame)
cv2.waitKey(0)
pytesseract.pytesseract.tesseract_cmd = (
r"C:\Program Files\Tesseract-OCR\tesseract.exe"
)
results = pytesseract.image_to_data(
frame,
output_type=Output.DICT,
config="--psm 10 --oem 3 -c tessedit_char_whitelist=0123456789",
)
# results = replace_chars(results)
print(("-").join(results["text"]), "\n")

One way of solving is using inRange thresholding
The result will be:
If you set page-segmentation-mode 6
15
0
30
0
40
0
Code:
import cv2
import pytesseract
from numpy import array
image_list = ["LZxCs.png", "W06I0.png", "vvzE5.png"]
for image in image_list:
bgr_image = cv2.imread(image)
hsv_image = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2HSV)
mask = cv2.inRange(hsv_image, array([0, 0, 0]), array([165, 10, 255]))
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 3))
dilate = cv2.dilate(mask, kernel, iterations=1)
thresh = cv2.bitwise_and(dilate, mask)
text = pytesseract.image_to_string(thresh, config='--psm 6')
print(text)
The second way is applying global-threshold
If you set page-segmentation-mode 6
15
0
30
0
40
0
Code:
import cv2
import pytesseract
image_list = ["LZxCs.png", "W06I0.png", "vvzE5.png"]
for image in image_list:
bgr_image = cv2.imread(image)
gray_image = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray_image, 127, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
text = pytesseract.image_to_string(thresh, config='--psm 6')
print(text)
cv2.imwrite(f"/Users/ahx/Desktop/{image}", thresh)
cv2.imshow('', thresh)
cv2.waitKey(0)
For more, you can check the documentation

Related

Using opencv to mask the background

I'm trying to use tesseract to read text from a game with poor results.
What I would like to accomplish is to remove the background from the image so that only the text is visible to improve OCR results.
I've tried cv2.inRange, thresholding yet I can't seem to get it to work.
import numpy as np
import pytesseract
from tesserocr import PyTessBaseAPI, OEM
def _img_to_bytes(image: np.ndarray, colorspace: str = 'LAB'):
# Sets an OpenCV-style image for recognition: https://github.com/sirfz/tesserocr/issues/198
bytes_per_pixel = image.shape[2] if len(image.shape) == 3 else 1
height, width = image.shape[:2]
bytes_per_line = bytes_per_pixel * width
if bytes_per_pixel != 1 and colorspace != 'RGB':
# non-RGB color image -> convert to RGB
image = cv2.cvtColor(image, getattr(cv2, f'COLOR_{colorspace}2RGB'))
elif bytes_per_pixel == 1 and image.dtype == bool:
# binary image -> convert to bitstream
image = np.packbits(image, axis=1)
bytes_per_line = image.shape[1]
width = bytes_per_line * 8
bytes_per_pixel = 0
# else image already RGB or grayscale
return image.tobytes(), width, height, bytes_per_pixel, bytes_per_line
img = cv2.imread("ref.png")
img = ~img
clahe = cv2.createCLAHE(clipLimit=3., tileGridSize=(8,8))
lab = cv2.cvtColor(img, cv2.COLOR_BGR2LAB)
l, a, b = cv2.split(lab)
l2 = clahe.apply(l)
lab = cv2.merge((l2,a,b))
img = cv2.cvtColor(lab, cv2.COLOR_LAB2BGR)
with PyTessBaseAPI(psm=3, oem=OEM.LSTM_ONLY, path=f"ocr", lang=d2r ) as api:
api.ReadConfigFile("ocr/config.txt")
api.SetVariable("user_words_file","ocr/dict.txt")
api.SetImageBytes(*_img_to_bytes(img))
print(api.GetUTF8Text())
cv2.imshow('res',img)
cv2.waitKey()```
Inverting color may help? try this & let me know.
import cv2
image = cv2.imread("Bytelock.jpg")
image = ~image
cv2.imwrite("Bytelock.jpg",image)
Inverted image
Red varient
import numpy as np
import imutils
import cv2
img_rgb = cv2.imread('ss.jpg')
Conv_hsv_Gray = cv2.cvtColor(img_rgb, cv2.COLOR_BGR2GRAY)
ret, mask = cv2.threshold(Conv_hsv_Gray, 0, 255,cv2.THRESH_BINARY_INV |cv2.THRESH_OTSU)
img_rgb[mask == 255] = [0, 0, 255]
cv2.imshow("red", img_rgb)
cv2.imwrite("red.jpg", img_rgb)
More sharpen? Try
import numpy as np
import imutils
import cv2
img_rgb = cv2.imread('ss.jpg')
Conv_hsv_Gray = cv2.cvtColor(img_rgb, cv2.COLOR_BGR2GRAY)
ret, mask = cv2.threshold(Conv_hsv_Gray, 0, 255,cv2.THRESH_BINARY_INV |cv2.THRESH_OTSU)
img_rgb[mask == 255] = [0, 0, 255]
cv2.imwrite("mask.jpg", mask)
cv2.imshow("mask", mask) # show windows
cv2.waitKey(0)
**
Much more better option
**
import cv2
image = cv2.imread("ss1.jpg")
image = ~image
img = image
clahe = cv2.createCLAHE(clipLimit=3., tileGridSize=(8,8))
lab = cv2.cvtColor(img, cv2.COLOR_BGR2LAB)
l, a, b = cv2.split(lab)
l2 = clahe.apply(l)
lab = cv2.merge((l2,a,b))
img2 = cv2.cvtColor(lab, cv2.COLOR_LAB2BGR)
cv2.imshow('Increased contrast', img2)
cv2.waitKey(0)
cv2.destroyAllWindows()
More sharpen

Load multiple images with a time interval to overlay an object in a webcam feed

I'm trying to load several images from a folder so that they are processed in an exact same manner. The code below detects a blue object in webcam feed and overlays it with the template image img where the webcam frame is im0
hsv = cv2.cvtColor(im0, cv2.COLOR_BGR2HSV)
mask = cv2.inRange(hsv, (0, 120, 120), (180, 255, 255))#<- blue # RED: (0, 120, 120), (10, 255, 255))
thresh = cv2.dilate(mask, None, iterations=2)
contours = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0]
for contour in contours:
x, y, w, h = cv2.boundingRect(contour)
height = 480
width = 640
if y + h < height and x + w < width:
logo = cv2.resize(img, (w, h))
img2gray = cv2.cvtColor(logo, cv2.COLOR_BGR2GRAY)
_, logo_mask = cv2.threshold(img2gray, 1, 255, cv2.THRESH_BINARY)
roi = im0[y:y+h, x:x+w]
roi[np.where(logo_mask)] = 0
roi += logo
cv2.imshow(str(p), im0)#im0 2
cv2.waitKey(1) # 1 millisecond
I am wondering how should I create a timer here so that the exact same processing happens to the img2, img3 and so on?

Detecting and ignoring rectangles that fall under another rectangle and efficient cropping

I am working on a project where I take a floor plan image as input that contains 2-3 floor plans and I need to detect each floorplan and crop them and save in as a different file.
Following are the sample input and output images.
Input:
Output:
So as you can see the second output is wrongly cropped. Also I get smaller rectangles (which are part of the output images) as byproducts.
Following is the code that I am using:
import cv2
import download_imgs_R1
import floorplan_threshold_R2
import pandas as pd
import os
filename = 'project_image.csv'
df = pd.read_csv(filename)
pids = df['id']
urls = df['complete_url']
for pid,url in zip(pids,urls):
name = url.split('/')[-1]
ext = name.split('.')[-1]
filepath = './xxxx/{}/original_images/'.format(pid)
savepath = './xxxx/{}/processed_images/'.format(pid)
savename = name.split('.')[0]
save = savepath+savename+'{}.png'
if ext == 'pdf':
image_name = download_imgs_R1.extract_from_pdf(filename=name, dest=filepath)
else:
image_name = filepath+name
print(image_name)
no_padding_image, crop_img_name = floorplan_threshold_R2.remove_white_space(image_name)
feature_dict = floorplan_threshold_R2.get_img_features(no_padding_image)
cont, hier = floorplan_threshold_R2.contour_method(no_padding_image)
area_dict = floorplan_threshold_R2.max_rect(cont)
roi_area = []
print(feature_dict)
img_area = feature_dict['area']
for area in area_dict:
if area >= img_area*0.1 and area < img_area:
roi_area.append(area)
plan_no = 1
for a in roi_area:
plan = area_dict[a]
# del area_dict[a]
x,y,w,h = plan
aspect_ratio = h/w
if x <=50 or y <= 25:
roi = no_padding_image[y:y+h, x:x+w]
else:
roi = no_padding_image[y-50:y+h+10, x-20:x+w+10]
print('PID: {}, No. {}'.format(pid,plan_no))
# cv2.rectangle(no_padding_image, (x-10,y-10), (x+w+10, y+h+10), (255,255,255), 2)
# roi = cv2.copyMakeBorder(roi, 10, 10, 10, 10, cv2.BORDER_CONSTANT, None, value = [255,255,255])
# cv2.imshow('ROI-{}'.format(image_name),roi)
cv2.imwrite(save.format(plan_no),roi)
cv2.waitKey(0)
plan_no += 1
floor_plan_threshold_R2.py:
import cv2
from cv2 import dilate
from cv2 import findContours
import imutils
import numpy as np
import download_imgs_R1
def remove_white_space(filename:str):
savename = filename
img = cv2.imread(filename=filename)
orignal = img.copy()
gray = cv2.cvtColor(orignal, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (25,25), 0)
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# Perform morph operations, first open to remove noise, then close to combine
noise_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))
opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, noise_kernel, iterations=2)
close_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (7,7))
close = cv2.morphologyEx(opening, cv2.MORPH_CLOSE, close_kernel, iterations=3)
# Find enclosing boundingbox and crop ROI
coords = cv2.findNonZero(close)
x,y,w,h = cv2.boundingRect(coords)
# cv2.rectangle(orignal, (x, y), (x + w, y + h), (36,255,12), 2)
if x <= 50 or y <= 10:
crop = orignal[y:y+h, x:x+w]
else:
crop = orignal[y-10:y+h+10, x-60:x+w+10]
cv2.imwrite(savename,crop)
# cv2.imshow('Removed White space (Preprocess - 1)',crop)
cv2.waitKey(0)
return crop, savename
def get_img_features(image, filename:str=None,resize:bool=False):
if not filename:
res = image.copy()
else:
res = cv2.imread(filename)
img_height, img_width, img_channel = image.shape
if resize is True and image.shape[0] > 800:
res = imutils.resize(res, height=720)
img_height, img_width, img_channel = res.shape
img_area = img_width * img_height
img_aspect_ratio = img_width/img_height
img_features = {'height':img_height, 'width':img_width, 'area':img_area, 'aspect_ratio':img_aspect_ratio}
return img_features
def mask_method(image, filename:str=None):
if not filename:
res = image.copy()
else:
res = cv2.imread(filename)
hsv_plan = cv2.cvtColor(res, cv2.COLOR_BGR2HSV)
#define range for blue color (HSV Range)
blue_min = np.array([14,100,76])
blue_max = np.array([130,255,255])
bluemask = cv2.inRange(hsv_plan,blue_min,blue_max)
blue_output = cv2.bitwise_and(hsv_plan, hsv_plan, mask=bluemask)
grey_mask = cv2.cvtColor(blue_output, cv2.COLOR_BGR2GRAY)
ret, thresh = cv2.threshold(grey_mask, 100, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
kernel = np.ones((3,3), np.uint8)
dil = dilate(thresh, kernel, iterations=2)
cont,hier = findContours(dil, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
return cont, hier
def contour_method(image, filename:str=None):
if not filename:
res = image.copy()
else:
res = cv2.imread(filename)
grey_plan = cv2.cvtColor(res, cv2.COLOR_BGR2GRAY)
ret2, thresh2 = cv2.threshold(grey_plan, 160, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
kernel = np.ones((3,3), np.uint8)
dil_grey = dilate(thresh2, kernel, iterations=2)
cont,hier = findContours(dil_grey, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
return cont,hier
def max_rect(cntrs):
ar = {}
for cnt in cntrs:
x,y,w,h = cv2.boundingRect(cnt)
area = w*h
ar[area] = (x,y,w,h)
return ar
I need to find a generic solution for cropping the image as just providing a number for cropping will affect other images as well.

Rasperry Pi handwriting recognition

I'm trying to make a handwriting recognition system using Raspberry Pi.My code detects the text but doesn't print it to screen.When I run the code,the img file detects the handwritings as they appear with blue borders.Another interesting thing is that it detects only in presence of source of light,I mean it's daytime and the environment isn't dark.When I don't turn on the light,the test1.jpg file is completely dark and handwritings aren't detected(I think this topic is about the camera I use or my environment isn't illuminated enough).
I'm doing something wrong but couldn't figure it out.
I'm newby to programming.I hope that someone explains me these stuffs.
Can you help me?
Here is my code.
from picamera import PiCamera
import cv2
import pytesseract
from PIL import Image
from time import sleep
from picamera.array import PiRGBArray
import numpy as np
print ("Capturing Image")
camera = PiCamera()
camera.resolution = (640, 480)
camera.brightness = 50
camera.contrast = 70
camera.framerate = 30
rawCapture = PiRGBArray(camera, size=(640, 480))
camera.capture('/home/pi/Desktop/Photos/test1.jpg')
print("Captured")
sleep(1)
print("Wait, Processing")
img = cv2.imread('/home/pi/Desktop/Photos/test1.jpg', cv2.IMREAD_COLOR)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
items = cv2.findContours(thresh, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
contours = items[0] if len(items) == 2 else items[1]
base = np.zeros(thresh.shape, dtype=np.uint8)
base = cv2.bitwise_not(base)
max_area = 0
for i in range(len(contours)):
x, y, w, h = cv2.boundingRect(contours[i])
ratio = h / w
area = cv2.contourArea(contours[i])
cv2.drawContours(img, [contours[i]], 0, (255, 0, 0), 2)
if 1 < ratio < 3:
max_area = max(area, max_area)
#print("area: " + str(area) + ", max area: " + str(max_area) + ", ratio: " + str(ratio))
# if 1000 < area < max_area / 2:
if 1000 < area < 40000:
mask = np.zeros(thresh.shape, dtype=np.uint8)
cv2.drawContours(mask, [contours[i]], -1, color=255, thickness=-1)
mean = cv2.mean(thresh, mask=mask)
segment = np.zeros((h, w), dtype=np.uint8)
segment[:h, :w] = thresh[y:y + h, x:x + w]
if mean[0] > 150:
# white, invert
segment = cv2.bitwise_not(segment)
base[y:y + h, x:x + w] = segment[:h, :w]
cv2.imshow("base", base)
cv2.drawContours(img, [contours[i]], 0, (255, 0, 0), 2)
cv2.waitKey(0)
custom_config = r'-l eng --oem 3 --psm 6 -c tessedit_char_whitelist="ABCDEFGHIJKLMNOPQRSTUVWXYZ " '
text = pytesseract.image_to_string(base, config=custom_config)
print (text)
cv2.imshow("img", img)
cv2.imshow("base", base)
cv2.waitKey(0)
cv2.destroyAllWindows()
I think there's something wrong with the pytesseract.image_to_string(base, config=custom_config) part.
By the way I'm using raspberry pi 4 2 gb,32 bit os and python 3.7.My camera is v1.3

Why doesn't my OCR opencv in python code work at al?

i am new to this forum so sorry if this is question is too long for this place. I just started coding and i never really had a lesson so i am searching wat i can find on the internet. I first looked at an youtube video where he tried this on python 2, so thats why it's sort of messy but i really want it to work.
import numpy as np
import cv2
import math
from PIL import Image
filepathinQ = "images/isomeren.jpg"
filepathinQPNG = 'images/imageinq.png'
filepathPNG = "images/atomen.png"
###First pictuce scan
class shapeRecognition():
def __init__(self, img):
self.img = img
def Voorbeeldenmaker(self):
im = Image.open('images/isomeren.png')
image = np.array(im)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
max_dimension = max(image.shape)
scale = 700/max_dimension
image = cv2.resize(image, None, fx=scale, fy=scale)
lower = np.array([0,0,0], dtype=np.uint8)
upper = np.array([15,15,15], dtype=np.uint8)
mask = cv2.inRange(self.img, lower, upper)
(flags, contours, h) = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
cv2.drawContours(mask, contours, 0,(120,0,145), 3)
cv2.imshow("vorm", mask)
return contours
#second picture scan
class shape2Recognition():
def __init__(self2, img2):
self2.img2 = img2
def inquestion(self2):
im = Image.open(filepathinQPNG)
image = np.array(im)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
max_dimension = max(image.shape)
scale = 700/max_dimension
image = cv2.resize(image, None, fx=scale, fy=scale)
lower2 = np.array([0,0,0], dtype=np.uint8)
upper2 = np.array([15,15,15], dtype=np.uint8)
mask2 = cv2.inRange(image, lower2, upper2)
(flags, contours2, h) = cv2.findContours(mask2, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
cv2.drawContours(mask2, contours2, 4,(120,0,145), 3)
cv2.imshow("vorm2", mask2)
return contours2
#threshold for the second picture
#to make it in black and white
def treshold(imageArray):
newAr = imageArray
img = Image.open(filepathinQ)
img = img.convert("RGBA")
datas = img.getdata()
newData = []
for eachRow in newAr:
for eachPix in eachRow:
if ((int(eachPix[0]) + int(eachPix[1]) + int(eachPix[2]))/3) > 220:
newData.append((255,255,255,255))
else:
newData.append((0,0,0,255))
img.putdata(newData)
img.save('images/imageinq.png', "PNG")
#comparing the two
def zijnzegelijk(image):
x = 0
while x < 21:
cnt = contours[x]
cnt2 = contours2[4]
ret = cv2.matchShapes(cnt, cnt2 ,1, 0.0)
print(ret)
print("\n")
print(x)
print("\n")
x+= 1
i = Image.open(filepathinQ)
iAr = np.array(i)
treshold(iAr)
img2 = cv2.imread(filepathinQPNG)
shape2Recognition = shape2Recognition(img2)
contours2 = shape2Recognition.inquestion()
i2 = Image.open(filepathinQPNG)
iAr2 = np.array(i2)
img = cv2.imread(filepathPNG)
shapeRecognition = shapeRecognition(img)
contours = shapeRecognition.Voorbeeldenmaker()
zijnzegelijk(iAr2)

Categories