so my code is using opencv with tesseract to extract a text from a image
and what i want to do is to blacklist some parts of the image so the code don't check if there is a text here
the code :
import numpy as np
img = cv2.imread('test.jpeg')
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
sensitivity = 70
lower_range = np.array([0,0,255-sensitivity])
upper_range = np.array([255,sensitivity,255])
mask = cv2.inRange(hsv, lower_range, upper_range)
cv2.imshow('image', img)
cv2.imshow('mask', mask)
cv2.waitKey(0)
cv2.destroyAllWindows()
Base image:
Parts of the image i want to blacklist (in red):
can someone help me doing this if this is possible ?
Your objective here is to extract only the text from the resulting mask image and you already did most of the heavy lifting. I have tried using easyOCR library on mask which gives the result you are looking for.
Using mask as input image, here is the remaining code:
# import library and initialize the reader
from easyocr import Reader
reader = Reader(['en'])
# pass input image
results = reader.readtext(mask)
Output:
[([[93, 85], [245, 85], [245, 129], [93, 129]], 'SWHSY', 0.9746386534414473)]
It returns the bounding box position of the text, the text itself along with confidence score.
The following snippet allows you to draw the bounding box around the detected text:
for (bbox, text, prob) in results[:5]:
(tl, tr, br, bl) = bbox
top_left = (int(tl[0]), int(tl[1]))
bottom_right = (int(br[0]), int(br[1]))
img = cv2.rectangle(img, top_left, bottom_right, (0, 0, 255), 3)
img = cv2.putText(img, text, (tl[0], tl[1] - 20), cv2.FONT_HERSHEY_SIMPLEX, 1.1, (255, 255, 0), 5)
Related
My OpenCV code works just fine. It find the license plate, extracts a black and white version of it, using the contours, and then when I pass it to pytesseract, it won't read any of the letters. I've tracked the program throughout each line of the code and the OpenCV works fine, but pytesseract won't extract the text from the image. There are no errors, it just doesn't read any text. The license plate is mine.
import cv2
# pip install imutils
import imutils
import pytesseract
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files (x86)\Tesseract-OCR\tesseract.exe'
# Read the image file
image = cv2.imread('LP.jpg')
# image = imutils.resize(image, width=500)
# Convert to Grayscale Image
gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Removes Noise
gray_image = cv2.bilateralFilter(gray_image, 11, 17, 17)
# Canny Edge Detection
canny_edge = cv2.Canny(gray_image, 100, 200)
# Find contours based on Edges
# The code below needs an - or else you'll get a ValueError: too many values to unpack (expected 2) or a numpy error
_, contours, new = cv2.findContours(canny_edge.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
contours = sorted(contours, key=cv2.contourArea, reverse=True)[:30]
# # Initialize license Plate contour and x,y coordinates
contour_with_license_plate = None
license_plate = None
x = None
y = None
w = None
h = None
# Find the contour with 4 potential corners and create a Region of Interest around it
for contour in contours:
# Find Perimeter of contour and it should be a closed contour
perimeter = cv2.arcLength(contour, True)
approx = cv2.approxPolyDP(contour, 0.02 * perimeter, True)
# This checks if it's a rectangle
if len(approx) == 4:
contour_with_license_plate = approx
x, y, w, h = cv2.boundingRect(contour)
license_plate = gray_image[y:y + h, x:x + w]
break
# # approximate_contours = cv2.drawContours(image, [contour_with_license_plate], -1, (0, 255, 0), 3)
# Text Recognition
text = pytesseract.image_to_string(license_plate, lang='eng')
print(text)
# Draw License Plate and write the Text
image = cv2.rectangle(image, (x, y), (x+w, y+h), (0, 255, 0), 3)
image = cv2.putText(image, text, (x-100, y-50), cv2.FONT_HERSHEY_SIMPLEX, 3, (0, 255, 0), 6, cv2.LINE_AA)
print("License Plate: ", text)
cv2.imshow("License Plate Detection", image)
cv2.waitKey(0)
Here is my partial answer, maybe you can perfect it.
Apply adaptive-threshold + bitwise-not operations to the license_plate variable.
The result will be:
Now if you read it:
txt = pytesseract.image_to_string(bnt, config="--psm 6")
print(txt)
Result:
277 BOY
Unfortunately Q is recognized as O.
Code: (Just replace text recogniiton commented part with the below)
thr = cv2.adaptiveThreshold(license_plate, 252, cv2.ADAPTIVE_THRESH_MEAN_C,
cv2.THRESH_BINARY_INV, 91, 93)
bnt = cv2.bitwise_not(thr)
txt = pytesseract.image_to_string(bnt, config="--psm 6")
print(txt)
I'm trying to detect colorful dots on a white/gray background. The dots are 3 different colors (yellow, purple, blue) of different sizes. Here is the original image:
I converted the image to HSV and found lower and upper bounds for each image then applied contour detection to find those dots. The following code detects most of the dots:
import cv2
import numpy as np
from matplotlib import pyplot as plt
img = cv2.imread('image1_1.png')
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
lower_yellow = np.array([22,25,219])
upper_yellow = np.array([25,75,225])
lower_purple = np.array([141,31,223])
upper_purple = np.array([143,83,225])
lower_blue = np.array([92,32,202])
upper_blue = np.array([96,36,208])
mask_blue = cv2.inRange(hsv, lower_blue, upper_blue)
mask_purple = cv2.inRange(hsv, lower_purple, upper_purple)
mask_yellow = cv2.inRange(hsv, lower_yellow, upper_yellow)
res_blue = cv2.bitwise_and(img,img, mask=mask_blue)
res_purple = cv2.bitwise_and(img,img, mask=mask_purple)
res_yellow = cv2.bitwise_and(img,img, mask=mask_yellow)
gray_blue = cv2.cvtColor(res_blue, cv2.COLOR_BGR2GRAY)
gray_purple = cv2.cvtColor(res_purple, cv2.COLOR_BGR2GRAY)
gray_yellow = cv2.cvtColor(res_yellow, cv2.COLOR_BGR2GRAY)
_,thresh_blue = cv2.threshold(gray_blue,10,255,cv2.THRESH_BINARY)
_,thresh_purple = cv2.threshold(gray_purple,10,255,cv2.THRESH_BINARY)
_,thresh_yellow = cv2.threshold(gray_yellow,10,255,cv2.THRESH_BINARY)
contours_blue, hierarhy1 = cv2.findContours(thresh_blue,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
contours_purple, hierarhy2 = cv2.findContours(thresh_purple,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
contours_yellow, hierarhy3 = cv2.findContours(thresh_yellow,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
result = img.copy()
cv2.drawContours(result, contours_blue, -1, (0, 0, 255), 2)
cv2.drawContours(result, contours_purple, -1, (0, 0, 255), 2)
cv2.drawContours(result, contours_yellow, -1, (0, 0, 255), 2)
cv2.imwrite("_allContours.jpg", result)
Here are the detected contours:
The problem is that some of the colored dots are not detected. I understand by fine-tuning the color ranges (lower and upper) it's possible to detect more dots. But that is very time consuming and not generalizable to similar images. For example the following image looks similar to the first image above and has the same colorful dots but the background is slightly different, once I ran it through above code it was not able to detect even one of the dots. Am I on the right track? Is there a more scalable and reliable solution with less need to tune color parameters in order to solve this problem? Here is the other image I tried:
I would suggest simply using adaptiveThreshold in Python/OpenCV
import cv2
import numpy as np
# read image
img = cv2.imread("dots.png")
# convert img to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# do adaptive threshold on gray image
thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 25, 6)
# write results to disk
cv2.imwrite("dots_thresh.jpg", thresh)
# display it
cv2.imshow("thresh", thresh)
cv2.waitKey(0)
enter image description here from PIL import Image
import pytesseract
from pdf2image import convert_from_path
import os
import pandas as pd
import numpy as np
img = cv2.imread(filename)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
cv2.imwrite('grey.png',gray)
binary,thresh1 = cv2.threshold(gray, 0, 255,cv2.THRESH_OTSU|cv2.THRESH_BINARY_INV)
cv2.imwrite('Thresh1.png',thresh1)
rect_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (15, 3))
dilation = cv2.dilate(thresh1, rect_kernel, iterations = 2)
contours, hierarchy = cv2.findContours(dilation, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
im2 = img.copy()
ROI_number = 0
for cnt in contours:
x,y,w,h = cv2.boundingRect(cnt)
ROI=im2[y:y+h, x:x+w]
print(str(w),str(h))
#cv2.putText(im2, str(h), (x,y - 10 ), cv2.FONT_HERSHEY_SIMPLEX, 0.1, (255, 0, 0), 1)
#cv2.putText(im2, str(w), (x,y + 10 ), cv2.FONT_HERSHEY_SIMPLEX, 0.1, (0, 0, 255), 1)
#cv2.imwrite('ROI_{}.png'.format(ROI_number),ROI)
cv2.rectangle(im2,(x,y),(x+w,y+h),(36,255,12),1)
ROI_number += 1
cv2.imwrite('contours1.png',im2)
How to make a filter which detect the bold lines part from image Bold text line should be the output for me .Don't know to approach this thing but i have a suggestion to get that thing in order of Binary inversion Threshold covert to get the white bigger part should come from that need suggestions regarding this Problem.
I am trying to extract the number from this image using MSER contours.
Using this code..
import cv2
import numpy as np
mser = cv2.MSER_create()
img = cv2.imread('C:\\Users\\Link\\Desktop\\7.png')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
vis = img.copy()
regions, _ = mser.detectRegions(gray)
hulls = [cv2.convexHull(p.reshape(-1, 1, 2)) for p in regions]
poly = cv2.polylines(vis, hulls, 1, (0, 255, 0))
mask = np.zeros((img.shape[0], img.shape[1], 1), dtype=np.uint8)
mask = cv2.dilate(mask, np.ones((150, 150), np.uint8))
for i,contour in enumerate(hulls):
cv2.drawContours(mask, [contour], -1, (255, 255, 255), -1)
text_only = cv2.bitwise_and(img, img, mask=mask)
cv2.imwrite('poly{}.png'.format(i), text_only)
cv2.imshow('img', vis)
cv2.waitKey(0)
cv2.imshow('mask', mask)
cv2.waitKey(0)
cv2.imshow('text', text_only)
cv2.waitKey(0)
...I obtain this as image (apart from the whole image itself as result of MSER detection):
How can I made the background transparent ? All that I found about this procedure, which use Opencv and Python, is this other question: NumPy/OpenCV 2: how do I crop non-rectangular region?
Applying the code suggested there produce the next output, which is not what I would like to have:
The goal is to have something like this:
If it's not possible at that level, atleast how can I turn the black area into transparent ?
Thanks
I have an invoice image, and I want to detect the text on it. So I plan to use 2 steps: first is to identify the text areas, and then using OCR to recognize the text.
I am using OpenCV 3.0 in python for that. I am able to identify the text(including some non text areas) but I further want to identify text boxes from the image(also excluding the non-text areas).
My input image is:
And the output is:
I am using the below code for this:
img = cv2.imread('/home/mis/Text_Recognition/bill.jpg')
mser = cv2.MSER_create()
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) #Converting to GrayScale
gray_img = img.copy()
regions = mser.detectRegions(gray, None)
hulls = [cv2.convexHull(p.reshape(-1, 1, 2)) for p in regions]
cv2.polylines(gray_img, hulls, 1, (0, 0, 255), 2)
cv2.imwrite('/home/mis/Text_Recognition/amit.jpg', gray_img) #Saving
Now, I want to identify the text boxes, and remove/unidentify any non-text areas on the invoice. I am new to OpenCV and am a beginner in Python. I am able to find some examples in MATAB example and C++ example, but If I convert them to python, it will take a lot of time for me.
Is there any example with python using OpenCV, or can anyone help me with this?
Below is the code
# Import packages
import cv2
import numpy as np
#Create MSER object
mser = cv2.MSER_create()
#Your image path i-e receipt path
img = cv2.imread('/home/rafiullah/PycharmProjects/python-ocr-master/receipts/73.jpg')
#Convert to gray scale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
vis = img.copy()
#detect regions in gray scale image
regions, _ = mser.detectRegions(gray)
hulls = [cv2.convexHull(p.reshape(-1, 1, 2)) for p in regions]
cv2.polylines(vis, hulls, 1, (0, 255, 0))
cv2.imshow('img', vis)
cv2.waitKey(0)
mask = np.zeros((img.shape[0], img.shape[1], 1), dtype=np.uint8)
for contour in hulls:
cv2.drawContours(mask, [contour], -1, (255, 255, 255), -1)
#this is used to find only text regions, remaining are ignored
text_only = cv2.bitwise_and(img, img, mask=mask)
cv2.imshow("text only", text_only)
cv2.waitKey(0)
This is an old post, yet I'd like to contribute that if you are trying to extract all the texts out of an image, here is the code to get that text in an array.
import cv2
import numpy as np
import re
import pytesseract
from pytesseract import image_to_string
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
from PIL import Image
image_obj = Image.open("screenshot.png")
rgb = cv2.imread('screenshot.png')
small = cv2.cvtColor(rgb, cv2.COLOR_BGR2GRAY)
#threshold the image
_, bw = cv2.threshold(small, 0.0, 255.0, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)
# get horizontal mask of large size since text are horizontal components
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (20, 1))
connected = cv2.morphologyEx(bw, cv2.MORPH_CLOSE, kernel)
# find all the contours
contours, hierarchy,=cv2.findContours(connected.copy(),cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
#Segment the text lines
counter=0
array_of_texts=[]
for idx in range(len(contours)):
x, y, w, h = cv2.boundingRect(contours[idx])
cropped_image = image_obj.crop((x-10, y, x+w+10, y+h ))
str_store = re.sub(r'([^\s\w]|_)+', '', image_to_string(cropped_image))
array_of_texts.append(str_store)
counter+=1
print(array_of_texts)