I have an image that converted from PDF to PNG. The converted image contains several keywords that I wanted to extracted using OCR Tesseract.
Right now, I need to determine the ROI manually to crop the selected ROI. Since I have more than 5 ROI's to be applied, what would be the most efficient way to apply the ROI instead of doing it by try and error to find the exact location?
Below is the code:
def cropped(self, event):
#1st ROI
y = 20
x = 405
h = 230
w = 425
#2nd ROI
y1 = 30
x1 = 305
h1 = 330
w1 = 525
#open the converted image
image = cv2.imread("Output.png")
#perform image cropping
crop_image = image[x:w, y:h]
crop_image1 = image[x1:w1, y1:h1]
#save the cropped image
cv2.imwrite("Cropped.png", crop_image)
cv2.imwrite("Cropped1.png", crop_image1)
#open the cropped image and pass to the OCR engine
im = cv2.imread("Cropped.png")
im1 = cv2.imread("Cropped1.png")
## Do the text extraction here
you can use mouse event to select multiple ROI and crop based on the location
#!/usr/bin/env python3
import argparse
import cv2
import numpy as np
from PIL import Image
import os
drawing = False # true if mouse is pressed
ix,iy = -1,-1
refPt = []
img = ""
clone = ""
ROIRegion = []
# mouse callback function
def draw_rectangle(event,x,y,flags,param):
global ix,iy,drawing,img,clone,refPt, ROIRegion
if event == cv2.EVENT_LBUTTONDOWN:
drawing = True
ix,iy = x,y
refPt = [(x, y)]
ROIRegion.append(refPt)
#clone = img.copy()
elif event == cv2.EVENT_MOUSEMOVE:
if drawing == True:
img = clone.copy()
cv2.rectangle(img,(ix,iy),(x,y),(0,255,0),3)
a=x
b=y
if a != x | b != y:
cv2.rectangle(img,(ix,iy),(x,y),(0,0,0),-1)
elif event == cv2.EVENT_LBUTTONUP:
drawing = False
refPt.append((x,y))
img = clone.copy()
cv2.rectangle(img, (ix,iy),(x,y), (0, 255, 0), 2)
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--image", required=True, help="Path to the image")
args = vars(ap.parse_args())
# load the image, clone it, and setup the mouse callback function
img = cv2.imread(args["image"])
img = np.array(img)
clone = img.copy()
cv2.namedWindow('image')
cv2.setMouseCallback('image',draw_rectangle)
while(1):
cv2.imshow('image',img)
k = cv2.waitKey(1) & 0xFF
if k == ord("r"):
del ROIRegion[-1]
del refPt[-1]
img = clone.copy()
elif k == 27:
break
#Do your cropping here
for region in range(len(ROIRegion)):
cv2.rectangle(img, ROIRegion[region][0],ROIRegion[region][1], (0, 255, 0), 2)
roi = clone[ROIRegion[region][0][1]:ROIRegion[region][1][1], ROIRegion[region][0][0]:ROIRegion[region][1][0]]
roi = cv2.cvtColor(roi, cv2.COLOR_BGR2RGB)
Here is one way in Python/OpenCV.
Read the input
Threshold on box outline color
Apply morphology to ensure closed
Get the external contours
Loop over each contour, get its bounding box, crop the region in the input and write the output
Input:
import cv2
import numpy as np
# read image
img = cv2.imread('text_boxes.jpg')
# threshold on box outline color
lowerBound = (80,120,100)
upperBound = (160,200,180)
thresh = cv2.inRange(img, lowerBound, upperBound)
# apply morphology to ensure regions are filled and remove extraneous noise
kernel = np.ones((3,3), np.uint8)
thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
# get contours
contours = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
# get bounding boxes
i = 1
for cntr in contours:
# get bounding boxes
x,y,w,h = cv2.boundingRect(cntr)
crop = img[y:y+h, x:x+w]
cv2.imwrite("text_boxes_crop_{0}.png".format(i), crop)
i = i + 1
# save threshold
cv2.imwrite("text_boxes_thresh.png",thresh)
# show thresh and result
cv2.imshow("thresh", thresh)
cv2.waitKey(0)
cv2.destroyAllWindows()
Threshold image:
Cropped Images:
Related
I am using tesseract 5.3.0 With the code below I am able to identify the licence plate and mask it out, however when I resize the licence plate part does not increase. How do I grab just the licence portion and increase it? Any tips on reading the licence plate would also be appreciated.
import cv2
import numpy as np
import imutils
import pytesseract
pytesseract.pytesseract.tesseract_cmd = r'/usr/local/bin/tesseract'
# Load the image and convert it to grayscale
image = cv2.imread('/Users/PythonProg/IMG_4592.JPG')
if image is None:
print("Error: Could not load the image")
exit()
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Apply Gaussian Blur to reduce noise and smooth the image
gray = cv2.bilateralFilter(gray, 13, 15, 15)
edged = cv2.Canny(gray, 30, 200)
contours = cv2.findContours(edged.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
contours = imutils.grab_contours(contours)
contours = sorted(contours, key = cv2.contourArea, reverse = True)[:10]
screenCnt = None
# Loop over the contours and find the one with the largest area
licence_plate = None
for c in contours:
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.018 * peri, True)
if len(approx) == 4:
screenCnt = approx
break
if screenCnt is None:
detected = 0
print ("No contour detected")
else:
detected = 1
if detected == 1:
cv2.drawContours(image, [screenCnt], -1, (0, 0, 255), 3)
cv2.imwrite('/Users/PythonProg/output.jpg', image)
mask = np.zeros(gray.shape,np.uint8)
licence_plate = cv2.drawContours(mask,[screenCnt],0,255,-1,)
licence_plate = cv2.bitwise_and(image,image,mask=mask)
cv2.imwrite('/Users/anthonywilson/PythonProg/mask.jpg', licence_plate)
# Resize the masked image to a specific size
resized = cv2.resize(licence_plate, (400, 200), interpolation = cv2.INTER_AREA)
# Save the resized image
cv2.imwrite('/Users/PythonProg/resized.jpg', resized)
I need a hand to be able to fix my project with opencv, which consists in detecting plates and using tesseract to extrapolate the content, but I don't understand why only the text written on a white background detects me and not when I use a real plate. I tried to arrange the image in order to make it more legible and maybe frame only the white part of the European license plates in order to simplify the extrapolation of the text, but nothing I can not isolate only the white. I using a raspberry pi 4, i don't know if it can be useful
Could anyone help me? Many thanks in advance.
print("Aspetta 5 secondi per catturare l'immagine, oppure premi <space> per scattare...")
cam = cv2.VideoCapture(0)
num_frames = 0
while True:
ret, image = cam.read()
if not ret:
print("La webcam non funziona...")
sys.exit(1)
cv2.imshow('image', image)
# Catturo l'immagine se premo <space>
if (cv2.waitKey(1) & 0xFF) == ord(' '):
break
# Aspetto 5 secondi prima di catturare l'immagine
num_frames += 1
if num_frames / 10 == 5:
break
cam.release()
cv2.destroyAllWindows()
cv2.imwrite("/home/pi/Desktop/Riconoscimento Targa/imagee.jpg", image)
filename = 'imagee.jpg'
img = np.array(Image.open(filename))
img = cv2.resize(img, (600,400) )
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
gray = cv2.bilateralFilter(gray, 13, 15, 15)
edged = cv2.Canny(gray, 30, 200) #Perform Edge detection
contours=cv2.findContours(edged.copy(),cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
contours = imutils.grab_contours(contours)
contours = sorted(contours,key=cv2.contourArea, reverse = True)[:10]
screenCnt = None
for c in contours:
# approximate the contour
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.018 * peri, True)
# if our approximated contour has four points, then
# we can assume that we have found our screen
if len(approx) == 4:
screenCnt = approx
break
# Masking the part other than the number plate
mask = np.zeros(gray.shape,np.uint8)
new_image = cv2.drawContours(mask,[screenCnt],0,255,-1,)
new_image = cv2.bitwise_and(img,img,mask=mask)
# Now crop
(x, y) = np.where(mask == 255)
(topx, topy) = (np.min(x), np.min(y))
(bottomx, bottomy) = (np.max(x), np.max(y))
Cropped = gray[topx:bottomx+1, topy:bottomy+1]
cv2.imwrite("/home/pi/Desktop/Riconoscimento Targa/Da eliminare.jpg", Cropped)
text = pytesseract.image_to_string(Cropped, config='--psm 11 -l ita')
return text
I am creating a dataset for UNET where I the Image I want to get as Y is a binarized image. I have written a code for OpenCv which uses input from users as sliding bars and saves the image after pressing given key.
Could someone please Help me apply Perspective Transformation to different images. I can get the respective values from the TrackBar if needed.
My images look something like this:
Below is the code.
import numpy as np
import cv2
import skimage.filters as filters
from os import listdir
from os.path import isfile, join
class InteractiveBinarization():
def __init__(self,path='./images/',out='./out/'):
'''
args:
path: Path of the directory which has all the images
out: Path of directory where your binarized images will be saved
'''
self.path = path
self.images = [f for f in listdir(path) if isfile(join(path, f))]
self.N = len(self.images)
self.out = out
def dummy(self,x=None)->None:
'''
Does not do anything. Used to pass to crateTrackbar as it needs a function
'''
pass
def binarize(self,window_width:int=350,window_height:int=350)->None:
'''
Method to binarize the Image based on the sliding values from the bars. It accepts Gauss Kernal, Sharpeen Amount, Sharpen Radius, Rotation Angle
Press 'esc' or 'q' to quit, 's' to save the binarized image, 't' for printing the current bar values to image, 'p' for previous image and 'n' for next image
args:
window_width: Width of the Window which has sliding bars
window_height: Height of window for the sliding bars
'''
cv2.namedWindow('Tracking Window',cv2.WINDOW_FULLSCREEN)
cv2.resizeWindow('Tracking Window', window_width, window_height)
cv2.createTrackbar('kernel','Tracking Window',3,513,self.dummy) # gauss kernal size
cv2.createTrackbar('x_sigma','Tracking Window',0,100,self.dummy) # gauss X sigma
cv2.createTrackbar('y_sigma','Tracking Window',0,100,self.dummy) # gauss Y sigma
cv2.createTrackbar('amount1','Tracking Window',0,7,self.dummy) # sharpen amount number
cv2.createTrackbar('amount2','Tracking Window',1,100,self.dummy) # sharpen amount decimal
cv2.createTrackbar('radius1','Tracking Window',0,7,self.dummy) # sharpen radius
cv2.createTrackbar('radius2','Tracking Window',1,100,self.dummy) # sharpen radius decimal
cv2.createTrackbar('angle','Tracking Window',0,360,self.dummy) # rotation angle
QUIT = False
put_text = False
read_image = True
counter = 0
while not QUIT:
if read_image:
img_name = self.images[counter]
img = cv2.imread(self.path+img_name)
read_image = False
g_k = cv2.getTrackbarPos('kernel','Tracking Window')
if g_k % 2 == 0:
g_k+=1
g_x_sigma = cv2.getTrackbarPos('x_sigma','Tracking Window')
g_y_sigma = cv2.getTrackbarPos('y_sigma','Tracking Window')
s_a1 = cv2.getTrackbarPos('amount1','Tracking Window') # 1,2,3,4
s_a2 = cv2.getTrackbarPos('amount2','Tracking Window') # .01, ..... 0.99
s_r1 = cv2.getTrackbarPos('radius1','Tracking Window') # same as above
s_r2 = cv2.getTrackbarPos('radius2','Tracking Window')
s_a = round(s_a1 + s_a2/100,2) # 1.01.......... 7.99
s_r = round(s_r1 + s_r2/100,2) # same asa above
angle = cv2.getTrackbarPos('angle','Tracking Window')
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
smooth = cv2.GaussianBlur(gray, (g_k,g_k), g_x_sigma,sigmaY=g_y_sigma)
division = cv2.divide(gray, smooth, scale=255)
sharp = filters.unsharp_mask(division, radius=s_r, amount=s_a, multichannel=False, preserve_range=False)
sharp = (255*sharp).clip(0,255).astype(np.uint8)
kernel = np.ones((5,5),np.uint8)
opening = cv2.morphologyEx(img, cv2.MORPH_OPEN, kernel)
closing = cv2.morphologyEx(img, cv2.MORPH_CLOSE, kernel)
thresh = cv2.threshold(sharp, 0, 255, cv2.THRESH_OTSU )[1]
# rotate
(h, w) = thresh.shape[:2]
center = (w // 2, h // 2)
M = cv2.getRotationMatrix2D(center, angle, 1)
thresh = cv2.warpAffine(thresh, M, (w, h), flags=cv2.INTER_CUBIC, borderMode = cv2.BORDER_CONSTANT, borderValue=255)
if put_text:
text = f"g_k: {g_k} , g_x_sigma: {g_x_sigma} , g_y_sigma: {g_y_sigma} , s_a: {s_a} , s_r: {s_r} , angle: {angle}"
cv2.putText(thresh,text,org=(30,30),fontFace=cv2.FONT_HERSHEY_SIMPLEX,fontScale=0.5,color=(0,128,0),thickness=1)
cv2.imshow('Image', thresh)
key = cv2.waitKey(1) # show for 1 miliseconds. Because the loop is infinite, it'll be infinitely showing the results
if key==27 or key == ord('q'): # Press escape / q to close all windows
QUIT = True
break
elif key == ord('s'): # save binary image
cv2.imwrite(self.out+'binary_'+img_name, thresh)
elif key == ord('t'): # show or hide text on image
put_text = not put_text
elif key == ord('n'):
if counter < self.N-1:
read_image = True
counter += 1
elif key == ord('p'):
if counter > 0:
read_image = True
counter -= 1
cv2.destroyAllWindows()
I'm looking to save/write photos that are not blurry. How would one go about combining the two codes below?
import cv2
image = cv2.imread('./facesData/ID.jpg')
cv2.Laplacian(image, cv2.CV_64F).var()
while 1:
ret, img = cap.read()
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
faces = face_cascade.detectMultiScale(gray, 1.2, 8)
for x,y,w,h in faces:
sampleN = sampleN + 1
cv2.imwrite("./facesData/ID." + str(sampleN) + ".jpg", gray[y:y+h, x:x+w])
cv2.rectangle(img, (x,y), (x+w,y+h), (255,0,0), 2)
cv2.waitKey(2)
cv2.imshow('user', img)
cv2.waitKey(1)
if sampleN > 20:
break
cap.release()
cv2.destroyAllWindows()
This moves your blurry images into a separate folder (code source).
# import the necessary packages
import cv2
import os
from pathlib import Path
#%% Setup paths
script_dir = str(Path(__file__).parents[0]) # path this script is running in
source_images_dir = os.path.join(script_dir, 'images')
#%%
def variance_of_laplacian(image):
# compute the Laplacian of the image and then return the focus
# measure, which is simply the variance of the Laplacian
return cv2.Laplacian(image, cv2.CV_64F).var()
#%% loop over the input images
threshold = 200
for file_name in os.listdir(source_images_dir):
image_path = os.path.join(source_images_dir, file_name)
image = cv2.imread(image_path) # load the image
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) # convert to grayscale
fm = variance_of_laplacian(image) # calculate blur
text = "Not Blurry"
# if the focus measure is less than the supplied threshold,
# then the image should be considered "blurry"
if fm >= threshold:
text = "Blurry"
#%% Once ready to move clear images, uncomment this section
else:
focused_images_path = os.path.join(script_dir,
'blurry_images',
file_name)
os.rename(image_path, focused_images_path)
#%% Comment out the following section when ready to batch move your images
cv2.putText(image, "{}: {:.2f}".format(text, fm), (10, 30),
cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 3)
cv2.imshow("Image", image) # Press ENTER to cycle through images
key = cv2.waitKey(0)
To use it setup your folder structure like:
ret, frame = cap.read()
if face_extractor(frame) is not None:
count = count+1;
face = cv2.cvtColor(face, cv2.COLOR_BGR2GRAY)
file_name_path = './facesData/ID.' +str(id)+ '.' +str(count)+ '.' + '.jpg'
if cv2.Laplacian(face, cv2.CV_64F).var() >500:
cv2.imwrite(file_name_path, face)
else:
count -= 1
cv2.imshow('user', frame)
else:
pass
if cv2.waitKey(1) == 13 or count == 20
I am looking for pupil detection from image using pythonwith opencvpackage. In my test images, I am able to detect pupil in (a) part but whenever there is a presence of reflection/ glare, I am unable to detect blob of pupil pixels accurately in (b) part of image. Can anybody help me out? Here is the code I am trying.
import numpy as np
import cv2
name = 'two_eyes1.png'
# reading an image
img = cv2.imread(name, cv2.IMREAD_COLOR)
# inverting image
img_inv = cv2.bitwise_not(img)
gray = cv2.cvtColor(img_inv, cv2.COLOR_BGR2GRAY)
ret, threshold = cv2.threshold(gray, 225, 255, cv2.THRESH_BINARY)
#----- Blob detector parameters initiation
params = cv2.SimpleBlobDetector_Params()
#change thresholds
params.minThreshold = 0;
params.maxThreshold = 255;
#filter by area
params.filterByArea = True
params.minArea = 70
# filter by cicularity
params.filterByCircularity = True
params.minCircularity = 0.1
# filter by convexity
params.filterByConvexity = True
params.minConvexity = 0.87
# filter by inertia
params.filterByInertia = True
params.minInertiaRatio = 0.01
det = cv2.SimpleBlobDetector_create(params)
keypoints = det.detect(img)
im_with_key = cv2.drawKeypoints(img, keypoints, np.array([]),
(0,0,255),cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
#----------
titles = ['Input','Inverted','Grayscaled','Thresholded','blobpart']
images = [img, img_inv, gray, threshold, im_with_key]
for i in range(5):
cv2.imshow(titles[i], images[i])
cv2.waitKey(0)
cv2.destroyAllWindows()