I want to isolate every character in the following image:
and it should create a rectangular bounding box around each character. My code is creating a circular bounding box. I need to supply these isolated character images to my trained model to predict the character. I haven't done image processing before which leads me to asking such a question.
This is the code I'm using:
# Standard imports
import cv2
import numpy as np;
from PIL import Image
params = cv2.SimpleBlobDetector_Params()
# Change thresholds
params.minThreshold = 10;
params.maxThreshold = 200;
#Filter by Color
params.filterByColor=False
params.blobColor=255
# Filter by Area.
params.filterByArea = False
params.minArea = 50
# Filter by Circularity
params.filterByCircularity = False
params.minCircularity = 0.0785
#
# # Filter by Convexity
params.filterByConvexity = False
params.minConvexity = 0.87
#
# # Filter by Inertia
params.filterByInertia = False
params.minInertiaRatio = 0.01
# Read image
im = cv2.imread("C:\\xx\\testimages\\bw_plate.jpg", cv2.IMREAD_GRAYSCALE)
cv2.threshold(im,200,255,cv2.THRESH_BINARY_INV,im)
# Set up the detector with default parameters.
detector = cv2.SimpleBlobDetector_create(params)
# Detect blobs.
keypoints = detector.detect(im)
# Draw detected blobs as red circles.
# cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS ensures the size of the circle corresponds to the size of blob
im_with_keypoints = cv2.drawKeypoints(im, keypoints, np.array([]), (0, 0, 255),
cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
# Show keypoints
cv2.imshow("Keypoints", im_with_keypoints)
cv2.waitKey(0)
My output with the following code is:
Why is it not detecting 0 and 2 properly? Also how can I create separate jpeg files for every isolated character?
The C++ implementation of my project uses CblobResult class which did the segmentation. Is there any equivalent library in python?
This is what the final output must look like for every character after segmentation:
After removing background noises you can input image like this:
Then you can get what you want using following code:
import cv2
img = cv2.imread('test4.jpg', 0)
cv2.threshold(img,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU,img)
image, contours, hier = cv2.findContours(img, cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_NONE)
contours = sorted(contours, key=lambda ctr: cv2.boundingRect(ctr)[0])
cv2.imshow("contours", img)
cv2.waitKey(0)
d=0
for ctr in contours:
# Get bounding box
x, y, w, h = cv2.boundingRect(ctr)
# Getting ROI
roi = image[y:y+h, x:x+w]
cv2.imshow('character: %d'%d,roi)
cv2.imwrite('character_%d.png'%d, roi)
cv2.waitKey(0)
cv2.destroyAllWindows()
d+=1
Related
I have tried to remove the noise from the image using the following code:
Remove wavy noise from image background using OpenCV
But it's not removing the noise from the image. Is there any other way to remove this kind of noise?
You can do this with opencv morphology operations. The steps I used to remove the noise are as follows
Blur(Smooth) the image
Threshold i.e. turn it into a Binary image
Apply Morphological operations to reduce background artifacts
Use Simple blob detector to remove remaining artifacts. See also
Here is the resulting image:
Here is the code. I will leave it up to you play the parameters and find good combinations to get the exact results that you desire. I recommend displaying the image at each step to get a better understanding of what each operation does.
import numpy as np
import cv2
# read in image as grayscale
image = cv2.imread(r"path\to\image", -1)
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# blurr image
blurred = cv2.GaussianBlur(image, (15,15), 0)
# threshold image
thresh = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 15, 3)
# apply Morphological operations
kernel_3 = np.ones((3,3), dtype=np.uint8)
kernel_5 = np.ones((5,5), dtype=np.uint8)
# perform Morphological Operations
dilation = cv2.dilate(thresh, kernel_5, iterations=1)
blackhat = cv2.morphologyEx(dilation, cv2.MORPH_BLACKHAT, kernel_3)
weighted = cv2.addWeighted(dilation, 1.0, blackhat, -1.0, 0)
erosion = cv2.erode(weighted, kernel_5, iterations=1)
# Use the simple blob detector to remove small unwanted artifacts
# Setup SimpleBlobDetector parameters.
params = cv2.SimpleBlobDetector_Params()
# Change thresholds
params.minThreshold = 0
params.maxThreshold = 255
# Filter by Area.
params.filterByArea = True
params.minArea = 10
params.maxArea = 250
# Filter by Circularity
params.filterByCircularity = False
params.minCircularity = 0.1
# Filter by Convexity
params.filterByConvexity = True
params.minConvexity = 0.1
# Filter by Inertia
params.filterByInertia = False
params.minInertiaRatio = 0.01
# Create a detector with the parameters
detector = cv2.SimpleBlobDetector_create(params)
# Detect blobs.
keypoints = detector.detect(erosion)
# (OPTIONAL) Draw detected blobs as red circles.
# im_with_keypoints = cv2.drawKeypoints(erosion, keypoints, np.array([]), (0), cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
# instead use the key points to erase unwanted blobs
filtered = erosion.copy()
for kp in keypoints:
cv2.circle(filtered,
center=np.round(kp.pt).astype(int),
radius=np.ceil(kp.size).astype(int),
color=(255),
thickness=-1)
# display
cv2.imshow("image", filtered)
cv2.waitKey(0)
cv2.destroyAllWindows()
I want to extract the texts only and remove all other things from the following image:
Now, I want to remove all other things except the texts in the rectangle shapes. That's my code:
import cv2
import pytesseract
import numpy as np
from imutils.perspective import four_point_transform
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
# Load image, convert to HSV, color threshold to get mask
image = cv2.imread('1.png')
hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
lower = np.array([0, 0, 0])
upper = np.array([100, 175, 110])
mask = cv2.inRange(hsv, lower, upper)
# Morph close to connect individual text into a single contour
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5,5))
close = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel, iterations=3)
# Find rotated bounding box then perspective transform
cnts = cv2.findContours(close, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
rect = cv2.minAreaRect(cnts[0])
box = cv2.boxPoints(rect)
box = np.int0(box)
cv2.drawContours(image,[box],0,(36,255,12),2)
warped = four_point_transform(255 - mask, box.reshape(4, 2))
# OCR
data = pytesseract.image_to_string(warped, lang='eng', config='--psm 6')
print(data)
cv2.imshow('mask', mask)
cv2.imshow('close', close)
cv2.imshow('warped', warped)
cv2.imshow('image', image)
cv2.waitKey()
Here's the output of the code:
The error with my code is that it shades all the things in the image, and I just want to extract only texts, not other things:
Since you have "perfect" rectangles in your image, I came up with the following approach:
Grayscale and inverse binarize the input image to get rid of possible artifacts, and to have white boxes and text on black background.
In the following, template matching will be used to find the upper left corners of the boxes of interest. So, set up a template and mask mimicking those upper left corners.
The template itself resembles a "corner" of 50 pixels length and 20 pixels height, since all boxes of interest at least have these dimensions:
The corresponding mask limits the template to a 5 pixels wide "stripe" along the "corner":
Since all texts have a margin of at least 5 pixels to the boxes' borders, there'll be "perfect" matching results, since no texts interfere with the matching.
From the "perfect" matching results, the (x, y) coordinates of each box of interest are derived, and iterated.
The box is floodfilled with some gray color (there's only black and white in the image, due to the binarization in the beginning)
and then masked using that gray color:
From that, the bounding rectangle is determined, and that portion is copy-pasted from the original to some clean image. Also, pytesseract is executed on the content.
Here's the full code:
import cv2
import numpy as np
import pytesseract
# Read image as grayscale
img = cv2.imread('M7X8C.png', cv2.IMREAD_GRAYSCALE)
# Inverse binarize image to get rid of possible artifacts, and to have
# white boxes and text on black background
thr = cv2.threshold(img, 128, 255, cv2.THRESH_BINARY_INV)[1]
# Set up a template and mask mimicking the upper left corner of the
# boxes of interest
templ = np.full((20, 50), 255, dtype=np.uint8)
templ[1:, 1:] = 0
mask = np.full_like(templ, 255)
mask[5:, 5:] = 0
# Template matching
res = cv2.matchTemplate(thr, templ, cv2.TM_CCORR_NORMED, mask=mask)
# Extract upper left corners of the boxes of interest
boxes_tl = np.argwhere(res == 1)
# Initialize new clean image
clean = np.full_like(img, 255)
# For each upper left corner...
for i in np.arange(boxes_tl.shape[0]):
# Get coordinates of upper left corner
y, x = boxes_tl[i, :]
print('x: {}, y: {}'.format(x, y))
# Flood fill inner part of box, and mask that area
box_mask = cv2.floodFill(thr.copy(), None, (x + 1, y + 1), 128)[1] == 128
# Extract the bounding rectangle of that area
x, y, w, h = cv2.boundingRect(box_mask.astype(np.uint8))
# Copy box content to clean image
clean[y:y+h, x:x+w] = img[y:y+h, x:x+w]
# Run pytesseract on box content
text = pytesseract.image_to_string(thr[y:y+h, x:x+w], config='--psm 6')
print(text.replace('\f', ''))
# Output
cv2.imshow('clean', clean)
cv2.waitKey(0)
That's the clean image:
And, that's the first two pytessract results:
x: 1, y: 0
PGGEOS KKCI 100600
x: 199, y: 39
ISOL
EMBD
CB
400
XXX
As you can see, the results are not perfect (S instead of 5), most likely due to the monospace font. Getting (or generating) some Tesseract traineddata for that kind of font will surely help to overcome that issue.
----------------------------------------
System information
----------------------------------------
Platform: Windows-10-10.0.19041-SP0
Python: 3.9.1
PyCharm: 2021.1.1
NumPy: 1.19.5
OpenCV: 4.5.2
pytesseract: 5.0.0-alpha.20201127
----------------------------------------
I'm trying to take real time input for hand gestures with web cam, then processing the images to feed them to a neural network. I wrote this processing function to make the hand features look prominent:
img = cv2.imread('hand.png')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray,(5,5),2)
th3 = cv2.adaptiveThreshold(blur,10,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY_INV,11,2)
ret, res = cv2.threshold(th3, 225, 255, cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU)
res = cv2.Canny(res,100,200)
cv2.imshow("Canny", res)
The input and the output images are as follows:
It's obvious that double lines, instead of one, are detected along the edges (allover the hand, not only contour). I want to make them single. If I apply just Canny edge detection algo, then the edges are not very prominent.
One straightforward solution would be flood-fill the background with white and then with black using cv2.floodFill, like this:
import cv2
import numpy as np
# image path
path = "D://opencvImages//"
fileName = "hand.png"
# Reading an image in default mode:
inputImage = cv2.imread(path + fileName)
# Convert the image to Grayscale:
binaryImage = cv2.cvtColor(inputImage, cv2.COLOR_BGR2GRAY)
# Flood fill bakcground (white + black):
cv2.floodFill(binaryImage, mask=None, seedPoint=(int(0), int(0)), newVal=(255))
cv2.floodFill(binaryImage, mask=None, seedPoint=(int(0), int(0)), newVal=(0))
cv2,imshow("floodFilled", binaryImage)
cv2.waitKey(0)
This is the result:
If you want to get a solid mask of the hand, you could try to fill the holes inside the hand's contour, also using flood-fill and some image arithmetic, like this:
# image path
path = "D://opencvImages//"
fileName = "hand.png"
# Reading an image in default mode:
inputImage = cv2.imread(path + fileName)
# Convert the image to Grayscale:
binaryImage = cv2.cvtColor(inputImage, cv2.COLOR_BGR2GRAY)
# Isolate holes on input image:
holes = binaryImage.copy()
# Get rows and cols from input:
(rows, cols) = holes.shape[:2]
# Remove background via flood-fill on 4 outermost corners
cv2.floodFill(holes, mask=None, seedPoint=(int(0), int(0)), newVal=(255))
cv2.floodFill(holes, mask=None, seedPoint=(int(10), int(rows-10)), newVal=(255))
cv2.floodFill(holes, mask=None, seedPoint=(int(cols-10), int(10)), newVal=(255))
cv2.floodFill(holes, mask=None, seedPoint=(int(cols-10), int(rows-10)), newVal=(255))
# Get holes:
holes = 255 - holes
# Final image is original imput + isolated holes:
mask = binaryImage + holes
# Deep copy for further results:
maskCopy = mask.copy()
maskCopy = cv2.cvtColor(maskCopy, cv2.COLOR_GRAY2BGR)
These are the isolated holes and hand mask:
You can then detect the bounding rectangle by processing contours, filtering small-area blobs and approximating to a rectangle, like this:
# Find the big contours/blobs on the processed image:
contours, hierarchy = cv2.findContours(mask, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)
# Get bounding rectangles:
for c in contours:
# Filter contour by area:
blobArea = cv2.contourArea(c)
maxArea = 100
if blobArea > maxArea:
# Approximate the contour to a polygon:
contoursPoly = cv2.approxPolyDP(c, 3, True)
# Get the polygon's bounding rectangle:
boundRect = cv2.boundingRect(contoursPoly)
# Get the dimensions of the bounding rect:
rectX = boundRect[0]
rectY = boundRect[1]
rectWidth = boundRect[2]
rectHeight = boundRect[3]
# Draw rectangle:
color = (0, 255, 0)
cv2.rectangle(maskCopy, (int(rectX), int(rectY)), (int(rectX + rectWidth), int(rectY + rectHeight)), color, 3)
cv2.imshow("Bounding Rectangle", maskCopy)
cv2.waitKey(0)
This is the result:
It looks like you are on the correct way, but as #CrisLuengo mentioned, Canny is applied on grayscale images rather than binary images. Here is an approach.
import numpy as np
import matplotlib.pyplot as plt
import cv2
img_gray = cv2.imread('hand.png',0)
sigma = 2
threshold1=30
threshold2=60
img_blur = cv2.GaussianBlur(img_gray,(5,5),sigmaX=sigma,sigmaY=sigma)
res = cv2.Canny(img_blur,threshold1=threshold1,threshold2=threshold2)
fig,ax = plt.subplots(1,2,sharex=True,sharey=True)
ax[0].imshow(img_gray,cmap='gray')
ax[1].imshow(res,cmap='gray')
plt.show()
After playing around with the parameters of the gaussian filter and the Canny threshold values, this is what I am getting:
As you can see most of the fingers are clearly detected except the thumb. The lighting conditions make it difficult for Canny to calculate a proper gradient there. You might either try to improve the contrast of your images through your setup (which is the easiest solution to me), or to apply some contrast enhancements methods like Contrast Limited Adaptive Histogram Equalization (CLAHE) before going for Canny. I did not get any better results than the one above after a few trials with CLAHE, though, but it might be worth to look at it. Good luck!
I'm writing a code that detects parakeets eyes. Currently, I'm using a already written code that i found on youtube. It's working great with the pictures that i have, but i don't know how to display a colored version of the selected area.
The results: (https://imgur.com/a/zCARrVC)
I've tried using masks and use cv2.drawcontourns to repeat the already drawn contour on them. It worked, but i couldn't make the mask overlap the original image and crop. I think it is because the contour wasn't filled, but i don't know for sure and i don't know if a filled contour won't mess up with the rest of the code.
import cv2
import numpy as np
import imutils
def nothing(x):
pass
# Load an image
img = cv2.imread('papagaio.png')
# Resize The image
if img.shape[1] > 600:
img = imutils.resize(img, width=600)
# Create a window
cv2.namedWindow('Treshed')
# create trackbars for treshold change
cv2.createTrackbar('Treshold','Treshed',0,255,nothing)
while(1):
# Clone original image to not overlap drawings
clone = img.copy()
# Convert to gray
gray = cv2.cvtColor(clone, cv2.COLOR_BGR2GRAY)
# get current positions of four trackbars
r = cv2.getTrackbarPos('Treshold','Treshed')
# Thresholding the gray image
ret,gray_threshed = cv2.threshold(gray,r,255,cv2.THRESH_BINARY)
# Blur an image
bilateral_filtered_image = cv2.bilateralFilter(gray_threshed, 5, 175, 175)
# Detect edges
edge_detected_image = cv2.Canny(bilateral_filtered_image, 75, 200)
# Find contours
contours, _= cv2.findContours(edge_detected_image, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
contour_list = []
for contour in contours:
# approximte for circles
approx = cv2.approxPolyDP(contour,0.01*cv2.arcLength(contour,True),True)
area = cv2.contourArea(contour)
if ((len(approx) > 8) & (area > 30) ):
contour_list.append(contour)
# Draw contours on the original image
cv2.drawContours(clone, contour_list, -1, (255,0,0), 2)
# there is an outer boundary and inner boundary for each eadge, so contours double
print('Number of found circles: {}'.format(int(len(contour_list)/2)))
#Displaying the results
cv2.imshow('Objects Detected', clone)
cv2.imshow("Treshed", gray_threshed)
# ESC to break
k = cv2.waitKey(1) & 0xFF
if k == 27:
break
# close all open windows
cv2.destroyAllWindows()'
Like you said, you can create mask and then apply it on the RGB image. Here's some way to do it:
mask = np.zeros( (clone.shape[0], clone.shape[1]), np.uint8) #create single channel mask
for contour in contours:
cv.fillPoly(mask, pts=[contour], color=(255)) #cv.drawContours with thickness parameter = -1 should also work
cv.bitwise_and(clone, clone, mask)
I am trying to make a computer vision script that detects the orientation of objects. It works a majority of the time, but it seems that it is not able to have the same success for certain images.
This script relies on blurring and Canny edge detection to find the contours.
Working example:
Part which it fails:
For the part where it fails, it two lines for one of the same shapes and it completely ignores one of the others shapes.
Main code:
import cv2
from imgops import imutils
import CVAlgo
z = 'am'
path = 'images/pca.jpg'
#path = 'images/pca2.jpg'
img = cv2.imread(path)
imgray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
img = imutils.resize(img, height = 600)
imgray = imutils.resize(img, height = 600)
final = img.copy()
thresh, imgray = CVAlgo.filtering(img, imgray, z)
__ , contours, hierarchy = cv2.findContours(thresh.copy(),cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
# Iterate through all contours
test = CVAlgo.cnt_gui(final, contours)
#cv2.imwrite('1.jpg', final)
cv2.imshow('thresh', thresh)
cv2.imshow('contours', final)
cv2.waitKey(0)
CVAlgo.py
import cv2
from numpy import *
from pylab import *
from imgops import imutils
import math
def invert_img(img):
img = (255-img)
return img
def canny(imgray):
imgray = cv2.GaussianBlur(imgray, (11,11), 200)
canny_low = 0
canny_high = 100
thresh = cv2.Canny(imgray,canny_low,canny_high)
return thresh
def cnt_gui(img, contours):
cnts = sorted(contours, key = cv2.contourArea, reverse = True)
for i in range(0,len(cnts)):
sel_cnts = sorted(contours, key = cv2.contourArea, reverse = True)[i]
area = cv2.contourArea(sel_cnts)
if area < 1000:
continue
# get orientation angle and center coord
center, axis,angle = cv2.fitEllipse(sel_cnts)
hyp = 100 # length of the orientation line
# Find out coordinates of 2nd point if given length of line and center coord
linex = int(center[0]) + int(math.sin(math.radians(angle))*hyp)
liney = int(center[1]) - int(math.cos(math.radians(angle))*hyp)
# Draw orienation
cv2.line(img, (int(center[0]),int(center[1])), (linex, liney), (0,0,255),5)
cv2.circle(img, (int(center[0]), int(center[1])), 10, (255,0,0), -1)
return img
def filtering(img, imgray, mode):
imgray = cv2.medianBlur(imgray, 11)
thresh = cv2.Canny(imgray,75,200)
return thresh, imgray
Does anyone know what the problem is? Anyone know how I can improve this script?
The shape that has not been detected is too close to the black background and as such its contour has been merged with the contour of the white object area. The second orientation you find in one of the objects is in fact the orientation of the outer contour. To circumvent some of this you can dilate or close the binary image after thresholding using the cv2.dilate function from: cv2.dilate.
I have a suggestion. Since you have extracted each of the object in
the image as a contour, try fitting an ellipse to each of them.
Then find the major axis of each of the ellipse.
Now find the angle of orientation of these major axis.