I have the task of find the contours of a red boundary drawn on a site location map. From the contours detected, I need to find the coordinates and save these to an array. I am able to filter for the red boundary and draw the contours, however I don't know how use this new image in my coordinate extraction. As a temporary solution I have screenshotted the mask generated, saved this, then in a new program have used this screenshot to find the coordinates. Is there a way to join all this code together?
This is the code for drawing the contours:
import cv2
img = cv2.imread(r'C:\Users\abbys\OneDrive\Pictures\agileapp\cornwall_cropped.png')
img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
# Gen lower mask (0-5) and upper mask (175-180) of RED
mask1 = cv2.inRange(img_hsv, (0,50,20), (5,255,255))
mask2 = cv2.inRange(img_hsv, (175,50,20), (180,255,255))
# Merge the mask and crop the red regions
mask = cv2.bitwise_or(mask1, mask2 )
cropped = cv2.bitwise_and(img, img, mask=mask)
## Display
cv2.imshow("mask", mask)
cv2.imshow("cropped", cropped)
cv2.waitKey()
This is the code used to extract the coordinates - the image I read in is a screen shot of the 'cropped' image from the above code
# Reading image
font = cv2.FONT_HERSHEY_COMPLEX
img2 = cv2.imread(r'C:\Users\abbys\OneDrive\Pictures\agileapp\cropped.png', cv2.IMREAD_COLOR)
# Reading same image in another
# variable and converting to gray scale.
img = cv2.imread(r'C:\Users\abbys\OneDrive\Pictures\agileapp\cropped.png', cv2.IMREAD_GRAYSCALE)
# Converting image to a binary image
# ( black and white only image).
_, threshold = cv2.threshold(img, 110, 255, cv2.THRESH_BINARY)
# Detecting contours in image.
contours, _= cv2.findContours(threshold, cv2.RETR_LIST,
cv2.CHAIN_APPROX_SIMPLE)
# Going through every contour found in the image.
for cnt in contours :
approx = cv2.approxPolyDP(cnt, 0.009 * cv2.arcLength(cnt, True), True)
# draws boundary of contours.
cv2.drawContours(img2, [approx], 0, (0, 0, 255), 5)
# Used to flatted the array containing
# the co-ordinates of the vertices.
n = approx.ravel()
i = 0
for j in n :
if(i % 2 == 0):
x = n[i]
y = n[i + 1]
# String containing the co-ordinates.
string = str(x) + " " + str(y)
if(i == 0):
# text on topmost co-ordinate.
cv2.putText(img2, "Arrow tip", (x, y),
font, 0.5, (255, 0, 0))
else:
# text on remaining co-ordinates.
cv2.putText(img2, string, (x, y),
font, 0.5, (0, 255, 0))
i = i + 1
# Showing the final image.
cv2.imshow('image2', img2)
# Exiting the window if 'q' is pressed on the keyboard.
if cv2.waitKey(0) & 0xFF == ord('q'):
cv2.destroyAllWindows()
So what I want to do is loop through two image crops and then see how many white pixels there are on both of these images. If a certain amount is detected on one image you print out something and if another amount is detected on the other you print out something etc etc.
I currently have this:
import numpy as np
import cv2
#img = cv2.imread('FIFA_Full_Crop_2_button.jpg')
img = cv2.imread('FIFA2.jpg')
#img = cv2.imread('FIFA_Full_Crop_2_button_3.jpg')
mask = np.zeros(img.shape[:2], np.uint8)
bgdModel = np.zeros((1, 65), np.float64)
fgdModel = np.zeros((1, 65), np.float64)
rect = (1512, 20, 180, 185) # boundary of interest
cv2.grabCut(img, mask, rect, bgdModel, fgdModel, 5, cv2.GC_INIT_WITH_RECT)
mask2 = np.where((mask == 2) | (mask == 0), 0, 1).astype('uint8')
img = img * mask2[:, :, np.newaxis]
cv2.imwrite('Image_mask.jpg', img)
mx = (1510, 22, 110, 185)
x, y, h, w = mx
# x,y coordinates for specified "fixed" location
# Left button
# mx = (1525, 58, 27, 22)
# x, y, h, w = mx
# Circle button 2
# mz = (1664, 58, 27, 22)
# x, y, h, w = mz
# Output to files
crop = img[y:y+h, x:x+w]
cv2.imwrite('Image_crop.jpg', crop)
cv2.rectangle(img,(x,y),(x+w,y+h),(0,255,0),2)
cv2.imwrite('Image_cont.jpg', img)
# Detect white pixels from cropped image
img = cv2.imread('Image_crop.jpg')
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
ret,gray = cv2.threshold(gray, 150,255,0)
gray2 = gray.copy()
cv2.imshow('IMG',gray2)
# Example left button detection 72 pixels
# n_white_pix = np.sum(gray2 == 255)
# print('Number of white pixels:', n_white_pix)
# if crop.sum() >= 72:
# print("Left")
# else:
# print("No button detected")
n_white_pix = np.sum(gray2 == 255)
print('Number of white pixels:', n_white_pix)
if n_white_pix > 0:
print("White pixel detected")
else:
print("Nothing detected")
cv2.waitKey(0)
cv2.destroyAllWindows()
As seen above I have commented out the left button and circle part as I can only do one image crop at a time, how would I go about in doing things if I want to check both crops at the same time?
An ideal solution would be looping through the button crops and then checking them with n_white_pix to see if they exceed a certain amount.
So the main problem at hand now is, how do I loop through two image crops, # Left button and # Circle button 2.
I am using opencv to take images using my webcam.
cam = cv2.VideoCapture(0)
cv2.namedWindow("Handwritten Number Recognition")
img_counter = 0
while True:
ret, frame = cam.read()
if not ret:
print("failed to grab frame")
break
cv2.imshow("Handwritten Number Recognition", frame)
k = cv2.waitKey(1)
if k%256 == 27:
# ESC pressed
print("Prediction is underway...")
break
elif k%256 == 32:
# SPACE pressed
img_name = "opencv_frame_{}.png".format(img_counter)
cv2.imwrite(img_name, frame)
print("Image taken!")
img_counter += 1
cam.release()
cv2.destroyAllWindows()
I then convert the image into grayscale and downsize it:
user_test = img_name
col = Image.open(user_test)
gray = col.convert('L')
bw = gray.point(lambda x: 0 if x<100 else 255, '1')
bw.save("bw_image.jpg")
bw
img_array = cv2.imread("bw_image.jpg", cv2.IMREAD_GRAYSCALE)
img_array = cv2.bitwise_not(img_array)
plt.imshow(img_array, cmap = plt.cm.binary)
plt.show()
img_size = 28
new_array = cv2.resize(img_array, (img_size,img_size))
final_array = new_array.reshape(1,-1)
plt.imshow(new_array, cmap = plt.cm.binary)
plt.show()
But the images have a very dark patch in the bottom which hampers the predictions I want to make with my data:
Original image:
What can I do to get past this problem? Interestingly this only happens if I click the image using opencv. If I use an image clicked through the same webcam but through the camera appliucation the error is not visible (Adding path of the image for preprocessing).
You have two options:
Choosing a better global threshold for the gray values. This is the easier less generic solution. Normally, people would choose the Otsu method to automatically select the optimal threshold. Have a look at: Opencv Thresholding Tutorial
threshold, dst_img = cv2.threshold(img, 0, 255, cv2.THRESH_OTSU)
Using an adaptive threshold. Adaptive simply means using a calculated threshold for each sliding window location based on some criteria. Have a look at: Niblack's Binarization methods
Using option one:
img = cv2.imread("thresh.jpg", cv2.IMREAD_GRAYSCALE)
threshold, img = cv2.threshold(img, 0, 255, cv2.THRESH_OTSU)
cv2.imwrite("thresh_bin.jpg", img)
Output:
this problem because of used thresholding method
bw = gray.point(lambda x: 0 if x<100 else 255, '1')
to solve this you can change the low limit value (100) to 75 or using opencv auto threshold
the, bw = cv2.threshold(gray_img,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
Here is one way to do that in Python/OpenCV using division normalization, thresholding and some morphology.
Input:
import cv2
import numpy as np
# read the image
img = cv2.imread('five.jpg')
# convert to gray
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
# blur
smooth = cv2.GaussianBlur(gray, (555,555), 0)
# divide smooth by gray image
division = cv2.divide(smooth, gray, scale=255)
# invert
division = 255 - division
# threshold
thresh = cv2.threshold(division, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)[1]
# add white border to help morphology close
border = cv2.copyMakeBorder(thresh, 60,60,60,60, cv2.BORDER_CONSTANT, value=(255,255,255))
hh, ww = border.shape
# morphology close
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (29,29))
result = cv2.morphologyEx(border, cv2.MORPH_CLOSE, kernel)
# remove border
result = result[60:hh-60, 60:ww-60]
# save results
cv2.imwrite('five_division_threshold.jpg',result)
# show results
cv2.imshow('smooth', smooth)
cv2.imshow('division', division)
cv2.imshow('result', result)
cv2.waitKey(0)
cv2.destroyAllWindows()
Result:
I'm trying to implement identification of optic nerve glioma identification using python and openCV.
I need to do the following steps in order for me to classify optic nerve glioma successfully.
Find the brightest part of an image and put a circle on it using cv2.circle - Done
Calculate the white part on the image inside cv2.circle - Needs help
Here's my code for identifying the brightest part of the image
gray = cv2.GaussianBlur(gray, (371, 371), 0)
(minVal, maxVal, minLoc, maxLoc) = cv2.minMaxLoc(gray)
image = orig.copy()
cv2.circle(image, maxLoc, 371, (255, 0, 0), 2)
sought = [254,254,254]
amount = 0
for x in range(image.shape[0]):
for y in range(image.shape[1]):
b, g, r = image[x, y]
if (b, g, r) == sought:
amount += 1
print(amount)
image = imutils.resize(image, width=400)
# display the results of our newly improved method
cv2.imshow("Optic Image", image)
cv2.waitKey(0)
The code above returns the following output
What I'm trying to do now is to identify the size of the white region of the image inside the cv2.circle.
Thank you so much!
I am not sure what you consider as "white", but here is one way to do the counting in Python/OpenCV. Simply read the image. Convert to grayscale. Threshold it at some level. Then just count the number of white pixels in the thresholded image.
If I use your output image for my input (after removing your white border):
import cv2
import numpy as np
# read image
img = cv2.imread('optic.png')
# convert to HSV and extract saturation channel
gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
# threshold
thresh = cv2.threshold(gray, 175, 255, cv2.THRESH_BINARY)[1]
# count number of white pixels
count = np.sum(np.where(thresh == 255))
print("count =",count)
# write result to disk
cv2.imwrite("optic_thresh.png", thresh)
# display it
cv2.imshow("IMAGE", img)
cv2.imshow("THRESH", thresh)
cv2.waitKey(0)
Thresholded image:
Count of white pixels in threshold:
count = 1025729
I am still not sure what you consider as white and what you consider as the yellow circle. But here is another attempt using Python/OpenCV.
Read the input
Convert the input to the range 0 to 1 as 1D data
Use kmeans clustering to reduce the number of colors and convert back to range 0 to 255 as 2D image
Use inRange color thresholding to isolate the "yellow" area
Clean it up with morphology and get the contour
Get the minimum enclosing circle center and radius and bias the center a little
Draw an unfilled white circle on the input
Draw a white filled circle on a black background as a circle mask for the yellow area
Convert the input to grayscale
Threshold the grayscale image
Apply the mask to the thresholded grayscale image
Count the number of white pixels
Input:
import cv2
import numpy as np
from sklearn import cluster
# read image
img = cv2.imread('optic.png')
h, w, c = img.shape
# convert to range 0 to 1
image = img.copy()/255
# reshape to 1D array
image_1d = image.reshape(h*w, c)
# do kmeans processing
kmeans_cluster = cluster.KMeans(n_clusters=int(5))
kmeans_cluster.fit(image_1d)
cluster_centers = kmeans_cluster.cluster_centers_
cluster_labels = kmeans_cluster.labels_
# need to scale result back to range 0-255
newimage = cluster_centers[cluster_labels].reshape(h, w, c)*255.0
newimage = newimage.astype('uint8')
# threshold brightest region
lowcolor = (150,180,230)
highcolor = (170,200,250)
thresh1 = cv2.inRange(newimage, lowcolor, highcolor)
# apply morphology open and close
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (7,7))
thresh1 = cv2.morphologyEx(thresh1, cv2.MORPH_OPEN, kernel, iterations=1)
thresh1 = cv2.morphologyEx(thresh1, cv2.MORPH_CLOSE, kernel, iterations=1)
# get contour
cntrs = cv2.findContours(thresh1, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cntrs = cntrs[0] if len(cntrs) == 2 else cntrs[1]
c = cntrs[0]
# get enclosing circle and bias center, if desired, since it is slightly offset (or alternately, increase the radius)
bias = 5
center, radius = cv2.minEnclosingCircle(c)
cx = int(round(center[0]))-bias
cy = int(round(center[1]))+bias
rr = int(round(radius))
# draw filled circle over black and also outline circle over input
mask = np.zeros_like(img)
cv2.circle(mask, (cx,cy), rr, (255, 255, 255), -1)
circle = img.copy()
cv2.circle(circle, (cx,cy), rr, (255, 255, 255), 1)
# convert to gray
gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
# threshold gray image
thresh2 = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)[1]
# apply mask to thresh2
thresh2 = cv2.bitwise_and(thresh2, mask[:,:,0])
# count number of white pixels
count = np.sum(np.where(thresh2 == 255))
print("count =",count)
# write result to disk
#cv2.imwrite("optic_thresh.png", thresh)
cv2.imwrite("optic_kmeans.png", newimage)
cv2.imwrite("optic_thresh1.png", thresh1)
cv2.imwrite("optic_mask.png", mask)
cv2.imwrite("optic_circle.png", circle)
cv2.imwrite("optic_thresh2.png", thresh2)
# display it
cv2.imshow("IMAGE", img)
cv2.imshow("KMEANS", newimage)
cv2.imshow("THRESH1", thresh1)
cv2.imshow("MASK", mask)
cv2.imshow("CIRCLE", circle)
cv2.imshow("GRAY", gray)
cv2.imshow("THRESH2", thresh2)
cv2.waitKey(0)
kmeans image:
inRange threshold image:
Circle on input:
Circle mask image:
Masked threshold image:
Count Results:
count = 443239
I have these images
For which I want to remove the text in the background. Only the captcha characters should remain(i.e K6PwKA, YabVzu). The task is to identify these characters later using tesseract.
This is what I have tried, but it isn't giving much good accuracy.
import cv2
import pytesseract
pytesseract.pytesseract.tesseract_cmd = r"C:\Users\HPO2KOR\AppData\Local\Tesseract-OCR\tesseract.exe"
img = cv2.imread("untitled.png")
gray_image = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
gray_filtered = cv2.inRange(gray_image, 0, 75)
cv2.imwrite("cleaned.png", gray_filtered)
How can I improve the same?
Note :
I tried all the suggestion that I was getting for this question and none of them worked for me.
EDIT :
According to Elias, I tried finding the color of the captcha text using photoshop by converting it to grayscale which came out to be somewhere in between [100, 105]. I then threshold the image based on this range. But the result which I got did not give satisfactory result from tesseract.
gray_filtered = cv2.inRange(gray_image, 100, 105)
cv2.imwrite("cleaned.png", gray_filtered)
gray_inv = ~gray_filtered
cv2.imwrite("cleaned.png", gray_inv)
data = pytesseract.image_to_string(gray_inv, lang='eng')
Output :
'KEP wKA'
Result :
EDIT 2 :
def get_text(img_name):
lower = (100, 100, 100)
upper = (104, 104, 104)
img = cv2.imread(img_name)
img_rgb_inrange = cv2.inRange(img, lower, upper)
neg_rgb_image = ~img_rgb_inrange
cv2.imwrite('neg_img_rgb_inrange.png', neg_rgb_image)
data = pytesseract.image_to_string(neg_rgb_image, lang='eng')
return data
gives :
and the text as
GXuMuUZ
Is there any way to soften it a little
Here are two potential approaches and a method to correct distorted text:
Method #1: Morphological operations + contour filtering
Obtain binary image. Load image, grayscale, then Otsu's threshold.
Remove text contours. Create a rectangular kernel with cv2.getStructuringElement() and then perform morphological operations to remove noise.
Filter and remove small noise. Find contours and filter using contour area to remove small particles. We effectively remove the noise by filling in the contour with cv2.drawContours()
Perform OCR. We invert the image then apply a slight
Gaussian blur. We then OCR using Pytesseract with the --psm 6 configuration option to treat the image as a single block of text. Look at Tesseract improve quality for other methods to improve detection and Pytesseract configuration options for additional settings.
Input image -> Binary -> Morph opening
Contour area filtering -> Invert -> Apply blur to get result
Result from OCR
YabVzu
Code
import cv2
import pytesseract
import numpy as np
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
# Load image, grayscale, Otsu's threshold
image = cv2.imread('2.png')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# Morph open to remove noise
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2,2))
opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=1)
# Find contours and remove small noise
cnts = cv2.findContours(opening, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
area = cv2.contourArea(c)
if area < 50:
cv2.drawContours(opening, [c], -1, 0, -1)
# Invert and apply slight Gaussian blur
result = 255 - opening
result = cv2.GaussianBlur(result, (3,3), 0)
# Perform OCR
data = pytesseract.image_to_string(result, lang='eng', config='--psm 6')
print(data)
cv2.imshow('thresh', thresh)
cv2.imshow('opening', opening)
cv2.imshow('result', result)
cv2.waitKey()
Method #2: Color segmentation
With the observation that the desired text to extract has a distinguishable contrast from the noise in the image, we can use color thresholding to isolate the text. The idea is to convert to HSV format then color threshold to obtain a mask using a lower/upper color range. From were we use the same process to OCR with Pytesseract.
Input image -> Mask -> Result
Code
import cv2
import pytesseract
import numpy as np
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
# Load image, convert to HSV, color threshold to get mask
image = cv2.imread('2.png')
hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
lower = np.array([0, 0, 0])
upper = np.array([100, 175, 110])
mask = cv2.inRange(hsv, lower, upper)
# Invert image and OCR
invert = 255 - mask
data = pytesseract.image_to_string(invert, lang='eng', config='--psm 6')
print(data)
cv2.imshow('mask', mask)
cv2.imshow('invert', invert)
cv2.waitKey()
Correcting distorted text
OCR works best when the image is horizontal. To ensure that the text is in an ideal format for OCR, we can perform a perspective transform. After removing all the noise to isolate the text, we can perform a morph close to combine individual text contours into a single contour. From here we can find the rotated bounding box using cv2.minAreaRect and then perform a four point perspective transform using imutils.perspective.four_point_transform. Continuing from the cleaned mask, here's the results:
Mask -> Morph close -> Detected rotated bounding box -> Result
Output with the other image
Updated code to include perspective transform
import cv2
import pytesseract
import numpy as np
from imutils.perspective import four_point_transform
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
# Load image, convert to HSV, color threshold to get mask
image = cv2.imread('1.png')
hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
lower = np.array([0, 0, 0])
upper = np.array([100, 175, 110])
mask = cv2.inRange(hsv, lower, upper)
# Morph close to connect individual text into a single contour
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5,5))
close = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel, iterations=3)
# Find rotated bounding box then perspective transform
cnts = cv2.findContours(close, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
rect = cv2.minAreaRect(cnts[0])
box = cv2.boxPoints(rect)
box = np.int0(box)
cv2.drawContours(image,[box],0,(36,255,12),2)
warped = four_point_transform(255 - mask, box.reshape(4, 2))
# OCR
data = pytesseract.image_to_string(warped, lang='eng', config='--psm 6')
print(data)
cv2.imshow('mask', mask)
cv2.imshow('close', close)
cv2.imshow('warped', warped)
cv2.imshow('image', image)
cv2.waitKey()
Note: The color threshold range was determined using this HSV threshold script
import cv2
import numpy as np
def nothing(x):
pass
# Load image
image = cv2.imread('2.png')
# Create a window
cv2.namedWindow('image')
# Create trackbars for color change
# Hue is from 0-179 for Opencv
cv2.createTrackbar('HMin', 'image', 0, 179, nothing)
cv2.createTrackbar('SMin', 'image', 0, 255, nothing)
cv2.createTrackbar('VMin', 'image', 0, 255, nothing)
cv2.createTrackbar('HMax', 'image', 0, 179, nothing)
cv2.createTrackbar('SMax', 'image', 0, 255, nothing)
cv2.createTrackbar('VMax', 'image', 0, 255, nothing)
# Set default value for Max HSV trackbars
cv2.setTrackbarPos('HMax', 'image', 179)
cv2.setTrackbarPos('SMax', 'image', 255)
cv2.setTrackbarPos('VMax', 'image', 255)
# Initialize HSV min/max values
hMin = sMin = vMin = hMax = sMax = vMax = 0
phMin = psMin = pvMin = phMax = psMax = pvMax = 0
while(1):
# Get current positions of all trackbars
hMin = cv2.getTrackbarPos('HMin', 'image')
sMin = cv2.getTrackbarPos('SMin', 'image')
vMin = cv2.getTrackbarPos('VMin', 'image')
hMax = cv2.getTrackbarPos('HMax', 'image')
sMax = cv2.getTrackbarPos('SMax', 'image')
vMax = cv2.getTrackbarPos('VMax', 'image')
# Set minimum and maximum HSV values to display
lower = np.array([hMin, sMin, vMin])
upper = np.array([hMax, sMax, vMax])
# Convert to HSV format and color threshold
hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
mask = cv2.inRange(hsv, lower, upper)
result = cv2.bitwise_and(image, image, mask=mask)
# Print if there is a change in HSV value
if((phMin != hMin) | (psMin != sMin) | (pvMin != vMin) | (phMax != hMax) | (psMax != sMax) | (pvMax != vMax) ):
print("(hMin = %d , sMin = %d, vMin = %d), (hMax = %d , sMax = %d, vMax = %d)" % (hMin , sMin , vMin, hMax, sMax , vMax))
phMin = hMin
psMin = sMin
pvMin = vMin
phMax = hMax
psMax = sMax
pvMax = vMax
# Display result image
cv2.imshow('image', result)
if cv2.waitKey(10) & 0xFF == ord('q'):
break
cv2.destroyAllWindows()
Your code produces better results than this. Here, I set a threshold for upperb and lowerb values based on histogram CDF values and a threshold. Press ESC button to get next image.
This code is unnecessarily complex and needs to be optimized in various ways. Code can be reordered to skip some steps. I kept it as some parts may help others. Some existing noise can be removed by keeping contour with area above certain threshold. Any suggestions on other noise reduction method is welcome.
Similar easier code for getting 4 corner points for perspective transform can be found here,
Accurate corners detection?
Code Description:
Original Image
Median Filter (Noise Removal and ROI Identification)
OTSU Thresholding
Invert Image
Use Inverted Black and White Image as Mask to keep mostly ROI part of original image
Dilation for largest Contour finding
Mark the ROI by drawing rectangle and corner points in original image
Straighten the ROI and extract it
Median Filter
OTSU Thresholding
Invert Image for mask
Mask the straight image to remove most noise further to text
In Range is used with lowerb and upperb values from histogram cdf as mentioned above to further reduce noise
Maybe eroding the image at this step will produce somewhat acceptable result. Instead here that image is dilated again and used as a mask to get less noisy ROI from perspective transformed image.
Code:
## Press ESC button to get next image
import cv2
import cv2 as cv
import numpy as np
frame = cv2.imread('extra/c1.png')
#frame = cv2.imread('extra/c2.png')
## keeping a copy of original
print(frame.shape)
original_frame = frame.copy()
original_frame2 = frame.copy()
## Show the original image
winName = 'Original'
cv.namedWindow(winName, cv.WINDOW_NORMAL)
#cv.resizeWindow(winName, 800, 800)
cv.imshow(winName, frame)
cv.waitKey(0)
## Apply median blur
frame = cv2.medianBlur(frame,9)
## Show the original image
winName = 'Median Blur'
cv.namedWindow(winName, cv.WINDOW_NORMAL)
#cv.resizeWindow(winName, 800, 800)
cv.imshow(winName, frame)
cv.waitKey(0)
#kernel = np.ones((5,5),np.uint8)
#frame = cv2.dilate(frame,kernel,iterations = 1)
# Otsu's thresholding
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
ret2,thresh_n = cv.threshold(frame,0,255,cv.THRESH_BINARY+cv.THRESH_OTSU)
frame = thresh_n
## Show the original image
winName = 'Otsu Thresholding'
cv.namedWindow(winName, cv.WINDOW_NORMAL)
#cv.resizeWindow(winName, 800, 800)
cv.imshow(winName, frame)
cv.waitKey(0)
## invert color
frame = cv2.bitwise_not(frame)
## Show the original image
winName = 'Invert Image'
cv.namedWindow(winName, cv.WINDOW_NORMAL)
#cv.resizeWindow(winName, 800, 800)
cv.imshow(winName, frame)
cv.waitKey(0)
## Dilate image
kernel = np.ones((5,5),np.uint8)
frame = cv2.dilate(frame,kernel,iterations = 1)
##
## Show the original image
winName = 'SUB'
cv.namedWindow(winName, cv.WINDOW_NORMAL)
#cv.resizeWindow(winName, 800, 800)
img_gray = cv2.cvtColor(original_frame, cv2.COLOR_BGR2GRAY)
cv.imshow(winName, img_gray & frame)
cv.waitKey(0)
## Show the original image
winName = 'Dilate Image'
cv.namedWindow(winName, cv.WINDOW_NORMAL)
#cv.resizeWindow(winName, 800, 800)
cv.imshow(winName, frame)
cv.waitKey(0)
## Get largest contour from contours
contours, hierarchy = cv2.findContours(frame, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
## Get minimum area rectangle and corner points
rect = cv2.minAreaRect(max(contours, key = cv2.contourArea))
print(rect)
box = cv2.boxPoints(rect)
print(box)
## Sorted points by x and y
## Not used in this code
print(sorted(box , key=lambda k: [k[0], k[1]]))
## draw anchor points on corner
frame = original_frame.copy()
z = 6
for b in box:
cv2.circle(frame, tuple(b), z, 255, -1)
## show original image with corners
box2 = np.int0(box)
cv2.drawContours(frame,[box2],0,(0,0,255), 2)
cv2.imshow('Detected Corners',frame)
cv2.waitKey(0)
cv2.destroyAllWindows()
## https://stackoverflow.com/questions/11627362/how-to-straighten-a-rotated-rectangle-area-of-an-image-using-opencv-in-python
def subimage(image, center, theta, width, height):
shape = ( image.shape[1], image.shape[0] ) # cv2.warpAffine expects shape in (length, height)
matrix = cv2.getRotationMatrix2D( center=center, angle=theta, scale=1 )
image = cv2.warpAffine( src=image, M=matrix, dsize=shape )
x = int(center[0] - width / 2)
y = int(center[1] - height / 2)
image = image[ y:y+height, x:x+width ]
return image
## Show the original image
winName = 'Dilate Image'
cv.namedWindow(winName, cv.WINDOW_NORMAL)
#cv.resizeWindow(winName, 800, 800)
## use the calculated rectangle attributes to rotate and extract it
frame = subimage(original_frame, center=rect[0], theta=int(rect[2]), width=int(rect[1][0]), height=int(rect[1][1]))
original_frame = frame.copy()
cv.imshow(winName, frame)
cv.waitKey(0)
perspective_transformed_image = frame.copy()
## Apply median blur
frame = cv2.medianBlur(frame,11)
## Show the original image
winName = 'Median Blur'
cv.namedWindow(winName, cv.WINDOW_NORMAL)
#cv.resizeWindow(winName, 800, 800)
cv.imshow(winName, frame)
cv.waitKey(0)
#kernel = np.ones((5,5),np.uint8)
#frame = cv2.dilate(frame,kernel,iterations = 1)
# Otsu's thresholding
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
ret2,thresh_n = cv.threshold(frame,0,255,cv.THRESH_BINARY+cv.THRESH_OTSU)
frame = thresh_n
## Show the original image
winName = 'Otsu Thresholding'
cv.namedWindow(winName, cv.WINDOW_NORMAL)
#cv.resizeWindow(winName, 800, 800)
cv.imshow(winName, frame)
cv.waitKey(0)
## invert color
frame = cv2.bitwise_not(frame)
## Show the original image
winName = 'Invert Image'
cv.namedWindow(winName, cv.WINDOW_NORMAL)
#cv.resizeWindow(winName, 800, 800)
cv.imshow(winName, frame)
cv.waitKey(0)
## Dilate image
kernel = np.ones((5,5),np.uint8)
frame = cv2.dilate(frame,kernel,iterations = 1)
##
## Show the original image
winName = 'SUB'
cv.namedWindow(winName, cv.WINDOW_NORMAL)
#cv.resizeWindow(winName, 800, 800)
img_gray = cv2.cvtColor(original_frame, cv2.COLOR_BGR2GRAY)
frame = img_gray & frame
frame[np.where(frame==0)] = 255
cv.imshow(winName, frame)
cv.waitKey(0)
hist,bins = np.histogram(frame.flatten(),256,[0,256])
cdf = hist.cumsum()
cdf_normalized = cdf * hist.max()/ cdf.max()
print(cdf)
print(cdf_normalized)
hist_image = frame.copy()
## two decresing range algorithm
low_index = -1
for i in range(0, 256):
if cdf[i] > 0:
low_index = i
break
print(low_index)
tol = 0
tol_limit = 20
broken_index = -1
past_val = cdf[low_index] - cdf[low_index + 1]
for i in range(low_index + 1, 255):
cur_val = cdf[i] - cdf[i+1]
if tol > tol_limit:
broken_index = i
break
if cur_val < past_val:
tol += 1
past_val = cur_val
print(broken_index)
##
lower = min(frame.flatten())
upper = max(frame.flatten())
print(min(frame.flatten()))
print(max(frame.flatten()))
#img_rgb_inrange = cv2.inRange(frame_HSV, np.array([lower,lower,lower]), np.array([upper,upper,upper]))
img_rgb_inrange = cv2.inRange(frame, (low_index), (broken_index))
neg_rgb_image = ~img_rgb_inrange
## Show the original image
winName = 'Final'
cv.namedWindow(winName, cv.WINDOW_NORMAL)
#cv.resizeWindow(winName, 800, 800)
cv.imshow(winName, neg_rgb_image)
cv.waitKey(0)
kernel = np.ones((3,3),np.uint8)
frame = cv2.erode(neg_rgb_image,kernel,iterations = 1)
winName = 'Final Dilate'
cv.namedWindow(winName, cv.WINDOW_NORMAL)
#cv.resizeWindow(winName, 800, 800)
cv.imshow(winName, frame)
cv.waitKey(0)
##
winName = 'Final Subtracted'
cv.namedWindow(winName, cv.WINDOW_NORMAL)
img2 = np.zeros_like(perspective_transformed_image)
img2[:,:,0] = frame
img2[:,:,1] = frame
img2[:,:,2] = frame
frame = img2
cv.imshow(winName, perspective_transformed_image | frame)
cv.waitKey(0)
##
import matplotlib.pyplot as plt
plt.plot(cdf_normalized, color = 'b')
plt.hist(hist_image.flatten(),256,[0,256], color = 'r')
plt.xlim([0,256])
plt.legend(('cdf','histogram'), loc = 'upper left')
plt.show()
1. Median Filter:
2. OTSU Threshold:
3. Invert:
4. Inverted Image Dilation:
5. Extract by Masking:
6. ROI points for transform:
7. Perspective Corrected Image:
8. Median Blur:
9. OTSU Threshold:
10. Inverted Image:
11. ROI Extraction:
12. Clamping:
13. Dilation:
14. Final ROI:
15. Histogram plot of step 11 image:
Didn't try , but this might work.
step 1:
use ps to find out what color the captcha characters are. For excample, "YabVzu" is (128,128,128),
Step 2:
Use pillow's method getdata()/getcolor(), it will return a sequence which contain the colour of every pixel.
then ,we project every item in the sequence to the original captcha image.
hence we know the positon of every pixel in the image.
Step 3:
find all pixels whose colour with the most approximate values to (128,128,128).
You may set a threshold to control the accuracy. this step return another sequence.
Lets annotate it as Seq a
Step 4:
generate a picture with the very same height and width as the original one.
plot every pixel in [Seq a] in the very excat position in the picture. Here,we will get a cleaned training items
Step 5:
Use a Keras project to break the code. And the precission should be over 72%.