Difficulty detecting digits with tesseract - python

I'm having some difficulty detecting text on the following type of image:
It seems that tesseract has difficulty distinguishing the numbers from the diagrams. And my goal is to find every digits and their location.
From this image I run the following code which is supposed to give me rectangles around text found :
import cv2
import pytesseract
from pytesseract import Output
import numpy as np
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract'
img = cv2.imread('Temp/VE_cropped.png')
kernel = np.ones((2,2),np.uint8)
img_processed = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
img_processed = cv2.medianBlur(img_processed,3)
img_processed = cv2.threshold(img_processed, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
img_processed = cv2.dilate(img_processed, kernel, iterations = 1)
dict_wordsDetected = pytesseract.image_to_data(img_processed, output_type=Output.DICT)
img_processed = cv2.cvtColor(img_processed, cv2.COLOR_GRAY2RGB)
n_boxes = len(dict_wordsDetected['text'])
for i in range(n_boxes):
(x, y, w, h) = (dict_wordsDetected['left'][i]
, dict_wordsDetected['top'][i]
, dict_wordsDetected['width'][i]
, dict_wordsDetected['height'][i])
img_processed = cv2.rectangle(img_processed, (x - 10, y - 10), (x + w + 10, y + h + 10), (0, 0, 255), 2)
cv2.imshow("processed", img_processed)
cv2.waitKey(0)
What gives us this result :

I think that I understood what you wanted. First of all, Tesseract works well for many problems, especially when we see examples with images that are easily OCR'ed. That means, images without a complex background. In your case, the image is not simple enough to be treated using just Tesseract or image thresholding. You must do more image preprocessing to OCR your image. To solve your problem, you must clean your image, trying to obtain just the numbers. It can be hard work.
Recently, I was looking for a code to apply OCR to an image with a complex background. I found some solutions. The code that I'll show you is based on this solution.
To extract the number (or try), you must follow some steps
convert your image into the gray scale
apply image threshold using Otsu method and inverse operation
apply distance transform
apply morphological operation to clean up small points in your image
apply dilate operation to enlarge your numbers
find contours and filter them according the width and height of each contours
create a list of hull objects to each contour
draw the hull objects
using dilate operation in your mask
bitwise operation to retrieval the the segmented areas
OCR the pre-processed image
print out your results
The code that I present here is not perfect and, I think that it can be improved, but I want to show you a start point for your problem resolution.
import cv2
import pytesseract
from pytesseract import Output
import numpy as np
import imutils
# loading and resizing image
img = cv2.imread('ABV5H.png')
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = imutils.resize(img, width=900)
#gray scale
gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
cv2.imshow("Gray", gray)
cv2.waitKey(0)
cv2.destroyAllWindows()
# thresholding with Otsu method and inverse operation
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV |
cv2.THRESH_OTSU)[1]
cv2.imshow("Threshold", thresh)
cv2.waitKey(0)
cv2.destroyAllWindows()
#distrance transform
dist = cv2.distanceTransform(thresh, cv2.DIST_L2, 5)
dist = cv2.normalize(dist, dist, 0, 1.0, cv2.NORM_MINMAX)
dist = (dist*255).astype('uint8')
dist = cv2.threshold(dist, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
cv2.imshow("Distance Transformation", dist)
cv2.waitKey(0)
cv2.destroyAllWindows()
# Morphological operation kernel (2,2) and OPEN method
kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, (2,2))
opening = cv2.morphologyEx(dist, cv2.MORPH_OPEN, kernel)
cv2.imshow("Morphology", opening)
cv2.imwrite("morphology.jpg", opening)
cv2.waitKey(0)
cv2.destroyAllWindows()
#dilate operation to enlarge the numbers
kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, (3,3))
dilation = cv2.dilate(opening, kernel, iterations = 1)
cv2.imshow("dilated", dilation)
cv2.imwrite("dilated.jpg", dilation)
cv2.waitKey(0)
cv2.destroyAllWindows()
#finding and grabbing the contours
cnts = cv2.findContours(dilation.copy(), cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)
cnts = imutils.grab_contours(cnts)
output = img.copy()
for i in cnts:
cv2.drawContours(output, [i], -1, (0, 0, 255), 3)
cv2.imshow("Contours", output)
cv2.imwrite("contours.jpg", dilation)
cv2.waitKey(0)
cv2.destroyAllWindows()
#filtering the contours
nums = []
output2 = img.copy()
for c in cnts:
(x, y, w, h) = cv2.boundingRect(c)
if w >= 5 and w < 75 and h > 15 and h <= 35:
nums.append(c)
for i in nums:
cv2.drawContours(output2, [i], -1, (0, 0, 255), 2)
cv2.imshow("Filter", output2)
cv2.imwrite("filter.jpg", output2)
cv2.waitKey(0)
cv2.destroyAllWindows()
# making a list with the hull points
hull = []
# calculate points for each contour
for i in range(len(nums)):
# creating convex hull object for each contour
hull.append(cv2.convexHull(nums[i], False))
# create an empty black image
mask = np.zeros(dilation.shape[:2], dtype='uint8')
# draw contours and hull points
for i in range(len(nums)):
color = (255, 0, 0) # blue - color for convex hull
# draw ith convex hull object
cv2.drawContours(mask, hull, i, color, 1, 8)
#dilating the mask to have a proper image for bitwise
mask = cv2.dilate(mask, kernel, iterations = 15)
cv2.imshow("Dilated Mask", mask)
cv2.imwrite("dilated-mask.jpg", mask)
cv2.waitKey(0)
cv2.destroyAllWindows()
#bitwise operation
final = cv2.bitwise_and(dilation, dilation, mask=mask)
cv2.imshow("Pre-processed Image", final)
cv2.imwrite("pre-processed.jpg", final)
cv2.waitKey(0)
cv2.destroyAllWindows()
config = '--psm 12 -c tessedit_char_whitelist=0123456789' #page segmentation mode and white lists
#OCR'ing the image
dict_wordsDetected = pytesseract.image_to_data(final, config = config,
output_type=Output.DICT)
#filtering the detections and making a list of index
index = []
for idx, txt in enumerate(dict_wordsDetected['text']):
if len(txt) >= 1:
dict_wordsDetected['text'][idx] = txt.replace(" ", "")
index.append(idx)
for i in index:
(x, y, w, h) = (dict_wordsDetected['left'][i]
, dict_wordsDetected['top'][i]
, dict_wordsDetected['width'][i]
, dict_wordsDetected['height'][i])
img_processed = cv2.rectangle(img, (x - 10, y - 10), (x + w + 10, y + h + 10), (0, 0, 255), 2)
text = "{}".format(dict_wordsDetected['text'][i])
cv2.putText(img, text, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2)
cv2.imshow("Voilà le résultat", img)
cv2.imwrite('result.jpg', img)
cv2.waitKey(0)
cv2.destroyAllWindows()
Visualizing some operations
(I cannot upload my images for the moment. There are some hyperlinks with images. These images correspond to some image pre-processing steps)
Output image after dilation:
filtered contours:
Mask after the hull operation and dilation:
pre-processed image (the image that will be OCR'ed:
The results
Results
As you can see, we can find numbers in the input image. We have good detection. On the other hand, we also have inaccurate outputs. The main reason is the image preprocessing. The image is noisy, even if we have performed some transformations. The key to your problem is image preprocessing. Another point you must keep in mind is that Tesseract is not perfect; it requires good images to work well. Beyond that, you must know the --psm modes (page segmentation) to improve your OCR, as well as using white lists to avoid undesirable detection. As I said, we have good results, but I guess you can improve them if your task requires just OpenCV and Tesseract. Because there are others that are way less complicated than this one.
Si tu as besoin d'aide, tu peux me contacter, je préfère parler français que l'anglais.

Related

Edge Detection for high resolution pictures

I am trying to locate three objects in my image and crop the sherd out. Any way that I can detect the edges better? This is the code I use to detect objects.
def getEdgedImg(img):
kernel = np.ones((3,3), np.uint8)
eroded = cv2.erode(img, kernel)
blur = cv2.medianBlur(eroded, 3)
med_val = np.median(eroded)
lower = int(max(0, 0.5*med_val))
upper = int(min(255, 1.3*med_val))
edged = cv2.Canny(blur, lower, upper)
return edged
edged = getEdgedImg(img)
_, contours= cv2.findContours(edged ,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE
for cnt in contours:
if cv2.contourArea(colour_cnts[i]) > 400:
x, y, w, h = cv2.boundingRect(colour_cnts[i])
cv2.rectangle(img2, (x, y), (x+w, y+h), (0, 255, 0), 2)
cv2.imshow('hi', img)
cv2.waitKey(0)
I am currently doing image processing in my raw images. I have been looking for a few methods to improve the results but still, it doesn't work very well for some photos.
edge detected:
Had a shot at it, but as expected the weak background contrast is giving trouble, as does the directed lighting. Just checking the stone right now, but the script should give you the tools to find the two reference cards as well. If you want to show the intermediate images, see the comments in the script.
Do you have the original image in another format then JPG by chance ? The color compression in the file is really not helping with extraction.
import cv2
# get image
img = cv2.imread("<YouPathHere>")
# extract stone shadow in RGB
chB, chG, chR = cv2.split(img)
threshShadow = 48
imgChanneldiff = chR-chB
imgshadow = cv2.threshold(imgChanneldiff,threshShadow,255,cv2.THRESH_BINARY)[1]
#cv2.namedWindow("imgshadow", cv2.WINDOW_NORMAL)
#cv2.imshow("imgshadow", imgshadow)
# extract stone together with shadow in HSV
imgHSV = cv2.cvtColor(img,cv2.COLOR_BGR2HSV)
threshHue = 10
chH, chS, chV = cv2.split(imgHSV)
imgbin = cv2.threshold(chH,threshHue,255,cv2.THRESH_BINARY)[1]
#cv2.namedWindow("imgbin", cv2.WINDOW_NORMAL)
#cv2.imshow("imgbin", imgbin)
imgResultMask = imgbin - imgshadow
MorphKernelSize = 25;
imgResultMask = cv2.erode(imgResultMask, cv2.getStructuringElement(cv2.MORPH_ELLIPSE, [MorphKernelSize,MorphKernelSize]))
imgResultMask = cv2.dilate(imgResultMask, cv2.getStructuringElement(cv2.MORPH_ELLIPSE, [MorphKernelSize,MorphKernelSize]))
cv2.namedWindow("imgResultMask", cv2.WINDOW_NORMAL)
cv2.imshow("imgResultMask", imgResultMask)
contours = cv2.findContours(imgResultMask,cv2.RETR_LIST,cv2.CHAIN_APPROX_SIMPLE)[0]
cv2.drawContours(img, contours, -1, (255,125,0), 3)
for c in contours:
cv2.rectangle(img, cv2.boundingRect(c), (0,125,255))
cv2.namedWindow("img", cv2.WINDOW_NORMAL)
cv2.imshow("img", img)
cv2.waitKey(0)
cv2.destroyAllWindows()

Python/OpenCV — Intelligent Centroid Tracking in Bacterial Images?

I'm currently working on an algorithm to detect bacterial centroids in microscopy images.
This question is a continuation of: OpenCV/Python — Matching Centroid Points of Bacteria in Two Images: Python/OpenCV — Matching Centroid Points of Bacteria in Two Images
I am using a modified version of the program proposed by Rahul Kedia.
https://stackoverflow.com/a/63049277/13696853
Currently, the issues in segmentation I am working on are:
Low Contrast
Clustering
The images below are sampled a second apart. However, in the latter image, one of the bacteria does not get detected.
Bright-field Image #1
Bright-Field Image #2
Bright-Field Contour Image #1
Bright-Field Contour Image #2
Bright-Field Image #1 (Unsegmented)
Bright-Field Image #2 (Unsegmented)
I want to know, given that I can successfully determine bacterial centroids in an image, can I use the data to intelligently look for the same bacteria in the subsequent image?
I haven't been able to find anything substantial online; I believe SIFT/SURF would likely be ineffective as the bacteria have the same appearance. Moreover, I am looking for specific points in the images. You can view my program below. Insert a specific path as indicated if you'd like to run the program.
import cv2
import numpy as np
import os
kernel = np.array([[0, 0, 1, 0, 0],
[0, 1, 1, 1, 0],
[1, 1, 1, 1, 1],
[0, 1, 1, 1, 0],
[0, 0, 1, 0, 0]], dtype=np.uint8)
def e_d(image, it):
image = cv2.erode(image, kernel, iterations=it)
image = cv2.dilate(image, kernel, iterations=it)
return image
path = r"[INSERT PATH]"
img_files = [file for file in os.listdir(path)]
def segment_index(index: int):
segment_file(img_files[index])
def segment_file(img_file: str):
img_path = path + "\\" + img_file
print(img_path)
img = cv2.imread(img_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Applying adaptive mean thresholding
th = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV, 11, 2)
# Removing small noise
th = e_d(th.copy(), 1)
# Finding contours with RETR_EXTERNAL flag and removing undesired contours and
# drawing them on a new image.
cnt, hie = cv2.findContours(th, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
cntImg = th.copy()
for contour in cnt:
x, y, w, h = cv2.boundingRect(contour)
# Eliminating the contour if its width is more than half of image width
# (bacteria will not be that big).
if w > img.shape[1] / 2:
continue
cntImg = cv2.drawContours(cntImg, [cv2.convexHull(contour)], -1, 255, -1)
# Removing almost all the remaining noise.
# (Some big circular noise will remain along with bacteria contours)
cntImg = e_d(cntImg, 3)
# Finding new filtered contours again
cnt2, hie2 = cv2.findContours(cntImg, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
# Now eliminating circular type noise contours by comparing each contour's
# extent of overlap with its enclosing circle.
finalContours = [] # This will contain the final bacteria contours
for contour in cnt2:
# Finding minimum enclosing circle
(x, y), radius = cv2.minEnclosingCircle(contour)
center = (int(x), int(y))
radius = int(radius)
# creating a image with only this circle drawn on it(filled with white colour)
circleImg = np.zeros(img.shape, dtype=np.uint8)
circleImg = cv2.circle(circleImg, center, radius, 255, -1)
# creating a image with only the contour drawn on it(filled with white colour)
contourImg = np.zeros(img.shape, dtype=np.uint8)
contourImg = cv2.drawContours(contourImg, [contour], -1, 255, -1)
# White pixels not common in both contour and circle will remain white
# else will become black.
union_inter = cv2.bitwise_xor(circleImg, contourImg)
# Finding ratio of the extent of overlap of contour to its enclosing circle.
# Smaller the ratio, more circular the contour.
ratio = np.sum(union_inter == 255) / np.sum(circleImg == 255)
# Storing only non circular contours(bacteria)
if ratio > 0.55:
finalContours.append(contour)
finalContours = np.asarray(finalContours)
# Finding center of bacteria and showing it.
bacteriaImg = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
for bacteria in finalContours:
M = cv2.moments(bacteria)
cx = int(M['m10'] / M['m00'])
cy = int(M['m01'] / M['m00'])
bacteriaImg = cv2.circle(bacteriaImg, (cx, cy), 5, (0, 0, 255), -1)
cv2.imshow("bacteriaImg", bacteriaImg)
cv2.waitKey(0)
# Segment Each Image
for i in range(len(img_files)):
segment_index(i)
Edit #1: Applying frmw42's approach, this image seems to get lost. I have tried adjusting a number of parameters but the image does not seem to show up.
Bright-Field Image #3
Bright-Field Image #4
Here is my Python/OpenCV code to extract your bacteria. I simply threshold, then get the contours and draw filled contours for those within a certain area range. I will let you do any further processing that you want. I simply viewed each step to make sure I have tuned the arguments appropriately before moving to the next step.
Input 1:
Input 2:
import cv2
import numpy as np
# read image
#img = cv2.imread("bacteria1.png")
img = cv2.imread("bacteria2.png")
# convert img to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
gray = 255 - gray
# do adaptive threshold on inverted gray image
thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 21, 5)
result = np.zeros_like(img)
contours = cv2.findContours(thresh , cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
for cntr in contours:
area = cv2.contourArea(cntr)
if area > 600 and area < 1100:
cv2.drawContours(result, [cntr], 0, (255,255,255), -1)
# write results to disk
#cv2.imwrite("bacteria_filled_contours1.png", result)
cv2.imwrite("bacteria_filled_contours2.png", result)
# display it
cv2.imshow("thresh", thresh)
cv2.imshow("result", result)
cv2.waitKey(0)
Result 1:
Result 2:
Adjust as desired.
It would seem that adaptive threshold is not able to handle all your various images. I suspect nothing simple will. You may need to use AI with training. Nevertheless, this works for your images: 1, 2 and 4 in Python/OpenCV. I make no guarantee that it will work for any of your other images.
First I found a simple threshold that seems to work, but brings in other regions. So since all your bacteria have similar shapes and range of orientations, I fit and ellipse to your bacteria and get the orientation of the major axis and filter the contours with area and angle.
import cv2
import numpy as np
# read image
#img = cv2.imread("bacteria1.png")
#img = cv2.imread("bacteria2.png")
img = cv2.imread("bacteria4.png")
# convert img to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
gray = 255 - gray
# median filter
#gray = cv2.medianBlur(gray, 1)
# do simple threshold on inverted gray image
thresh = cv2.threshold(gray, 170, 255, cv2.THRESH_BINARY)[1]
result = np.zeros_like(img)
contours = cv2.findContours(thresh , cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
for cntr in contours:
area = cv2.contourArea(cntr)
if area > 600 and area < 1100:
ellipse = cv2.fitEllipse(cntr)
(xc,yc),(d1,d2),angle = ellipse
if angle > 90:
angle = angle - 90
else:
angle = angle + 90
print(angle,area)
if angle >= 150 and angle <= 250:
cv2.drawContours(result, [cntr], 0, (255,255,255), -1)
# write results to disk
#cv2.imwrite("bacteria_filled_contours1.png", result)
#cv2.imwrite("bacteria_filled_contours2.png", result)
cv2.imwrite("bacteria_filled_contours4.png", result)
# display it
cv2.imshow("thresh", thresh)
cv2.imshow("result", result)
cv2.waitKey(0)
Result for image 1:
Result for image 2:
Result for image 4:
You might explore noise reduction before thresholding. I had some success with using some of ImageMagick tools and there is a Python version called Python Wand that uses ImageMagick.

How to perform image segmentation of apples using Python OpenCV?

I have pictures of apple slices that have been soaked in an iodine solution. The goal is to segment the apples into individual regions of interest and evaluate the starch level of each one. This is for a school project so my goal is to test different methods of segmentation and objectively find the best solution whether it be a single technique or a combination of multiple techniques.
The problem is that so far I have only come close on one method. That method is using HoughCircles. I had originally planned to use the Watershed method, Morphological operations, or simple thresholding. This plan failed when I couldn't modify any of them to work.
The original images look similar to this, with varying shades of darkness of the apple
I've tried removing the background tray using cv2.inRange with HSV values, but it doesn't work well with darker apples.
This is what the HoughCircles produced on the original image with a grayscale and median blur applied, also with an attempted mask of the tray.
Any advice or direction on where to look next would be greatly appreciated. I can supply the code I'm using if that will help.
Thank you!
EDIT 1 : Adding some code and clarifying the question
Thank you for the responses. My real question is are there any other methods of segmentation that this scenario lends itself well to? I would like to try a couple different methods and compare results on a large set of photos. My next in line to try is using k-means segmentation. Also I'll add some code below to show what I've tried so far.
HSV COLOR FILTERING
import cv2
import numpy as np
# Load image
image = cv2.imread('ApplePic.jpg')
# Set minimum and max HSV values to display
lower = np.array([0, 0, 0])
upper = np.array([105, 200, 255])
# Create HSV Image and threshold into a range.
hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
mask = cv2.inRange(hsv, lower, upper)
maskedImage = cv2.bitwise_and(image, image, mask=mask)
# Show Image
cv2.imshow('HSV Mask', image)
cv2.waitKey(0)
HoughCircles
# import the necessary packages
import numpy as np
import argparse
import cv2
import os
directory = os.fsencode('Photos\\Sample N 100')
for file in os.listdir(directory):
filename = os.fsdecode(file)
if filename.endswith('.jpg'):
# Load the image
image = cv2.imread('Photos\\Sample N 100\\' + filename)
# Calculate scale
scale_factor = 800 / image.shape[0]
width = int(image.shape[1] * scale_factor)
height = 800
dimension = (width, height)
min_radius = int((width / 10) * .8)
max_radius = int((width / 10) * 1.2)
# Resize image
image = cv2.resize(image, dimension, interpolation=cv2.INTER_AREA)
# Copy Image
output = image.copy()
# Grayscale Image
gray = cv2.medianBlur(cv2.cvtColor(image, cv2.COLOR_BGR2GRAY), 5)
# Detect circles in image
circles = cv2.HoughCircles(gray, cv2.HOUGH_GRADIENT, 1, min_radius * 2, 4, 60, 20, min_radius, max_radius)
# ensure at least some circles were found
if circles is not None:
# convert the (x, y) coordinates and radius of the circles to integers
circles = np.round(circles[0, :]).astype("int")
# loop over the (x, y) coordinates and radius of the circles
for (x, y, r) in circles:
# draw the circle in the output image, then draw a rectangle
# corresponding to the center of the circle
cv2.circle(output, (x, y), r, (0, 255, 0), 4)
cv2.rectangle(output, (x - 5, y - 5), (x + 5, y + 5), (0, 128, 255), -1)
cv2.putText(output, '(' + str(x) + ',' + str(y) + ',' + str(r) + ')', (x, y),
cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, 255)
# show the output image
cv2.imshow("output", np.hstack([image, output, maskedImage]))
cv2.waitKey(0)
continue
else:
continue
An alternative approach to segmenting the apples is to perform Kmeans color segmentation before thresholding then using contour filtering to isolate the apple objects:
Apply Kmeans color segmentation. We load the image, resize smaller using imutils.resize then apply Kmeans color segmentation. Depending on the number of clusters, we can segment the image into the desired number of colors.
Obtain binary image. Next we convert to grayscale, Gaussian blur and Otsu's threshold.
Filter using contour approximation. We filter out non-circle contours and small noise.
Morphological operations. We perform a morph close to fill adjacent contours
Draw minimum enclosing circles using contour area as filter. We find contours and draw the approximated circles. For this we use two sections, one where there was a good threshold and another where we approximate the radius.
Kmeans color quantization with clusters=3 and binary image
Morph close and result
The "good" contours that had the radius automatically calculated using cv2.minEnclosingCircle is highlighted in green while the approximated contours are highlighted in teal. These approximated contours were not segmented well from the thresholding process so we average the "good" contours radius and use that to draw the circle.
Code
import cv2
import numpy as np
import imutils
# Kmeans color segmentation
def kmeans_color_quantization(image, clusters=8, rounds=1):
h, w = image.shape[:2]
samples = np.zeros([h*w,3], dtype=np.float32)
count = 0
for x in range(h):
for y in range(w):
samples[count] = image[x][y]
count += 1
compactness, labels, centers = cv2.kmeans(samples,
clusters,
None,
(cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10000, 0.0001),
rounds,
cv2.KMEANS_RANDOM_CENTERS)
centers = np.uint8(centers)
res = centers[labels.flatten()]
return res.reshape((image.shape))
# Load image, resize smaller, perform kmeans, grayscale
# Apply Gaussian blur, Otsu's threshold
image = cv2.imread('1.jpg')
image = imutils.resize(image, width=600)
kmeans = kmeans_color_quantization(image, clusters=3)
gray = cv2.cvtColor(kmeans, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (9,9), 0)
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
# Filter out contours not circle
cnts = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.04 * peri, True)
if len(approx) < 4:
cv2.drawContours(thresh, [c], -1, 0, -1)
# Morph close
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5,5))
close = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=3)
# Find contours and draw minimum enclosing circles
# using contour area as filter
approximated_radius = 63
cnts = cv2.findContours(close, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
area = cv2.contourArea(c)
x,y,w,h = cv2.boundingRect(c)
# Large circles
if area > 6000 and area < 15000:
((x, y), r) = cv2.minEnclosingCircle(c)
cv2.circle(image, (int(x), int(y)), int(r), (36, 255, 12), 2)
# Small circles
elif area > 1000 and area < 6000:
((x, y), r) = cv2.minEnclosingCircle(c)
cv2.circle(image, (int(x), int(y)), approximated_radius, (200, 255, 12), 2)
cv2.imshow('kmeans', kmeans)
cv2.imshow('thresh', thresh)
cv2.imshow('close', close)
cv2.imshow('image', image)
cv2.waitKey()

How to detect edge if edges of an object has same colour as background

I am tasked to build a license plate detection system and my code does not work if the plate has the same colour of the paint of car (background).
Take a look at this picture below.
I have tried a variety of edge detection technique and my findings are they hardly work.
Here is my image processing pipeline:
Extract the gray channel from the image.
Reduce noise with Iterative Bilaterial Filtering
Detect edges with Adaptive Thresholding
Dilate the edges slightly
Locate contours based on some heuristics.
The edge detection part performed miserably around the plate region.
The pipeline works good and I am able to detect license plates if the car is has a different paint colour than the plate.
Code
def rectangleness(hull):
rect = cv2.boundingRect(hull)
rectPoints = np.array([[rect[0], rect[1]],
[rect[0] + rect[2], rect[1]],
[rect[0] + rect[2], rect[1] + rect[3]],
[rect[0], rect[1] + rect[3]]])
intersection_area = cv2.intersectConvexConvex(np.array(rectPoints), hull)[0]
rect_area = cv2.contourArea(rectPoints)
rectangleness = intersection_area/rect_area
return rectangleness
def preprocess(image):
image = imutils.resize(image, 1000)
# Attenuate shadows by using H channel instead of converting to gray directly
imgHSV = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
_, _, gray = cv2.split(imgHSV)
# Reduce noise while preserve edge with Iterative Bilaterial Filtering
blur = cv2.bilateralFilter(gray, 11, 6, 6)
# Detect edges by thresholding
edge = cv2.adaptiveThreshold(blur, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 5)
# Dilate edges, kernel size cannot be too big as some fonts are very closed to the edge of the plates
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (2, 2))
dilated = cv2.dilate(edge, kernel)
# Detect contours
edge, contours, _ = cv2.findContours(dilated, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
# Loop through contours and select the most probable ones
contours = sorted(contours, key = cv2.contourArea, reverse=True)[:10]
for contour in contours:
perimeter = cv2.arcLength(contour, closed=True)
approximate = cv2.approxPolyDP(contour, 0.02*perimeter, closed=True)
if len(approximate) == 4:
(x, y, w, h) = cv2.boundingRect(approximate)
whRatio = w / h
# Heuristics:
# 1. Width of plate should at least be 2x greater than height
# 2. Width of contour should be more than 5 (eliminate false positive)
# 3. Height must not be too small
# 4. Polygon must resemble a rectangle
if (2.0 < whRatio < 6.0) and (w > 5.0) and (h > 20):
hull = cv2.convexHull(approximate, returnPoints=True)
if rectangleness(hull) > 0.75:
print("X Y {} {}".format(x, y))
print("Height: {}".format(h))
print("Width : {}".format(w))
print("Ratio : {}\n".format(w/h))
cv2.drawContours(image, [approximate], -1, (0, 255, 0), 2)
cv2.imshow("Edge", edge)
cv2.imshow("Frame", image)
cv2.waitKey(0)
cv2.destroyAllWindows()
You can use cv2.morphologyEx for making the plate region become more visible. Next step is to find contours and set reasonable conditions to extract the contour that contains the plate. If you want, you can have a look at this github repository where my friend and I show detailed steps about license plate detection and recognition.
import cv2
import numpy as np
img = cv2.imread("a.png")
imgBlurred = cv2.GaussianBlur(img, (7, 7), 0)
gray = cv2.cvtColor(imgBlurred, cv2.COLOR_BGR2GRAY) # convert to gray
sobelx = cv2.Sobel(gray, cv2.CV_8U, 1, 0, ksize=3) # sobelX to get the vertical edges
ret,threshold_img = cv2.threshold(sobelx, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)
morph_img_threshold = threshold_img.copy()
element = cv2.getStructuringElement(shape=cv2.MORPH_RECT, ksize=(22, 3))
cv2.morphologyEx(src=threshold_img, op=cv2.MORPH_CLOSE, kernel=element,
dst=morph_img_threshold)
cv2.imshow("img", img)
cv2.imshow("sobelx", sobelx)
cv2.imshow("morph_img_threshold", morph_img_threshold)
cv2.waitKey()
cv2.destroyAllWindows()

Python OpenCV - Extrapolating the largest rectangle off of a set of contour points

I'm trying to make an OpenCV detect a bed in the image. I am running the usual Grayscale, Blur, Canny, and I've tried Convex Hull. However, since there's quite a number of "noise" which gives extra contours and messes up the object detection. Because of this, I am unable to detect the bed properly.
Here is the input image as well as the Canny Edge Detection result:
As you can see, it's almost there. I have the outline of the bed already, albeit, that the upper right corner has a gap - which is preventing me from detecting a closed rectangle.
Here's the code I'm running:
import cv2
import numpy as np
def contoursConvexHull(contours):
print("contours length = ", len(contours))
print("contours length of first item = ", len(contours[1]))
pts = []
for i in range(0, len(contours)):
for j in range(0, len(contours[i])):
pts.append(contours[i][j])
pts = np.array(pts)
result = cv2.convexHull(pts)
print(len(result))
return result
def auto_canny(image, sigma = 0.35):
# compute the mediam of the single channel pixel intensities
v = np.median(image)
# apply automatic Canny edge detection using the computed median
lower = int(max(0, (1.0 - sigma) * v))
upper = int(min(255, (1.0 + sigma) *v))
edged = cv2.Canny(image, lower, upper)
# return edged image
return edged
# Get our image in color mode (1)
src = cv2.imread("bed_cv.jpg", 1)
# Convert the color from BGR to Gray
srcGray = cv2.cvtColor(src, cv2.COLOR_BGR2GRAY)
# Use Gaussian Blur
srcBlur = cv2.GaussianBlur(srcGray, (3, 3), 0)
# ret is the returned value, otsu is an image
##ret, otsu = cv2.threshold(srcBlur, 0, 255,
## cv2.THRESH_BINARY+cv2.THRESH_OTSU)
# Use canny
##srcCanny = cv2.Canny(srcBlur, ret, ret*2, 3)
srcCanny1 = auto_canny(srcBlur, 0.70)
# im is the output image
# contours is the contour list
# I forgot what hierarchy was
im, contours, hierarchy = cv2.findContours(srcCanny1,
cv2.RETR_TREE,
cv2.CHAIN_APPROX_SIMPLE)
##cv2.drawContours(src, contours, -1, (0, 255, 0), 3)
ConvexHullPoints = contoursConvexHull(contours)
##cv2.polylines(src, [ConvexHullPoints], True, (0, 0, 255), 3)
cv2.imshow("Source", src)
cv2.imshow("Canny1", srcCanny1)
cv2.waitKey(0)
Since the contour of the bed isn't closed, I can't fit a rectangle nor detect the contour with the largest area.
The solution I can think of is to extrapolate the largest possible rectangle using the contour points in the hopes of bridging that small gap, but I'm not too sure how to proceed since the rectangle is incomplete.
Since you haven't provided any other examples, I provide an algorithm working with this case. But bare in mind that you will have to find ways of adapting it to however the light and background changes on other samples.
Since there is a lot of noise and a relatively high dynamic range, I suggest not to use Canny and instead use Adaptive Thresholding and Find Contours on that (it doesn't need edges as an input), that helps with choosing different threshold values for different parts of the image.
My result:
Code:
import cv2
import numpy as np
def clahe(img, clip_limit=2.0, grid_size=(8,8)):
clahe = cv2.createCLAHE(clipLimit=clip_limit, tileGridSize=grid_size)
return clahe.apply(img)
src = cv2.imread("bed.png")
# HSV thresholding to get rid of as much background as possible
hsv = cv2.cvtColor(src.copy(), cv2.COLOR_BGR2HSV)
lower_blue = np.array([0, 0, 120])
upper_blue = np.array([180, 38, 255])
mask = cv2.inRange(hsv, lower_blue, upper_blue)
result = cv2.bitwise_and(src, src, mask=mask)
b, g, r = cv2.split(result)
g = clahe(g, 5, (3, 3))
# Adaptive Thresholding to isolate the bed
img_blur = cv2.blur(g, (9, 9))
img_th = cv2.adaptiveThreshold(img_blur, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY, 51, 2)
im, contours, hierarchy = cv2.findContours(img_th,
cv2.RETR_CCOMP,
cv2.CHAIN_APPROX_SIMPLE)
# Filter the rectangle by choosing only the big ones
# and choose the brightest rectangle as the bed
max_brightness = 0
canvas = src.copy()
for cnt in contours:
rect = cv2.boundingRect(cnt)
x, y, w, h = rect
if w*h > 40000:
mask = np.zeros(src.shape, np.uint8)
mask[y:y+h, x:x+w] = src[y:y+h, x:x+w]
brightness = np.sum(mask)
if brightness > max_brightness:
brightest_rectangle = rect
max_brightness = brightness
cv2.imshow("mask", mask)
cv2.waitKey(0)
x, y, w, h = brightest_rectangle
cv2.rectangle(canvas, (x, y), (x+w, y+h), (0, 255, 0), 1)
cv2.imshow("canvas", canvas)
cv2.imwrite("result.jpg", canvas)
cv2.waitKey(0)

Categories