Segmenting circle/ellipses from OMR bubble sheet image - python

I am trying to extract information about a single bubble from OMR bubble sheet to determine if its correctly marked or not.
Here is the Original bubble sheet :-
What I did :-
Got Wrapped Perspective of the section containing bubbles (say, img_wrap) img_wrap
Converted img_wrap to Grayscale and applied adaptive threshold (say, img_thresh) img_thresh
For segmenting it further to just single bubble I tried Blob detector method given here
However, I did not get expected results from step3 and couldn't find any other method or workaround for this
Complete Code :-
import cv2
import numpy as np
def extract_rect(contours): #Extracting rectangular area containing the bubbles
rect_contours = []
for c in contours:
if cv2.contourArea(c) > 10000:
perimeter = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.02*perimeter, True) #approximates a curve or a polygon with another curve/polygon with less vertices so that the distance between them is less or equal to the specified precision. Uses Douglas-Peucker algorithm
# if len(approx) == 4: #cv2.boundingRect seems to be automatically taking care of this
# rect_contours.append(c)
rect_contours.append(c)
rect_contours = sorted(rect_contours, key=cv2.contourArea,reverse=True)
return rect_contours
def findlen(p1, p2):
length = ((p2[0] - p1[0])**2 + (p2[1] - p1[1])**2)**0.5 # length = sqroot[(x2-x1)**2 + (y2-y1)**2]
return length
def rect_points(rect_contour): #Finding corner points of rectangle extracted to further find wrapped perspective of the same in step 1
perimeter = cv2.arcLength(rect_contour, True)
approx = cv2.approxPolyDP(rect_contour, 0.02*perimeter, True)
print("APPROX")
print(type(approx))
print(approx)
cv2.drawContours(img, approx, -1, (100,10,55), 18)
cv2.drawContours(img, rect_contour, -1, (100,10,55), 1)
x, y, w, h = cv2.boundingRect(rect_contour)
print("printing x y w h")
print(x, y, w, h)
# Corner points of the rectangle further used to be used to wrap the rectangular section
point_1 = np.array([x, y])
point_2 = np.array([x+w, y])
point_3 = np.array([x, y+h])
point_4 = np.array([w, h])
paper_length = findlen(point_1, point_3)
paper_breadth = findlen(point_1, point_2)
if paper_breadth > paper_length: # here breadth should be smaller than length
temp = point_1.copy()
point_1 = point_3.copy()
point_3 = point_4.copy()
point_4 = point_2.copy()
point_2 = temp
corner_list = np.ndarray(shape=(4,2), dtype=np.int32)
np.append(corner_list, point_1)
np.append(corner_list, point_2)
np.append(corner_list, point_3)
np.append(corner_list, point_4)
print("corners list")
print(corner_list)
new_cornerlist = np.zeros((4, 1, 2), np.int32)
add = corner_list.sum(1)
# print(add)
# print(np.argmax(add))
new_cornerlist[0] = corner_list[np.argmin(add)] #[0,0]
new_cornerlist[3] = corner_list[np.argmax(add)] #[w,h]
diff = np.diff(corner_list, axis=1)
new_cornerlist[1] = corner_list[np.argmin(diff)] #[w,0]
new_cornerlist[2] = corner_list[np.argmax(diff)] #[h,0]
print(new_cornerlist.shape)
return new_cornerlist
def detect_blob(image): #Part of step 3
# Set our filtering parameters
# Initialize parameter setting using cv2.SimpleBlobDetector
params = cv2.SimpleBlobDetector_Params()
# Set Area filtering parameters
params.filterByArea = True
params.minArea = 90
# Set Circularity filtering parameters
params.filterByCircularity = True
params.minCircularity = 0.2
# Set Convexity filtering parameters
params.filterByConvexity = True
params.minConvexity = 0.7
# Set inertia filtering parameters
params.filterByInertia = True
params.minInertiaRatio = 0.05
# Create a detector with the parameters
detector = cv2.SimpleBlobDetector_create(params)
# Detect blobs
keypoints = detector.detect(image)
# Draw blobs on our image as red circles
blank = np.zeros((1, 1))
blobs = cv2.drawKeypoints(image, keypoints, blank, (0, 0, 255), cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
number_of_blobs = len(keypoints)
text = "Number of Circular Blobs: " + str(len(keypoints))
cv2.putText(blobs, text, (20, 550), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 100, 255), 2)
cv2.imshow("Filtering Circular Blobs Only", blobs)
cv2.waitKey(0)
img_path = 'bubblesheet.jpg' #Path of the input bubble image
img = cv2.imread(img_path)
print(img.shape) # Original size is 1600 * 1200 and 3 color channels
img_width = 600
img_height = 600
img = cv2.resize(img, (img_width, img_height), interpolation=cv2.INTER_AREA)
img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # grayscaled image Grayscaling bcoz further for edge detection we only require intensity info., and similar intensity pixels to detect the contours
img_blur = cv2.GaussianBlur(img_gray, (5,5), 0) # blurred image
cv2.imshow('Blurred', img_blur)
img_canny = cv2.Canny(img_blur, 20, 110) # Edge detection on processed image using Canny edge detection , binary thresholding could have been an alternative (i.e If the pixel value is smaller than the threshold, it is set to 0, otherwise it is set to a maximum value. )
cv2.imshow('Edge detection', img_canny)
contours, heirarchy = cv2.findContours(img_canny, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE) # Find contours.
img_contours = img.copy()
cv2.drawContours(img_contours, contours, -1, (0,255,0), 1) #parameters are (image, contours, countour_idx, contour_color, contour_thickness) . contour_idx is -1 for all contours
cv2.imshow('Contours', img_contours)
rect_contours = extract_rect(contours)
cv2.drawContours(img, rect_contours[1], -1, (0,255,0), 1)
wrap_points = rect_points(rect_contours[2])
if wrap_points.size != 0:
cv2.drawContours(img, wrap_points, -1, (0,0,255), 12)
wrap_points = rect_points(rect_contours[2])
cv2.drawContours(img, wrap_points, -1, (0,0,255), 12)
warp_img_width = int(img_width)
warp_img_height = int(img_height)
warp_from = np.float32(wrap_points)
warp_to = np.float32([[0,0], [warp_img_width, 0], [0, warp_img_height], [warp_img_width, warp_img_height]])
transformation_matrix = cv2.getPerspectiveTransform(warp_from, warp_to)
img_warp = cv2.warpPerspective(img, transformation_matrix, (warp_img_height, warp_img_height))
cv2.imshow('Wrapped Perspective', img_warp)
img_warp_gray = cv2.cvtColor(img_warp, cv2.COLOR_BGR2GRAY)
#Using adaptive threshold to get the best threshold value
img_thresh = cv2.adaptiveThreshold(img_warp_gray, 200, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2)
cv2.imshow('img_thresh', img_thresh)
detect_blob(img_thresh) #Attempt to Extract each bubble from the image
cv2.imshow('Original', img)
cv2.waitKey(0)

Related

How to deal with DICOM by OpenCV FindContours with ambiguous edge

I want to segment the left auricle DICOM image for the calculation of cv2.minAreaRect.
I use cv2.GaussianBlur to filtter noise first, then use cv2.kmeans to segment the image as mask.
Afterwards, I use cv2.Canny and cv2.findContours to find the edge I want.
But after I apply cv2.minAreaRect to the contours, I get many small rectangles.
This isn't what I expect, I want to find the whole minimum bounding rectangle as below.
The following is my code.
import cv2
import numpy as np
# read input and convert to range 0-1
image = cv2.imread('1.jpg')
image = cv2.GaussianBlur(image, (15, 15), 0)
h, w, c = image.shape
# reshape to 1D array
image_2d = image.reshape(h*w, c).astype(np.float32)
# set number of colors
numcolors = 2
numiters = 10
epsilon = 1
attempts = 10
# do kmeans processing
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, numiters, epsilon)
ret, labels, centers = cv2.kmeans(image_2d, numcolors, None, criteria, attempts, cv2.KMEANS_RANDOM_CENTERS)
# reconstitute 2D image of results
centers = np.uint8(centers)
newimage = centers[labels.flatten()]
newimage = newimage.reshape(image.shape)
cv2.imshow('new image', newimage)
cv2.waitKey(0)
k = 0
for center in centers:
# select color and create mask
layer = newimage.copy()
mask = cv2.inRange(layer, center, center)
#print(mask[203][130])
# apply mask to layer
layer[mask == 0] = [0,0,100]
#cv2.imshow('layer', layer)
#cv2.waitKey(0)
num = 0
# save kmeans clustered image and layer
if(k == 0):
edges = cv2.Canny(mask, 1, 10)
contours, hierarchy = cv2.findContours(edges,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_NONE)[-2:]
raw_img3 = cv2.drawContours(image.copy(), contours, -1, (0, 0, 255), 3)
cnts = contours
cv2.imshow('Canny', raw_img3)
cv2.waitKey(0)
for cnt in cnts:
# find minimum bounding rectangle
rect = cv2.minAreaRect(cnt)
box2 = cv2.boxPoints(rect)
box2 = np.int0(box2)
aa = cv2.drawContours(image.copy(), [box2], 0, (255, 0, 0), 4)
cv2.imshow('Canny', aa)
cv2.waitKey(0)
k = k + 1

How to detect and crop character from an image having some bounding lines in python?

I'm having an image and i want to crop only the number inside with out the lines bounding around image. Here is the sample image:
As you can see there is 2 lines on the right and bottom of the image, i want to crop only number 8, if nothing inside, i will return None for the result, but with my code, it also return the area which include these line. So is there anyway to fix this?
Result i got:
Or with empty image, i got:
Here is my code:
import cv2
def process(image, readFile=True, returnType='binary'):
out_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Performing OTSU threshold
ret, thresh1 = cv2.threshold(out_gray, 220, 255, cv2.THRESH_OTSU + cv2.THRESH_BINARY_INV)
# Specify structure shape and kernel size.
# Kernel size increases or decreases the area
# of the rectangle to be detected.
# A smaller value like (10, 10) will detect
# each word instead of a sentence.
rect_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (8,8))
# Appplying dilation on the threshold image
dilation = cv2.dilate(thresh1, rect_kernel, iterations = 1)
# Finding contours
contours, hierarchy = cv2.findContours(dilation, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
if len(contours) <= 0:
return False
contours, boundingBoxes = sort_contours(contours, 'left-to-right')
# Creating a copy of image
im2 = img.copy()
idata = 0
# Looping through the identified contours
# Then rectangular part is cropped and passed on
# to pytesseract for extracting text from it
# Extracted text is then written into the text file
i = 1
minHeight = 10
minWidth = 10
result = []
for cnt in contours:
x, y, w, h = cv2.boundingRect(cnt)
# Drawing a rectangle on copied image
rect = cv2.rectangle(im2, (x, (y-1)), (x + w, y + h + 1), (0, 255, 0), 1)
cv2.imwrite(f'{constant.__RESULT_PATH}/cutted123withbounding.png', im2)
# Cropping the text block for giving input to OCR
cropped = im2[y:y + h, x:x + w]
if w >= minWidth and h >= minHeight:
result.append(thresh1)
i = i + 1
return result

Human fingernail segmanation without training a model

I want to mask human fingernails (fingernails white and everything including the hand is black). I do simple image operations then Canny edge detection after I smoothen the image then find contours to give internal contours white color which would be fingernails.
My problem is that when fingernails are painted it is quite easy to detect however when there is no paint it becomes really complicated and the program has to get 50 images and save outputs to a certain folder.
I am confused about how to proceed, if anybody did something similar I would appreciate some help.
import cv2
import numpy as np
import matplotlib.pyplot as plt
def display_img(img):
fig = plt.figure(figsize = (12,10))
ax = fig.add_subplot(111)
plt.imshow(img,cmap='gray')
img = cv2.imread('nail2.png')
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
blur = cv2.blur(gray,ksize=(1,1))
kernel = np.ones((5,5),np.uint8)
display_img(blur)
med = np.median(gray)
gradient = cv2.Laplacian(blur,cv2.CV_64F)
gradient = cv2.convertScaleAbs(gradient)
plt.imshow(gradient,'gray')
lower = int(max(0,0.7*med))
upper = int(min(255,1.3*med))
edges = cv2.Canny(blur,lower,upper)
display_img(edges)
edges = cv2.GaussianBlur(edges, (11, 11), 0) # smoothing before applying threshold
display_img(edges)
image, contours, hierarchy = cv2.findContours(edges, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)
# Create empty array to hold internal contours
image_internal = np.zeros(image.shape)
# Iterate through list of contour arrays
for i in range(len(contours)):
# If third column value is NOT equal to -1 than its internal
if hierarchy[0][i][3] != -1:
# Draw the Contour
cv2.drawContours(image_internal, contours, i, 255, -1)
display_img(image_internal)
below is a good result:
some bad result even though fingers have pink paint:
Well, you have a big light and scale problem in these two images. But a possible solution is to segment the color channels and look for blobs.
Then you can segment with blob params.
The code you can try here:
import cv2
import numpy as np
fra = cv2.imread('nails.png')
height, width, channels = fra.shape
src = cv2.medianBlur(fra, 21)
hsv = cv2.cvtColor(src, cv2.COLOR_BGR2HSV_FULL)
mask = cv2.inRange(hsv, np.array([0, 0, 131]), np.array([62, 105, 255]))
mask = cv2.erode(mask, None, iterations=8)
mask = cv2.dilate(mask, None, iterations=8)
params = cv2.SimpleBlobDetector_Params()
params.filterByArea = True
params.minArea = int((height * width) / 500)
params.maxArea = int((height * width) / 10)
params.filterByCircularity = True
params.minCircularity = 0.5
params.filterByConvexity = True
params.minConvexity = 0.5
params.filterByInertia = True
params.minInertiaRatio = 0.01
detector = cv2.SimpleBlobDetector_create(params)
key_points = detector.detect(255 - mask)
vis = cv2.bitwise_and(hsv, hsv, mask=mask)
vis = cv2.addWeighted(src, 0.2, vis, 0.8, 0)
cv2.drawKeypoints(vis, key_points, vis, (0, 0, 255), cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
for kp in key_points:
cv2.drawMarker(vis, (int(kp.pt[0]), int(kp.pt[1])), color=(0, 255, 0), markerType=cv2.MARKER_CROSS, thickness=3)
cv2.imshow("VIS", vis)
cv2.imwrite('nails_detected.png', vis)
cv2.waitKey(0)
cv2.destroyAllWindows()
Good luck!

pytesseract detects the wrong integer values

I'm trying to detects the numbers found in my sqares, and I thought I could use the libary pytesseract, but for some reason I read the wrong values.
This is the console output:
And here I have all my pictures (they are seperated, this is just to show them all)
import numpy as np
import cv2
import re
from PIL import Image
import pytesseract
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract'
img = cv2.imread('gulRecNum.jpg')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# convert to HSV, since red and yellow are the lowest hue colors and come before green
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
# create a binary thresholded image on hue between red and yellow
lower = (0,240,160)
upper = (30,255,255)
thresh = cv2.inRange(hsv, lower, upper)
# apply morphology
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (9,9))
clean = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (15,15))
clean = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
# get external contours
contours = cv2.findContours(clean, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
result1 = img.copy()
result2 = img.copy()
mask = np.zeros(result2.shape, dtype=np.uint8)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
ROI_number = 0
for c in contours:
cv2.drawContours(result1,[c],0,(0,0,0),2)
# get rotated rectangle from contour
rot_rect = cv2.minAreaRect(c)
box = cv2.boxPoints(rot_rect)
box = np.int0(box)
# draw rotated rectangle on copy of img
cv2.drawContours(result2,[box],0,(0,0,0),2)
# Gør noget hvis arealet er større end 1.
# Whats the area of the component?
areal = cv2.contourArea(c)
if(areal > 1):
# get the center of mass
M = cv2.moments(c)
cx = int(M['m10']/M['m00'])
cy = int(M['m01']/M['m00'])
center = (cx, cy)
print("\nx: ",cx,"\ny: ",cy)
color = (0, 0, 255)
cv2.circle(result2, center, 3, color, -1)
cv2.putText(result2, "center", (int(cx) - 10, int(cy) - 20),
cv2.FONT_HERSHEY_SIMPLEX, 1.2, color, 2)
# LOOK AT THIS PART
x,y,w,h = cv2.boundingRect(c)
ROI = 255 - thresh[y:y+h, x:x+w]
cv2.drawContours(mask, [c], -1, (255,255,255), -1)
cv2.imwrite('ROI_{}.png'.format(ROI_number), ROI)
Number = pytesseract.image_to_string(ROI, config='--psm 13 --oem 3 -c tessedit_char_whitelist=0123456789')
print("Number ", Number)
ROI_number += 1
# save result
cv2.imwrite("4cubes_result2.png",result2)
# display result
imS = cv2.resize(result2, (600, 400))
cv2.imshow("result2", imS)
cv2.waitKey(0)
cv2.destroyAllWindows()
Thought I could write Number = pytesseract.image_to_string(ROI, config='--psm 13 --oem 3 -c tessedit_char_whitelist=0123456789') print(Number)
and then get the number from the image, but I don't, how can that be?
EDIT NEW ERROR
how do i solve it with this picture?
from PIL import Image
from operator import itemgetter
import numpy as np
import easyocr
import cv2
import re
import imutils
import pytesseract
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract'
reader = easyocr.Reader(['ch_sim','en']) # need to run only once to load model into memory
#Define empty array
Cubes = []
def getNumber(ROI):
img = cv2.imread(ROI)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
ret,thresh = cv2.threshold(gray,127,255,0)
#cv2.imshow(thresh)
#cv2.imshow('Thresholded original',thresh)
#cv2.waitKey(0)
## Get contours
contours,h = cv2.findContours(thresh,cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)
## only draw contour that have big areas
imx = img.shape[0]
imy = img.shape[1]
lp_area = (imx * imy) / 10
tmp_img = img.copy()
for cnt in contours:
approx = cv2.approxPolyDP(cnt,0.01 * cv2.arcLength(cnt, True), True)
if cv2.contourArea(cnt) > lp_area:
# Draw box corners and minimum area rectangle
rect = cv2.minAreaRect(cnt)
box = cv2.boxPoints(rect)
box = np.int0(box)
#cv2.drawContours(tmp_img, [box], 0, (0, 50, 255), 3)
#cv2.circle(tmp_img, tuple(box[0]), 8, (0, 255, 0), -1)
#cv2.circle(tmp_img, tuple(box[1]), 8, (0, 255, 0), -1)
#cv2.circle(tmp_img, tuple(box[2]), 8, (0, 255, 0), -1)
#cv2.circle(tmp_img, tuple(box[3]), 8, (0, 255, 0), -1)
#cv2.imshow(tmp_img)
#cv2.imshow('Minimum Area Rectangle', tmp_img)
#cv2.waitKey(0)
## Correct orientation and crop
# Link, https://jdhao.github.io/2019/02/23/crop_rotated_rectangle_opencv/
width = int(rect[1][0])
height = int(rect[1][1])
src_pts = box.astype("float32")
dst_pts = np.array([[0, height-1],
[0, 0],
[width-1, 0],
[width-1, height-1]], dtype="float32")
M = cv2.getPerspectiveTransform(src_pts, dst_pts)
warped = cv2.warpPerspective(img, M, (width, height))
# Run OCR on cropped image
# If the predicted value is digit print else rotate first
result = reader.readtext(warped)
print(result)
predicted_digit = result[0][1]
if np.char.isdigit(predicted_digit) == True:
cv2.imshow("warped " + ROI,warped)
else:
rot_img = warped.copy()
for i in range(0, 3):
rotated_image = cv2.rotate(rot_img, cv2.cv2.ROTATE_90_CLOCKWISE)
result = reader.readtext(rotated_image)
#if np.array(result).size == 0:
# continue
if not result:
rot_img = rotated_image
continue
#if len(result) == 0:
# continue
predicted_digit = result[0][1]
#print(result)
#print(predicted_digit)
#cv2.imshow(rotated_image)
if np.char.isdigit(predicted_digit) == True:
cv2.imshow("Image " + ROI, rotated_image)
break
rot_img = rotated_image
return predicted_digit
def sortNumbers(Cubes):
Cubes = sorted(Cubes, key=lambda x: int(x[2]))
#Cubes.sort(key=itemgetter(2)) # In-place sorting
#Cubes = sorted(Cubes, key=itemgetter(2)) # Create a new list
return Cubes
#img = cv2.imread('gulRecNum.jpg')
img = cv2.imread('webcam7.png')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# convert to HSV, since red and yellow are the lowest hue colors and come before green
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
# create a binary thresholded image on hue between red and yellow
#Change these if cube colours changes?
lower =(20, 100, 100)
upper = (30, 255, 255)
#lower = (0,240,160)
#upper = (30,255,255)
thresh = cv2.inRange(hsv, lower, upper)
# apply morphology
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (9,9))
clean = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (15,15))
clean = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
# get external contours
contours = cv2.findContours(clean, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
result2 = img.copy()
mask = np.zeros(result2.shape, dtype=np.uint8)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
ROI_number = 0
for c in contours:
cv2.drawContours(result2,[c],0,(0,0,0),2)
# get rotated rectangle from contour
rot_rect = cv2.minAreaRect(c)
box = cv2.boxPoints(rot_rect)
box = np.int0(box)
# draw rotated rectangle on copy of img
cv2.drawContours(result2,[box],0,(0,0,0),2)
# Gør noget hvis arealet er større end 1.
# Whats the area of the component?
areal = cv2.contourArea(c)
if(areal > 1):
# get the center of mass
M = cv2.moments(c)
cx = int(M['m10']/M['m00'])
cy = int(M['m01']/M['m00'])
center = (cx, cy)
print("\nx: ",cx,"\ny: ",cy)
color = (0, 0, 255)
cv2.circle(result2, center, 3, color, -1)
cv2.putText(result2, "center", (int(cx) - 10, int(cy) - 20),
cv2.FONT_HERSHEY_SIMPLEX, 1.2, color, 2)
x,y,w,h = cv2.boundingRect(c)
ROI = 255 - thresh[y:y+h, x:x+w]
cv2.drawContours(mask, [c], -1, (255,255,255), -1)
cv2.imwrite('ROI_{}.png'.format(ROI_number), ROI)
#Read saved image (number)
result = getNumber('ROI_{}.png'.format(ROI_number))
print("ROI_number: ", result)
Cubes.append([cx, cy, result])
ROI_number += 1
# save result
cv2.imwrite("4cubes_result2.png",result2)
# display result
imS = cv2.resize(result2, (600, 400))
cv2.imshow("result2", imS)
#cv2.imshow('mask', mask)
#cv2.imshow('thresh', thresh)
SortedCubes = sortNumbers(Cubes)
print("\nFound array [x, y, Cube_num] = ", Cubes)
print("Sorted array [x, y, Cube_num] = ", SortedCubes)
cv2.waitKey(0)
cv2.destroyAllWindows()
I get the following error (it can't detect a number)
Traceback (most recent call last): File "c:/Users/Mads/OneDrive/Universitet/7. semester/ROB1/python/objectDetectiong.py", line 169, in <module> result = getNumber('ROI_{}.png'.format(ROI_number)) File "c:/Users/Mads/OneDrive/Universitet/7. semester/ROB1/python/objectDetectiong.py", line 70, in getNumber predicted_digit = result[0][1] IndexError: list index out of range
This is implementation of my comment. Since, I do not have individual images this code will work with given grid like processed image.
For OCR I used EasyOCR instead of Tesserect. You could also try pytesserect on each output cropped images. Instead of rotating 4 times by 90 degrees by confidence, I went with digit detection on OCR result. If a detection is not a number then only rotate and retry.
Tested on google colab. Replace cv2_imshow(...) with cv2.imshow(...) for working locally. Also remove from google.colab.patches import cv2_imshow import.
This is modified version of my answer on card orientation correction here, OpenCV: using Canny and Shi-Tomasi to detect round corners of a playing card. All previous code is left as comment.
Code
!pip install easyocr
import easyocr
reader = easyocr.Reader(['ch_sim','en']) # need to run only once to load model into memory
"""
Based on my answer of rotated card detection,
https://stackoverflow.com/questions/64860785/opencv-using-canny-and-shi-tomasi-to-detect-round-corners-of-a-playing-card/64862448#64862448
"""
import cv2
import numpy as np
from google.colab.patches import cv2_imshow
img = cv2.imread('1.jpg')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
ret,thresh = cv2.threshold(gray,127,255,0)
#cv2_imshow(thresh)
#cv2.imshow('Thresholded original',thresh)
#cv2.waitKey(0)
## Get contours
contours,h = cv2.findContours(thresh,cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)
## only draw contour that have big areas
imx = img.shape[0]
imy = img.shape[1]
lp_area = (imx * imy) / 10
#################################################################
# Four point perspective transform
# https://www.pyimagesearch.com/2014/08/25/4-point-opencv-getperspective-transform-example/
#################################################################
def order_points(pts):
# initialzie a list of coordinates that will be ordered
# such that the first entry in the list is the top-left,
# the second entry is the top-right, the third is the
# bottom-right, and the fourth is the bottom-left
rect = np.zeros((4, 2), dtype = "float32")
# the top-left point will have the smallest sum, whereas
# the bottom-right point will have the largest sum
s = pts.sum(axis = 1)
rect[0] = pts[np.argmin(s)]
rect[2] = pts[np.argmax(s)]
# now, compute the difference between the points, the
# top-right point will have the smallest difference,
# whereas the bottom-left will have the largest difference
diff = np.diff(pts, axis = 1)
rect[1] = pts[np.argmin(diff)]
rect[3] = pts[np.argmax(diff)]
# return the ordered coordinates
return rect
def four_point_transform(image, pts):
# obtain a consistent order of the points and unpack them
# individually
rect = order_points(pts)
(tl, tr, br, bl) = rect
# compute the width of the new image, which will be the
# maximum distance between bottom-right and bottom-left
# x-coordiates or the top-right and top-left x-coordinates
widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
maxWidth = max(int(widthA), int(widthB))
# compute the height of the new image, which will be the
# maximum distance between the top-right and bottom-right
# y-coordinates or the top-left and bottom-left y-coordinates
heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
maxHeight = max(int(heightA), int(heightB))
# now that we have the dimensions of the new image, construct
# the set of destination points to obtain a "birds eye view",
# (i.e. top-down view) of the image, again specifying points
# in the top-left, top-right, bottom-right, and bottom-left
# order
dst = np.array([
[0, 0],
[maxWidth - 1, 0],
[maxWidth - 1, maxHeight - 1],
[0, maxHeight - 1]], dtype = "float32")
# compute the perspective transform matrix and then apply it
M = cv2.getPerspectiveTransform(rect, dst)
warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))
# return the warped image
return warped
#################################################################
#print(len(contours))
tmp_img = img.copy()
for cnt in contours:
approx = cv2.approxPolyDP(cnt,0.01 * cv2.arcLength(cnt, True), True)
## calculate number of vertices
#print(len(approx))
## Get the largest contours only
## Side count cannot be used since contours are not all rectangular
if cv2.contourArea(cnt) > lp_area:
#if len(approx) == 4 and cv2.contourArea(cnt) > lp_area:
# print("\n\n")
# print("#################################################")
# print("rectangle")
# print("#################################################")
# print("\n\n")
#tmp_img = img.copy()
#cv2.drawContours(tmp_img, [cnt], 0, (0, 255, 0), 6)
#cv2_imshow(tmp_img)
#cv2.imshow('Contour Borders', tmp_img)
#cv2.waitKey(0)
# tmp_img = img.copy()
# cv2.drawContours(tmp_img, [cnt], 0, (255, 0, 255), -1)
# cv2_imshow(tmp_img)
# #cv2.imshow('Contour Filled', tmp_img)
# #cv2.waitKey(0)
# # Make a hull arround the contour and draw it on the original image
# tmp_img = img.copy()
# mask = np.zeros((img.shape[:2]), np.uint8)
# hull = cv2.convexHull(cnt)
# cv2.drawContours(mask, [hull], 0, (255, 255, 255), -1)
# cv2_imshow(mask)
# #cv2.imshow('Convex Hull Mask', mask)
# #cv2.waitKey(0)
# # Draw minimum area rectangle
# #tmp_img = img.copy()
# rect = cv2.minAreaRect(cnt)
# box = cv2.boxPoints(rect)
# box = np.int0(box)
# cv2.drawContours(tmp_img, [box], 0, (255, 0, 0), 2)
# #cv2_imshow(tmp_img)
# #cv2.imshow('Minimum Area Rectangle', tmp_img)
# #cv2.waitKey(0)
# Draw box corners and minimum area rectangle
#tmp_img = img.copy()
rect = cv2.minAreaRect(cnt)
box = cv2.boxPoints(rect)
box = np.int0(box)
#print(rect)
#print(box)
cv2.drawContours(tmp_img, [box], 0, (0, 50, 255), 3)
cv2.circle(tmp_img, tuple(box[0]), 8, (0, 255, 0), -1)
cv2.circle(tmp_img, tuple(box[1]), 8, (0, 255, 0), -1)
cv2.circle(tmp_img, tuple(box[2]), 8, (0, 255, 0), -1)
cv2.circle(tmp_img, tuple(box[3]), 8, (0, 255, 0), -1)
#cv2_imshow(tmp_img)
#cv2.imshow('Minimum Area Rectangle', tmp_img)
#cv2.waitKey(0)
## Correct orientation and crop
# Link, https://jdhao.github.io/2019/02/23/crop_rotated_rectangle_opencv/
width = int(rect[1][0])
height = int(rect[1][1])
src_pts = box.astype("float32")
dst_pts = np.array([[0, height-1],
[0, 0],
[width-1, 0],
[width-1, height-1]], dtype="float32")
M = cv2.getPerspectiveTransform(src_pts, dst_pts)
warped = cv2.warpPerspective(img, M, (width, height))
#cv2_imshow(warped)
# Run OCR on cropped image
# If the predicted value is digit print else rotate first
result = reader.readtext(warped)
predicted_digit = result[0][1]
print("Detected Text:")
if np.char.isdigit(predicted_digit) == True:
print(result)
print(predicted_digit)
cv2_imshow(warped)
else:
rot_img = warped.copy()
for i in range(0, 3):
rotated_image = cv2.rotate(rot_img, cv2.cv2.ROTATE_90_CLOCKWISE)
result = reader.readtext(rotated_image)
#if np.array(result).size == 0:
# continue
if not result:
rot_img = rotated_image
continue
#if len(result) == 0:
# continue
predicted_digit = result[0][1]
#print(result)
#print(predicted_digit)
#cv2_imshow(rotated_image)
if np.char.isdigit(predicted_digit) == True:
print(result)
print(predicted_digit)
cv2_imshow(rotated_image)
break
rot_img = rotated_image
# # Draw bounding rectangle
# #tmp_img = img.copy()
# x, y, w, h = cv2.boundingRect(cnt)
# cv2.rectangle(tmp_img, (x, y), (x + w, y + h), (255, 0, 0), 2)
# #cv2_imshow(tmp_img)
# #cv2.imshow('Bounding Rectangle', tmp_img)
# #cv2.waitKey(0)
# # Bounding Rectangle and Minimum Area Rectangle
# #tmp_img = img.copy()
# rect = cv2.minAreaRect(cnt)
# box = cv2.boxPoints(rect)
# box = np.int0(box)
# cv2.drawContours(tmp_img, [box], 0, (0, 0, 255), 2)
# x, y, w, h = cv2.boundingRect(cnt)
# cv2.rectangle(tmp_img, (x, y), (x + w, y + h), (0, 255, 0), 2)
# #cv2_imshow(tmp_img)
# #cv2.imshow('Bounding Rectangle', tmp_img)
# #cv2.waitKey(0)
# # determine the most extreme points along the contour
# # https://www.pyimagesearch.com/2016/04/11/finding-extreme-points-in-contours-with-opencv/
# tmp_img = img.copy()
# extLeft = tuple(cnt[cnt[:, :, 0].argmin()][0])
# extRight = tuple(cnt[cnt[:, :, 0].argmax()][0])
# extTop = tuple(cnt[cnt[:, :, 1].argmin()][0])
# extBot = tuple(cnt[cnt[:, :, 1].argmax()][0])
# cv2.drawContours(tmp_img, [cnt], -1, (0, 255, 255), 2)
# cv2.circle(tmp_img, extLeft, 8, (0, 0, 255), -1)
# cv2.circle(tmp_img, extRight, 8, (0, 255, 0), -1)
# cv2.circle(tmp_img, extTop, 8, (255, 0, 0), -1)
# cv2.circle(tmp_img, extBot, 8, (255, 255, 0), -1)
# print("Corner Points: ", extLeft, extRight, extTop, extBot)
# cv2_imshow(tmp_img)
# #cv2.imshow('img contour drawn', tmp_img)
# #cv2.waitKey(0)
# #cv2.destroyAllWindows()
# ## Perspective Transform
# tmp_img = img.copy()
# pts = np.array([extLeft, extRight, extTop, extBot])
# warped = four_point_transform(tmp_img, pts)
# cv2_imshow(tmp_img)
# #cv2.imshow("Warped", warped)
# #cv2.waitKey(0)
cv2_imshow(tmp_img)
#cv2.destroyAllWindows()
Output Prediction
Detected Text:
[([[85, 67], [131, 67], [131, 127], [85, 127]], '1', 0.9992043972015381)]
1
Detected Text:
[([[85, 65], [133, 65], [133, 125], [85, 125]], '2', 0.9991914629936218)]
2
Detected Text:
[([[96, 72], [144, 72], [144, 128], [96, 128]], '4', 0.9996564984321594)]
4
Detected Text:
[([[88, 76], [132, 76], [132, 132], [88, 132]], '3', 0.9973381161689758)]
3
White Region Detection With Corners
Alternate methods,
Try pretrained digit classification model trained from MNIST and others on each large contours exceeding certain area.
Use multitask object detection with rotation. One output of network will be detections another angle regression to predict orientation.
Use text detector like, East and run OCR on each detected text.

Not enough background filtering

I am trying to filter the background of images presenting electric cables. I tried to do the following:
Transform from color to gray
Apply cv2.Laplacian or 2 times of cv2.Sobel for finding edges in both directions.
Apply thresholding cv2.THRESH_BINARY(_INV), cv2.THRESH_OTSU
Lastly, I tried to find edges with 'filtered' images using cv2.Canny together with cv2.HoughLinesP
Overall, the results aren't satisfying at all. I will give an example of 2 images:
And the output of my script:
I also played with the values in config, but the results weren't different much.
Here's the little script I managed to do:
import cv2
import matplotlib.pyplot as plt
import numpy as np
def img_show(images, cmap=None):
fig = plt.figure(figsize=(17, 10))
root = 3 # len(images) ** 0.5
for i, img in enumerate(images):
ax = fig.add_subplot(root, root, i + 1)
ax.imshow(img, cmap=cmap[i])
plt.show()
class Config:
scale = 0.4
min_threshold = 120
max_threshold = 200
canny_min_threshold = 100
canny_max_threshold = 200
config = Config()
def find_lines(img, rgb_img):
dst = cv2.Canny(img, config.canny_min_threshold, config.canny_max_threshold)
cdstP = np.copy(rgb_img)
lines = cv2.HoughLinesP(dst, 1, np.pi / 180, 150, None, 0, 0)
lines1 = lines[:, 0, :]
for x1, y1, x2, y2 in lines1[:]:
cv2.line(cdstP, (x1, y1), (x2, y2), (255, 0, 0), 5)
return cdstP
if __name__ == "__main__":
bgr_img = cv2.imread('DJI_0009.JPG')
bgr_img = cv2.resize(bgr_img, (0, 0), bgr_img, config.scale, config.scale)
rgb_img = cv2.cvtColor(bgr_img, cv2.COLOR_BGR2RGB)
gray_img = cv2.cvtColor(bgr_img, cv2.COLOR_BGR2GRAY)
# _, threshold = cv2.threshold(gray_img, config.min_threshold, config.max_threshold, cv2.THRESH_BINARY)
# laplacian = cv2.Laplacian(rgb_img, cv2.CV_8UC1)
sobelx = cv2.Sobel(gray_img, cv2.CV_8UC1, 1, 0)
sobely = cv2.Sobel(gray_img, cv2.CV_8UC1, 0, 1)
blended = cv2.addWeighted(src1=sobelx, alpha=0.5, src2=sobely, beta=0.5, gamma=0)
_, threshold = cv2.threshold(blended, config.min_threshold, config.max_threshold,
cv2.THRESH_BINARY | cv2.THRESH_OTSU)
p1 = find_lines(threshold, rgb_img)
p2 = find_lines(blended, rgb_img)
p3 = find_lines(gray_img, rgb_img)
plots = [rgb_img, p1, p2, p3]
cmaps = [None] + ['gray'] * (len(plots) - 1)
img_show(plots, cmaps)
I am assuming I need to do much better filtring. However, I also tried image segmentation, but the results weren't promising at all.
Any ideas on how to improve this?
Thanks
Here is one way to do that in Python/OpenCV. I threshold, then optionally clean with morphology. Then get the contours and for each contour compute its rotated rectangle. Then get the dimensions of the rotated rectangle and compute the aspect ratio (largest dimension / smallest dimension) and optionally the area. Then I threshold on the aspect ratio (and optionally the area) and keep only those contours that pass)
Input:
import cv2
import numpy as np
image = cv2.imread("DCIM-100-MEDIA-DJI-0009-JPG.jpg")
hh, ww = image.shape[:2]
# convert to grayscale
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# create a binary thresholded image
thresh = cv2.threshold(gray, 64, 255, cv2.THRESH_BINARY)[1]
# invert so line is white on black background
thresh = 255 - thresh
# apply morphology
kernel = np.ones((11,11), np.uint8)
clean = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
# get external contours
contours = cv2.findContours(clean, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
area_thresh = ww / 2
aspect_thresh = ww / 30
print(area_thresh,aspect_thresh)
print('')
result = image.copy()
for c in contours:
# get rotated rectangle from contour
# get its dimensions
rotrect = cv2.minAreaRect(c)
(center), (dim1,dim2), angle = rotrect
maxdim = max(dim1,dim2)
mindim = min(dim1,dim2)
area = dim1 * dim2
if mindim != 0:
aspect = maxdim / mindim
#print(area, aspect)
#if area > area_thresh and aspect > aspect_thresh:
if aspect > aspect_thresh:
# draw contour on input
cv2.drawContours(result,[c],0,(0,0,255),3)
print(area, aspect)
# save result
cv2.imwrite("DCIM-100-MEDIA-DJI-0009-JPG_thresh.jpg",thresh)
cv2.imwrite("DCIM-100-MEDIA-DJI-0009-JPG_clean.jpg",clean)
cv2.imwrite("DCIM-100-MEDIA-DJI-0009-JPG_result.jpg",result)
# display result
cv2.imshow("thresh", thresh)
cv2.imshow("clean", clean)
cv2.imshow("result", result)
cv2.waitKey(0)
cv2.destroyAllWindows()
Thresholded image:
Morphology cleaned image:
Result image:

Categories