Related
I am very new at Python so what I am trying to do is this:
I already made a mask of floor of given image:
https://i.ibb.co/0r17SnT/www.png
What I want to do is to connect all points by geometric straight lines and find the length of each edge, like I drew on this example:
https://i.ibb.co/tbgyYF5/www.png
I am using openCV, and so far I tried to find some extreme points but it is not what i need i guess.
def getCalculatedImg():
# Load image
img = cv2.imread('img.jpg')
gray_image = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Apply cv2.threshold() to get a binary image
ret, thresh = cv2.threshold(gray_image, 50, 255, cv2.THRESH_BINARY)
# Find contours:
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
cntrs = contours[0]
c = max(contours, key=cv2.contourArea)
# Obtain outer coordinates
left_coordinates_separated = [(c[c[:, :, 0].argmin()][0])[0], (c[c[:, :, 0].argmin()][0])[1]]
left = tuple(left_coordinates_separated)
right_coordinates_separated = [(c[c[:, :, 0].argmax()][0])[0], (c[c[:, :, 0].argmax()][0])[1]]
right = tuple(right_coordinates_separated)
top_coordinates_separated = [(c[c[:, :, 1].argmin()][0])[0], (c[c[:, :, 1].argmin()][0])[1]]
top = tuple(top_coordinates_separated)
bottom_coordinates_separated = [(c[c[:, :, 1].argmax()][0])[0], (c[c[:, :, 1].argmax()][0])[1]]
bottom = tuple(bottom_coordinates_separated)
# Draw contours and dots to image:
cv2.drawContours(img, [c], -1, (36, 255, 12), 2)
cv2.circle(img, left, 8, (0, 50, 255), -1)
cv2.circle(img, right, 8, (0, 255, 255), -1)
cv2.circle(img, top, 8, (255, 50, 0), -1)
cv2.circle(img, bottom, 8, (255, 255, 0), -1)
print('Left point: {}'.format(left))
print('Right point: {}'.format(right))
print('Top point: {}'.format(top))
print('Bottom point: {}'.format(bottom))
# Output
cv2.imwrite('outlined_boundered_image.jpg', img)
return img
It seems what you want is a straight line interpolation of the boundary and the length of the lines. One way of getting one is to use Splines of degree one. If you're more interested in the actual length of the boundary it would be more appropriate to use degree 3 interpolation and an integral. So let me know if you want that.
First I used blur and a sobel filter to obtain a clear picture of the boundary.
import cv2
import matplotlib.pyplot as plt
import numpy as np
from scipy.interpolate import UnivariateSpline
picture = cv2.imread('area.png', cv2.IMREAD_GRAYSCALE)
picture2 = cv2.GaussianBlur(picture, (3, 3), 0, 0, cv2.BORDER_DEFAULT)
picture3 = np.abs(cv2.Sobel(picture2, cv2.CV_16S,0,1))+np.abs(cv2.Sobel(picture2, cv2.CV_16S,0,1))
picture4 = (picture3>0.3*picture3.max()).T
plt.imshow(picture4.T, cmap='gray')
I can't give the image to UnivariateSpline to interpolate yet since there are multiple y pixels for each x. I solved that by taking the average of the y values for each x as the value of the function at that point.
x,y = np.where(picture4)
idx = np.unique(x, return_index = True)[1]
y = np.array(tuple(map(np.mean,np.split(y,idx)[1:])))
f = UnivariateSpline(x[idx],y,k=1,s=10**4)
knots = f.get_knots()
print(len(knots))
x2 = np.linspace(0,picture.shape[1])
plt.plot(x2, -f(x2))
And it gives me a straight line interpolation with 33 lines of the original boundary. If you think 33 lines is too much you can just increase s. Then it uses less lines.
And finally I calculate the length of the line segments
dx = knots[1:]-knots[:-1]
dy = f(knots[1:])-f(knots[:-1])
lengths = (dx**2+dy**2)**(1/2)
lengths
They are
array([ 48.00108553, 24.00049131, 48.59189432, 48.00784009,
24.00411983, 12.0353518 , 7.45847594, 76.56726501,
50.2062363 , 62.97882834, 55.66753909, 59.85823117,
192.18816002, 24.0756812 , 12.0380576 , 6.30271185,
68.6638399 , 8.91837947, 4.92632077, 11.31860656,
68.12574347, 18.55007016, 24.08123637, 48.59346072,
12.23820314, 18.36509318, 93.8749539 , 8.61755003,
15.5131074 , 43.85394011, 56.05155661, 9.3525071 ,
11.54485654])
Edit I thought It would be interesting to see a few pictures with different amounts of lines. If you're not happy with the results please try to describe what's wrong.
I'm trying to detects the numbers found in my sqares, and I thought I could use the libary pytesseract, but for some reason I read the wrong values.
This is the console output:
And here I have all my pictures (they are seperated, this is just to show them all)
import numpy as np
import cv2
import re
from PIL import Image
import pytesseract
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract'
img = cv2.imread('gulRecNum.jpg')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# convert to HSV, since red and yellow are the lowest hue colors and come before green
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
# create a binary thresholded image on hue between red and yellow
lower = (0,240,160)
upper = (30,255,255)
thresh = cv2.inRange(hsv, lower, upper)
# apply morphology
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (9,9))
clean = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (15,15))
clean = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
# get external contours
contours = cv2.findContours(clean, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
result1 = img.copy()
result2 = img.copy()
mask = np.zeros(result2.shape, dtype=np.uint8)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
ROI_number = 0
for c in contours:
cv2.drawContours(result1,[c],0,(0,0,0),2)
# get rotated rectangle from contour
rot_rect = cv2.minAreaRect(c)
box = cv2.boxPoints(rot_rect)
box = np.int0(box)
# draw rotated rectangle on copy of img
cv2.drawContours(result2,[box],0,(0,0,0),2)
# Gør noget hvis arealet er større end 1.
# Whats the area of the component?
areal = cv2.contourArea(c)
if(areal > 1):
# get the center of mass
M = cv2.moments(c)
cx = int(M['m10']/M['m00'])
cy = int(M['m01']/M['m00'])
center = (cx, cy)
print("\nx: ",cx,"\ny: ",cy)
color = (0, 0, 255)
cv2.circle(result2, center, 3, color, -1)
cv2.putText(result2, "center", (int(cx) - 10, int(cy) - 20),
cv2.FONT_HERSHEY_SIMPLEX, 1.2, color, 2)
# LOOK AT THIS PART
x,y,w,h = cv2.boundingRect(c)
ROI = 255 - thresh[y:y+h, x:x+w]
cv2.drawContours(mask, [c], -1, (255,255,255), -1)
cv2.imwrite('ROI_{}.png'.format(ROI_number), ROI)
Number = pytesseract.image_to_string(ROI, config='--psm 13 --oem 3 -c tessedit_char_whitelist=0123456789')
print("Number ", Number)
ROI_number += 1
# save result
cv2.imwrite("4cubes_result2.png",result2)
# display result
imS = cv2.resize(result2, (600, 400))
cv2.imshow("result2", imS)
cv2.waitKey(0)
cv2.destroyAllWindows()
Thought I could write Number = pytesseract.image_to_string(ROI, config='--psm 13 --oem 3 -c tessedit_char_whitelist=0123456789') print(Number)
and then get the number from the image, but I don't, how can that be?
EDIT NEW ERROR
how do i solve it with this picture?
from PIL import Image
from operator import itemgetter
import numpy as np
import easyocr
import cv2
import re
import imutils
import pytesseract
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract'
reader = easyocr.Reader(['ch_sim','en']) # need to run only once to load model into memory
#Define empty array
Cubes = []
def getNumber(ROI):
img = cv2.imread(ROI)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
ret,thresh = cv2.threshold(gray,127,255,0)
#cv2.imshow(thresh)
#cv2.imshow('Thresholded original',thresh)
#cv2.waitKey(0)
## Get contours
contours,h = cv2.findContours(thresh,cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)
## only draw contour that have big areas
imx = img.shape[0]
imy = img.shape[1]
lp_area = (imx * imy) / 10
tmp_img = img.copy()
for cnt in contours:
approx = cv2.approxPolyDP(cnt,0.01 * cv2.arcLength(cnt, True), True)
if cv2.contourArea(cnt) > lp_area:
# Draw box corners and minimum area rectangle
rect = cv2.minAreaRect(cnt)
box = cv2.boxPoints(rect)
box = np.int0(box)
#cv2.drawContours(tmp_img, [box], 0, (0, 50, 255), 3)
#cv2.circle(tmp_img, tuple(box[0]), 8, (0, 255, 0), -1)
#cv2.circle(tmp_img, tuple(box[1]), 8, (0, 255, 0), -1)
#cv2.circle(tmp_img, tuple(box[2]), 8, (0, 255, 0), -1)
#cv2.circle(tmp_img, tuple(box[3]), 8, (0, 255, 0), -1)
#cv2.imshow(tmp_img)
#cv2.imshow('Minimum Area Rectangle', tmp_img)
#cv2.waitKey(0)
## Correct orientation and crop
# Link, https://jdhao.github.io/2019/02/23/crop_rotated_rectangle_opencv/
width = int(rect[1][0])
height = int(rect[1][1])
src_pts = box.astype("float32")
dst_pts = np.array([[0, height-1],
[0, 0],
[width-1, 0],
[width-1, height-1]], dtype="float32")
M = cv2.getPerspectiveTransform(src_pts, dst_pts)
warped = cv2.warpPerspective(img, M, (width, height))
# Run OCR on cropped image
# If the predicted value is digit print else rotate first
result = reader.readtext(warped)
print(result)
predicted_digit = result[0][1]
if np.char.isdigit(predicted_digit) == True:
cv2.imshow("warped " + ROI,warped)
else:
rot_img = warped.copy()
for i in range(0, 3):
rotated_image = cv2.rotate(rot_img, cv2.cv2.ROTATE_90_CLOCKWISE)
result = reader.readtext(rotated_image)
#if np.array(result).size == 0:
# continue
if not result:
rot_img = rotated_image
continue
#if len(result) == 0:
# continue
predicted_digit = result[0][1]
#print(result)
#print(predicted_digit)
#cv2.imshow(rotated_image)
if np.char.isdigit(predicted_digit) == True:
cv2.imshow("Image " + ROI, rotated_image)
break
rot_img = rotated_image
return predicted_digit
def sortNumbers(Cubes):
Cubes = sorted(Cubes, key=lambda x: int(x[2]))
#Cubes.sort(key=itemgetter(2)) # In-place sorting
#Cubes = sorted(Cubes, key=itemgetter(2)) # Create a new list
return Cubes
#img = cv2.imread('gulRecNum.jpg')
img = cv2.imread('webcam7.png')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# convert to HSV, since red and yellow are the lowest hue colors and come before green
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
# create a binary thresholded image on hue between red and yellow
#Change these if cube colours changes?
lower =(20, 100, 100)
upper = (30, 255, 255)
#lower = (0,240,160)
#upper = (30,255,255)
thresh = cv2.inRange(hsv, lower, upper)
# apply morphology
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (9,9))
clean = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (15,15))
clean = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
# get external contours
contours = cv2.findContours(clean, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
result2 = img.copy()
mask = np.zeros(result2.shape, dtype=np.uint8)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
ROI_number = 0
for c in contours:
cv2.drawContours(result2,[c],0,(0,0,0),2)
# get rotated rectangle from contour
rot_rect = cv2.minAreaRect(c)
box = cv2.boxPoints(rot_rect)
box = np.int0(box)
# draw rotated rectangle on copy of img
cv2.drawContours(result2,[box],0,(0,0,0),2)
# Gør noget hvis arealet er større end 1.
# Whats the area of the component?
areal = cv2.contourArea(c)
if(areal > 1):
# get the center of mass
M = cv2.moments(c)
cx = int(M['m10']/M['m00'])
cy = int(M['m01']/M['m00'])
center = (cx, cy)
print("\nx: ",cx,"\ny: ",cy)
color = (0, 0, 255)
cv2.circle(result2, center, 3, color, -1)
cv2.putText(result2, "center", (int(cx) - 10, int(cy) - 20),
cv2.FONT_HERSHEY_SIMPLEX, 1.2, color, 2)
x,y,w,h = cv2.boundingRect(c)
ROI = 255 - thresh[y:y+h, x:x+w]
cv2.drawContours(mask, [c], -1, (255,255,255), -1)
cv2.imwrite('ROI_{}.png'.format(ROI_number), ROI)
#Read saved image (number)
result = getNumber('ROI_{}.png'.format(ROI_number))
print("ROI_number: ", result)
Cubes.append([cx, cy, result])
ROI_number += 1
# save result
cv2.imwrite("4cubes_result2.png",result2)
# display result
imS = cv2.resize(result2, (600, 400))
cv2.imshow("result2", imS)
#cv2.imshow('mask', mask)
#cv2.imshow('thresh', thresh)
SortedCubes = sortNumbers(Cubes)
print("\nFound array [x, y, Cube_num] = ", Cubes)
print("Sorted array [x, y, Cube_num] = ", SortedCubes)
cv2.waitKey(0)
cv2.destroyAllWindows()
I get the following error (it can't detect a number)
Traceback (most recent call last): File "c:/Users/Mads/OneDrive/Universitet/7. semester/ROB1/python/objectDetectiong.py", line 169, in <module> result = getNumber('ROI_{}.png'.format(ROI_number)) File "c:/Users/Mads/OneDrive/Universitet/7. semester/ROB1/python/objectDetectiong.py", line 70, in getNumber predicted_digit = result[0][1] IndexError: list index out of range
This is implementation of my comment. Since, I do not have individual images this code will work with given grid like processed image.
For OCR I used EasyOCR instead of Tesserect. You could also try pytesserect on each output cropped images. Instead of rotating 4 times by 90 degrees by confidence, I went with digit detection on OCR result. If a detection is not a number then only rotate and retry.
Tested on google colab. Replace cv2_imshow(...) with cv2.imshow(...) for working locally. Also remove from google.colab.patches import cv2_imshow import.
This is modified version of my answer on card orientation correction here, OpenCV: using Canny and Shi-Tomasi to detect round corners of a playing card. All previous code is left as comment.
Code
!pip install easyocr
import easyocr
reader = easyocr.Reader(['ch_sim','en']) # need to run only once to load model into memory
"""
Based on my answer of rotated card detection,
https://stackoverflow.com/questions/64860785/opencv-using-canny-and-shi-tomasi-to-detect-round-corners-of-a-playing-card/64862448#64862448
"""
import cv2
import numpy as np
from google.colab.patches import cv2_imshow
img = cv2.imread('1.jpg')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
ret,thresh = cv2.threshold(gray,127,255,0)
#cv2_imshow(thresh)
#cv2.imshow('Thresholded original',thresh)
#cv2.waitKey(0)
## Get contours
contours,h = cv2.findContours(thresh,cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)
## only draw contour that have big areas
imx = img.shape[0]
imy = img.shape[1]
lp_area = (imx * imy) / 10
#################################################################
# Four point perspective transform
# https://www.pyimagesearch.com/2014/08/25/4-point-opencv-getperspective-transform-example/
#################################################################
def order_points(pts):
# initialzie a list of coordinates that will be ordered
# such that the first entry in the list is the top-left,
# the second entry is the top-right, the third is the
# bottom-right, and the fourth is the bottom-left
rect = np.zeros((4, 2), dtype = "float32")
# the top-left point will have the smallest sum, whereas
# the bottom-right point will have the largest sum
s = pts.sum(axis = 1)
rect[0] = pts[np.argmin(s)]
rect[2] = pts[np.argmax(s)]
# now, compute the difference between the points, the
# top-right point will have the smallest difference,
# whereas the bottom-left will have the largest difference
diff = np.diff(pts, axis = 1)
rect[1] = pts[np.argmin(diff)]
rect[3] = pts[np.argmax(diff)]
# return the ordered coordinates
return rect
def four_point_transform(image, pts):
# obtain a consistent order of the points and unpack them
# individually
rect = order_points(pts)
(tl, tr, br, bl) = rect
# compute the width of the new image, which will be the
# maximum distance between bottom-right and bottom-left
# x-coordiates or the top-right and top-left x-coordinates
widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
maxWidth = max(int(widthA), int(widthB))
# compute the height of the new image, which will be the
# maximum distance between the top-right and bottom-right
# y-coordinates or the top-left and bottom-left y-coordinates
heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
maxHeight = max(int(heightA), int(heightB))
# now that we have the dimensions of the new image, construct
# the set of destination points to obtain a "birds eye view",
# (i.e. top-down view) of the image, again specifying points
# in the top-left, top-right, bottom-right, and bottom-left
# order
dst = np.array([
[0, 0],
[maxWidth - 1, 0],
[maxWidth - 1, maxHeight - 1],
[0, maxHeight - 1]], dtype = "float32")
# compute the perspective transform matrix and then apply it
M = cv2.getPerspectiveTransform(rect, dst)
warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))
# return the warped image
return warped
#################################################################
#print(len(contours))
tmp_img = img.copy()
for cnt in contours:
approx = cv2.approxPolyDP(cnt,0.01 * cv2.arcLength(cnt, True), True)
## calculate number of vertices
#print(len(approx))
## Get the largest contours only
## Side count cannot be used since contours are not all rectangular
if cv2.contourArea(cnt) > lp_area:
#if len(approx) == 4 and cv2.contourArea(cnt) > lp_area:
# print("\n\n")
# print("#################################################")
# print("rectangle")
# print("#################################################")
# print("\n\n")
#tmp_img = img.copy()
#cv2.drawContours(tmp_img, [cnt], 0, (0, 255, 0), 6)
#cv2_imshow(tmp_img)
#cv2.imshow('Contour Borders', tmp_img)
#cv2.waitKey(0)
# tmp_img = img.copy()
# cv2.drawContours(tmp_img, [cnt], 0, (255, 0, 255), -1)
# cv2_imshow(tmp_img)
# #cv2.imshow('Contour Filled', tmp_img)
# #cv2.waitKey(0)
# # Make a hull arround the contour and draw it on the original image
# tmp_img = img.copy()
# mask = np.zeros((img.shape[:2]), np.uint8)
# hull = cv2.convexHull(cnt)
# cv2.drawContours(mask, [hull], 0, (255, 255, 255), -1)
# cv2_imshow(mask)
# #cv2.imshow('Convex Hull Mask', mask)
# #cv2.waitKey(0)
# # Draw minimum area rectangle
# #tmp_img = img.copy()
# rect = cv2.minAreaRect(cnt)
# box = cv2.boxPoints(rect)
# box = np.int0(box)
# cv2.drawContours(tmp_img, [box], 0, (255, 0, 0), 2)
# #cv2_imshow(tmp_img)
# #cv2.imshow('Minimum Area Rectangle', tmp_img)
# #cv2.waitKey(0)
# Draw box corners and minimum area rectangle
#tmp_img = img.copy()
rect = cv2.minAreaRect(cnt)
box = cv2.boxPoints(rect)
box = np.int0(box)
#print(rect)
#print(box)
cv2.drawContours(tmp_img, [box], 0, (0, 50, 255), 3)
cv2.circle(tmp_img, tuple(box[0]), 8, (0, 255, 0), -1)
cv2.circle(tmp_img, tuple(box[1]), 8, (0, 255, 0), -1)
cv2.circle(tmp_img, tuple(box[2]), 8, (0, 255, 0), -1)
cv2.circle(tmp_img, tuple(box[3]), 8, (0, 255, 0), -1)
#cv2_imshow(tmp_img)
#cv2.imshow('Minimum Area Rectangle', tmp_img)
#cv2.waitKey(0)
## Correct orientation and crop
# Link, https://jdhao.github.io/2019/02/23/crop_rotated_rectangle_opencv/
width = int(rect[1][0])
height = int(rect[1][1])
src_pts = box.astype("float32")
dst_pts = np.array([[0, height-1],
[0, 0],
[width-1, 0],
[width-1, height-1]], dtype="float32")
M = cv2.getPerspectiveTransform(src_pts, dst_pts)
warped = cv2.warpPerspective(img, M, (width, height))
#cv2_imshow(warped)
# Run OCR on cropped image
# If the predicted value is digit print else rotate first
result = reader.readtext(warped)
predicted_digit = result[0][1]
print("Detected Text:")
if np.char.isdigit(predicted_digit) == True:
print(result)
print(predicted_digit)
cv2_imshow(warped)
else:
rot_img = warped.copy()
for i in range(0, 3):
rotated_image = cv2.rotate(rot_img, cv2.cv2.ROTATE_90_CLOCKWISE)
result = reader.readtext(rotated_image)
#if np.array(result).size == 0:
# continue
if not result:
rot_img = rotated_image
continue
#if len(result) == 0:
# continue
predicted_digit = result[0][1]
#print(result)
#print(predicted_digit)
#cv2_imshow(rotated_image)
if np.char.isdigit(predicted_digit) == True:
print(result)
print(predicted_digit)
cv2_imshow(rotated_image)
break
rot_img = rotated_image
# # Draw bounding rectangle
# #tmp_img = img.copy()
# x, y, w, h = cv2.boundingRect(cnt)
# cv2.rectangle(tmp_img, (x, y), (x + w, y + h), (255, 0, 0), 2)
# #cv2_imshow(tmp_img)
# #cv2.imshow('Bounding Rectangle', tmp_img)
# #cv2.waitKey(0)
# # Bounding Rectangle and Minimum Area Rectangle
# #tmp_img = img.copy()
# rect = cv2.minAreaRect(cnt)
# box = cv2.boxPoints(rect)
# box = np.int0(box)
# cv2.drawContours(tmp_img, [box], 0, (0, 0, 255), 2)
# x, y, w, h = cv2.boundingRect(cnt)
# cv2.rectangle(tmp_img, (x, y), (x + w, y + h), (0, 255, 0), 2)
# #cv2_imshow(tmp_img)
# #cv2.imshow('Bounding Rectangle', tmp_img)
# #cv2.waitKey(0)
# # determine the most extreme points along the contour
# # https://www.pyimagesearch.com/2016/04/11/finding-extreme-points-in-contours-with-opencv/
# tmp_img = img.copy()
# extLeft = tuple(cnt[cnt[:, :, 0].argmin()][0])
# extRight = tuple(cnt[cnt[:, :, 0].argmax()][0])
# extTop = tuple(cnt[cnt[:, :, 1].argmin()][0])
# extBot = tuple(cnt[cnt[:, :, 1].argmax()][0])
# cv2.drawContours(tmp_img, [cnt], -1, (0, 255, 255), 2)
# cv2.circle(tmp_img, extLeft, 8, (0, 0, 255), -1)
# cv2.circle(tmp_img, extRight, 8, (0, 255, 0), -1)
# cv2.circle(tmp_img, extTop, 8, (255, 0, 0), -1)
# cv2.circle(tmp_img, extBot, 8, (255, 255, 0), -1)
# print("Corner Points: ", extLeft, extRight, extTop, extBot)
# cv2_imshow(tmp_img)
# #cv2.imshow('img contour drawn', tmp_img)
# #cv2.waitKey(0)
# #cv2.destroyAllWindows()
# ## Perspective Transform
# tmp_img = img.copy()
# pts = np.array([extLeft, extRight, extTop, extBot])
# warped = four_point_transform(tmp_img, pts)
# cv2_imshow(tmp_img)
# #cv2.imshow("Warped", warped)
# #cv2.waitKey(0)
cv2_imshow(tmp_img)
#cv2.destroyAllWindows()
Output Prediction
Detected Text:
[([[85, 67], [131, 67], [131, 127], [85, 127]], '1', 0.9992043972015381)]
1
Detected Text:
[([[85, 65], [133, 65], [133, 125], [85, 125]], '2', 0.9991914629936218)]
2
Detected Text:
[([[96, 72], [144, 72], [144, 128], [96, 128]], '4', 0.9996564984321594)]
4
Detected Text:
[([[88, 76], [132, 76], [132, 132], [88, 132]], '3', 0.9973381161689758)]
3
White Region Detection With Corners
Alternate methods,
Try pretrained digit classification model trained from MNIST and others on each large contours exceeding certain area.
Use multitask object detection with rotation. One output of network will be detections another angle regression to predict orientation.
Use text detector like, East and run OCR on each detected text.
I am trying to extract the cell locations in the table below.
I was able to get the contours around the cell positions after applying adaptive thresholding and HoughLines get vertical and horizontal structuring elements.
Here's my code :
img = cv2.imread(os.path.join(img_path, file))
img1 = img.copy()
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
bw = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 17, 1)
bw = cv2.bitwise_not(bw)
#detect horizontal lines
horizontalStructure = cv2.getStructuringElement(cv2.MORPH_RECT, (15, 1))
horizontal = cv2.erode(bw, horizontalStructure)
horizontal = cv2.dilate(horizontal, horizontalStructure)
horizontal = cv2.dilate(horizontal, (1,1), iterations=5)
horizontal = cv2.erode(horizontal, (1,1), iterations=5)
hlines = cv2.HoughLinesP(horizontal, 1, np.pi/180, 20, np.array([]), 20, 2)
for line in hlines :
for x1,y1,x2,y2 in line:
if abs(x1 - x2) > img.shape[1]/4:
cv2.line(img,(x1,y1),(x2,y2),(0,255,0),2)
#detect vertical lines
verticalStructure = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 15))
vertical = cv2.erode(bw, verticalStructure)
vertical = cv2.dilate(vertical, verticalStructure)
vertical = cv2.dilate(vertical, (1,1), iterations=5)
#vertical = cv2.erode(vertical, (1,1), iterations=5)
vlines = cv2.HoughLinesP(vertical, 1, np.pi/180, 20, np.array([]), 20, 2)
for line in vlines :
for x1,y1,x2,y2 in line:
#if abs(y1 - y2) > img.shape[0]/2:
cv2.line(img,(x1,y1),(x2,y2),(0,255,0),2)
# red color boundaries [B, G, R]
lower = [0, 240, 0]
upper = [20, 255, 20]
# create NumPy arrays from the boundaries
lower = np.array(lower, dtype="uint8")
upper = np.array(upper, dtype="uint8")
# find the colors within the specified boundaries and apply
# the mask
mask = cv2.inRange(img, lower, upper)
output = cv2.bitwise_and(img1, img, mask=mask)
ret,thresh = cv2.threshold(mask, 40, 255, 0)
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
img_area = img.shape[0] * img.shape[1]
for c in contours:
x, y, w, h = cv2.boundingRect(c)
if w * h > 0.005 * img_area:
cv2.rectangle(img1, (x, y), (x+w, y+h), (0, 0, 255), 2)
How can I improve this solution? What other approaches can I implement in order to extract the table cells information better and in a more robust manner ?
for each box detected , take a wider area to get along with an arbitrary error treshold (in n pixel width, like 5 pixel), you should be able to detect every text content
i want to compare an image with specific pixel using
(score, diff) = compare_ssim(grayA[y:y+h, x:x+w], grayB[y:y+h, x:x+w], full=True)
But that function only support rectangle ROI. And my ROI is a contour.
To compare that i need largest rectangle inside the contour. How to find largest rectangle inside contour area ?
Sample image
According to your OP, I suggest to use warpAffine to rotate the ROI to a rectangle shape, because the ROI is already in rectangle shape but rotated. Here is a simple sample:
import cv2
import numpy as np
img = cv2.imread("1.png")
(H,W,c) = img.shape
print("shape = {},{}".format(H,W))
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
_,thresh = cv2.threshold(gray,128,255,cv2.THRESH_BINARY_INV)
_,contours,_ = cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
res = np.zeros_like(img)
c = np.squeeze(contours[0])
# find rectangle's conner points
x = sorted(c, key=lambda a:a[0])
left = x[0]
right = x[-1]
y= sorted(c, key=lambda a:a[1])
top = y[0]
bottom = y[-1]
cv2.circle(img, (left[0],left[1]), 4, (0, 0, 255), -1)
cv2.circle(img, (right[0],right[1]), 4, (0, 0, 255), -1)
cv2.circle(img, (top[0],top[1]), 4, (0, 0, 255), -1)
cv2.circle(img, (bottom[0],bottom[1]), 4, (0, 0, 255), -1)
#calculate rectangle's shape
roi_w = int(np.sqrt((top[0]-right[0])*(top[0]-right[0])+(top[1]-right[1])*(top[1]-right[1])))
roi_h = int(np.sqrt((top[0]-left[0])*(top[0]-left[0])+(top[1]-left[1])*(top[1]-left[1])))
pts1 = np.float32([top,right,left])
# keep the top coords and calculate new coords for left and right
new_top = top
new_right = [top[0] + roi_w, top[1]]
new_left = [top[0], top[1] + roi_h]
pts2 = np.float32([new_top,new_right,new_left])
#rotate
matrix = cv2.getAffineTransform(pts1, pts2)
result = cv2.warpAffine(img, matrix, (W,H))
cv2.drawContours(res, [contours[0]], 0, (0,255,0), 3)
# extract roi
roi = result[new_top[1]:new_left[1],new_top[0]:new_right[0]]
cv2.imshow("img",img)
cv2.imshow("result",result)
cv2.waitKey(0)
I have an image with two contours, where one contour is always 'inside' another. I want to find the distance between the two contours for 90 different angles (meaning, distance at every 4 degrees). How do I go about doing it?
Here's an example image:
Thank you!
Take this image of two sets of two shapes:
We want to find the distance between the edges of each set of shapes, including where the edges overlap.
First things first, we import the necessary modules:
import cv2
import numpy as np
To do that, we will first need to retrieve every shape in the image as lists of contours. In the above particular example, there are 4 shapes that need to be detected. To retrieve each shape, we will need to use a mask to mask out every color besides the color of the shape of interest:
def get_masked(img, lower, upper):
img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
mask = cv2.inRange(img_hsv, np.array(lower), np.array(upper))
img_mask = cv2.bitwise_and(img, img, mask=mask)
return img_mask
The lower and upper parameters will determine the minimum HVS values and the maximum HSV values that will not be masked out of the image. Given the right lower and upper parameters, you will be able to extract one image with only the green shapes, and one image with only the blue shapes:
With the masked images, you can then proceed to process them into more clean contours. Here is the preprocess function, with values that can be tweaked whenever necessary:
def get_processed(img):
img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
img_blur = cv2.GaussianBlur(img_gray, (7, 7), 7)
img_canny = cv2.Canny(img_blur, 50, 50)
kernel = np.ones((7, 7))
img_dilate = cv2.dilate(img_canny, kernel, iterations=2)
img_erode = cv2.erode(img_dilate, kernel, iterations=2)
return img_erode
Passing in the masked images will give you
With the images masked and processed, they will be ready for opencv to detect their contours:
def get_contours(img):
contours, hierarchy = cv2.findContours(img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
return [cnt for cnt in contours if cv2.contourArea(cnt) > 500]
The list comprehension at the return statement is there to filter out noise by specifying that every contour must have an area that is greater than 500.
Now, we will define some basic functions that we will later use:
def get_centeroid(cnt):
length = len(cnt)
sum_x = np.sum(cnt[..., 0])
sum_y = np.sum(cnt[..., 1])
return int(sum_x / length), int(sum_y / length)
def get_pt_at_angle(pts, pt, ang):
angles = np.rad2deg(np.arctan2(*(pt - pts).T))
angles = np.where(angles < -90, angles + 450, angles + 90)
found= np.rint(angles) == ang
if np.any(found):
return pts[found][0]
The names of the functions are pretty self-explanatory; the first one returns the center point of a contour, and the second one returns a point in a given array of points, pts, that is at a given angle, ang, relative to a given point, pt. The np.where in the get_pt_at_angle function is there to shift the starting angle, 0, to the positive x axis, as it by default will be at the positive y axis.
Time to define the function that will return the distances. First, define it so that these five parameters can be passed in:
def get_distances(img, cnt1, cnt2, center, step):
A brief explanation on each parameter:
img, the image array
cnt1, the first shape
cnt2, the second shape
center, the origin for the distance calculations
step, the number of degrees to be jumped per value
Define a dictionary to store the distances, with the angles as key and the distances as values:
angles = dict()
Loop through each angle you want to retrieve the distance of the edges of the two shapes, and find the coordinate of the two contours that are the ct angle of the iterations, angle, relative to the origin point, center, using the get_pt_at_angle function we defined earlier.
for angle in range(0, 360, step):
pt1 = get_pt_at_angle(cnt1, center, angle)
pt2 = get_pt_at_angle(cnt2, center, angle)
Check if a point exists in both contours that is at the specific angle relative to the origin:
if np.any(pt1) and np.any(pt2):
You can use the np.linalg.norm method to get the distance between the two points. I also made it draw the text and connecting lines for visualization. Don't forget to add the angle and value to the angles dictionary, and you can then break out of the inner for loop. At the end of the function, return the image that has the text and lines drawn on it:
d = round(np.linalg.norm(pt1 - pt2))
cv2.putText(img, str(d), tuple(pt1), cv2.FONT_HERSHEY_PLAIN, 0.8, (0, 0, 0))
cv2.drawContours(img, np.array([[center, pt1]]), -1, (255, 0, 255), 1)
angles[angle] = d
return img, angles
Finally, you can utilize the function defined on an image:
img = cv2.imread("shapes1.png")
img_green = get_masked(img, [10, 0, 0], [70, 255, 255])
img_blue = get_masked(img, [70, 0, 0], [179, 255, 255])
img_green_processed = get_processed(img_green)
img_blue_processed = get_processed(img_blue)
img_green_contours = get_contours(img_green_processed)
img_blue_contours = get_contours(img_blue_processed)
Using the image of four shapes, you can tell that the img_green_contours and img_blue_contours will each contain two contours. But you might be wondering: how did I choose the minimum and maximum HSV values? Well, I used a trackbar code. You can run the below code, adjusting the HSV values using the trackbars until you find a range where everything in the image is masked out (in black) except for the shape you want to retrieve:
import cv2
import numpy as np
def empty(a):
pass
cv2.namedWindow("TrackBars")
cv2.createTrackbar("Hue Min", "TrackBars", 0, 179, empty)
cv2.createTrackbar("Hue Max", "TrackBars", 179, 179, empty)
cv2.createTrackbar("Sat Min", "TrackBars", 0, 255, empty)
cv2.createTrackbar("Sat Max", "TrackBars", 255, 255, empty)
cv2.createTrackbar("Val Min", "TrackBars", 0, 255, empty)
cv2.createTrackbar("Val Max", "TrackBars", 255, 255, empty)
img = cv2.imread("shapes0.png")
while True:
h_min = cv2.getTrackbarPos("Hue Min", "TrackBars")
h_max = cv2.getTrackbarPos("Hue Max", "TrackBars")
s_min = cv2.getTrackbarPos("Sat Min", "TrackBars")
s_max = cv2.getTrackbarPos("Sat Max", "TrackBars")
v_min = cv2.getTrackbarPos("Val Min", "TrackBars")
v_max = cv2.getTrackbarPos("Val Max", "TrackBars")
img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
lower = np.array([h_min, s_min, v_min])
upper = np.array([h_max, s_max, v_max])
mask = cv2.inRange(img_hsv, lower, upper)
img_masked = cv2.bitwise_and(img, img, mask=mask)
cv2.imshow("Image", img_masked)
if cv2.waitKey(1) & 0xFF == ord("q"): # If you press the q key
break
With the values I chose, I got:
Loop through the blue shape contours and green shape contours in parallel, and depending on which color shape you want the origin to be at the center of, you can pass that color contour into the get_centeroid function we defined earlier:
for cnt_blue, cnt_green in zip(img_blue_contours, img_green_contours[::-1]):
center = get_centeroid(cnt_blue)
img, angles = get_distances(img, cnt_green.squeeze(), cnt_blue.squeeze(), center, 30)
print(angles)
Notice that I used 30 as the step; that number can be changed to 4, I used 30 so the visualization would be more clear.
Finally, we can display the image:
cv2.imshow("Image", img)
cv2.waitKey(0)
Altogether:
import cv2
import numpy as np
def get_masked(img, lower, upper):
img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
mask = cv2.inRange(img_hsv, np.array(lower), np.array(upper))
img_mask = cv2.bitwise_and(img, img, mask=mask)
return img_mask
def get_processed(img):
img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
img_blur = cv2.GaussianBlur(img_gray, (7, 7), 7)
img_canny = cv2.Canny(img_blur, 50, 50)
kernel = np.ones((7, 7))
img_dilate = cv2.dilate(img_canny, kernel, iterations=2)
img_erode = cv2.erode(img_dilate, kernel, iterations=2)
return img_erode
def get_contours(img):
contours, hierarchy = cv2.findContours(img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
return [cnt for cnt in contours if cv2.contourArea(cnt) > 500]
def get_centeroid(cnt):
length = len(cnt)
sum_x = np.sum(cnt[..., 0])
sum_y = np.sum(cnt[..., 1])
return int(sum_x / length), int(sum_y / length)
def get_pt_at_angle(pts, pt, ang):
angles = np.rad2deg(np.arctan2(*(pt - pts).T))
angles = np.where(angles < -90, angles + 450, angles + 90)
found= np.rint(angles) == ang
if np.any(found):
return pts[found][0]
def get_distances(img, cnt1, cnt2, center, step):
angles = dict()
for angle in range(0, 360, step):
pt1 = get_pt_at_angle(cnt1, center, angle)
pt2 = get_pt_at_angle(cnt2, center, angle)
if np.any(pt1) and np.any(pt2):
d = round(np.linalg.norm(pt1 - pt2))
cv2.putText(img, str(d), tuple(pt1), cv2.FONT_HERSHEY_PLAIN, 0.8, (0, 0, 0))
cv2.drawContours(img, np.array([[center, pt1]]), -1, (255, 0, 255), 1)
angles[angle] = d
return img, angles
img = cv2.imread("shapes1.png")
img_green = get_masked(img, [10, 0, 0], [70, 255, 255])
img_blue = get_masked(img, [70, 0, 0], [179, 255, 255])
img_green_processed = get_processed(img_green)
img_blue_processed = get_processed(img_blue)
img_green_contours = get_contours(img_green_processed)
img_blue_contours = get_contours(img_blue_processed)
for cnt_blue, cnt_green in zip(img_blue_contours, img_green_contours[::-1]):
center = get_centeroid(cnt_blue)
img, angles = get_distances(img, cnt_green.squeeze(), cnt_blue.squeeze(), center, 30)
print(angles)
cv2.imshow("Image", img)
cv2.waitKey(0)
Output:
{0: 5, 30: 4, 60: 29, 90: 25, 120: 31, 150: 8, 180: 5, 210: 7, 240: 14, 270: 12, 300: 14, 330: 21}
{0: 10, 30: 9, 60: 6, 90: 0, 120: 11, 150: 7, 180: 5, 210: 6, 240: 6, 270: 4, 300: 0, 330: 16}
Note: For certain shapes, some angles might be absent in the dictionary. That would be caused by the process function; you would get more accurate results if you turn down some of the values, like the blur sigma
In the following code, I have just given you the example for the vertical line, the rest can be obtained by rotating the line. Result looks like this, instead of drawing you can use the coordinates for distance calculation.
import shapely.geometry as shapgeo
import numpy as np
import cv2
img = cv2.imread('image.jpg', 0)
ret, img =cv2.threshold(img, 128, 255, cv2.THRESH_BINARY)
#Fit the ellipses
_, contours0, hierarchy = cv2.findContours( img.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
outer_ellipse = [cv2.approxPolyDP(contours0[0], 0.1, True)]
inner_ellipse = [cv2.approxPolyDP(contours0[2], 0.1, True)]
h, w = img.shape[:2]
vis = np.zeros((h, w, 3), np.uint8)
cv2.drawContours( vis, outer_ellipse, -1, (255,0,0), 1)
cv2.drawContours( vis, inner_ellipse, -1, (0,0,255), 1)
##Extract contour of ellipses
cnt_outer = np.vstack(outer_ellipse).squeeze()
cnt_inner = np.vstack(inner_ellipse).squeeze()
#Determine centroid
M = cv2.moments(cnt_inner)
cx = int(M['m10']/M['m00'])
cy = int(M['m01']/M['m00'])
print cx, cy
#Draw full segment lines
cv2.line(vis,(cx,0),(cx,w),(150,0,0),1)
# Calculate intersections using Shapely
# http://toblerity.org/shapely/manual.html
PolygonEllipse_outer= shapgeo.asLineString(cnt_outer)
PolygonEllipse_inner= shapgeo.asLineString(cnt_inner)
PolygonVerticalLine=shapgeo.LineString([(cx,0),(cx,w)])
insecouter= np.array(PolygonEllipse_outer.intersection(PolygonVerticalLine)).astype(np.int)
insecinner= np.array(PolygonEllipse_inner.intersection(PolygonVerticalLine)).astype(np.int)
cv2.line(vis,(insecouter[0,0], insecinner[1,1]),(insecouter[1,0], insecouter[1,1]),(0,255,0),2)
cv2.line(vis,(insecouter[0,0], insecinner[0,1]),(insecouter[1,0], insecouter[0,1]),(0,255,0),2)
cv2.imshow('contours', vis)
0xFF & cv2.waitKey()
cv2.destroyAllWindows()
I borrowed the general idea using Shapely and the basic code from tfv's answer. Nevertheless, iterating the desired angles, calculating the needed end points for the correct lines to be intersected with the shapes, calculating and storing the distances, etc. were missing, so I added all that.
That'd be my full code:
import cv2
import numpy as np
import shapely.geometry as shapgeo
# Read image, and binarize
img = cv2.imread('G48xu.jpg', cv2.IMREAD_GRAYSCALE)
img = cv2.threshold(img, 128, 255, cv2.THRESH_BINARY)[1]
# Find (approximated) contours of inner and outer shape
cnts, hier = cv2.findContours(img.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
outer = [cv2.approxPolyDP(cnts[0], 0.1, True)]
inner = [cv2.approxPolyDP(cnts[2], 0.1, True)]
# Just for visualization purposes: Draw contours of inner and outer shape
h, w = img.shape[:2]
vis = np.zeros((h, w, 3), np.uint8)
cv2.drawContours(vis, outer, -1, (255, 0, 0), 1)
cv2.drawContours(vis, inner, -1, (0, 0, 255), 1)
# Squeeze contours for further processing
outer = np.vstack(outer).squeeze()
inner = np.vstack(inner).squeeze()
# Calculate centroid of inner contour
M = cv2.moments(inner)
cx = int(M['m10'] / M['m00'])
cy = int(M['m01'] / M['m00'])
# Calculate maximum needed radius for later line intersections
r_max = np.min([cx, w - cx, cy, h - cy])
# Set up angles (in degrees)
angles = np.arange(0, 360, 4)
# Initialize distances
dists = np.zeros_like(angles)
# Prepare calculating the intersections using Shapely
poly_outer = shapgeo.asLineString(outer)
poly_inner = shapgeo.asLineString(inner)
# Iterate angles and calculate distances between inner and outer shape
for i, angle in enumerate(angles):
# Convert angle from degrees to radians
angle = angle / 180 * np.pi
# Calculate end points of line from centroid in angle's direction
x = np.cos(angle) * r_max + cx
y = np.sin(angle) * r_max + cy
points = [(cx, cy), (x, y)]
# Calculate intersections using Shapely
poly_line = shapgeo.LineString(points)
insec_outer = np.array(poly_outer.intersection(poly_line))
insec_inner = np.array(poly_inner.intersection(poly_line))
# Calculate distance between intersections using L2 norm
dists[i] = np.linalg.norm(insec_outer - insec_inner)
# Just for visualization purposes: Draw lines for some examples
if (i == 10) or (i == 40) or (i == 75):
# Line from centroid to end points
cv2.line(vis, (cx, cy), (int(x), int(y)), (128, 128, 128), 1)
# Line between both shapes
cv2.line(vis,
(int(insec_inner[0]), int(insec_inner[1])),
(int(insec_outer[0]), int(insec_outer[1])), (0, 255, 0), 2)
# Distance
cv2.putText(vis, str(dists[i]), (int(x), int(y)),
cv2.FONT_HERSHEY_COMPLEX, 0.75, (0, 255, 0), 2)
# Output angles and distances
print(np.vstack([angles, dists]).T)
# Just for visualization purposes: Output image
cv2.imshow('Output', vis)
cv2.waitKey(0)
cv2.destroyAllWindows()
I generated some examplary output for visualization purposes:
And, here's an excerpt from the output, showing angle and the corresponding distance:
[[ 0 70]
[ 4 71]
[ 8 73]
[ 12 76]
[ 16 77]
...
[340 56]
[344 59]
[348 62]
[352 65]
[356 67]]
Hopefully, the code is self-explanatory. If not, please don't hesitate to ask questions. I'll gladly provide further information.
----------------------------------------
System information
----------------------------------------
Platform: Windows-10-10.0.16299-SP0
Python: 3.9.1
NumPy: 1.20.2
OpenCV: 4.5.1
Shapely: 1.7.1
----------------------------------------