Extract table structure from image containing Tables with borders - python

I am trying to extract the cell locations in the table below.
I was able to get the contours around the cell positions after applying adaptive thresholding and HoughLines get vertical and horizontal structuring elements.
Here's my code :
img = cv2.imread(os.path.join(img_path, file))
img1 = img.copy()
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
bw = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 17, 1)
bw = cv2.bitwise_not(bw)
#detect horizontal lines
horizontalStructure = cv2.getStructuringElement(cv2.MORPH_RECT, (15, 1))
horizontal = cv2.erode(bw, horizontalStructure)
horizontal = cv2.dilate(horizontal, horizontalStructure)
horizontal = cv2.dilate(horizontal, (1,1), iterations=5)
horizontal = cv2.erode(horizontal, (1,1), iterations=5)
hlines = cv2.HoughLinesP(horizontal, 1, np.pi/180, 20, np.array([]), 20, 2)
for line in hlines :
for x1,y1,x2,y2 in line:
if abs(x1 - x2) > img.shape[1]/4:
cv2.line(img,(x1,y1),(x2,y2),(0,255,0),2)
#detect vertical lines
verticalStructure = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 15))
vertical = cv2.erode(bw, verticalStructure)
vertical = cv2.dilate(vertical, verticalStructure)
vertical = cv2.dilate(vertical, (1,1), iterations=5)
#vertical = cv2.erode(vertical, (1,1), iterations=5)
vlines = cv2.HoughLinesP(vertical, 1, np.pi/180, 20, np.array([]), 20, 2)
for line in vlines :
for x1,y1,x2,y2 in line:
#if abs(y1 - y2) > img.shape[0]/2:
cv2.line(img,(x1,y1),(x2,y2),(0,255,0),2)
# red color boundaries [B, G, R]
lower = [0, 240, 0]
upper = [20, 255, 20]
# create NumPy arrays from the boundaries
lower = np.array(lower, dtype="uint8")
upper = np.array(upper, dtype="uint8")
# find the colors within the specified boundaries and apply
# the mask
mask = cv2.inRange(img, lower, upper)
output = cv2.bitwise_and(img1, img, mask=mask)
ret,thresh = cv2.threshold(mask, 40, 255, 0)
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
img_area = img.shape[0] * img.shape[1]
for c in contours:
x, y, w, h = cv2.boundingRect(c)
if w * h > 0.005 * img_area:
cv2.rectangle(img1, (x, y), (x+w, y+h), (0, 0, 255), 2)
How can I improve this solution? What other approaches can I implement in order to extract the table cells information better and in a more robust manner ?

for each box detected , take a wider area to get along with an arbitrary error treshold (in n pixel width, like 5 pixel), you should be able to detect every text content

Related

I have using opencv python form converting the analog clock to a digital data for the hours and minutes but I need it to show for seconds too

I have ued opencv to read the image , convert it to gray scale, and found edges using canny, kernel, thesh, erode and so on and i have detected all the lines in the image using HooughLineP() and i have detected the hours and minutes hand but i also need to find the seconds hand here is the code which i have used
import cv2
import math
import numpy as np
from matplotlib import pyplot as plt
from math import sqrt
from math import acos, degrees
kernel = np.ones((5,5),np.uint8)
img1 = cv2.imread('input1.jpg')
img = cv2.imread('input1.jpg',0)
gray = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
ret, thresh = cv2.threshold(gray, 50, 255, cv2.THRESH_BINARY)
# Create mask
height,width = img.shape
#height=height-10
#width=width-10
mask = np.zeros((height,width), np.uint8)
edges = cv2.Canny(thresh, 100, 200)
#cv2.imshow('detected ',gray)
cimg=cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
circles = cv2.HoughCircles(gray, cv2.HOUGH_GRADIENT, 1.2, 100)
#circles = cv2.HoughCircles(edges, cv2.HOUGH_GRADIENT, 1.2, 1000, param1 = 50, param2 = 30, minRadius = 20, maxRadius = 0)
for i in circles[0,:]:
i[2]=i[2]+4
# Draw on mask
cv2.circle(mask,(i[0],i[1]),i[2],(255,255,255),thickness=-1)
# Copy that image using that mask
masked_data = cv2.bitwise_and(img1, img1, mask=mask)
# Apply Threshold
_,thresh = cv2.threshold(mask,1,255,cv2.THRESH_BINARY)
# Find Contour
contours = cv2.findContours(thresh,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
x,y,w,h = cv2.boundingRect(contours[0])
# Crop masked_data
crop = masked_data[y+30:y+h-30,x+30:x+w-30]
i=crop
height, width, channels = i.shape
print (width, height, channels)
#########################################################################
ret, mask = cv2.threshold(i, 10, 255, cv2.THRESH_BINARY)
edges = cv2.Canny(i,100,200)
kernel = np.ones((11,11),np.uint8)
kernel2 = np.ones((13,13),np.uint8)
edges = cv2.dilate(edges,kernel,iterations = 1)
edges = cv2.erode(edges,kernel2,iterations = 1)
minLineLength = 1000
maxLineGap = 10
lines = cv2.HoughLinesP(edges,1,np.pi/180,15,minLineLength,maxLineGap)
h=[]
xmax1=0
xmax2=0
ymax1=0
ymax2=0
xs1=0
xs2=0
ys1=0
ys2=0
for line in lines:
x1, y1, x2, y2 = line[0]
#cv2.line(i, (x1, y1), (x2, y2), (0, 255, 0), 1)
dx=x2-x1
if(dx<0):
dx=dx*-1
dy=y2-y1
if(dy<0):
dy=dy*-1
hypo=sqrt(dx**2 + dy**2)
#print("dx=",dx," dy=",dy)
h.append(hypo)
#print(h)
print(len(h))
a=len(h)
h.sort(reverse=True)
#print(h)
m=0
k=0
for f in range(a):
for line in lines:
x1, y1, x2, y2 = line[0]
#cv2.line(i, (x1, y1), (x2, y2), (0, 255, 0), 3)
dx=x2-x1
if(dx<0):
dx=dx*-1
dy=y2-y1
if(dy<0):
dy=dy*-1
hypo2=sqrt(dx**2 + dy**2)
if(hypo2==h[0]):
m=hypo2
xmax1=x1
xmax2=x2
ymax1=y1
ymax2=y2
cv2.line(crop, (xmax1, ymax1), (xmax2, ymax2), (255, 0, 0), 3)
#print("xmax1=",xmax1," ymax1=",ymax1," xmax2=",xmax2," ymax2=",ymax2)
if(m==h[0]):
if(hypo2==h[f]):
if((sqrt((xmax2-x2)**2 + (ymax2-y2)**2))>20):
if((sqrt((xmax1-x1)**2 + (ymax1-y1)**2))>20):
xs1=x1
xs2=x2
ys1=y1
ys2=y2
cv2.line(crop, (xs1, ys1), (xs2, ys2), (0, 255, 0), 3)
print("xs1=",xs1," ys1=",ys1," xs2=",xs2," ys2=",ys2)
k=1
break
if(k==1):
break
print("xmax1=",xmax1," ymax1=",ymax1," xmax2=",xmax2," ymax2=",ymax2)
I have separated the minute's hand and hours hand in the above line of code but i need to separate the seconds hand too, Kindly anyone help me with it!
Based on this post: How to detect lines in OpenCV? I have adapted with your image and your crop method, it gives a valid output of the given image :
import cv2
import numpy as np
from matplotlib import pyplot as plt
kernel = np.ones((5,5),np.uint8)
img1 = cv2.imread('clock.jpg')
img = cv2.imread('clock.jpg',0)
gray = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
ret, thresh = cv2.threshold(gray, 50, 255, cv2.THRESH_BINARY)
# Create mask
height,width = img.shape
mask = np.zeros((height,width), np.uint8)
edges = cv2.Canny(thresh, 100, 200)
#cv2.imshow('detected ',gray)
cimg=cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
circles = cv2.HoughCircles(gray, cv2.HOUGH_GRADIENT, 1.2, 100)
for i in circles[0,:]:
i[2]=i[2]+4
# Draw on mask
cv2.circle(mask,(i[0],i[1]),i[2],(255,255,255),thickness=-1)
# Copy that image using that mask
masked_data = cv2.bitwise_and(img1, img1, mask=mask)
# Apply Threshold
_,thresh = cv2.threshold(mask,1,255,cv2.THRESH_BINARY)
# Find Contour
contours, hierarchy =
cv2.findContours(thresh,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
x,y,w,h = cv2.boundingRect(contours[0])
# Crop masked_data
crop = masked_data[y+30:y+h-30,x+30:x+w-30]
################################
kernel_size = 5
blur_crop = cv2.GaussianBlur(crop,(kernel_size, kernel_size),0)
low_threshold = 50
high_threshold = 150
edges = cv2.Canny(blur_crop, low_threshold, high_threshold)
rho = 1 # distance resolution in pixels
theta = np.pi / 180 # angular resolution in radians
threshold = 15 # minimum number of votes
min_line_length = 100 # minimum number of pixels making up a line
max_line_gap = 10 # maximum gap in pixels between connectable
line segments
line_image = np.copy(crop) * 0
# Run Hough on edge detected image
# Output "lines" is an array containing endpoints of detected line
lines = cv2.HoughLinesP(edges, rho, theta, threshold, np.array([]),
min_line_length, max_line_gap)
for line in lines:
for x1,y1,x2,y2 in line:
cv2.line(line_image,(x1,y1),(x2,y2),(255,0,0),5)
# Draw the lines on the image
lines_edges = cv2.addWeighted(crop, 0.8, line_image, 1, 0)
cv2.imshow('line_image', line_image)
cv2.imshow('crop', crop)
With some parameter tweaking on the Hough detection you should be able to reduce the results to 3 nice lines.

pytesseract detects the wrong integer values

I'm trying to detects the numbers found in my sqares, and I thought I could use the libary pytesseract, but for some reason I read the wrong values.
This is the console output:
And here I have all my pictures (they are seperated, this is just to show them all)
import numpy as np
import cv2
import re
from PIL import Image
import pytesseract
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract'
img = cv2.imread('gulRecNum.jpg')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# convert to HSV, since red and yellow are the lowest hue colors and come before green
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
# create a binary thresholded image on hue between red and yellow
lower = (0,240,160)
upper = (30,255,255)
thresh = cv2.inRange(hsv, lower, upper)
# apply morphology
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (9,9))
clean = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (15,15))
clean = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
# get external contours
contours = cv2.findContours(clean, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
result1 = img.copy()
result2 = img.copy()
mask = np.zeros(result2.shape, dtype=np.uint8)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
ROI_number = 0
for c in contours:
cv2.drawContours(result1,[c],0,(0,0,0),2)
# get rotated rectangle from contour
rot_rect = cv2.minAreaRect(c)
box = cv2.boxPoints(rot_rect)
box = np.int0(box)
# draw rotated rectangle on copy of img
cv2.drawContours(result2,[box],0,(0,0,0),2)
# Gør noget hvis arealet er større end 1.
# Whats the area of the component?
areal = cv2.contourArea(c)
if(areal > 1):
# get the center of mass
M = cv2.moments(c)
cx = int(M['m10']/M['m00'])
cy = int(M['m01']/M['m00'])
center = (cx, cy)
print("\nx: ",cx,"\ny: ",cy)
color = (0, 0, 255)
cv2.circle(result2, center, 3, color, -1)
cv2.putText(result2, "center", (int(cx) - 10, int(cy) - 20),
cv2.FONT_HERSHEY_SIMPLEX, 1.2, color, 2)
# LOOK AT THIS PART
x,y,w,h = cv2.boundingRect(c)
ROI = 255 - thresh[y:y+h, x:x+w]
cv2.drawContours(mask, [c], -1, (255,255,255), -1)
cv2.imwrite('ROI_{}.png'.format(ROI_number), ROI)
Number = pytesseract.image_to_string(ROI, config='--psm 13 --oem 3 -c tessedit_char_whitelist=0123456789')
print("Number ", Number)
ROI_number += 1
# save result
cv2.imwrite("4cubes_result2.png",result2)
# display result
imS = cv2.resize(result2, (600, 400))
cv2.imshow("result2", imS)
cv2.waitKey(0)
cv2.destroyAllWindows()
Thought I could write Number = pytesseract.image_to_string(ROI, config='--psm 13 --oem 3 -c tessedit_char_whitelist=0123456789') print(Number)
and then get the number from the image, but I don't, how can that be?
EDIT NEW ERROR
how do i solve it with this picture?
from PIL import Image
from operator import itemgetter
import numpy as np
import easyocr
import cv2
import re
import imutils
import pytesseract
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract'
reader = easyocr.Reader(['ch_sim','en']) # need to run only once to load model into memory
#Define empty array
Cubes = []
def getNumber(ROI):
img = cv2.imread(ROI)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
ret,thresh = cv2.threshold(gray,127,255,0)
#cv2.imshow(thresh)
#cv2.imshow('Thresholded original',thresh)
#cv2.waitKey(0)
## Get contours
contours,h = cv2.findContours(thresh,cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)
## only draw contour that have big areas
imx = img.shape[0]
imy = img.shape[1]
lp_area = (imx * imy) / 10
tmp_img = img.copy()
for cnt in contours:
approx = cv2.approxPolyDP(cnt,0.01 * cv2.arcLength(cnt, True), True)
if cv2.contourArea(cnt) > lp_area:
# Draw box corners and minimum area rectangle
rect = cv2.minAreaRect(cnt)
box = cv2.boxPoints(rect)
box = np.int0(box)
#cv2.drawContours(tmp_img, [box], 0, (0, 50, 255), 3)
#cv2.circle(tmp_img, tuple(box[0]), 8, (0, 255, 0), -1)
#cv2.circle(tmp_img, tuple(box[1]), 8, (0, 255, 0), -1)
#cv2.circle(tmp_img, tuple(box[2]), 8, (0, 255, 0), -1)
#cv2.circle(tmp_img, tuple(box[3]), 8, (0, 255, 0), -1)
#cv2.imshow(tmp_img)
#cv2.imshow('Minimum Area Rectangle', tmp_img)
#cv2.waitKey(0)
## Correct orientation and crop
# Link, https://jdhao.github.io/2019/02/23/crop_rotated_rectangle_opencv/
width = int(rect[1][0])
height = int(rect[1][1])
src_pts = box.astype("float32")
dst_pts = np.array([[0, height-1],
[0, 0],
[width-1, 0],
[width-1, height-1]], dtype="float32")
M = cv2.getPerspectiveTransform(src_pts, dst_pts)
warped = cv2.warpPerspective(img, M, (width, height))
# Run OCR on cropped image
# If the predicted value is digit print else rotate first
result = reader.readtext(warped)
print(result)
predicted_digit = result[0][1]
if np.char.isdigit(predicted_digit) == True:
cv2.imshow("warped " + ROI,warped)
else:
rot_img = warped.copy()
for i in range(0, 3):
rotated_image = cv2.rotate(rot_img, cv2.cv2.ROTATE_90_CLOCKWISE)
result = reader.readtext(rotated_image)
#if np.array(result).size == 0:
# continue
if not result:
rot_img = rotated_image
continue
#if len(result) == 0:
# continue
predicted_digit = result[0][1]
#print(result)
#print(predicted_digit)
#cv2.imshow(rotated_image)
if np.char.isdigit(predicted_digit) == True:
cv2.imshow("Image " + ROI, rotated_image)
break
rot_img = rotated_image
return predicted_digit
def sortNumbers(Cubes):
Cubes = sorted(Cubes, key=lambda x: int(x[2]))
#Cubes.sort(key=itemgetter(2)) # In-place sorting
#Cubes = sorted(Cubes, key=itemgetter(2)) # Create a new list
return Cubes
#img = cv2.imread('gulRecNum.jpg')
img = cv2.imread('webcam7.png')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# convert to HSV, since red and yellow are the lowest hue colors and come before green
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
# create a binary thresholded image on hue between red and yellow
#Change these if cube colours changes?
lower =(20, 100, 100)
upper = (30, 255, 255)
#lower = (0,240,160)
#upper = (30,255,255)
thresh = cv2.inRange(hsv, lower, upper)
# apply morphology
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (9,9))
clean = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (15,15))
clean = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
# get external contours
contours = cv2.findContours(clean, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
result2 = img.copy()
mask = np.zeros(result2.shape, dtype=np.uint8)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
ROI_number = 0
for c in contours:
cv2.drawContours(result2,[c],0,(0,0,0),2)
# get rotated rectangle from contour
rot_rect = cv2.minAreaRect(c)
box = cv2.boxPoints(rot_rect)
box = np.int0(box)
# draw rotated rectangle on copy of img
cv2.drawContours(result2,[box],0,(0,0,0),2)
# Gør noget hvis arealet er større end 1.
# Whats the area of the component?
areal = cv2.contourArea(c)
if(areal > 1):
# get the center of mass
M = cv2.moments(c)
cx = int(M['m10']/M['m00'])
cy = int(M['m01']/M['m00'])
center = (cx, cy)
print("\nx: ",cx,"\ny: ",cy)
color = (0, 0, 255)
cv2.circle(result2, center, 3, color, -1)
cv2.putText(result2, "center", (int(cx) - 10, int(cy) - 20),
cv2.FONT_HERSHEY_SIMPLEX, 1.2, color, 2)
x,y,w,h = cv2.boundingRect(c)
ROI = 255 - thresh[y:y+h, x:x+w]
cv2.drawContours(mask, [c], -1, (255,255,255), -1)
cv2.imwrite('ROI_{}.png'.format(ROI_number), ROI)
#Read saved image (number)
result = getNumber('ROI_{}.png'.format(ROI_number))
print("ROI_number: ", result)
Cubes.append([cx, cy, result])
ROI_number += 1
# save result
cv2.imwrite("4cubes_result2.png",result2)
# display result
imS = cv2.resize(result2, (600, 400))
cv2.imshow("result2", imS)
#cv2.imshow('mask', mask)
#cv2.imshow('thresh', thresh)
SortedCubes = sortNumbers(Cubes)
print("\nFound array [x, y, Cube_num] = ", Cubes)
print("Sorted array [x, y, Cube_num] = ", SortedCubes)
cv2.waitKey(0)
cv2.destroyAllWindows()
I get the following error (it can't detect a number)
Traceback (most recent call last): File "c:/Users/Mads/OneDrive/Universitet/7. semester/ROB1/python/objectDetectiong.py", line 169, in <module> result = getNumber('ROI_{}.png'.format(ROI_number)) File "c:/Users/Mads/OneDrive/Universitet/7. semester/ROB1/python/objectDetectiong.py", line 70, in getNumber predicted_digit = result[0][1] IndexError: list index out of range
This is implementation of my comment. Since, I do not have individual images this code will work with given grid like processed image.
For OCR I used EasyOCR instead of Tesserect. You could also try pytesserect on each output cropped images. Instead of rotating 4 times by 90 degrees by confidence, I went with digit detection on OCR result. If a detection is not a number then only rotate and retry.
Tested on google colab. Replace cv2_imshow(...) with cv2.imshow(...) for working locally. Also remove from google.colab.patches import cv2_imshow import.
This is modified version of my answer on card orientation correction here, OpenCV: using Canny and Shi-Tomasi to detect round corners of a playing card. All previous code is left as comment.
Code
!pip install easyocr
import easyocr
reader = easyocr.Reader(['ch_sim','en']) # need to run only once to load model into memory
"""
Based on my answer of rotated card detection,
https://stackoverflow.com/questions/64860785/opencv-using-canny-and-shi-tomasi-to-detect-round-corners-of-a-playing-card/64862448#64862448
"""
import cv2
import numpy as np
from google.colab.patches import cv2_imshow
img = cv2.imread('1.jpg')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
ret,thresh = cv2.threshold(gray,127,255,0)
#cv2_imshow(thresh)
#cv2.imshow('Thresholded original',thresh)
#cv2.waitKey(0)
## Get contours
contours,h = cv2.findContours(thresh,cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)
## only draw contour that have big areas
imx = img.shape[0]
imy = img.shape[1]
lp_area = (imx * imy) / 10
#################################################################
# Four point perspective transform
# https://www.pyimagesearch.com/2014/08/25/4-point-opencv-getperspective-transform-example/
#################################################################
def order_points(pts):
# initialzie a list of coordinates that will be ordered
# such that the first entry in the list is the top-left,
# the second entry is the top-right, the third is the
# bottom-right, and the fourth is the bottom-left
rect = np.zeros((4, 2), dtype = "float32")
# the top-left point will have the smallest sum, whereas
# the bottom-right point will have the largest sum
s = pts.sum(axis = 1)
rect[0] = pts[np.argmin(s)]
rect[2] = pts[np.argmax(s)]
# now, compute the difference between the points, the
# top-right point will have the smallest difference,
# whereas the bottom-left will have the largest difference
diff = np.diff(pts, axis = 1)
rect[1] = pts[np.argmin(diff)]
rect[3] = pts[np.argmax(diff)]
# return the ordered coordinates
return rect
def four_point_transform(image, pts):
# obtain a consistent order of the points and unpack them
# individually
rect = order_points(pts)
(tl, tr, br, bl) = rect
# compute the width of the new image, which will be the
# maximum distance between bottom-right and bottom-left
# x-coordiates or the top-right and top-left x-coordinates
widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
maxWidth = max(int(widthA), int(widthB))
# compute the height of the new image, which will be the
# maximum distance between the top-right and bottom-right
# y-coordinates or the top-left and bottom-left y-coordinates
heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
maxHeight = max(int(heightA), int(heightB))
# now that we have the dimensions of the new image, construct
# the set of destination points to obtain a "birds eye view",
# (i.e. top-down view) of the image, again specifying points
# in the top-left, top-right, bottom-right, and bottom-left
# order
dst = np.array([
[0, 0],
[maxWidth - 1, 0],
[maxWidth - 1, maxHeight - 1],
[0, maxHeight - 1]], dtype = "float32")
# compute the perspective transform matrix and then apply it
M = cv2.getPerspectiveTransform(rect, dst)
warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))
# return the warped image
return warped
#################################################################
#print(len(contours))
tmp_img = img.copy()
for cnt in contours:
approx = cv2.approxPolyDP(cnt,0.01 * cv2.arcLength(cnt, True), True)
## calculate number of vertices
#print(len(approx))
## Get the largest contours only
## Side count cannot be used since contours are not all rectangular
if cv2.contourArea(cnt) > lp_area:
#if len(approx) == 4 and cv2.contourArea(cnt) > lp_area:
# print("\n\n")
# print("#################################################")
# print("rectangle")
# print("#################################################")
# print("\n\n")
#tmp_img = img.copy()
#cv2.drawContours(tmp_img, [cnt], 0, (0, 255, 0), 6)
#cv2_imshow(tmp_img)
#cv2.imshow('Contour Borders', tmp_img)
#cv2.waitKey(0)
# tmp_img = img.copy()
# cv2.drawContours(tmp_img, [cnt], 0, (255, 0, 255), -1)
# cv2_imshow(tmp_img)
# #cv2.imshow('Contour Filled', tmp_img)
# #cv2.waitKey(0)
# # Make a hull arround the contour and draw it on the original image
# tmp_img = img.copy()
# mask = np.zeros((img.shape[:2]), np.uint8)
# hull = cv2.convexHull(cnt)
# cv2.drawContours(mask, [hull], 0, (255, 255, 255), -1)
# cv2_imshow(mask)
# #cv2.imshow('Convex Hull Mask', mask)
# #cv2.waitKey(0)
# # Draw minimum area rectangle
# #tmp_img = img.copy()
# rect = cv2.minAreaRect(cnt)
# box = cv2.boxPoints(rect)
# box = np.int0(box)
# cv2.drawContours(tmp_img, [box], 0, (255, 0, 0), 2)
# #cv2_imshow(tmp_img)
# #cv2.imshow('Minimum Area Rectangle', tmp_img)
# #cv2.waitKey(0)
# Draw box corners and minimum area rectangle
#tmp_img = img.copy()
rect = cv2.minAreaRect(cnt)
box = cv2.boxPoints(rect)
box = np.int0(box)
#print(rect)
#print(box)
cv2.drawContours(tmp_img, [box], 0, (0, 50, 255), 3)
cv2.circle(tmp_img, tuple(box[0]), 8, (0, 255, 0), -1)
cv2.circle(tmp_img, tuple(box[1]), 8, (0, 255, 0), -1)
cv2.circle(tmp_img, tuple(box[2]), 8, (0, 255, 0), -1)
cv2.circle(tmp_img, tuple(box[3]), 8, (0, 255, 0), -1)
#cv2_imshow(tmp_img)
#cv2.imshow('Minimum Area Rectangle', tmp_img)
#cv2.waitKey(0)
## Correct orientation and crop
# Link, https://jdhao.github.io/2019/02/23/crop_rotated_rectangle_opencv/
width = int(rect[1][0])
height = int(rect[1][1])
src_pts = box.astype("float32")
dst_pts = np.array([[0, height-1],
[0, 0],
[width-1, 0],
[width-1, height-1]], dtype="float32")
M = cv2.getPerspectiveTransform(src_pts, dst_pts)
warped = cv2.warpPerspective(img, M, (width, height))
#cv2_imshow(warped)
# Run OCR on cropped image
# If the predicted value is digit print else rotate first
result = reader.readtext(warped)
predicted_digit = result[0][1]
print("Detected Text:")
if np.char.isdigit(predicted_digit) == True:
print(result)
print(predicted_digit)
cv2_imshow(warped)
else:
rot_img = warped.copy()
for i in range(0, 3):
rotated_image = cv2.rotate(rot_img, cv2.cv2.ROTATE_90_CLOCKWISE)
result = reader.readtext(rotated_image)
#if np.array(result).size == 0:
# continue
if not result:
rot_img = rotated_image
continue
#if len(result) == 0:
# continue
predicted_digit = result[0][1]
#print(result)
#print(predicted_digit)
#cv2_imshow(rotated_image)
if np.char.isdigit(predicted_digit) == True:
print(result)
print(predicted_digit)
cv2_imshow(rotated_image)
break
rot_img = rotated_image
# # Draw bounding rectangle
# #tmp_img = img.copy()
# x, y, w, h = cv2.boundingRect(cnt)
# cv2.rectangle(tmp_img, (x, y), (x + w, y + h), (255, 0, 0), 2)
# #cv2_imshow(tmp_img)
# #cv2.imshow('Bounding Rectangle', tmp_img)
# #cv2.waitKey(0)
# # Bounding Rectangle and Minimum Area Rectangle
# #tmp_img = img.copy()
# rect = cv2.minAreaRect(cnt)
# box = cv2.boxPoints(rect)
# box = np.int0(box)
# cv2.drawContours(tmp_img, [box], 0, (0, 0, 255), 2)
# x, y, w, h = cv2.boundingRect(cnt)
# cv2.rectangle(tmp_img, (x, y), (x + w, y + h), (0, 255, 0), 2)
# #cv2_imshow(tmp_img)
# #cv2.imshow('Bounding Rectangle', tmp_img)
# #cv2.waitKey(0)
# # determine the most extreme points along the contour
# # https://www.pyimagesearch.com/2016/04/11/finding-extreme-points-in-contours-with-opencv/
# tmp_img = img.copy()
# extLeft = tuple(cnt[cnt[:, :, 0].argmin()][0])
# extRight = tuple(cnt[cnt[:, :, 0].argmax()][0])
# extTop = tuple(cnt[cnt[:, :, 1].argmin()][0])
# extBot = tuple(cnt[cnt[:, :, 1].argmax()][0])
# cv2.drawContours(tmp_img, [cnt], -1, (0, 255, 255), 2)
# cv2.circle(tmp_img, extLeft, 8, (0, 0, 255), -1)
# cv2.circle(tmp_img, extRight, 8, (0, 255, 0), -1)
# cv2.circle(tmp_img, extTop, 8, (255, 0, 0), -1)
# cv2.circle(tmp_img, extBot, 8, (255, 255, 0), -1)
# print("Corner Points: ", extLeft, extRight, extTop, extBot)
# cv2_imshow(tmp_img)
# #cv2.imshow('img contour drawn', tmp_img)
# #cv2.waitKey(0)
# #cv2.destroyAllWindows()
# ## Perspective Transform
# tmp_img = img.copy()
# pts = np.array([extLeft, extRight, extTop, extBot])
# warped = four_point_transform(tmp_img, pts)
# cv2_imshow(tmp_img)
# #cv2.imshow("Warped", warped)
# #cv2.waitKey(0)
cv2_imshow(tmp_img)
#cv2.destroyAllWindows()
Output Prediction
Detected Text:
[([[85, 67], [131, 67], [131, 127], [85, 127]], '1', 0.9992043972015381)]
1
Detected Text:
[([[85, 65], [133, 65], [133, 125], [85, 125]], '2', 0.9991914629936218)]
2
Detected Text:
[([[96, 72], [144, 72], [144, 128], [96, 128]], '4', 0.9996564984321594)]
4
Detected Text:
[([[88, 76], [132, 76], [132, 132], [88, 132]], '3', 0.9973381161689758)]
3
White Region Detection With Corners
Alternate methods,
Try pretrained digit classification model trained from MNIST and others on each large contours exceeding certain area.
Use multitask object detection with rotation. One output of network will be detections another angle regression to predict orientation.
Use text detector like, East and run OCR on each detected text.

Detect the longest horizontal and vertical line in an image

I have a pdf from which I want to extract text. I use tesseract for OCR which does a good job. But my problem is that it does not recognize the 2 column format of the document and hence it merges the 2 columns together.
I want to split the document on the vertical (in the middle of the page) and horizontal (on top of the page) lines and then feed it to tesseract. So I do the following
Preprocessing steps:
# color to gray
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# edge detection
edges = cv2.Canny(gray, 500, 1000, apertureSize=7)
# dialate
kernel = np.ones((5,5),np.float32)/25
edges = cv2.dilate(edges, kernel, iterations=1)
# blur
blur = cv2.GaussianBlur(edges, (7, 7), 0)
These steps produce:
Now, I do line detection:
minLineLength = 1000
maxLineGap = 500
lines = cv2.HoughLinesP(processed_img, 1, np.pi, 2, minLineLength, maxLineGap)
for line in lines:
x1, y1, x2, y2 = line[0]
cv2.line(img, (x1, y1), (x2, y2), (0, 0, 0), 1)
The final result (after stitching all the images back into a pdf) looks like this.
I have tried various combinations for theta, minLineLength and maxLineGap and this was the best result I could get. Any help/pointers would be greatly appreciated!
One of the possible solutions is described below:
1) Detect the horizontal line. Below is one way to do this:
import cv2
import numpy as np
def discard(image):
image = np.uint8(image)
_, im_label, stts, _ = cv2.connectedComponentsWithStats(image, connectivity=4)
msk1 = np.isin(im_label, np.where(stts[:, cv2.CC_STAT_WIDTH] > 500)[0])
msk2 = np.isin(im_label, np.where(stts[:, cv2.CC_STAT_HEIGHT] > 500)[0])
image[(msk1 | msk2)] = 0
return image
img = cv2.imread("page_1.jpg", 0)
img = cv2.resize(img, None, fx=0.35, fy=0.35, interpolation=cv2.INTER_LINEAR)
height, width = img.shape[:2]
# Binarization
thresh = 255 - img
ret, thresh = cv2.threshold(thresh, 5, 255, cv2.THRESH_BINARY)
# Discarding long connected components
without_lines = discard(thresh.copy())
just_lines = cv2.bitwise_xor(thresh, without_lines)
horizontal = just_lines.copy()
# separating horizontal line
h_kernel_large = np.array([[0, 0, 0, 0, 0],
[0, 0, 0, 0, 0],
[1, 1, 1, 1, 1],
[0, 0, 0, 0, 0],
[0, 0, 0, 0, 0]], np.uint8)
horizontal = cv2.morphologyEx(horizontal, cv2.MORPH_OPEN, h_kernel_large, iterations=2)
cv2.imshow("horizontal_line", horizontal)
This is what we get in the horizontal matrix:
2) Use findContours and boundingRect to get the coordinates of that horizontal line. Then use that coordinate to crop the image horizontally.
upper_portion = img
lower_portion = img
contours, hierarchy = cv2.findContours(horizontal, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
for cnt in contours:
x, y, w, h = cv2.boundingRect(cnt)
upper_portion = img[0:y, 0:width]
lower_portion = img[y+h:height, 0:width]
cv2.imshow("upper_portion", upper_portion)
cv2.imshow("lower_portion", lower_portion)
cv2.waitKey(0)
Below are images after cropping.
upper_portion:
lower_portion:
3) Detect the vertical line and crop lower_portion image using the same procedure described in step 1.
In step one, I basically used "Connected Component Analysis" followed by an "Opening operation". Read them here and here

Normalizing curved lines to form a rectangle using opencv

I have an underwater camera that detects PVC frameworks as in Image 1. I've added random online water effects so it accumulates for the expected rough conditions.
I've tried two approaches:
Canny Edge Algorithm.
Multiple color conversions, smoothing and thresholding.
The most efficient result was the later's result.
My problem is that I'm having trouble preparing this result for further processing.
For easier processing, the result needs line-shaped with constant width as in this one for the rightmost part.
I tried Probabilistic Hough Line Transform to detect any lines but they all are too curved to be detected.
To extract the lines from the image, you could filter the horizontal and vertical lines after thresholding and draw rectangles with a constant width through the centers, then remove the small objects around the intersections:
import cv2
import numpy as np
from skimage.io import imread
from skimage.morphology import remove_small_objects
rgb = imread('https://i.stack.imgur.com/QPz8W.jpg')
# convert to HSV for thresholding
hsv = cv2.cvtColor(rgb, cv2.COLOR_RGB2HSV)
# threshold hue channel for purple tubes, value channel for blue tubes
thresh_hue = cv2.threshold(hsv[..., 0], 127, 255, cv2.THRESH_BINARY)[1]
thresh_val = cv2.threshold(hsv[..., 2], 200, 255, cv2.THRESH_BINARY)[1]
# combine purple tubes with blue tubes
thresh = thresh_hue | thresh_val
cv2.imwrite('threshold_result.png', thresh)
# morphologically close the gaps between purple and blue tubes
thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, np.ones((5, 5), np.uint8))
cv2.imwrite('closing_result.png', thresh)
# morphological opening with horizontal and vertical kernels
h_kernel = np.zeros((11, 11), dtype=np.uint8)
h_kernel[5, :] = 1
v_kernel = np.zeros((11, 11), dtype=np.uint8)
v_kernel[:, 5] = 1
h_tubes = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, h_kernel, iterations=6)
v_tubes = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, v_kernel, iterations=7)
cv2.imwrite('horizontal_tubes.png', h_tubes)
cv2.imwrite('vertical_tubes.png', v_tubes)
# find contours and draw rectangles with constant widths through centers
h_contours = cv2.findContours(h_tubes, cv2.RETR_LIST,
cv2.CHAIN_APPROX_SIMPLE)[0]
h_lines = np.zeros(thresh.shape, np.uint8)
for cnt in h_contours:
x, y, w, h = cv2.boundingRect(cnt)
y += int(np.floor(h / 2) - 4)
cv2.rectangle(h_lines, (x, y), (x + w, y + 8), 255, -1)
v_contours = cv2.findContours(v_tubes, cv2.RETR_LIST,
cv2.CHAIN_APPROX_SIMPLE)[0]
v_lines = np.zeros(thresh.shape, np.uint8)
for cnt in v_contours:
x, y, w, h = cv2.boundingRect(cnt)
x += int(np.floor(w / 2) - 4)
cv2.rectangle(v_lines, (x, y), (x + 8, y + h), 255, -1)
# combine horizontal and vertical lines
all_lines = h_lines | v_lines
cv2.imwrite('all_lines.png', all_lines)
# remove small objects around the intersections
xor = np.bool8(h_lines ^ v_lines)
removed = xor ^ remove_small_objects(xor, 350)
result = all_lines & ~removed * 255
cv2.imwrite('result.png', result)
threshold_result.png
closing_result.png
horizontal_tubes.png
vertical_tubes.png
all_lines.png
result.png

How to find the distance between two concentric contours, for different angles?

I have an image with two contours, where one contour is always 'inside' another. I want to find the distance between the two contours for 90 different angles (meaning, distance at every 4 degrees). How do I go about doing it?
Here's an example image:
Thank you!
Take this image of two sets of two shapes:
We want to find the distance between the edges of each set of shapes, including where the edges overlap.
First things first, we import the necessary modules:
import cv2
import numpy as np
To do that, we will first need to retrieve every shape in the image as lists of contours. In the above particular example, there are 4 shapes that need to be detected. To retrieve each shape, we will need to use a mask to mask out every color besides the color of the shape of interest:
def get_masked(img, lower, upper):
img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
mask = cv2.inRange(img_hsv, np.array(lower), np.array(upper))
img_mask = cv2.bitwise_and(img, img, mask=mask)
return img_mask
The lower and upper parameters will determine the minimum HVS values and the maximum HSV values that will not be masked out of the image. Given the right lower and upper parameters, you will be able to extract one image with only the green shapes, and one image with only the blue shapes:
With the masked images, you can then proceed to process them into more clean contours. Here is the preprocess function, with values that can be tweaked whenever necessary:
def get_processed(img):
img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
img_blur = cv2.GaussianBlur(img_gray, (7, 7), 7)
img_canny = cv2.Canny(img_blur, 50, 50)
kernel = np.ones((7, 7))
img_dilate = cv2.dilate(img_canny, kernel, iterations=2)
img_erode = cv2.erode(img_dilate, kernel, iterations=2)
return img_erode
Passing in the masked images will give you
With the images masked and processed, they will be ready for opencv to detect their contours:
def get_contours(img):
contours, hierarchy = cv2.findContours(img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
return [cnt for cnt in contours if cv2.contourArea(cnt) > 500]
The list comprehension at the return statement is there to filter out noise by specifying that every contour must have an area that is greater than 500.
Now, we will define some basic functions that we will later use:
def get_centeroid(cnt):
length = len(cnt)
sum_x = np.sum(cnt[..., 0])
sum_y = np.sum(cnt[..., 1])
return int(sum_x / length), int(sum_y / length)
def get_pt_at_angle(pts, pt, ang):
angles = np.rad2deg(np.arctan2(*(pt - pts).T))
angles = np.where(angles < -90, angles + 450, angles + 90)
found= np.rint(angles) == ang
if np.any(found):
return pts[found][0]
The names of the functions are pretty self-explanatory; the first one returns the center point of a contour, and the second one returns a point in a given array of points, pts, that is at a given angle, ang, relative to a given point, pt. The np.where in the get_pt_at_angle function is there to shift the starting angle, 0, to the positive x axis, as it by default will be at the positive y axis.
Time to define the function that will return the distances. First, define it so that these five parameters can be passed in:
def get_distances(img, cnt1, cnt2, center, step):
A brief explanation on each parameter:
img, the image array
cnt1, the first shape
cnt2, the second shape
center, the origin for the distance calculations
step, the number of degrees to be jumped per value
Define a dictionary to store the distances, with the angles as key and the distances as values:
angles = dict()
Loop through each angle you want to retrieve the distance of the edges of the two shapes, and find the coordinate of the two contours that are the ct angle of the iterations, angle, relative to the origin point, center, using the get_pt_at_angle function we defined earlier.
for angle in range(0, 360, step):
pt1 = get_pt_at_angle(cnt1, center, angle)
pt2 = get_pt_at_angle(cnt2, center, angle)
Check if a point exists in both contours that is at the specific angle relative to the origin:
if np.any(pt1) and np.any(pt2):
You can use the np.linalg.norm method to get the distance between the two points. I also made it draw the text and connecting lines for visualization. Don't forget to add the angle and value to the angles dictionary, and you can then break out of the inner for loop. At the end of the function, return the image that has the text and lines drawn on it:
d = round(np.linalg.norm(pt1 - pt2))
cv2.putText(img, str(d), tuple(pt1), cv2.FONT_HERSHEY_PLAIN, 0.8, (0, 0, 0))
cv2.drawContours(img, np.array([[center, pt1]]), -1, (255, 0, 255), 1)
angles[angle] = d
return img, angles
Finally, you can utilize the function defined on an image:
img = cv2.imread("shapes1.png")
img_green = get_masked(img, [10, 0, 0], [70, 255, 255])
img_blue = get_masked(img, [70, 0, 0], [179, 255, 255])
img_green_processed = get_processed(img_green)
img_blue_processed = get_processed(img_blue)
img_green_contours = get_contours(img_green_processed)
img_blue_contours = get_contours(img_blue_processed)
Using the image of four shapes, you can tell that the img_green_contours and img_blue_contours will each contain two contours. But you might be wondering: how did I choose the minimum and maximum HSV values? Well, I used a trackbar code. You can run the below code, adjusting the HSV values using the trackbars until you find a range where everything in the image is masked out (in black) except for the shape you want to retrieve:
import cv2
import numpy as np
def empty(a):
pass
cv2.namedWindow("TrackBars")
cv2.createTrackbar("Hue Min", "TrackBars", 0, 179, empty)
cv2.createTrackbar("Hue Max", "TrackBars", 179, 179, empty)
cv2.createTrackbar("Sat Min", "TrackBars", 0, 255, empty)
cv2.createTrackbar("Sat Max", "TrackBars", 255, 255, empty)
cv2.createTrackbar("Val Min", "TrackBars", 0, 255, empty)
cv2.createTrackbar("Val Max", "TrackBars", 255, 255, empty)
img = cv2.imread("shapes0.png")
while True:
h_min = cv2.getTrackbarPos("Hue Min", "TrackBars")
h_max = cv2.getTrackbarPos("Hue Max", "TrackBars")
s_min = cv2.getTrackbarPos("Sat Min", "TrackBars")
s_max = cv2.getTrackbarPos("Sat Max", "TrackBars")
v_min = cv2.getTrackbarPos("Val Min", "TrackBars")
v_max = cv2.getTrackbarPos("Val Max", "TrackBars")
img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
lower = np.array([h_min, s_min, v_min])
upper = np.array([h_max, s_max, v_max])
mask = cv2.inRange(img_hsv, lower, upper)
img_masked = cv2.bitwise_and(img, img, mask=mask)
cv2.imshow("Image", img_masked)
if cv2.waitKey(1) & 0xFF == ord("q"): # If you press the q key
break
With the values I chose, I got:
Loop through the blue shape contours and green shape contours in parallel, and depending on which color shape you want the origin to be at the center of, you can pass that color contour into the get_centeroid function we defined earlier:
for cnt_blue, cnt_green in zip(img_blue_contours, img_green_contours[::-1]):
center = get_centeroid(cnt_blue)
img, angles = get_distances(img, cnt_green.squeeze(), cnt_blue.squeeze(), center, 30)
print(angles)
Notice that I used 30 as the step; that number can be changed to 4, I used 30 so the visualization would be more clear.
Finally, we can display the image:
cv2.imshow("Image", img)
cv2.waitKey(0)
Altogether:
import cv2
import numpy as np
def get_masked(img, lower, upper):
img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
mask = cv2.inRange(img_hsv, np.array(lower), np.array(upper))
img_mask = cv2.bitwise_and(img, img, mask=mask)
return img_mask
def get_processed(img):
img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
img_blur = cv2.GaussianBlur(img_gray, (7, 7), 7)
img_canny = cv2.Canny(img_blur, 50, 50)
kernel = np.ones((7, 7))
img_dilate = cv2.dilate(img_canny, kernel, iterations=2)
img_erode = cv2.erode(img_dilate, kernel, iterations=2)
return img_erode
def get_contours(img):
contours, hierarchy = cv2.findContours(img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
return [cnt for cnt in contours if cv2.contourArea(cnt) > 500]
def get_centeroid(cnt):
length = len(cnt)
sum_x = np.sum(cnt[..., 0])
sum_y = np.sum(cnt[..., 1])
return int(sum_x / length), int(sum_y / length)
def get_pt_at_angle(pts, pt, ang):
angles = np.rad2deg(np.arctan2(*(pt - pts).T))
angles = np.where(angles < -90, angles + 450, angles + 90)
found= np.rint(angles) == ang
if np.any(found):
return pts[found][0]
def get_distances(img, cnt1, cnt2, center, step):
angles = dict()
for angle in range(0, 360, step):
pt1 = get_pt_at_angle(cnt1, center, angle)
pt2 = get_pt_at_angle(cnt2, center, angle)
if np.any(pt1) and np.any(pt2):
d = round(np.linalg.norm(pt1 - pt2))
cv2.putText(img, str(d), tuple(pt1), cv2.FONT_HERSHEY_PLAIN, 0.8, (0, 0, 0))
cv2.drawContours(img, np.array([[center, pt1]]), -1, (255, 0, 255), 1)
angles[angle] = d
return img, angles
img = cv2.imread("shapes1.png")
img_green = get_masked(img, [10, 0, 0], [70, 255, 255])
img_blue = get_masked(img, [70, 0, 0], [179, 255, 255])
img_green_processed = get_processed(img_green)
img_blue_processed = get_processed(img_blue)
img_green_contours = get_contours(img_green_processed)
img_blue_contours = get_contours(img_blue_processed)
for cnt_blue, cnt_green in zip(img_blue_contours, img_green_contours[::-1]):
center = get_centeroid(cnt_blue)
img, angles = get_distances(img, cnt_green.squeeze(), cnt_blue.squeeze(), center, 30)
print(angles)
cv2.imshow("Image", img)
cv2.waitKey(0)
Output:
{0: 5, 30: 4, 60: 29, 90: 25, 120: 31, 150: 8, 180: 5, 210: 7, 240: 14, 270: 12, 300: 14, 330: 21}
{0: 10, 30: 9, 60: 6, 90: 0, 120: 11, 150: 7, 180: 5, 210: 6, 240: 6, 270: 4, 300: 0, 330: 16}
Note: For certain shapes, some angles might be absent in the dictionary. That would be caused by the process function; you would get more accurate results if you turn down some of the values, like the blur sigma
In the following code, I have just given you the example for the vertical line, the rest can be obtained by rotating the line. Result looks like this, instead of drawing you can use the coordinates for distance calculation.
import shapely.geometry as shapgeo
import numpy as np
import cv2
img = cv2.imread('image.jpg', 0)
ret, img =cv2.threshold(img, 128, 255, cv2.THRESH_BINARY)
#Fit the ellipses
_, contours0, hierarchy = cv2.findContours( img.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
outer_ellipse = [cv2.approxPolyDP(contours0[0], 0.1, True)]
inner_ellipse = [cv2.approxPolyDP(contours0[2], 0.1, True)]
h, w = img.shape[:2]
vis = np.zeros((h, w, 3), np.uint8)
cv2.drawContours( vis, outer_ellipse, -1, (255,0,0), 1)
cv2.drawContours( vis, inner_ellipse, -1, (0,0,255), 1)
##Extract contour of ellipses
cnt_outer = np.vstack(outer_ellipse).squeeze()
cnt_inner = np.vstack(inner_ellipse).squeeze()
#Determine centroid
M = cv2.moments(cnt_inner)
cx = int(M['m10']/M['m00'])
cy = int(M['m01']/M['m00'])
print cx, cy
#Draw full segment lines
cv2.line(vis,(cx,0),(cx,w),(150,0,0),1)
# Calculate intersections using Shapely
# http://toblerity.org/shapely/manual.html
PolygonEllipse_outer= shapgeo.asLineString(cnt_outer)
PolygonEllipse_inner= shapgeo.asLineString(cnt_inner)
PolygonVerticalLine=shapgeo.LineString([(cx,0),(cx,w)])
insecouter= np.array(PolygonEllipse_outer.intersection(PolygonVerticalLine)).astype(np.int)
insecinner= np.array(PolygonEllipse_inner.intersection(PolygonVerticalLine)).astype(np.int)
cv2.line(vis,(insecouter[0,0], insecinner[1,1]),(insecouter[1,0], insecouter[1,1]),(0,255,0),2)
cv2.line(vis,(insecouter[0,0], insecinner[0,1]),(insecouter[1,0], insecouter[0,1]),(0,255,0),2)
cv2.imshow('contours', vis)
0xFF & cv2.waitKey()
cv2.destroyAllWindows()
I borrowed the general idea using Shapely and the basic code from tfv's answer. Nevertheless, iterating the desired angles, calculating the needed end points for the correct lines to be intersected with the shapes, calculating and storing the distances, etc. were missing, so I added all that.
That'd be my full code:
import cv2
import numpy as np
import shapely.geometry as shapgeo
# Read image, and binarize
img = cv2.imread('G48xu.jpg', cv2.IMREAD_GRAYSCALE)
img = cv2.threshold(img, 128, 255, cv2.THRESH_BINARY)[1]
# Find (approximated) contours of inner and outer shape
cnts, hier = cv2.findContours(img.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
outer = [cv2.approxPolyDP(cnts[0], 0.1, True)]
inner = [cv2.approxPolyDP(cnts[2], 0.1, True)]
# Just for visualization purposes: Draw contours of inner and outer shape
h, w = img.shape[:2]
vis = np.zeros((h, w, 3), np.uint8)
cv2.drawContours(vis, outer, -1, (255, 0, 0), 1)
cv2.drawContours(vis, inner, -1, (0, 0, 255), 1)
# Squeeze contours for further processing
outer = np.vstack(outer).squeeze()
inner = np.vstack(inner).squeeze()
# Calculate centroid of inner contour
M = cv2.moments(inner)
cx = int(M['m10'] / M['m00'])
cy = int(M['m01'] / M['m00'])
# Calculate maximum needed radius for later line intersections
r_max = np.min([cx, w - cx, cy, h - cy])
# Set up angles (in degrees)
angles = np.arange(0, 360, 4)
# Initialize distances
dists = np.zeros_like(angles)
# Prepare calculating the intersections using Shapely
poly_outer = shapgeo.asLineString(outer)
poly_inner = shapgeo.asLineString(inner)
# Iterate angles and calculate distances between inner and outer shape
for i, angle in enumerate(angles):
# Convert angle from degrees to radians
angle = angle / 180 * np.pi
# Calculate end points of line from centroid in angle's direction
x = np.cos(angle) * r_max + cx
y = np.sin(angle) * r_max + cy
points = [(cx, cy), (x, y)]
# Calculate intersections using Shapely
poly_line = shapgeo.LineString(points)
insec_outer = np.array(poly_outer.intersection(poly_line))
insec_inner = np.array(poly_inner.intersection(poly_line))
# Calculate distance between intersections using L2 norm
dists[i] = np.linalg.norm(insec_outer - insec_inner)
# Just for visualization purposes: Draw lines for some examples
if (i == 10) or (i == 40) or (i == 75):
# Line from centroid to end points
cv2.line(vis, (cx, cy), (int(x), int(y)), (128, 128, 128), 1)
# Line between both shapes
cv2.line(vis,
(int(insec_inner[0]), int(insec_inner[1])),
(int(insec_outer[0]), int(insec_outer[1])), (0, 255, 0), 2)
# Distance
cv2.putText(vis, str(dists[i]), (int(x), int(y)),
cv2.FONT_HERSHEY_COMPLEX, 0.75, (0, 255, 0), 2)
# Output angles and distances
print(np.vstack([angles, dists]).T)
# Just for visualization purposes: Output image
cv2.imshow('Output', vis)
cv2.waitKey(0)
cv2.destroyAllWindows()
I generated some examplary output for visualization purposes:
And, here's an excerpt from the output, showing angle and the corresponding distance:
[[ 0 70]
[ 4 71]
[ 8 73]
[ 12 76]
[ 16 77]
...
[340 56]
[344 59]
[348 62]
[352 65]
[356 67]]
Hopefully, the code is self-explanatory. If not, please don't hesitate to ask questions. I'll gladly provide further information.
----------------------------------------
System information
----------------------------------------
Platform: Windows-10-10.0.16299-SP0
Python: 3.9.1
NumPy: 1.20.2
OpenCV: 4.5.1
Shapely: 1.7.1
----------------------------------------

Categories