I have an image which have a table and some other data. I need to draw borders for table to separate out each cell.
My image looks like this
What i am trying:
1) dilating the image to create continuous spots, which looks like
2) finding contours and drawing
Issue: I am not able to draw correctly because it looks like my table cells are too close and while dilating they are becoming a continuous spot
**I took this code from Internet and was trying to modify But it did not work out well for this image
code :
import os
import cv2
import imutils
# This only works if there's only one table on a page
# Important parameters:
# - morph_size
# - min_text_height_limit
# - max_text_height_limit
# - cell_threshold
# - min_columns
def pre_process_image(img, save_in_file, morph_size=(7, 7)):
# get rid of the color
pre = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Otsu threshold
pre = cv2.threshold(pre,250, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
# dilate the text to make it solid spot
cpy = pre.copy()
struct = cv2.getStructuringElement(cv2.MORPH_RECT, morph_size)
cpy = cv2.dilate(~cpy, struct, anchor=(-1, -1), iterations=1)
# cpy = cv2.dilate(img,kernel,iterations = 1)
pre = ~cpy
# pre=cpy
if save_in_file is not None:
cv2.imwrite(save_in_file, pre)
return pre
def find_text_boxes(pre, min_text_height_limit=3, max_text_height_limit=30):
# Looking for the text spots contours
contours = cv2.findContours(pre, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
# contours = contours[0] if imutils.is_cv2() else contours[1]
contours = contours[0]
# Getting the texts bounding boxes based on the text size assumptions
boxes = []
for contour in contours:
box = cv2.boundingRect(contour)
h = box[3]
if min_text_height_limit < h < max_text_height_limit:
return boxes
def find_table_in_boxes(boxes, cell_threshold=10, min_columns=2):
rows = {}
cols = {}
# Clustering the bounding boxes by their positions
for box in boxes:
(x, y, w, h) = box
col_key = x // cell_threshold
row_key = y // cell_threshold
cols[row_key] = [box] if col_key not in cols else cols[col_key] + [box]
rows[row_key] = [box] if row_key not in rows else rows[row_key] + [box]
# Filtering out the clusters having less than 2 cols
table_cells = list(filter(lambda r: len(r) >= min_columns, rows.values()))
# Sorting the row cells by x coord
table_cells = [list(sorted(tb)) for tb in table_cells]
# Sorting rows by the y coord
table_cells = list(sorted(table_cells, key=lambda r: r[0][1]))
return table_cells
def build_lines(table_cells):
if table_cells is None or len(table_cells) <= 0:
return [], []
max_last_col_width_row = max(table_cells, key=lambda b: b[-1][2])
max_x = max_last_col_width_row[-1][0] + max_last_col_width_row[-1][2]
max_last_row_height_box = max(table_cells[-1], key=lambda b: b[3])
max_y = max_last_row_height_box[1] + max_last_row_height_box[3]
hor_lines = []
ver_lines = []
for box in table_cells:
x = box[0][0]
y = box[0][1]
hor_lines.append((x, y, max_x, y))
for box in table_cells[0]:
x = box[0]
y = box[1]
ver_lines.append((x, y, x, max_y))
(x, y, w, h) = table_cells[0][-1]
ver_lines.append((max_x, y, max_x, max_y))
(x, y, w, h) = table_cells[0][0]
hor_lines.append((x, max_y, max_x, max_y))
return hor_lines, ver_lines
if __name__ == "__main__":
in_file = os.path.join("data", "page1.jpg")
pre_file = os.path.join("data", "pre.png")
out_file = os.path.join("data", "out.png")
img = cv2.imread(os.path.join(in_file))
pre_processed = pre_process_image(img, pre_file)
text_boxes = find_text_boxes(pre_processed)
cells = find_table_in_boxes(text_boxes)
hor_lines, ver_lines = build_lines(cells)
# Visualize the result
vis = img.copy()
# for box in text_boxes:
# (x, y, w, h) = box
# cv2.rectangle(vis, (x, y), (x + w - 2, y + h - 2), (0, 255, 0), 1)
for line in hor_lines:
[x1, y1, x2, y2] = line
cv2.line(vis, (x1, y1), (x2, y2), (0, 0, 255), 1)
for line in ver_lines:
[x1, y1, x2, y2] = line
cv2.line(vis, (x1, y1), (x2, y2), (0, 0, 255), 1)
cv2.imwrite(out_file, vis)
Very interesting application.
Raw dialating may not be the best way to do it.
I do recommend using OCR routing. Like below
The output is sthing like this
So as long as there is two row which are closer to each other. eg, row1-row2< npixel. then it is close line. the find the center position between (row1+height1) and row2. The line should be pretty accurate.
in my sample if |292-335| < 50. then draw a line between (292+27 + 335) /2
means it's between asset line and the property line.
For the OCR package, you can try with tesseract if you insist with python.
See here for python text coordinate Tesseract OCR Text Position
Tesseract.PageIteratorLevel myLevel = /*TODO*/;
using (var page = Engine.Process(img))
using (var iter = page.GetIterator())
if (iter.TryGetBoundingBox(myLevel, out var rect))
var curText = iter.GetText(myLevel);
// Your code here, 'rect' should containt the location of the text, 'curText' contains the actual text itself
} while (iter.Next(myLevel));
rect contains the part you wanted x y height width
The demo I show it here is actually using sth similar to windows OCR sample
Feel free to try any of the methods to get the table line you wanted.
I am trying to implement the conversion discussed in the blogspot https://medium.com/#alexppppp/how-to-annotate-keypoints-using-roboflow-9bc2aa8915cd.
In my dataset I need 4 keypoints and 1 box. I want the edited code for it.
My code and error are shown below.
keypoint_names = ['Head', 'Tail']
rectangles2keypoints = {1:0, 2:1}
def converter(file_labels, file_image, keypoint_names):
img = cv2.imread(file_image)
img_w, img_h = img.shape[1], img.shape[0]
with open(file_labels) as f:
lines_txt = f.readlines()
lines = []
for line in lines_txt:
lines.append([int(line.split()[0])] + [round(float(el), 5) for el in line.split()[1:]])
bboxes = []
keypoints = []
# In this loop we convert normalized coordinates to absolute coordinates
for line in lines:
# Number 0 is a class of rectangles related to bounding boxes.
if line[0] == 0:
x_c, y_c, w, h = round(line[1] * img_w), round(line[2] * img_h), round(line[3] * img_w), round(line[4] * img_h)
bboxes.append([round(x_c - w/2), round(y_c - h/2), round(x_c + w/2), round(y_c + h/2)])
# Other numbers are the classes of rectangles related to keypoints.
# After convertion, numbers of keypoint classes should start with 0, so we apply rectangles2keypoints dictionary to achieve that.
# In our case:
# 1 is rectangle for head keypoint, which is 0, so we convert 1 to 0;
# 2 is rectangle for tail keypoint, which is 1, so we convert 2 to 1.
if line[0] != 0:
kp_id, x_c, y_c = rectangles2keypoints[line[0]], round(line[1] * img_w), round(line[2] * img_h)
keypoints.append([kp_id, x_c, y_c])
# In this loop we are iterating over each keypoint and looking to which bounding box it matches.
# Thus, we are matching keypoints and corresponding bounding boxes.
keypoints_sorted = [[[] for _ in keypoint_names] for _ in bboxes]
for kp in keypoints:
kp_id, kp_x, kp_y = kp[0], kp[1], kp[2]
for bbox_idx, bbox in enumerate(bboxes):
x1, y1, x2, y2 = bbox[0], bbox[1], bbox[2], bbox[3]
if x1 < kp_x < x2 and y1 < kp_y < y2:
keypoints_sorted[bbox_idx][kp_id] = [kp_x, kp_y, 1] # All keypoints are visible
return bboxes, keypoints_sorted
i ma trying to make an automatic annotiation tool for yolo object detection which useses previosly trained model to find the detections , and i managed to put together some code but i am stuck a little, as far as i know this needs to be the annotation format for YOLO:
18 0.154167 0.431250 0.091667 0.612500
And with my code i get
0.5576068858305613, 0.5410404056310654, -0.7516528169314066, 0.33822181820869446
I am not sure why i get the - at the third number and if i need to shorten my float number,
I will post the code below if someone could help me , after completing this project i will post the whole code if someone wants to use it
def convert(size, box):
dw = 1./size[0]
dh = 1./size[1]
x = (box[0] + box[1])/2.0
y = (box[2] + box[3])/2.0
w = box[1] - box[0]
h = box[3] - box[2]
x = x*dw
w = w*dw
y = y*dh
h = h*dh
return (x,y,w,h)
The above code is the function that converts the coordinates for YOLO format , For the size you need to pass the (w,h) and the for the box you need to pass (x,x+w, y, y+h)
net = cv2.dnn.readNetFromDarknet(config_path, weights_path)
# path_name = "images/city_scene.jpg"
path_name = image
image = cv2.imread(path_name)
file_name = os.path.basename(path_name)
filename, ext = file_name.split(".")
h, w = image.shape[:2]
# create 4D blob
blob = cv2.dnn.blobFromImage(image, 1 / 255.0, (416, 416), swapRB=True, crop=False)
# sets the blob as the input of the network
# get all the layer names
ln = net.getLayerNames()
ln = [ln[i[0] - 1] for i in net.getUnconnectedOutLayers()]
# feed forward (inference) and get the network output
# measure how much it took in seconds
start = time.perf_counter()
layer_outputs = net.forward(ln)
time_took = time.perf_counter() - start
print(f"Time took: {time_took:.2f}s")
boxes, confidences, class_ids = [], [], []
# loop over each of the layer outputs
for output in layer_outputs:
# loop over each of the object detections
for detection in output:
# extract the class id (label) and confidence (as a probability) of
# the current object detection
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
# discard weak predictions by ensuring the detected
# probability is greater than the minimum probability
if confidence > CONFIDENCE:
# scale the bounding box coordinates back relative to the
# size of the image, keeping in mind that YOLO actually
# returns the center (x, y)-coordinates of the bounding
# box followed by the boxes' width and height
box = detection[0:4] * np.array([w, h, w, h])
(centerX, centerY, width, height) = box.astype("float")
# use the center (x, y)-coordinates to derive the top and
# and left corner of the bounding box
x = int(centerX - (width / 2))
y = int(centerY - (height / 2))
a = w, h
convert(a, box)
boxes.append([x, y, int(width), int(height)])
idxs = cv2.dnn.NMSBoxes(boxes, confidences, SCORE_THRESHOLD,
font_scale = 1
thickness = 1
# ensure at least one detection exists
if len(idxs) > 0:
# loop over the indexes we are keeping
for i in idxs.flatten():
# extract the bounding box coordinates
x, y = boxes[i][0], boxes[i][1]
w, h = boxes[i][2], boxes[i][3]
# draw a bounding box rectangle and label on the image
color = [int(c) for c in colors[class_ids[i]]]
cv2.rectangle(image, (x, y), (x + w, y + h), color=color, thickness=thickness)
text = "{}".format(labels[class_ids[i]])
conf = "{:.3f}".format(confidences[i], x, y)
int1, int2 = (x, y)
#print(convert(ba, box))
#print(convert(b, boxes))
#print(convert(a, box)) #coordinates
ivan = str(int1)
b.append([text, ivan])
# calculate text width & height to draw the transparent boxes as background of the text
(text_width, text_height) = \
cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, fontScale=font_scale, thickness=thickness)[0]
text_offset_x = x
text_offset_y = y - 5
box_coords = ((text_offset_x, text_offset_y), (text_offset_x + text_width + 2, text_offset_y - text_height))
overlay = image.copy()
cv2.rectangle(overlay, box_coords[0], box_coords[1], color=color, thickness=cv2.FILLED)
# add opacity (transparency to the box)
image = cv2.addWeighted(overlay, 0.6, image, 0.4, 0)
# now put the text (label: confidence %)
cv2.putText(image, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX,
fontScale=font_scale, color=(0, 0, 0), thickness=thickness)
text = "{}".format(labels[class_ids[i]],x,y)
conf = "{:.3f}".format(confidences[i])
the problem is the indexes in your function.
box[0]=>center x
box[1]=>center y
box[2]=>width of your bbox
box[3]=>height of your bbox
and according to the document, yolo labels are like this :
<object-class> <x> <y> <width> <height>
which x and y are the center of the bounding box.so your code should be like this :
def convert(size, box):
dw = 1./size[0]
dh = 1./size[1]
x = box[0]*dw
y = box[1]*dh
w = box[2]*dw
h = box[3]*dh
return (x,y,w,h)
Maybe this can help you
def bounding_box_2_yolo(obj_detections, frame, index):
yolo_info = []
for object_det in obj_detections:
left_x, top_y, right_x, bottom_y = object_det.boxes
xmin = left_x
xmax = right_x
ymin = top_y
ymax = bottom_y
xcen = float((xmin + xmax)) / 2 / frame.shape[1]
ycen = float((ymin + ymax)) / 2 / frame.shape[0]
w = float((xmax - xmin)) / frame.shape[1]
h = float((ymax - ymin)) / frame.shape[0]
yolo_info.append((index, xcen, ycen, w, h))
return yolo_info
The labelimg has a lot of things that you can use too
I wrote a small script in python where I'm trying to extract or crop the part of the playing card that represents the artwork only, removing all the rest. I've been trying various methods of thresholding but couldn't get there. Also note that I can't simply record manually the position of the artwork because it's not always in the same position or size, but always in a rectangular shape where everything else is just text and borders.
from matplotlib import pyplot as plt
import cv2
img = cv2.imread(filename)
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
ret,binary = cv2.threshold(gray, 0, 255, cv2.THRESH_OTSU | cv2.THRESH_BINARY)
binary = cv2.bitwise_not(binary)
kernel = np.ones((15, 15), np.uint8)
closing = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel)
The current output is the closest thing I could get. I could be on the right way and try some further wrangling to draw a rectangle around the white parts, but I don't think it's a sustainable method :
As a last note, see the cards below, not all frames are exactly the same sizes or positions, but there's always a piece of artwork with only text and borders around it. It doesn't have to be super precisely cut, but clearly the art is a "region" of the card, surrounded by other regions containing some text. My goal is to try to capture the region of the artwork as well as I can.
I used Hough line transform to detect linear parts of the image.
The crossings of all lines were used to construct all possible rectangles, which do not contain other crossing points.
Since the part of the card you are looking for is always the biggest of those rectangles (at least in the samples you provided), i simply chose the biggest of those rectangles as winner.
The script works without user interaction.
import cv2
import numpy as np
from collections import defaultdict
def segment_by_angle_kmeans(lines, k=2, **kwargs):
#Groups lines based on angle with k-means.
#Uses k-means on the coordinates of the angle on the unit circle
#to segment `k` angles inside `lines`.
# Define criteria = (type, max_iter, epsilon)
default_criteria_type = cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER
criteria = kwargs.get('criteria', (default_criteria_type, 10, 1.0))
flags = kwargs.get('flags', cv2.KMEANS_RANDOM_CENTERS)
attempts = kwargs.get('attempts', 10)
# returns angles in [0, pi] in radians
angles = np.array([line[0][1] for line in lines])
# multiply the angles by two and find coordinates of that angle
pts = np.array([[np.cos(2*angle), np.sin(2*angle)]
for angle in angles], dtype=np.float32)
# run kmeans on the coords
labels, centers = cv2.kmeans(pts, k, None, criteria, attempts, flags)[1:]
labels = labels.reshape(-1) # transpose to row vec
# segment lines based on their kmeans label
segmented = defaultdict(list)
for i, line in zip(range(len(lines)), lines):
segmented = list(segmented.values())
return segmented
def intersection(line1, line2):
#Finds the intersection of two lines given in Hesse normal form.
#Returns closest integer pixel locations.
#See https://stackoverflow.com/a/383527/5087436
rho1, theta1 = line1[0]
rho2, theta2 = line2[0]
A = np.array([
[np.cos(theta1), np.sin(theta1)],
[np.cos(theta2), np.sin(theta2)]
b = np.array([[rho1], [rho2]])
x0, y0 = np.linalg.solve(A, b)
x0, y0 = int(np.round(x0)), int(np.round(y0))
return [[x0, y0]]
def segmented_intersections(lines):
#Finds the intersections between groups of lines.
intersections = []
for i, group in enumerate(lines[:-1]):
for next_group in lines[i+1:]:
for line1 in group:
for line2 in next_group:
intersections.append(intersection(line1, line2))
return intersections
def rect_from_crossings(crossings):
#find all rectangles without other points inside
rectangles = []
# Search all possible rectangles
for i in range(len(crossings)):
x1= int(crossings[i][0][0])
y1= int(crossings[i][0][1])
for j in range(len(crossings)):
x2= int(crossings[j][0][0])
y2= int(crossings[j][0][1])
#Search all points
flag = 1
for k in range(len(crossings)):
x3= int(crossings[k][0][0])
y3= int(crossings[k][0][1])
#Dont count double (reverse rectangles)
if (x1 > x2 or y1 > y2):
flag = 0
#Dont count rectangles with points inside
elif ((((x3 >= x1) and (x2 >= x3))and (y3 > y1) and (y2 > y3) or ((x3 > x1) and (x2 > x3))and (y3 >= y1) and (y2 >= y3))):
if(i!=k and j!=k):
flag = 0
if flag:
return rectangles
if __name__ == '__main__':
#img = cv2.imread('TAJFp.jpg')
#img = cv2.imread('Bj2uu.jpg')
img = cv2.imread('yi8db.png')
width = int(img.shape[1])
height = int(img.shape[0])
scale = 380/width
dim = (int(width*scale), int(height*scale))
# resize image
img = cv2.resize(img, dim, interpolation = cv2.INTER_AREA)
img2 = img.copy()
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
gray = cv2.GaussianBlur(gray,(5,5),cv2.BORDER_DEFAULT)
# Parameters of Canny and Hough may have to be tweaked to work for as many cards as possible
edges = cv2.Canny(gray,10,45,apertureSize = 7)
lines = cv2.HoughLines(edges,1,np.pi/90,160)
segmented = segment_by_angle_kmeans(lines)
crossings = segmented_intersections(segmented)
rectangles = rect_from_crossings(crossings)
#Find biggest remaining rectangle
size = 0
for i in range(len(rectangles)):
x1 = rectangles[i][0][0]
x2 = rectangles[i][1][0]
y1 = rectangles[i][0][1]
y2 = rectangles[i][1][1]
if(size < (abs(x1-x2)*abs(y1-y2))):
size = abs(x1-x2)*abs(y1-y2)
x1_rect = x1
x2_rect = x2
y1_rect = y1
y2_rect = y2
cv2.rectangle(img2, (x1_rect,y1_rect), (x2_rect,y2_rect), (0,0,255), 2)
roi = img[y1_rect:y2_rect, x1_rect:x2_rect]
cv2.imwrite("Output.png", roi)
These are the results with the samples you provided:
The code for finding line crossings can be found here: find intersection point of two lines drawn using houghlines opencv
You can read more about Hough Lines here.
We know that cards have straight boundaries along the x and y axes. We can use this to extract parts of the image. The following code implements detecting horizontal and vertical lines in the image.
import cv2
import numpy as np
def mouse_callback(event, x, y, flags, params):
global num_click
if num_click < 2 and event == cv2.EVENT_LBUTTONDOWN:
num_click = num_click + 1
global upper_bound, lower_bound, left_bound, right_bound
upper_bound.append(max(i for i in hor if i < y) + 1)
lower_bound.append(min(i for i in hor if i > y) - 1)
left_bound.append(max(i for i in ver if i < x) + 1)
right_bound.append(min(i for i in ver if i > x) - 1)
filename = 'image.png'
thr = 100 # edge detection threshold
lined = 50 # number of consequtive True pixels required an axis to be counted as line
num_click = 0 # select only twice
upper_bound, lower_bound, left_bound, right_bound = [], [], [], []
winname = 'img'
cv2.setMouseCallback(winname, mouse_callback)
img = cv2.imread(filename, 1)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
bw = cv2.Canny(gray, thr, 3*thr)
height, width, _ = img.shape
# find horizontal lines
hor = []
for i in range (0, height-1):
count = 0
for j in range (0, width-1):
if bw[i,j]:
count = count + 1
count = 0
if count >= lined:
# find vertical lines
ver = []
for j in range (0, width-1):
count = 0
for i in range (0, height-1):
if bw[i,j]:
count = count + 1
count = 0
if count >= lined:
# draw lines
disp_img = np.copy(img)
for i in hor:
cv2.line(disp_img, (0, i), (width-1, i), (0,0,255), 1)
for i in ver:
cv2.line(disp_img, (i, 0), (i, height-1), (0,0,255), 1)
while num_click < 2:
cv2.imshow(winname, disp_img)
disp_img = img[min(upper_bound):max(lower_bound), min(left_bound):max(right_bound)]
cv2.imshow(winname, disp_img)
cv2.waitKey() # Press any key to exit
You just need to click two areas to include. A sample click area and the corresponding result are as follows:
Results from other images:
I don't think it is possible to automatically crop the artwork ROI using traditional image processing techniques due to the dynamic nature of the colors, dimensions, locations, and textures for each card. You would have to look into machine/deep learning and train your own classifier if you want to do it automatically. Instead, here's a manual approach to select and crop a static ROI from an image.
The idea is to use cv2.setMouseCallback() and event handlers to detect if the mouse has been clicked or released. For this implementation, you can extract the artwork ROI by holding down the left mouse button and dragging to select the desired ROI. Once you have selected the desired ROI, press c to crop and save the ROI. You can reset the ROI using the right mouse button.
Saved artwork ROIs
import cv2
class ExtractArtworkROI(object):
def __init__(self):
# Load image
self.original_image = cv2.imread('1.png')
self.clone = self.original_image.copy()
cv2.setMouseCallback('image', self.extractROI)
self.selected_ROI = False
# ROI bounding box reference points
self.image_coordinates = []
def extractROI(self, event, x, y, flags, parameters):
# Record starting (x,y) coordinates on left mouse button click
if event == cv2.EVENT_LBUTTONDOWN:
self.image_coordinates = [(x,y)]
# Record ending (x,y) coordintes on left mouse button release
elif event == cv2.EVENT_LBUTTONUP:
# Remove old bounding box
if self.selected_ROI:
self.clone = self.original_image.copy()
# Draw rectangle
self.selected_ROI = True
cv2.rectangle(self.clone, self.image_coordinates[0], self.image_coordinates[1], (36,255,12), 2)
print('top left: {}, bottom right: {}'.format(self.image_coordinates[0], self.image_coordinates[1]))
print('x,y,w,h : ({}, {}, {}, {})'.format(self.image_coordinates[0][0], self.image_coordinates[0][1], self.image_coordinates[1][0] - self.image_coordinates[0][0], self.image_coordinates[1][1] - self.image_coordinates[0][1]))
# Clear drawing boxes on right mouse button click
elif event == cv2.EVENT_RBUTTONDOWN:
self.selected_ROI = False
self.clone = self.original_image.copy()
def show_image(self):
return self.clone
def crop_ROI(self):
if self.selected_ROI:
x1 = self.image_coordinates[0][0]
y1 = self.image_coordinates[0][1]
x2 = self.image_coordinates[1][0]
y2 = self.image_coordinates[1][1]
# Extract ROI
self.cropped_image = self.original_image.copy()[y1:y2, x1:x2]
# Display and save image
cv2.imshow('Cropped Image', self.cropped_image)
cv2.imwrite('ROI.png', self.cropped_image)
print('Select ROI before cropping!')
if __name__ == '__main__':
extractArtworkROI = ExtractArtworkROI()
while True:
cv2.imshow('image', extractArtworkROI.show_image())
key = cv2.waitKey(1)
# Close program with keyboard 'q'
if key == ord('q'):
# Crop ROI
if key == ord('c'):
I have a set of images, each containing a table. Some images have the tables in them already aligned and the borders are drawn, it is not hard to identify the main table on those images using Canny edge detection. However, some images have their tables without any borders, so I am trying to identify the table in an image and plot its border's contours as well as columns.
I am using openCV version 3.4 and the approach i'm generally taking is as follows:
dilate the grayscale image to identify the text spots
apply cv2.findContours function to get text's bounding boxes.
cluster the bounding boxes in case smaller tables were identified instead of the main table.
try to draw the contours in hopes to identify the borders of the table.
This approach seems to work to a certain extent but the drawn contours are not at all accurate.
img, contours, hierarchy = cv2.findContours(gray_matrix, cv2.RETR_LIST,
# get bounding boxes around any text
boxes = []
for contour in contours:
box = cv2.boundingRect(contour)
h = box[3]
rows = {}
cols = {}
# Clustering the bounding boxes by their positions
for box in boxes:
(x, y, w, h) = box
col_key = 10 # cell threshold
row_key = 10 # cell threshold
cols[row_key] = [box] if col_key not in cols else cols[col_key] + [box]
rows[row_key] = [box] if row_key not in rows else rows[row_key] + [box]
# Filtering out the clusters having less than 4 cols
table_cells = list(filter(lambda r: len(r) >= 4, rows.values()))
# Sorting the row cells by x coord
table_cells = [list(sorted(tb)) for tb in table_cells]
table_cells = list(sorted(table_cells, key=lambda r: r[0][1]))
#attempt to identify columns
max_last_col_width_row = max(table_cells, key=lambda b: b[-1][2])
max_x = max_last_col_width_row[-1][0] + max_last_col_width_row[-1][2]
ver_lines = []
for box in table_cells:
x = box[0][0]
y = box[0][1]
hor_lines.append((x, y, max_x, y))
for box in table_cells[0]:
x = box[0]
y = box[1]
ver_lines.append((x, y, x, max_y))
(x, y, w, h) = table_cells[0][-1]
ver_lines.append((max_x, y, max_x, max_y))
(x, y, w, h) = table_cells[0][0]
hor_lines.append((x, max_y, max_x, max_y))
for line in ver_lines:
[x1, y1, x2, y2] = line
cv2.line(output_image, (x1, y1), (x2, y2), (0, 0, 255), 1)
cv2.imshow('Proper Table Borders', output_image)
I am trying to achieve something like the below image.
In short, how can I find the invisible borders of a table-structure in an image as well as identify the x coordinates of the identified table's columns?
I know the above code is not at all optimal to produce the required outcome, but I am still learning openCV so I'm trying various approaches but still did not reach the desired result.
Try vertical profile, which is count of text (black) pixels with the same X coordinate in certain (Y0, Y1) range (table vertical span). Zero or near zero regions will indicate table column borders. Here is a hand drawn, approximate profile for your example:
I got two detected contours in an image and need the diameter between the two vertical-edges of the top contour and the diameter between the vertical-edges of the lower contour. I achieved this with this code.
import cv2
import numpy as np
import math, os
import imutils
img = cv2.imread("1.jpg")
gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
gray = cv2.GaussianBlur(gray, (7, 7), 0)
edges = cv2.Canny(gray, 200, 100)
edges = cv2.dilate(edges, None, iterations=1)
edges = cv2.erode(edges, None, iterations=1)
cnts = cv2.findContours(edges.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = imutils.grab_contours(cnts)
# sorting the contours to find the largest and smallest one
c1 = max(cnts, key=cv2.contourArea)
c2 = min(cnts, key=cv2.contourArea)
# determine the most extreme points along the contours
extLeft1 = tuple(c1[c1[:, :, 0].argmin()][0])
extRight1 = tuple(c1[c1[:, :, 0].argmax()][0])
extLeft2 = tuple(c2[c2[:, :, 0].argmin()][0])
extRight2 = tuple(c2[c2[:, :, 0].argmax()][0])
# show contour
cimg = cv2.drawContours(img, cnts, -1, (0,200,0), 2)
# set y of left point to y of right point
lst1 = list(extLeft1)
lst1[1] = extRight1[1]
extLeft1 = tuple(lst1)
lst2 = list(extLeft2)
lst2[1] = extRight2[1]
extLeft2= tuple(lst2)
# compute the distance between the points (x1, y1) and (x2, y2)
dist1 = math.sqrt( ((extLeft1[0]-extRight1[0])**2)+((extLeft1[1]-extRight1[1])**2) )
dist2 = math.sqrt( ((extLeft2[0]-extRight2[0])**2)+((extLeft2[1]-extRight2[1])**2) )
# draw lines
cv2.line(cimg, extLeft1, extRight1, (255,0,0), 1)
cv2.line(cimg, extLeft2, extRight2, (255,0,0), 1)
# draw the distance text
fontScale = 0.5
fontColor = (255,0,0)
lineType = 1
cv2.putText(cimg,str(dist1),(155,100),font, fontScale, fontColor, lineType)
cv2.putText(cimg,str(dist2),(155,280),font, fontScale, fontColor, lineType)
# show image
cv2.imshow("Image", img)
Now I would also need the angle of the slope lines on the bottom side of the upper contour.
Any ideas how I can get this? Is it possible using contours?
Or is it necessary to use HoughLinesP and sort the regarding lines somehow?
And continued question: Maybe its also possible to get function which describes parabola slope of that sides ?
Thanks alot for any help!
There are several ways to obtain just the slopes. In order to know the slope, we can can use cv2.HoughLines to detect the bottom horizontal line, detect to end points of that line and from those, obtain the slopes. As an illustration,
lines = cv2.HoughLines(edges, rho=1, theta=np.pi/180, threshold=int(dist2*0.66) )
on edges in your code gives 4 lines, and if we force the angle to be horizontal
for line in lines:
rho, theta = line[0]
# here we filter out non-horizontal lines
if abs(theta - np.pi/2) > np.pi/180:
a = np.cos(theta)
b = np.sin(theta)
x0 = a*rho
y0 = b*rho
x1 = int(x0 + 1000*(-b))
y1 = int(y0 + 1000*(a))
x2 = int(x0 - 1000*(-b))
y2 = int(y0 - 1000*(a))
we get:
For the extended question concerns with the parabolas, we first compose a function that returns the left and right points:
def horizontal_scan(gray_img, thresh=50, start=50):
scan horizontally for left and right points until we met an all-background line
#param thresh: threshold for background pixel
#param start: y coordinate to start scanning
ret = []
thickness = 0
for i in range(start,len(gray_img)):
row = gray_img[i]
# scan for left:
left = 0
while left < len(row) and row[left]<thresh:
left += 1
if left==len(row):
# scan for right:
right = left
while right < len(row) and row[right] >= thresh:
if thickness == 0:
thickness = right - left
# prevent sudden drop, error/noise
if (right-left) < thickness//5:
thickness = right - left
return ret
# we start scanning from extLeft1 down until we see a blank line
# with some tweaks, we can make horizontal_scan run on edges,
# which would be simpler and faster
horizontal_lines = horizontal_scan(gray, start = extLeft1[1])
# check if horizontal_line[0] are closed to extLeft1 and extRight1
print(horizontal_lines[0], extLeft1, extRight1[0])
Note that we can use this function to find the end points of the horizontal line returned by HoughLines.
# last line of horizontal_lines would be the points we need:
upper_lowest_y, upper_lowest_left, upper_lowest_right = horizontal_lines[-1]
img_lines = img.copy()
cv2.line(img_lines, (upper_lowest_left, upper_lowest_y), extLeft1, (0,0,255), 1)
cv2.line(img_lines, (upper_lowest_right, upper_lowest_y), extRight1, (0,0,255),1)
and that gives:
Let's return to the extended question, where we have those left and right points:
left_points = [(x,y) for y,x,_ in horizontal_lines]
right_points = [(x,y) for y,_,x in horizontal_lines]
Obviously, they would not fit perfectly in a parabola, so we need some sort of approximation/fitting here. For that, we can build a LinearRegression model:
from sklearn.linear_model import LinearRegression
class BestParabola:
def __init__(self, points):
x_x2 = np.array([(x**2,x) for x,_ in points])
ys = np.array([y for _,y in points])
self.lr = LinearRegression()
self.a, self.b = self.lr.coef_
self.c = self.lr.intercept_
self.coef_ = (self.c,self.b,self.a)
def transform(self,points):
x_x2 = np.array([(x**2,x) for x,_ in points])
ys = self.lr.predict(x_x2)
return np.array([(x,y) for (_,x),y in zip(x_x2,ys)])
And then, we can fit the given left_points, right_points to get the desired parabolas:
# construct the approximate parabola
# the parabollas' coefficients are accessible by BestParabola.coef_
left_parabola = BestParabola(left_points)
right_parabola = BestParabola(right_points)
# get points for rendering
left_parabola_points = left_parabola.transform(left_points)
right_parabola_points = right_parabola.transform(right_points)
# render with matplotlib, cv2.drawContours would work
plt.plot(left_parabola_points[:,0], left_parabola_points[:,1], linewidth=3)
plt.plot(right_parabola_points[:,0], right_parabola_points[:,1], linewidth=3, color='r')
Which gives:
The left parabola is not perfect, but you should work out that if need be :-)