Extract artwork from table game card image with OpenCV - python

I wrote a small script in python where I'm trying to extract or crop the part of the playing card that represents the artwork only, removing all the rest. I've been trying various methods of thresholding but couldn't get there. Also note that I can't simply record manually the position of the artwork because it's not always in the same position or size, but always in a rectangular shape where everything else is just text and borders.
from matplotlib import pyplot as plt
import cv2
img = cv2.imread(filename)
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
ret,binary = cv2.threshold(gray, 0, 255, cv2.THRESH_OTSU | cv2.THRESH_BINARY)
binary = cv2.bitwise_not(binary)
kernel = np.ones((15, 15), np.uint8)
closing = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel)
plt.imshow(closing),plt.show()
The current output is the closest thing I could get. I could be on the right way and try some further wrangling to draw a rectangle around the white parts, but I don't think it's a sustainable method :
As a last note, see the cards below, not all frames are exactly the same sizes or positions, but there's always a piece of artwork with only text and borders around it. It doesn't have to be super precisely cut, but clearly the art is a "region" of the card, surrounded by other regions containing some text. My goal is to try to capture the region of the artwork as well as I can.

I used Hough line transform to detect linear parts of the image.
The crossings of all lines were used to construct all possible rectangles, which do not contain other crossing points.
Since the part of the card you are looking for is always the biggest of those rectangles (at least in the samples you provided), i simply chose the biggest of those rectangles as winner.
The script works without user interaction.
import cv2
import numpy as np
from collections import defaultdict
def segment_by_angle_kmeans(lines, k=2, **kwargs):
#Groups lines based on angle with k-means.
#Uses k-means on the coordinates of the angle on the unit circle
#to segment `k` angles inside `lines`.
# Define criteria = (type, max_iter, epsilon)
default_criteria_type = cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER
criteria = kwargs.get('criteria', (default_criteria_type, 10, 1.0))
flags = kwargs.get('flags', cv2.KMEANS_RANDOM_CENTERS)
attempts = kwargs.get('attempts', 10)
# returns angles in [0, pi] in radians
angles = np.array([line[0][1] for line in lines])
# multiply the angles by two and find coordinates of that angle
pts = np.array([[np.cos(2*angle), np.sin(2*angle)]
for angle in angles], dtype=np.float32)
# run kmeans on the coords
labels, centers = cv2.kmeans(pts, k, None, criteria, attempts, flags)[1:]
labels = labels.reshape(-1) # transpose to row vec
# segment lines based on their kmeans label
segmented = defaultdict(list)
for i, line in zip(range(len(lines)), lines):
segmented[labels[i]].append(line)
segmented = list(segmented.values())
return segmented
def intersection(line1, line2):
#Finds the intersection of two lines given in Hesse normal form.
#Returns closest integer pixel locations.
#See https://stackoverflow.com/a/383527/5087436
rho1, theta1 = line1[0]
rho2, theta2 = line2[0]
A = np.array([
[np.cos(theta1), np.sin(theta1)],
[np.cos(theta2), np.sin(theta2)]
])
b = np.array([[rho1], [rho2]])
x0, y0 = np.linalg.solve(A, b)
x0, y0 = int(np.round(x0)), int(np.round(y0))
return [[x0, y0]]
def segmented_intersections(lines):
#Finds the intersections between groups of lines.
intersections = []
for i, group in enumerate(lines[:-1]):
for next_group in lines[i+1:]:
for line1 in group:
for line2 in next_group:
intersections.append(intersection(line1, line2))
return intersections
def rect_from_crossings(crossings):
#find all rectangles without other points inside
rectangles = []
# Search all possible rectangles
for i in range(len(crossings)):
x1= int(crossings[i][0][0])
y1= int(crossings[i][0][1])
for j in range(len(crossings)):
x2= int(crossings[j][0][0])
y2= int(crossings[j][0][1])
#Search all points
flag = 1
for k in range(len(crossings)):
x3= int(crossings[k][0][0])
y3= int(crossings[k][0][1])
#Dont count double (reverse rectangles)
if (x1 > x2 or y1 > y2):
flag = 0
#Dont count rectangles with points inside
elif ((((x3 >= x1) and (x2 >= x3))and (y3 > y1) and (y2 > y3) or ((x3 > x1) and (x2 > x3))and (y3 >= y1) and (y2 >= y3))):
if(i!=k and j!=k):
flag = 0
if flag:
rectangles.append([[x1,y1],[x2,y2]])
return rectangles
if __name__ == '__main__':
#img = cv2.imread('TAJFp.jpg')
#img = cv2.imread('Bj2uu.jpg')
img = cv2.imread('yi8db.png')
width = int(img.shape[1])
height = int(img.shape[0])
scale = 380/width
dim = (int(width*scale), int(height*scale))
# resize image
img = cv2.resize(img, dim, interpolation = cv2.INTER_AREA)
img2 = img.copy()
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
gray = cv2.GaussianBlur(gray,(5,5),cv2.BORDER_DEFAULT)
# Parameters of Canny and Hough may have to be tweaked to work for as many cards as possible
edges = cv2.Canny(gray,10,45,apertureSize = 7)
lines = cv2.HoughLines(edges,1,np.pi/90,160)
segmented = segment_by_angle_kmeans(lines)
crossings = segmented_intersections(segmented)
rectangles = rect_from_crossings(crossings)
#Find biggest remaining rectangle
size = 0
for i in range(len(rectangles)):
x1 = rectangles[i][0][0]
x2 = rectangles[i][1][0]
y1 = rectangles[i][0][1]
y2 = rectangles[i][1][1]
if(size < (abs(x1-x2)*abs(y1-y2))):
size = abs(x1-x2)*abs(y1-y2)
x1_rect = x1
x2_rect = x2
y1_rect = y1
y2_rect = y2
cv2.rectangle(img2, (x1_rect,y1_rect), (x2_rect,y2_rect), (0,0,255), 2)
roi = img[y1_rect:y2_rect, x1_rect:x2_rect]
cv2.imshow("Output",roi)
cv2.imwrite("Output.png", roi)
cv2.waitKey()
These are the results with the samples you provided:
The code for finding line crossings can be found here: find intersection point of two lines drawn using houghlines opencv
You can read more about Hough Lines here.

We know that cards have straight boundaries along the x and y axes. We can use this to extract parts of the image. The following code implements detecting horizontal and vertical lines in the image.
import cv2
import numpy as np
def mouse_callback(event, x, y, flags, params):
global num_click
if num_click < 2 and event == cv2.EVENT_LBUTTONDOWN:
num_click = num_click + 1
print(num_click)
global upper_bound, lower_bound, left_bound, right_bound
upper_bound.append(max(i for i in hor if i < y) + 1)
lower_bound.append(min(i for i in hor if i > y) - 1)
left_bound.append(max(i for i in ver if i < x) + 1)
right_bound.append(min(i for i in ver if i > x) - 1)
filename = 'image.png'
thr = 100 # edge detection threshold
lined = 50 # number of consequtive True pixels required an axis to be counted as line
num_click = 0 # select only twice
upper_bound, lower_bound, left_bound, right_bound = [], [], [], []
winname = 'img'
cv2.namedWindow(winname)
cv2.setMouseCallback(winname, mouse_callback)
img = cv2.imread(filename, 1)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
bw = cv2.Canny(gray, thr, 3*thr)
height, width, _ = img.shape
# find horizontal lines
hor = []
for i in range (0, height-1):
count = 0
for j in range (0, width-1):
if bw[i,j]:
count = count + 1
else:
count = 0
if count >= lined:
hor.append(i)
break
# find vertical lines
ver = []
for j in range (0, width-1):
count = 0
for i in range (0, height-1):
if bw[i,j]:
count = count + 1
else:
count = 0
if count >= lined:
ver.append(j)
break
# draw lines
disp_img = np.copy(img)
for i in hor:
cv2.line(disp_img, (0, i), (width-1, i), (0,0,255), 1)
for i in ver:
cv2.line(disp_img, (i, 0), (i, height-1), (0,0,255), 1)
while num_click < 2:
cv2.imshow(winname, disp_img)
cv2.waitKey(10)
disp_img = img[min(upper_bound):max(lower_bound), min(left_bound):max(right_bound)]
cv2.imshow(winname, disp_img)
cv2.waitKey() # Press any key to exit
cv2.destroyAllWindows()
You just need to click two areas to include. A sample click area and the corresponding result are as follows:
Results from other images:

I don't think it is possible to automatically crop the artwork ROI using traditional image processing techniques due to the dynamic nature of the colors, dimensions, locations, and textures for each card. You would have to look into machine/deep learning and train your own classifier if you want to do it automatically. Instead, here's a manual approach to select and crop a static ROI from an image.
The idea is to use cv2.setMouseCallback() and event handlers to detect if the mouse has been clicked or released. For this implementation, you can extract the artwork ROI by holding down the left mouse button and dragging to select the desired ROI. Once you have selected the desired ROI, press c to crop and save the ROI. You can reset the ROI using the right mouse button.
Saved artwork ROIs
Code
import cv2
class ExtractArtworkROI(object):
def __init__(self):
# Load image
self.original_image = cv2.imread('1.png')
self.clone = self.original_image.copy()
cv2.namedWindow('image')
cv2.setMouseCallback('image', self.extractROI)
self.selected_ROI = False
# ROI bounding box reference points
self.image_coordinates = []
def extractROI(self, event, x, y, flags, parameters):
# Record starting (x,y) coordinates on left mouse button click
if event == cv2.EVENT_LBUTTONDOWN:
self.image_coordinates = [(x,y)]
# Record ending (x,y) coordintes on left mouse button release
elif event == cv2.EVENT_LBUTTONUP:
# Remove old bounding box
if self.selected_ROI:
self.clone = self.original_image.copy()
# Draw rectangle
self.selected_ROI = True
self.image_coordinates.append((x,y))
cv2.rectangle(self.clone, self.image_coordinates[0], self.image_coordinates[1], (36,255,12), 2)
print('top left: {}, bottom right: {}'.format(self.image_coordinates[0], self.image_coordinates[1]))
print('x,y,w,h : ({}, {}, {}, {})'.format(self.image_coordinates[0][0], self.image_coordinates[0][1], self.image_coordinates[1][0] - self.image_coordinates[0][0], self.image_coordinates[1][1] - self.image_coordinates[0][1]))
# Clear drawing boxes on right mouse button click
elif event == cv2.EVENT_RBUTTONDOWN:
self.selected_ROI = False
self.clone = self.original_image.copy()
def show_image(self):
return self.clone
def crop_ROI(self):
if self.selected_ROI:
x1 = self.image_coordinates[0][0]
y1 = self.image_coordinates[0][1]
x2 = self.image_coordinates[1][0]
y2 = self.image_coordinates[1][1]
# Extract ROI
self.cropped_image = self.original_image.copy()[y1:y2, x1:x2]
# Display and save image
cv2.imshow('Cropped Image', self.cropped_image)
cv2.imwrite('ROI.png', self.cropped_image)
else:
print('Select ROI before cropping!')
if __name__ == '__main__':
extractArtworkROI = ExtractArtworkROI()
while True:
cv2.imshow('image', extractArtworkROI.show_image())
key = cv2.waitKey(1)
# Close program with keyboard 'q'
if key == ord('q'):
cv2.destroyAllWindows()
exit(1)
# Crop ROI
if key == ord('c'):
extractArtworkROI.crop_ROI()

Related

How to crop square inscribed in partial circle?

I have frames of a video taken from a microscope. I need to crop them to a square inscribed to the circle but the issue is that the circle isn't whole (like in the following image). How can I do it?
My idea was to use contour finding to get the center of the circle and then find the distance from each point over the whole array of coordinates to the center, take the maximum distance as the radius and find the corners of the square analytically but there must be a better way to do it (also I don't really have a formula to find the corners).
This may not be adequate in terms of centered at center of circle, but using my iterative processing, one can crop to an approximation of the largest rectangle inside your circle area.
Input:
import cv2
import numpy as np
# read image
img = cv2.imread('img.jpg')
h, w = img.shape[:2]
# threshold so border is black and rest is white (invert as needed).
# Here I needed to specify the upper threshold at 20 as your black is not pure black.
lower = (0,0,0)
upper = (20,20,20)
mask = cv2.inRange(img, lower, upper)
mask = 255 - mask
# define top and left starting coordinates and starting width and height
top = 0
left = 0
bottom = h
right = w
# compute the mean of each side of the image and its stop test
mean_top = np.mean( mask[top:top+1, left:right] )
mean_left = np.mean( mask[top:bottom, left:left+1] )
mean_bottom = np.mean( mask[bottom-1:bottom, left:right] )
mean_right = np.mean( mask[top:bottom, right-1:right] )
mean_minimum = min(mean_top, mean_left, mean_bottom, mean_right)
top_test = "stop" if (mean_top == 255) else "go"
left_test = "stop" if (mean_left == 255) else "go"
bottom_test = "stop" if (mean_bottom == 255) else "go"
right_test = "stop" if (mean_right == 255) else "go"
# iterate to compute new side coordinates if mean of given side is not 255 (all white) and it is the current darkest side
while top_test == "go" or left_test == "go" or right_test == "go" or bottom_test == "go":
# top processing
if top_test == "go":
if mean_top != 255:
if mean_top == mean_minimum:
top += 1
mean_top = np.mean( mask[top:top+1, left:right] )
mean_left = np.mean( mask[top:bottom, left:left+1] )
mean_bottom = np.mean( mask[bottom-1:bottom, left:right] )
mean_right = np.mean( mask[top:bottom, right-1:right] )
mean_minimum = min(mean_top, mean_left, mean_right, mean_bottom)
#print("top",mean_top)
continue
else:
top_test = "stop"
# left processing
if left_test == "go":
if mean_left != 255:
if mean_left == mean_minimum:
left += 1
mean_top = np.mean( mask[top:top+1, left:right] )
mean_left = np.mean( mask[top:bottom, left:left+1] )
mean_bottom = np.mean( mask[bottom-1:bottom, left:right] )
mean_right = np.mean( mask[top:bottom, right-1:right] )
mean_minimum = min(mean_top, mean_left, mean_right, mean_bottom)
#print("left",mean_left)
continue
else:
left_test = "stop"
# bottom processing
if bottom_test == "go":
if mean_bottom != 255:
if mean_bottom == mean_minimum:
bottom -= 1
mean_top = np.mean( mask[top:top+1, left:right] )
mean_left = np.mean( mask[top:bottom, left:left+1] )
mean_bottom = np.mean( mask[bottom-1:bottom, left:right] )
mean_right = np.mean( mask[top:bottom, right-1:right] )
mean_minimum = min(mean_top, mean_left, mean_right, mean_bottom)
#print("bottom",mean_bottom)
continue
else:
bottom_test = "stop"
# right processing
if right_test == "go":
if mean_right != 255:
if mean_right == mean_minimum:
right -= 1
mean_top = np.mean( mask[top:top+1, left:right] )
mean_left = np.mean( mask[top:bottom, left:left+1] )
mean_bottom = np.mean( mask[bottom-1:bottom, left:right] )
mean_right = np.mean( mask[top:bottom, right-1:right] )
mean_minimum = min(mean_top, mean_left, mean_right, mean_bottom)
#print("right",mean_right)
continue
else:
right_test = "stop"
# crop input
result = img[top:bottom, left:right]
# print crop values
print("top: ",top)
print("bottom: ",bottom)
print("left: ",left)
print("right: ",right)
print("height:",result.shape[0])
print("width:",result.shape[1])
# save cropped image
#cv2.imwrite('border_image1_cropped.png',result)
cv2.imwrite('img_cropped.png',result)
cv2.imwrite('img_mask.png',mask)
# show the images
cv2.imshow("mask", mask)
cv2.imshow("cropped", result)
cv2.waitKey(0)
cv2.destroyAllWindows()
Result:
Let's start with an illustration of the problem to help with the explanation.
Of course, we have to begin with loading the image. Let's also grab its width and height, since they will be useful later on.
img = cv2.imread('TUP74.jpg', cv2.IMREAD_COLOR)
height, width = img.shape[:2]
First, let's convert the image to grayscale and then apply threshold to make the circle all white, and the background black. I arbitrarily picked a threshold value of 31, which seems to give reasonable results.
img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
_, thresh = cv2.threshold(img_gray, 31, 255, cv2.THRESH_BINARY)
The result of those operations looks like this:
Now, we can determine the "top" and "bottom" of the circle (first_yd and last_yd), by finding the first and last row that contains at least one white pixel. I chose to use cv2.reduce to find the maximum of each row (since the thresholded image only contains 0's and 255's, a non-zero result means there is at least 1 white pixel), followed by cv2.findNonZero to get the row numbers.
reduced = cv2.reduce(thresh, 1, cv2.REDUCE_MAX)
row_info = cv2.findNonZero(reduced)
first_yd, last_yd = row_info[0][0][1], row_info[-1][0][1]
This information allows us to determine the diameter of the circle d, its radius r (r = d/2), as well as the Y coordinate of the center of the circle center_y.
diameter = last_yd - first_yd
radius = int(diameter / 2)
center_y = first_yd + radius
Next, we need to determine the X coordinate of the center of the circle center_x.
Let's take advantage of the fact that the circle is cropped on the left-hand side. The white pixels in the first column of the threshold image represent a chord c of the circle (red in the diagram).
Again, we begin with finding the "top" and "bottom" of the chord (first_yc and last_yc), but since we're working with a single column, we only need cv2.findNonZero.
row_info = cv2.findNonZero(thresh[:,0])
first_yc, last_yc = row_info[0][0][1], row_info[-1][0][1]
c = last_yc - first_yc
Now we have a nice right-angled triangle with one side adjacent to the right angle being half of the chord c (red in the diagram), the other adjacent side being the unknown offset o, and the hypotenuse (green in the diagram) being the radius of the circle r. Let's apply Pythagoras' theorem:
r2 = (c/2)2 + o2
o2 = r2 - (c/2)2
o = sqrt(r2 - (c/2)2)
And in Python:
center_x = int(math.sqrt(radius**2 - (c/2)**2))
Now we're ready to determine the parameters of the inscribed square. Let's keep in mind that the center of the circle and center of its inscribed square are co-located. Here is another illustration:
We will again use Pythagoras' theorem. The hypotenuse of the right triangle is again the radius r. Both of the sides adjacent to the right angle are of equal length, which is half the length of the side of inscribed square s.
r2 = (s/2)2 + (s/2)2
r2 = 2 × (s/2)2
r2 = 2 × s2/22
r2 = s2/2
s2 = 2 × r2
s = sqrt(2) × r
And in Python:
s = int(math.sqrt(2) * radius)
Finally, we can determine the top-left and bottom-right corners of the inscribed square. Both of those points are offset by s/2 from the common center.
half_s = int(s/2)
tl = (center_x - half_s, center_y - half_s)
br = (center_x + half_s, center_y + half_s)
We have determined all the parameters we need. Let's print them out...
Circle diameter = 1167 pixels
Circle radius = 583 pixels
Circle center = (404,1089)
Inscribed square side = 824 pixels
Inscribed square top-left = (-8,677)
Inscribed square bottom-right = (816,1501)
and visualize the center (green), the detected circle (red) and the inscribed square (blue) on a copy of the input image:
Now we can do the cropping, but first we have to make sure we don't go out of bounds of the source image.
crop_left = max(tl[0], 0)
crop_top = max(tl[1], 0) # Kinda redundant, but why not
crop_right = min(br[0], width)
crop_bottom = min(br[1], height) # ditto
cropped = img[crop_top:crop_bottom, crop_left:crop_right]
And that's it. Here's the cropped image (it's rectangular, since small part of the inscribed square falls outside the source image, and scaled down for embedding -- click to get the full-sized image):
Complete Script
import cv2
import numpy as np
import math
img = cv2.imread('TUP74.jpg', cv2.IMREAD_COLOR)
height, width = img.shape[:2]
img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
_, thresh = cv2.threshold(img_gray, 31, 255, cv2.THRESH_BINARY)
# Find top/bottom of the circle, to determine radius and center
reduced = cv2.reduce(thresh, 1, cv2.REDUCE_MAX)
row_info = cv2.findNonZero(reduced)
first_yd, last_yd = row_info[0][0][1], row_info[-1][0][1]
diameter = last_yd - first_yd
radius = int(diameter / 2)
center_y = first_yd + radius
# Repeat again, just on first column, to find length of a chord of the circle
row_info = cv2.findNonZero(thresh[:,0])
first_yc, last_yc = row_info[0][0][1], row_info[-1][0][1]
c = last_yc - first_yc
# Apply Pythagoras theorem to find the X offset of the center from the chord
# Since the chord is in row 0, this is also the X coordinate
center_x = int(math.sqrt(radius**2 - (c/2)**2))
# Find length of the side of the inscribed square (Pythagoras again)
s = int(math.sqrt(2) * radius)
# Now find the top-left and bottom-right corners of the square
half_s = int(s/2)
tl = (center_x - half_s, center_y - half_s)
br = (center_x + half_s, center_y + half_s)
# Let's print out what we found
print("Circle diameter = %d pixels" % diameter)
print("Circle radius = %d pixels" % radius)
print("Circle center = (%d,%d)" % (center_x, center_y))
print("Inscribed square side = %d pixels" % s)
print("Inscribed square top-left = (%d,%d)" % tl)
print("Inscribed square bottom-right = (%d,%d)" % br)
# And visualize it...
vis = img.copy()
cv2.line(vis, (center_x-5,center_y), (center_x+5,center_y), (0,255,0), 3)
cv2.line(vis, (center_x,center_y-5), (center_x,center_y+5), (0,255,0), 3)
cv2.circle(vis, (center_x,center_y), radius, (0,0,255), 3)
cv2.rectangle(vis, tl, br, (255,0,0), 3)
# Write some illustration images
cv2.imwrite('circ_thresh.png', thresh)
cv2.imwrite('circ_vis.png', vis)
# Time to do some cropping, but we need to make sure the coordinates are inside the bounds of the image
crop_left = max(tl[0], 0)
crop_top = max(tl[1], 0) # Kinda redundant, but why not
crop_right = min(br[0], width)
crop_bottom = min(br[1], height) # ditto
cropped = img[crop_top:crop_bottom, crop_left:crop_right]
cv2.imwrite('circ_cropped.png', cropped)
NB: The main focus of this was the explanation of the algorithm. I've been kinda blunt on rounding the values, and there may be some off-by-one errors. For the sake of brevity, error checking is minimal. It's left as an excercise to the reader to address those issues as necessary.
Furthermore, the assumption is that the left-hand side of the circle is cropped as in the sample image. It should be fairly trivial to extend this to handle other possible scenarios, using the techniques I've demonstrated.
Building on Dan Mašek's answer, here is an alternate method of computing the center and radius in Python/OpenCV/Numpy, in particular, the x-coordinate of the center.
The idea is simply find the coordinate of column that has the largest non-zero count in the thresholded image.
Input:
import cv2
import numpy as np
import math
img = cv2.imread('img_circle.jpg')
height, width = img.shape[:2]
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 31, 255, cv2.THRESH_BINARY)[1]
# Find top/bottom of the circle, to determine radius and y coordinate center
reduced = cv2.reduce(thresh, 1, cv2.REDUCE_MAX)
row_info = cv2.findNonZero(reduced)
first_yd, last_yd = row_info[0][0][1], row_info[-1][0][1]
diameter = last_yd - first_yd
radius = int(diameter / 2)
center_y = first_yd + radius
# count non-zero pixels in columns to find the column with the largest count
# that will give us the x coordinate center
col_counts = np.count_nonzero(thresh, axis=0)
max_counts = np.amax(col_counts)
# find index (x-coordinate) where col_counts=max_counts
max_coords = np.argwhere(col_counts==max_counts)
# get number of max values in case more than one
num_max = len(max_coords)
# compute center_y
center_x = max_coords[0][0] + num_max//2
print("radius:", radius, "center_x:", center_x, "center_y:", center_y)
print('')
Result:
radius: 583 center_x: 388 center_y: 1089
The rest is the same as in Dan Mašek's answer.
Find the edge points of the image circle, and then fit a circle to the edge.
Or, you may be able to use minEnclosingCircle() instead of circle fitting.
(I omit the explanation of the subsequent steps for obtaining a square.)

why running the code with multiprocessing.pool() is 100x slower than running the code without it?

I am using windows 8.1 core I4 with python 3.7 with Opencv 4.1. I want to detect black objects in the frames and show them with a green square around them and the darkest one with red square around them. I wrote a code that is supposed to detect black spots in the image. and find the darkest one and mark it as a target. when I execute the code sequentially it works fine. I need to decrease running time using Multiprocessing.pool(). this is my original code without multiprocessing (I added a command line to each paragraph to describe the processing)
class Detection:
def __init__(self):
self.image = []
self.image_filter_hvs = []
def first_filter(self):
# Blur image to remove noise
image_filter_gaussian = cv2.GaussianBlur(self.image.copy(), (3, 3), 0)
# Convert image from BGR to HSV
self.image_filter_hvs = cv2.cvtColor(image_filter_gaussian, cv2.COLOR_BGR2HSV)
def filters(self, lower, higher):
# Set pixels to white if in color range, others to black (binary bitmap)
image_binary_one_color = cv2.inRange(self.image_filter_hvs, lower, higher)
# Dilate image to make white blobs larger
binary_image = cv2.dilate(image_binary_one_color, None, iterations=2)
return binary_image
def find_contours(self, binary_image):
contours = cv2.findContours(binary_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)[-2]
target_possible_position = []
# remove some contours to accelerate the program
if len(contours) > 0:
for ContFrame in contours: # contours' in a frame
area = cv2.contourArea(ContFrame)
if 200 > area > 15:
xx, yy, ww, hh = cv2.boundingRect(ContFrame)
# center of each contour
center = (int(xx + ww / 2), int(yy + hh / 2))
# temp= color of central pixel
temp = self.image[center[1]][center[0]][2]
# temp_array= [color [xx, yy, ww, hh]]
target_possible_position.append([temp, [xx, yy, ww, hh]])
return target_possible_position
def find_best_contours(self, target_position):
if len(target_position) > 0:
first_item = itemgetter(0)
# sort by the color of central pixel to remove lighter contours
target_position = sorted(target_position, key=first_item)
# remove some contours to decrease the running time
for i in range(round(len(target_position) / 4)):
temp1 = target_position[i]
xx = temp1[1][0]
yy = temp1[1][1]
ww = temp1[1][2]
hh = temp1[1][3]
# Draw a green circle around each contour
cv2.rectangle(self.image, (xx, yy), (xx + ww, yy + hh), (0, 255, 0), 2)
# Draw a red square around the darkest contour by center pixel
# temp_array[1][0]=xx temp_array[1][1]=yy temp_array[1][2]=ww temp_array[1][3]=hh
cv2.rectangle(self.image, (target_position[0][1][0], target_position[0][1][1]),
(target_position[0][1][0] + target_position[0][1][2],
target_position[0][1][1] + target_position[0][1][3]),
(0, 0, 255), 2)
# return estimated value which will use in tracker
return \
target_position[0][1][0], \
target_position[0][1][1], \
target_position[0][1][2], \
target_position[0][1][3]
# Main
target_estimated_location = []
cam = cv2.VideoCapture("data2.avi")
ob = Detection()
while True:
ret_val, frame = cam.read()
if not ret_val:
print("can not open the video")
break
# detection
ob.image = frame
ob.first_filter()
binary_image_1 = ob.filters((0, 0, 0), (255, 100, 130))
target_position = ob.find_contours(binary_image_1)
target_locations = ob.find_best_contours(target_position)
Output_image = ob.image
cv2.imshow('webcam', Output_image)
cv2.waitKey(20)
cam.release()
I want to reduce running time. so I decided to use multiprocessing and I changed For loop to a multiprocessing.pool to use all of the CPU power.
class Detection:
def __init__(self):
self.image = []
self.image_filter_hvs = []
def first_filter(self): # this part filter the frame
# Blur image to remove noise
image_filter_gaussian = cv2.GaussianBlur(self.image.copy(), (3, 3), 0)
# Convert image from BGR to HSV
self.image_filter_hvs = cv2.cvtColor(image_filter_gaussian, cv2.COLOR_BGR2HSV)
def filters(self, lower, higher): # put a threshold to remove some colors
# Set pixels to white if in color range, others to black (binary bitmap)
image_binary_one_color = cv2.inRange(self.image_filter_hvs, lower, higher)
# Dilate image to make white blobs larger
binary_image = cv2.dilate(image_binary_one_color, None, iterations=2)
return binary_image
def find_contours(self, binary_image): # find contours in the frame
contours = cv2.findContours(binary_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)[-2]
return contours
def Green_contours(self, ContFrame):
# remove some contours to accelerate the program
if len(ContFrame) > 0:
area = cv2.contourArea(ContFrame)
if 200 > area > 15:
xx, yy, ww, hh = cv2.boundingRect(ContFrame)
# center of each contour
center = (int(xx + ww / 2), int(yy + hh / 2))
# Draw a green circle around the largest enclosed contour
# temp= color of central pixel
temp = self.image[center[1]][center[0]][2]
# temp_array= [color [xx, yy, ww, hh]]
target_possible_position = [temp, [xx, yy, ww, hh]]
return target_possible_position
def find_best_contours(self, target_position):
if len(target_position) > 0:
first_item = itemgetter(0)
target_position = sorted(target_position, key=first_item)
for i in range(round(len(target_position) / 4)):
temp1 = target_position[i]
xx = temp1[1][0]
yy = temp1[1][1]
ww = temp1[1][2] # width of the target
hh = temp1[1][3] # hieght of the target
cv2.rectangle(self.image, (xx, yy), (xx + ww, yy + hh), (0, 255, 0), 2)
# temp_array[1][0]=xx temp_array[1][1]=yy temp_array[1][2]=ww temp_array[1][3]=hh
cv2.rectangle(self.image, (target_position[0][1][0], target_position[0][1][1]),
(target_position[0][1][0] + target_position[0][1][2],
target_position[0][1][1] + target_position[0][1][3]),
(0, 0, 255), 2)
# return estimated value which will use in tracker
return \
target_position[0][1][0], \
target_position[0][1][1], \
target_position[0][1][2], \
target_position[0][1][3]
if __name__ == "__main__":
# Main
cam = cv2.VideoCapture("data1.mp4")
ob = Detection()
while True:
ret_val, frame = cam.read()
if not ret_val:
print("can not open the video")
break
# detection
ob.image = frame
ob.first_filter()
binary_image_1 = ob.filters((0, 0, 0), (255, 100, 130))
conts = ob.find_contours(binary_image_1)
p = Pool(processes=3)
green_cont = p.map(ob.Green_contours, conts)
#to remove NONe statements
green_cont = [x for x in green_cont if x is not None]
target_position = green_cont
target_locations = ob.find_best_contours(target_position)
# Get image from camera
Output_image = ob.image
# Show image's windows
cv2.imshow('webcam', Output_image)
cv2.waitKey(1)
cam.release()
in this code, I used Multiprocessing.pool() instead of for loop. so I think it should be more efficient. but its too slow. it updates each frame every 5 mins.
is there any issue with my code?
is there any idea to make my code faster?
how can I use multiprocessing to speed up the code.
Is my question clear? because I tried a lot to ask a clear question!
thanks
There is a known cv2 issue that breaks multiprocessing on OSX. The workaround is to configure new processes to be created with "spawn" rather than "fork":
multiprocessing.set_start_method('spawn')
Add the above line of code before creating the multiprocessing Pool.

Detect a certain Object in a video stream

I am trying to detect a white Object on a black/white road to let an autonmous RC car drive around it. And i am detecting everything but the white box on the road.
What I tried can be seen in my code Example
#input= one video stream frame 320x240
frame = copy.deepcopy(input)
grayFrame = cv2.cvtColor(frame,cv2.COLOR_BGR2GRAY)
threshGray = cv2.adaptiveThreshold(
grayFrame,
255,
cv2.ADAPTIVE_THRESH_MEAN_C,
cv2.THRESH_BINARY,
blockSize=123,
C=-19,
)
contours,_ = cv2.findContours(threshGray, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
for cnt in contours:
#some filtering needs to be done
#
#after filtering append contour
filteredContours.append(cnt)
cv2.rectangle(frame, (x, y), (x + w, y + h), (3, 244, 244), 1)
cv2.drawContours(frame, filteredContours, -1, (255, 0, 255),1 )
cv2.imshow("with contours", frame)
cv2.imshow("adaptiveThreshhold", threshGray)
cv2.imshow("input", input)
I'm looking for a way to draw a bounding box around the obstacle.
Problem is I dont know how to extract this box from the rest.
It is probably because the contour of the box and the lines on the right are connected and thats why the bounding box is that big. Would be great if someone knows a way to do that.
Click here to see the Result
First: Input image
Second: after adaptiveThreshold
third: with contours(pink) and bounding boxes(yellow)
At this point in time, you got several candidates of white color value.
You need to add code in to the #some filtering needs to be done to rid candidate list of NOT bounding box you want to find.
I suggest you to compare your candidate list with square box as bigger as enough.
Because all of contours without BOX(that you want to find on the road) do not satisfy condition about square box as I mentioned above.
I think what you are looking for is triangular masking, as seen in the input image you have lane marking as well. Did try using a lane detector with this all the areas out of lane can be masked and only the spaces in lane can be processed.
Below I have tried to use Lane detector using HoughLinesP and added Contours as well. Try to use this, I did not test this code but I see no issues.
#! /usr/bin/env python 3
"""
Lane detector using the Hog transform method
"""
import cv2 as cv
import numpy as np
# import matplotlib.pyplot as plt
import random as rng
rng.seed(369)
def do_canny(frame):
# Converts frame to grayscale because we only need the luminance channel for detecting edges - less computationally expensive
gray = cv.cvtColor(frame, cv.COLOR_RGB2GRAY)
# Applies a 5x5 gaussian blur with deviation of 0 to frame - not mandatory since Canny will do this for us
blur = cv.GaussianBlur(gray, (5, 5), 0)
# Applies Canny edge detector with minVal of 50 and maxVal of 150
canny = cv.Canny(blur, 50, 150)
return canny
def do_segment(frame):
# Since an image is a multi-directional array containing the relative intensities of each pixel in the image, we can use frame.shape to return a tuple: [number of rows, number of columns, number of channels] of the dimensions of the frame
# frame.shape[0] give us the number of rows of pixels the frame has. Since height begins from 0 at the top, the y-coordinate of the bottom of the frame is its height
height = frame.shape[0]
# Creates a triangular polygon for the mask defined by three (x, y) coordinates
polygons = np.array([
[(0, height), (800, height), (380, 290)]
])
# Creates an image filled with zero intensities with the same dimensions as the frame
mask = np.zeros_like(frame)
# Allows the mask to be filled with values of 1 and the other areas to be filled with values of 0
cv.fillPoly(mask, polygons, 255)
# A bitwise and operation between the mask and frame keeps only the triangular area of the frame
segment = cv.bitwise_and(frame, mask)
return segment
def calculate_lines(frame, lines):
# Empty arrays to store the coordinates of the left and right lines
left = []
right = []
# Loops through every detected line
for line in lines:
# Reshapes line from 2D array to 1D array
x1, y1, x2, y2 = line.reshape(4)
# Fits a linear polynomial to the x and y coordinates and returns a vector of coefficients which describe the slope and y-intercept
parameters = np.polyfit((x1, x2), (y1, y2), 1)
slope = parameters[0]
y_intercept = parameters[1]
# If slope is negative, the line is to the left of the lane, and otherwise, the line is to the right of the lane
if slope < 0:
left.append((slope, y_intercept))
else:
right.append((slope, y_intercept))
# Averages out all the values for left and right into a single slope and y-intercept value for each line
left_avg = np.average(left, axis = 0)
right_avg = np.average(right, axis = 0)
# Calculates the x1, y1, x2, y2 coordinates for the left and right lines
left_line = calculate_coordinates(frame, left_avg)
right_line = calculate_coordinates(frame, right_avg)
return np.array([left_line, right_line])
def calculate_coordinates(frame, parameters):
slope, intercept = parameters
# Sets initial y-coordinate as height from top down (bottom of the frame)
y1 = frame.shape[0]
# Sets final y-coordinate as 150 above the bottom of the frame
y2 = int(y1 - 150)
# Sets initial x-coordinate as (y1 - b) / m since y1 = mx1 + b
x1 = int((y1 - intercept) / slope)
# Sets final x-coordinate as (y2 - b) / m since y2 = mx2 + b
x2 = int((y2 - intercept) / slope)
return np.array([x1, y1, x2, y2])
def visualize_lines(frame, lines):
# Creates an image filled with zero intensities with the same dimensions as the frame
lines_visualize = np.zeros_like(frame)
# Checks if any lines are detected
if lines is not None:
for x1, y1, x2, y2 in lines:
# Draws lines between two coordinates with green color and 5 thickness
cv.line(lines_visualize, (x1, y1), (x2, y2), (0, 255, 0), 5)
return lines_visualize
# The video feed is read in as a VideoCapture object
cap = cv.VideoCapture(1)
while (cap.isOpened()):
# ret = a boolean return value from getting the frame, frame = the current frame being projected in the video
ret, frame = cap.read()
canny = do_canny(frame)
cv.imshow("canny", canny)
# plt.imshow(frame)
# plt.show()
segment = do_segment(canny)
hough = cv.HoughLinesP(segment, 2, np.pi / 180, 100, np.array([]), minLineLength = 100, maxLineGap = 50)
# Averages multiple detected lines from hough into one line for left border of lane and one line for right border of lane
lines = calculate_lines(frame, hough)
# Visualizes the lines
lines_visualize = visualize_lines(frame, lines)
cv.imshow("hough", lines_visualize)
# Overlays lines on frame by taking their weighted sums and adding an arbitrary scalar value of 1 as the gamma argument
output = cv.addWeighted(frame, 0.9, lines_visualize, 1, 1)
contours, _ = cv.findContours(output, cv.RETR_TREE, cv.CHAIN_APPROX_SIMPLE)
contours_poly = [None]*len(contours)
boundRect = [None]*len(contours)
centers = [None]*len(contours)
radius = [None]*len(contours)
for i, c in enumerate(contours):
contours_poly[i] = cv.approxPolyDP(c, 3, True)
boundRect[i] = cv.boundingRect(contours_poly[i])
centers[i], radius[i] = cv.minEnclosingCircle(contours_poly[i])
## [allthework]
## [zeroMat]
drawing = np.zeros((output.shape[0], output.shape[1], 3), dtype=np.uint8)
## [zeroMat]
## [forContour]
# Draw polygonal contour + bonding rects + circles
for i in range(len(contours)):
color = (rng.randint(0,256), rng.randint(0,256), rng.randint(0,256))
cv.drawContours(drawing, contours_poly, i, color)
cv.rectangle(drawing, (int(boundRect[i][0]), int(boundRect[i][1])), \
(int(boundRect[i][0]+boundRect[i][2]), int(boundRect[i][1]+boundRect[i][3])), color, 2)
# Opens a new window and displays the output frame
cv.imshow('Contours', drawing)
# Frames are read by intervals of 10 milliseconds. The programs breaks out of the while loop when the user presses the 'q' key
if cv.waitKey(10) & 0xFF == ord('q'):
break
# The following frees up resources and closes all windows
cap.release()
cv.destroyAllWindows()
try different values in the threshold for canny.

Calculate slope, length and angle of a specific part / side / line on a contour?

I got two detected contours in an image and need the diameter between the two vertical-edges of the top contour and the diameter between the vertical-edges of the lower contour. I achieved this with this code.
import cv2
import numpy as np
import math, os
import imutils
img = cv2.imread("1.jpg")
gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
gray = cv2.GaussianBlur(gray, (7, 7), 0)
edges = cv2.Canny(gray, 200, 100)
edges = cv2.dilate(edges, None, iterations=1)
edges = cv2.erode(edges, None, iterations=1)
cnts = cv2.findContours(edges.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = imutils.grab_contours(cnts)
# sorting the contours to find the largest and smallest one
c1 = max(cnts, key=cv2.contourArea)
c2 = min(cnts, key=cv2.contourArea)
# determine the most extreme points along the contours
extLeft1 = tuple(c1[c1[:, :, 0].argmin()][0])
extRight1 = tuple(c1[c1[:, :, 0].argmax()][0])
extLeft2 = tuple(c2[c2[:, :, 0].argmin()][0])
extRight2 = tuple(c2[c2[:, :, 0].argmax()][0])
# show contour
cimg = cv2.drawContours(img, cnts, -1, (0,200,0), 2)
# set y of left point to y of right point
lst1 = list(extLeft1)
lst1[1] = extRight1[1]
extLeft1 = tuple(lst1)
lst2 = list(extLeft2)
lst2[1] = extRight2[1]
extLeft2= tuple(lst2)
# compute the distance between the points (x1, y1) and (x2, y2)
dist1 = math.sqrt( ((extLeft1[0]-extRight1[0])**2)+((extLeft1[1]-extRight1[1])**2) )
dist2 = math.sqrt( ((extLeft2[0]-extRight2[0])**2)+((extLeft2[1]-extRight2[1])**2) )
# draw lines
cv2.line(cimg, extLeft1, extRight1, (255,0,0), 1)
cv2.line(cimg, extLeft2, extRight2, (255,0,0), 1)
# draw the distance text
font = cv2.FONT_HERSHEY_SIMPLEX
fontScale = 0.5
fontColor = (255,0,0)
lineType = 1
cv2.putText(cimg,str(dist1),(155,100),font, fontScale, fontColor, lineType)
cv2.putText(cimg,str(dist2),(155,280),font, fontScale, fontColor, lineType)
# show image
cv2.imshow("Image", img)
cv2.waitKey(0)
Now I would also need the angle of the slope lines on the bottom side of the upper contour.
Any ideas how I can get this? Is it possible using contours?
Or is it necessary to use HoughLinesP and sort the regarding lines somehow?
And continued question: Maybe its also possible to get function which describes parabola slope of that sides ?
Thanks alot for any help!
There are several ways to obtain just the slopes. In order to know the slope, we can can use cv2.HoughLines to detect the bottom horizontal line, detect to end points of that line and from those, obtain the slopes. As an illustration,
lines = cv2.HoughLines(edges, rho=1, theta=np.pi/180, threshold=int(dist2*0.66) )
on edges in your code gives 4 lines, and if we force the angle to be horizontal
for line in lines:
rho, theta = line[0]
# here we filter out non-horizontal lines
if abs(theta - np.pi/2) > np.pi/180:
continue
a = np.cos(theta)
b = np.sin(theta)
x0 = a*rho
y0 = b*rho
x1 = int(x0 + 1000*(-b))
y1 = int(y0 + 1000*(a))
x2 = int(x0 - 1000*(-b))
y2 = int(y0 - 1000*(a))
cv2.line(img_lines,(x1,y1),(x2,y2),(0,0,255),1)
we get:
For the extended question concerns with the parabolas, we first compose a function that returns the left and right points:
def horizontal_scan(gray_img, thresh=50, start=50):
'''
scan horizontally for left and right points until we met an all-background line
#param thresh: threshold for background pixel
#param start: y coordinate to start scanning
'''
ret = []
thickness = 0
for i in range(start,len(gray_img)):
row = gray_img[i]
# scan for left:
left = 0
while left < len(row) and row[left]<thresh:
left += 1
if left==len(row):
break;
# scan for right:
right = left
while right < len(row) and row[right] >= thresh:
right+=1
if thickness == 0:
thickness = right - left
# prevent sudden drop, error/noise
if (right-left) < thickness//5:
continue
else:
thickness = right - left
ret.append((i,left,right))
return ret
# we start scanning from extLeft1 down until we see a blank line
# with some tweaks, we can make horizontal_scan run on edges,
# which would be simpler and faster
horizontal_lines = horizontal_scan(gray, start = extLeft1[1])
# check if horizontal_line[0] are closed to extLeft1 and extRight1
print(horizontal_lines[0], extLeft1, extRight1[0])
Note that we can use this function to find the end points of the horizontal line returned by HoughLines.
# last line of horizontal_lines would be the points we need:
upper_lowest_y, upper_lowest_left, upper_lowest_right = horizontal_lines[-1]
img_lines = img.copy()
cv2.line(img_lines, (upper_lowest_left, upper_lowest_y), extLeft1, (0,0,255), 1)
cv2.line(img_lines, (upper_lowest_right, upper_lowest_y), extRight1, (0,0,255),1)
and that gives:
Let's return to the extended question, where we have those left and right points:
left_points = [(x,y) for y,x,_ in horizontal_lines]
right_points = [(x,y) for y,_,x in horizontal_lines]
Obviously, they would not fit perfectly in a parabola, so we need some sort of approximation/fitting here. For that, we can build a LinearRegression model:
from sklearn.linear_model import LinearRegression
class BestParabola:
def __init__(self, points):
x_x2 = np.array([(x**2,x) for x,_ in points])
ys = np.array([y for _,y in points])
self.lr = LinearRegression()
self.lr.fit(x_x2,ys)
self.a, self.b = self.lr.coef_
self.c = self.lr.intercept_
self.coef_ = (self.c,self.b,self.a)
def transform(self,points):
x_x2 = np.array([(x**2,x) for x,_ in points])
ys = self.lr.predict(x_x2)
return np.array([(x,y) for (_,x),y in zip(x_x2,ys)])
And then, we can fit the given left_points, right_points to get the desired parabolas:
# construct the approximate parabola
# the parabollas' coefficients are accessible by BestParabola.coef_
left_parabola = BestParabola(left_points)
right_parabola = BestParabola(right_points)
# get points for rendering
left_parabola_points = left_parabola.transform(left_points)
right_parabola_points = right_parabola.transform(right_points)
# render with matplotlib, cv2.drawContours would work
plt.figure(figsize=(8,8))
plt.imshow(cv2.cvtColor(img,cv2.COLOR_BGR2RGB))
plt.plot(left_parabola_points[:,0], left_parabola_points[:,1], linewidth=3)
plt.plot(right_parabola_points[:,0], right_parabola_points[:,1], linewidth=3, color='r')
plt.show()
Which gives:
The left parabola is not perfect, but you should work out that if need be :-)

How can I extract image segment with specific color in OpenCV?

I work with logos and other simple graphics, in which there are no gradients or complex patterns. My task is to extract from the logo segments with letters and other elements.
To do this, I define the background color, and then I go through the picture in order to segment the images. Here is my code for more understanding:
MAXIMUM_COLOR_TRANSITION_DELTA = 100 # 0 - 765
def expand_segment_recursive(image, unexplored_foreground, segment, point, color):
height, width, _ = image.shape
# Unpack coordinates from point
py, px = point
# Create list of pixels to check
neighbourhood_pixels = [(py, px + 1), (py, px - 1), (py + 1, px), (py - 1, px)]
allowed_zone = unexplored_foreground & np.invert(segment)
for y, x in neighbourhood_pixels:
# Add pixel to segment if its coordinates within the image shape and its color differs from segment color no
# more than MAXIMUM_COLOR_TRANSITION_DELTA
if y in range(height) and x in range(width) and allowed_zone[y, x]:
color_delta = np.sum(np.abs(image[y, x].astype(np.int) - color.astype(np.int)))
print(color_delta)
if color_delta <= MAXIMUM_COLOR_TRANSITION_DELTA:
segment[y, x] = True
segment = expand_segment_recursive(image, unexplored_foreground, segment, (y, x), color)
allowed_zone = unexplored_foreground & np.invert(segment)
return segment
if __name__ == "__main__":
if len(sys.argv) < 2:
print("Pass image as the argument to use the tool")
exit(-1)
IMAGE_FILENAME = sys.argv[1]
print(IMAGE_FILENAME)
image = cv.imread(IMAGE_FILENAME)
height, width, _ = image.shape
# To filter the background I use median value of the image, as background in most cases takes > 50% of image area.
background_color = np.median(image, axis=(0, 1))
print("Background color: ", background_color)
# Create foreground mask to find segments in it (TODO: Optimize this part)
foreground = np.zeros(shape=(height, width, 1), dtype=np.bool)
for y in range(height):
for x in range(width):
if not np.array_equal(image[y, x], background_color):
foreground[y, x] = True
unexplored_foreground = foreground
for y in range(height):
for x in range(width):
if unexplored_foreground[y, x]:
segment = np.zeros(foreground.shape, foreground.dtype)
segment[y, x] = True
segment = expand_segment_recursive(image, unexplored_foreground, segment, (y, x), image[y, x])
cv.imshow("segment", segment.astype(np.uint8) * 255)
while cv.waitKey(0) != 27:
continue
Here is the desired result:
In the end of run-time I expect 13 extracted separated segments (for this particular image). But instead I got RecursionError: maximum recursion depth exceeded, which is not surprising as expand_segment_recursive() can be called for every pixel of the image. And since even with small image resolution of 600x500 i got at maximum 300K calls.
My question is how can I get rid of recursion in this case and possibly optimize the algorithm with Numpy or OpenCV algorithms?
You can actually use a thresholded image (binary) and connectedComponents to do this job in a couple of steps. Also, you may use findContours or other methods.
Here is the code:
import numpy as np
import cv2
# load image as greyscale
img = cv2.imread("hp.png", 0)
# puts 0 to the white (background) and 255 in other places (greyscale value < 250)
_, thresholded = cv2.threshold(img, 250, 255, cv2.THRESH_BINARY_INV)
# gets the labels and the amount of labels, label 0 is the background
amount, labels = cv2.connectedComponents(thresholded)
# lets draw it for visualization purposes
preview = np.zeros((img.shape[0], img.shape[2], 3), dtype=np.uint8)
print (amount) #should be 3 -> two components + background
# draw label 1 blue and label 2 green
preview[labels == 1] = (255, 0, 0)
preview[labels == 2] = (0, 255, 0)
cv2.imshow("frame", preview)
cv2.waitKey(0)
At the end, the thresholded image will look like this:
and the preview image (the one with the colored segments) will look like this:
With the mask you can always use numpy functions to get things like, coordinates of the segments you want or to color them (like I did with preview)
UPDATE
To get different colored segments, you may try to create a "border" between the segments. Since they are plain colors and not gradients, you can try to do an edge detector like canny and then put it black in the image....
import numpy as np
import cv2
img = cv2.imread("total.png", 0)
# background to black
img[img>=200] = 0
# get edges
canny = cv2.Canny(img, 60, 180)
# make them thicker
kernel = np.ones((3,3),np.uint8)
canny = cv2.morphologyEx(canny, cv2.MORPH_DILATE, kernel)
# apply edges as border in the image
img[canny==255] = 0
# same as before
amount, labels = cv2.connectedComponents(img)
preview = np.zeros((img.shape[0], img.shape[1], 3), dtype=np.uint8)
print (amount) #should be 14 -> 13 components + background
# color them randomly
for i in range(1, amount):
preview[labels == i] = np.random.randint(0,255, size=3, dtype=np.uint8)
cv2.imshow("frame", preview )
cv2.waitKey(0)
The result is:

Categories