Related
As main data I've an image already converted in a list of lists of tuples(r,g,b) defined start_img,
where each list is a line (y) of pixels and each list inside y contains the actual pixel value (x) as a tuple (r,g,b)
the image is made up as follow:
Some rectangles: each one has a different value for color, width, height.
The others pixels are just rgb(0, 0, 0) pixels.
I CAN'T USE EXTERNAL LIBRARIES
1st goal
I'm looking for a way to find each rectangle already in the image and store it as x,y,h,w,r,g,b.
where (x,y) is the position of the rectangle and referes to the first top left pixel.
example
XXBBBBB
XXBBBXX
XXBBBXX
where X is a non black pixel and B stands for a black pixel
2nd goal
given a list of others rectangles I should check if i could or not fit them (one by one).
FIRST GOAL!!
in the example above there are 2 rectangles and my function should return
output:0,0,3,2,color.
5,5,2,2,color.
where for the first line 0,0 stands for (x,y), (2,2) stans for width, height, and color, if it's red for example there's should be this tuple (255,0,0).
So here I was thinking to use a nested for loop to iterate on each pixel and if it finds a color different then black it should increase the width and store the starting pixel in a dict like this
(x,y) as key and (w,h,r,g,b).
Now since it's a rectangle i can assume that if i find at (0,0),(1,0) red as color, when i'm then iterating on (1,0) if it's red it should assume that (1,1) it must be red and (1,2) must be black (as 0,2... ofc there's the rectangles are always slightly divided by at least a black pixel).
ofc this is just an idea i'm not even sure if it's a good method.
SECOND GOAL!!
For this task i'm just looking for a function that return True or False.
One way to solve this is checking neighbours recursively. This way, one can extract regions with equal color. Afterwards, one can check by the number of elements if the identified region is indeed a rectangle.
For this example I have used "X" and "Y" as color, and "B" as black, but those can be easily replaced by rgb tuples.
img_str = """
XXBBBBB
XXBBBYY
XXBBBYY
"""
# split into list of lists
img = [[letter for letter in line] for line in
img_str.strip().splitlines()]
def get_region(img_list, x, y, color, black):
""" recursive helper function to identify regions of same color """
region = []
try: # to get a pixel of suitable color
test_pixel = img_list[y][x]
if test_pixel != color:
return [] # color not suitable
except IndexError:
return [] # outside of image
img_list[y][x] = black # mask the found pixel (elsewise one would find it again later on)
region.append((x, y)) # add found pixel to region
region.extend(get_region(img_list, x + 1, y, color, black)) # look at left-side neighbour recursively
region.extend(get_region(img_list, x, y + 1, color, black)) # look at down-side neighbour recursively
return region
def get_rectangle_dim(region):
""" check if identified region is a rectangle and return dimensionality"""
min_x = min(region, key=lambda t: t[0])[0]
max_x = max(region, key=lambda t: t[0])[0]
min_y = min(region, key=lambda t: t[1])[1]
max_y = max(region, key=lambda t: t[1])[1]
width = (max_x - min_x + 1)
height = (max_y - min_y + 1)
if width * height == len(region):
return min_x, min_y, width, height
# else this returns None implicitly
potential_rectangles = []
black = "B"
# loop over rows and columns
for y in range(len(img)):
for x in range(len(img[0])):
# identify target color
if (color := img[y][x]) == black:
continue # skip black regions
if region := get_region(img, x, y, color, black):
potential_rectangles.append((region, color)) # add found regions
# filter all regions for rectangles
rectangles = []
for potential_rectangle, color in potential_rectangles:
if rectangle := get_rectangle_dim(potential_rectangle):
rectangles.append(rectangle + (color,))
# show end result
print(rectangles)
I have two co-ordinates stored in my variable points : [(100, 50)]
I'm trying to move my mouse with pyautogui.moveTo(points) and I get the error:
pyautogui.PyAutoGUIException: The supplied sequence must have exactly 2 or exactly 4 elements (0 were received).
I assume this means I'm passing a single list object rather than the coordinates.
What does the expression [(100, 50)] mean and how I can I transform x and y into two elements.
The source code where I'm getting points from:
import cv2 as cv
import numpy as np
class Vision:
# properties
needle_img = None
needle_w = 0
needle_h = 0
method = None
# constructor
def __init__(self, needle_img_path, method=cv.TM_CCOEFF_NORMED):
self.needle_img = cv.imread(needle_img_path, cv.IMREAD_UNCHANGED)
# Save the dimensions of the needle image
self.needle_w = self.needle_img.shape[1]
self.needle_h = self.needle_img.shape[0]
self.method = method
def find(self, haystack_img, threshold=0.5, debug_mode=None):
# run the OpenCV algorithm
result = cv.matchTemplate(haystack_img, self.needle_img, self.method)
# Get the all the positions from the match result that exceed our threshold
locations = np.where(result >= threshold)
locations = list(zip(*locations[::-1]))
for loc in locations:
rect = [int(loc[0]), int(loc[1]), self.needle_w, self.needle_h]
# Add every box to the list twice in order to retain single (non-overlapping) boxes
rectangles.append(rect)
rectangles.append(rect)
# Apply group rectangles
rectangles, weights = cv.groupRectangles(rectangles, groupThreshold=1, eps=0.5)
points = []
if len(rectangles):
line_color = (0, 255, 0)
line_type = cv.LINE_4
marker_color = (255, 0, 255)
marker_type = cv.MARKER_CROSS
# Loop over all the rectangles
for (x, y, w, h) in rectangles:
# Determine the center position
center_x = x + int(w/2)
center_y = y + int(h/2)
# Save the points
points.append((center_x, center_y))
if debug_mode == 'rectangles':
# Determine the box position
top_left = (x, y)
bottom_right = (x + w, y + h)
# Draw the box
cv.rectangle(haystack_img, top_left, bottom_right, color=line_color,
lineType=line_type, thickness=2)
elif debug_mode == 'points':
# Draw the center point
cv.drawMarker(haystack_img, (center_x, center_y),
color=marker_color, markerType=marker_type,
markerSize=40, thickness=2)
if debug_mode:
cv.imshow('Matches', haystack_img)
return points
What does the expression [(100, 50)] mean and how I can I transform x and y into two elements.
[...] creates a list containing whatever you put inside it. (100, 50) creates a tuple containing the integers 100 and 50. So you have a list that contains a tuple that contains two numbers.
I assume this means I'm passing a single list object rather than the coordinates.
You're right, kinda. The problem isn't that you're passing a single list object, you need to pass a single listor rather, sequence object. The problem is that that list object contains only one element: the tuple.
You can check this by looking at the len of the list:
>>> l = [(100, 50)]
>>> len(l)
1
The way you intend to use it, pyautogui.moveTo(points) wants a sequence (a list or a tuple) that contains two elements. These elements are the coordinates of the point you want to move to.
The tuple that is inside the list is this two-element sequence, so that's what you need to pass:
pyautogui.moveTo(points[0])
I have a stationary camera which takes photos rapidly of the continuosly moving product but in a fixed position just of the same angle (translation perspective). I need to stitch all images into a panoramic picture. I've tried by using the class Stitcher. It worked, but it took a long time to compute.
I also tried to use another method by using the SIFT detector, FNNbasedMatcher, finding Homography and then warping the images. This method works fine if I only use two images. For multiple images it still doesn't stitch them properly. Does anyone know the best and fastest image stitching algorithm for this case?
This is my code which uses the Stitcher class.
import time
import cv2
import os
import numpy as np
import sys
def main():
# read input images
imgs = []
path = 'pics_rotated/'
i = 0
for (root, dirs, files) in os.walk(path):
images = [f for f in files]
print(images)
for i in range(0,len(images)):
curImg = cv2.imread(path + images[i])
imgs.append(curImg)
stitcher = cv2.Stitcher.create(mode= 0)
status ,result = stitcher.stitch(imgs)
if status != cv2.Stitcher_OK:
print("Can't stitch images, error code = %d" % status)
sys.exit(-1)
cv2.imwrite("imagesout/output.jpg", result)
cv2.waitKey(0)
if __name__ == '__main__':
start = time.time()
main()
end = time.time()
print("Time --->>>>>", end - start)
cv2.destroyAllWindows()enter code here
Briefing
Although OpenCV Stitcher class provides lots of methods and options to perform stitching, I find it hard to use it because of the complexity.
Therefore, I will try to provide the minimum and fastest way to perform stitching.
In case you are wondering more sophisticated approachs such as exposure compensation, I highly recommend looking at the detailed sample code.
As a side note, I will be grateful if someone can convert the following functions to use Stitcher class.
Introduction
In order to combine multiple images into the same perspective, the following operations are needed:
Detect and match features.
Compute homography (perspective transform between frames).
Warp one image onto the other perspective.
Combine the base and warped images while keeping track of the shift in origin.
Given the combination pattern, stitch multiple images.
Feature detection and matching
What are features?
They are distinguishable parts, like corners of a square, that are preserved across images.
There are different algorithms proposed for obtaining these characteristic points, like Harris, ORB, SIFT, SURF, etc.
See cv::Feature2d for the full list.
I will use SIFT because it is accurate and sufficiently fast.
A feature consists of a KeyPoint, which is the location in the image, and a descriptor, which is a set of numbers (e.g. a 128-D vector) that represents the properties of the feature.
After finding distinct points in images, we need to match the corresponding point pairs.
See cv::DescriptionMatcher.
I will use Flann-based descriptor matcher.
First, we initialize the descriptor and matcher classes.
descriptor = cv.SIFT.create()
matcher = cv.DescriptorMatcher.create(cv.DescriptorMatcher.FLANNBASED)
Then, we find the features in each image.
(kps, desc) = descriptor.detectAndCompute(image, mask=None)
Now we find the corresponding point pairs.
if (desc1 is not None and desc2 is not None and len(desc1) >=2 and len(desc2) >= 2):
rawMatch = matcher->knnMatch(desc2, desc1, k=2)
matches = []
# ensure the distance is within a certain ratio of each other (i.e. Lowe's ratio test)
ratio = 0.75
for m in rawMatch:
if len(m) == 2 and m[0].distance < m[1].distance * ratio:
matches.append((m[0].trainIdx, m[0].queryIdx))
Homography computation
Homography is the perspective transformation from one view to another.
The parallel lines in one view may not be parallel in another, like a road to sunset.
We need to have at least 4 corresponding point pairs.
The more means redundant data that have to be decomposed or eliminated.
Homography matrix that transforms the point in the initial view to its warped position.
It is a 3x3 matrix that is computed by Direct Linear Transform algorithm.
There are 8 DoF and the last element in the matrix is 1.
[pt2] = H * [pt1]
Now that we have corresponding point matches, we compute the homography.
The method we use to handle redundant data is RANSAC, which randomly selects 4 point pairs and uses the best fitting result.
See cv::findHomography for more options.
if len(matches) > 4:
(H, status) = cv.findHomography(pts1, pts2, cv.RANSAC)
Warping to perspective
By computing homography, we know which point in the source image corresponds to which point in the destination image.
In order not to lose information from the source image, we need to pad the destination image by the amount that the transformed point falls to negative regions.
At the same time, we need to keep track of the shift amount of the origin for stitching multiple images.
Auxilary functions
# find the ROI of a transformation result
def warpRect(rect, H):
x, y, w, h = rect
corners = [[x, y], [x, y + h - 1], [x + w - 1, y], [x + w - 1, y + h - 1]]
extremum = cv.transform(corners, H)
minx, miny = np.min(extremum[:,0]), np.min(extremum[:,1])
maxx, maxy = np.max(extremum[:,0]), np.max(extremum[:,1])
xo = int(np.floor(minx))
yo = int(np.floor(miny))
wo = int(np.ceil(maxx - minx))
ho = int(np.ceil(maxy - miny))
outrect = (xo, yo, wo, ho)
return outrect
# homography matrix is translated to fit in the screen
def coverH(rect, H):
# obtain bounding box of the result
x, y, _, _ = warpRect(rect, H)
# shift amount to the first quadrant
xpos = int(-x if x < 0 else 0)
ypos = int(-y if y < 0 else 0)
# correct the homography matrix so that no point is thrown out
T = np.array([[1, 0, xpos], [0, 1, ypos], [0, 0, 1]])
H_corr = T.dot(H)
return (H_corr, (xpos, ypos))
# pad image to cover ROI, return the shift amount of origin
def addBorder(img, rect):
x, y, w, h = rect
tl = (x, y)
br = (x + w, y + h)
top = int(-tl[1] if tl[1] < 0 else 0)
bottom = int(br[1] - img.shape[0] if br[1] > img.shape[0] else 0)
left = int(-tl[0] if tl[0] < 0 else 0)
right = int(br[0] - img.shape[1] if br[0] > img.shape[1] else 0)
img = cv.copyMakeBorder(img, top, bottom, left, right, cv.BORDER_CONSTANT, value=[0, 0, 0])
orig = (left, top)
return img, orig
def size2rect(size):
return (0, 0, size[1], size[0])
Warping function
def warpImage(img, H):
# tweak the homography matrix to move the result to the first quadrant
H_cover, pos = coverH(size2rect(img.shape), H)
# find the bounding box of the output
x, y, w, h = warpRect(size2rect(img.shape), H_cover)
width, height = x + w, y + h
# warp the image using the corrected homography matrix
warped = cv.warpPerspective(img, H_corr, (width, height))
# make the external boundary solid black, useful for masking
warped = np.ascontiguousarray(warped, dtype=np.uint8)
gray = cv.cvtColor(warped, cv.COLOR_RGB2GRAY)
_, bw = cv.threshold(gray, 1, 255, cv.THRESH_BINARY)
# https://stackoverflow.com/a/55806272/12447766
major = cv.__version__.split('.')[0]
if major == '3':
_, cnts, _ = cv.findContours(bw, cv.RETR_EXTERNAL, cv.CHAIN_APPROX_NONE)
else:
cnts, _ = cv.findContours(bw, cv.RETR_EXTERNAL, cv.CHAIN_APPROX_NONE)
warped = cv.drawContours(warped, cnts, 0, [0, 0, 0], lineType=cv.LINE_4)
return (warped, pos)
Combining warped and destination images
This is the step where image enhancement such as exposure compensation becomes involved.
In order to keep things simple, we will use mean value blending.
The easiest solution would be overriding the existing data in the destination image but averaging operation is not a burden for us.
# only the non-zero pixels are weighted to the average
def mean_blend(img1, img2):
assert(img1.shape == img2.shape)
locs1 = np.where(cv.cvtColor(img1, cv.COLOR_RGB2GRAY) != 0)
blended1 = np.copy(img2)
blended1[locs1[0], locs1[1]] = img1[locs1[0], locs1[1]]
locs2 = np.where(cv.cvtColor(img2, cv.COLOR_RGB2GRAY) != 0)
blended2 = np.copy(img1)
blended2[locs2[0], locs2[1]] = img2[locs2[0], locs2[1]]
blended = cv.addWeighted(blended1, 0.5, blended2, 0.5, 0)
return blended
def warpPano(prevPano, img, H, orig):
# correct homography matrix
T = np.array([[1, 0, -orig[0]], [0, 1, -orig[1]], [0, 0, 1]])
H_corr = H.dot(T)
# warp the image and obtain shift amount of origin
result, pos = warpImage(prevPano, H_corr)
xpos, ypos = pos
# zero pad the result
rect = (xpos, ypos, img.shape[1], img.shape[0])
result, _ = addBorder(result, rect)
# mean value blending
idx = np.s_[ypos : ypos + img.shape[0], xpos : xpos + img.shape[1]]
result[idx] = mean_blend(result[idx], img)
# crop extra paddings
x, y, w, h = cv.boundingRect(cv.cvtColor(result, cv.COLOR_RGB2GRAY))
result = result[y : y + h, x : x + w]
# return the resulting image with shift amount
return (result, (xpos - x, ypos - y))
Stitching multiple images given combination pattern
# base image is the last image in each iteration
def blend_multiple_images(images, homographies):
N = len(images)
assert(N >= 2)
assert(len(homographies) == N - 1)
pano = np.copy(images[0])
pos = (0, 0)
for i in range(N - 1):
img = images[i + 1]
# get homography matrix
H = homographies[i]
# warp pano onto image
pano, pos = warpPano(pano, img, H, pos)
return (pano, pos)
The method above warps the previously combined image, called pano, onto the next image subsequently.
A pattern, however, may have conjunction points for the best stitching view.
For example
1 2 3
4 5 6
The best pattern to combine these images is
1 -> 2 <- 3
|
V
4 -> 5 <- 6
Therefore, we need one last function to combine 1 & 2 with 2 & 3, or 1235 with 456 at node 5.
from operator import sub
# no warping here, useful for combining two different stitched images
# the image at given origin coordinates must be the same
def patchPano(img1, img2, orig1=(0,0), orig2=(0,0)):
# bottom right points
br1 = (img1.shape[1] - 1, img1.shape[0] - 1)
br2 = (img2.shape[1] - 1, img2.shape[0] - 1)
# distance from orig to br
diag2 = tuple(map(sub, br2, orig2))
# possible pano corner coordinates based on img1
extremum = np.array([(0, 0), br1,
tuple(map(sum, zip(orig1, diag2))),
tuple(map(sub, orig1, orig2))])
bb = cv.boundingRect(extremum)
# patch img1 to img2
pano, shift = addBorder(img1, bb)
orig = tuple(map(sum, zip(orig1, shift)))
idx = np.s_[orig[1] : orig[1] + img2.shape[0] - orig2[1],
orig[0] : orig[0] + img2.shape[1] - orig2[0]]
subImg = img2[orig2[1] : img2.shape[0], orig2[0] : img2.shape[1]]
pano[idx] = mean_blend(pano[idx], subImg)
return (pano, orig)
For a quick demo, you can run the Python code in GitHub.
If you want to use the above methods in C++, you can have a look at Stitch library.
Any PR or edit to this post is welcome.
As an alternative to the last step that #Burak gave, this is the way I used as I had the number of images for each of the rows (chunks), the multiStitching being nothing but a function to stitch images horizontally:
def stitchingImagesHV(img_list, size):
"""
As our multi stitching algorithm works on the horizontal line, we will hack
it to use also the vertical stitching by rotating each row "stitch_img" and
apply the same technique, and after that, the final result is rotated back to the
original direction.
"""
# Generate row chunks of "size" length from image list
chunks = [img_list[i:i + size] for i in range(0, len(img_list), size)]
list_rotated_images = []
for i in range(len(chunks)):
stitch_img = multiStitching(chunks[i])
stitch_img_rotated = cv2.rotate(stitch_img, cv2.ROTATE_90_COUNTERCLOCKWISE)
list_rotated_images.append(stitch_img_rotated.astype('uint8'))
stitch_img2 = multiStitching(list_rotated_images)
return cv2.rotate(stitch_img2, cv2.ROTATE_90_CLOCKWISE)
I have a set of images, each containing a table. Some images have the tables in them already aligned and the borders are drawn, it is not hard to identify the main table on those images using Canny edge detection. However, some images have their tables without any borders, so I am trying to identify the table in an image and plot its border's contours as well as columns.
I am using openCV version 3.4 and the approach i'm generally taking is as follows:
dilate the grayscale image to identify the text spots
apply cv2.findContours function to get text's bounding boxes.
cluster the bounding boxes in case smaller tables were identified instead of the main table.
try to draw the contours in hopes to identify the borders of the table.
This approach seems to work to a certain extent but the drawn contours are not at all accurate.
img, contours, hierarchy = cv2.findContours(gray_matrix, cv2.RETR_LIST,
cv2.CHAIN_APPROX_SIMPLE)
# get bounding boxes around any text
boxes = []
for contour in contours:
box = cv2.boundingRect(contour)
h = box[3]
rows = {}
cols = {}
# Clustering the bounding boxes by their positions
for box in boxes:
(x, y, w, h) = box
col_key = 10 # cell threshold
row_key = 10 # cell threshold
cols[row_key] = [box] if col_key not in cols else cols[col_key] + [box]
rows[row_key] = [box] if row_key not in rows else rows[row_key] + [box]
# Filtering out the clusters having less than 4 cols
table_cells = list(filter(lambda r: len(r) >= 4, rows.values()))
# Sorting the row cells by x coord
table_cells = [list(sorted(tb)) for tb in table_cells]
table_cells = list(sorted(table_cells, key=lambda r: r[0][1]))
#attempt to identify columns
max_last_col_width_row = max(table_cells, key=lambda b: b[-1][2])
max_x = max_last_col_width_row[-1][0] + max_last_col_width_row[-1][2]
ver_lines = []
for box in table_cells:
x = box[0][0]
y = box[0][1]
hor_lines.append((x, y, max_x, y))
for box in table_cells[0]:
x = box[0]
y = box[1]
ver_lines.append((x, y, x, max_y))
(x, y, w, h) = table_cells[0][-1]
ver_lines.append((max_x, y, max_x, max_y))
(x, y, w, h) = table_cells[0][0]
hor_lines.append((x, max_y, max_x, max_y))
for line in ver_lines:
[x1, y1, x2, y2] = line
cv2.line(output_image, (x1, y1), (x2, y2), (0, 0, 255), 1)
cv2.imshow('Proper Table Borders', output_image)
I am trying to achieve something like the below image.
In short, how can I find the invisible borders of a table-structure in an image as well as identify the x coordinates of the identified table's columns?
I know the above code is not at all optimal to produce the required outcome, but I am still learning openCV so I'm trying various approaches but still did not reach the desired result.
Try vertical profile, which is count of text (black) pixels with the same X coordinate in certain (Y0, Y1) range (table vertical span). Zero or near zero regions will indicate table column borders. Here is a hand drawn, approximate profile for your example:
I need to divide an image to regions of pixels whose RGB value pass a certain test.
I'm OK with scanning the image and checking each pixel's value however the part of clustering them into regions and then getting those regions coordinates (x, y, width, height) leaves me in total dark :)
here's the code I have so far
from PIL import Image
def detectRedRegions(PILImage):
image = PILImage.load()
width, height = PILImage.size
reds = []
h = 0
while h < height:
w = 0
while w < width:
px = image[w, h]
if is_red(px):
reds.append([w, h])
# Here's where I'm being clueless
w +=1
h +=1
I read tons about clustering but just can't wrap my head around this subject any code example s that will fit my needs will be great (and hopefully enlightening
Thanks!
[EDIT]
While the solution below works, it can be made better. Here is a version with better names and better performance:
from itertools import product
from PIL import Image, ImageDraw
def closed_regions(image, test):
"""
Return all closed regions in image who's pixels satisfy test.
"""
pixel = image.load()
xs, ys = map(xrange, image.size)
neighbors = dict((xy, set([xy])) for xy in product(xs, ys) if test(pixel[xy]))
for a, b in neighbors:
for cd in (a + 1, b), (a, b + 1):
if cd in neighbors:
neighbors[a, b].add(cd)
neighbors[cd].add((a, b))
seen = set()
def component(node, neighbors=neighbors, seen=seen, see=seen.add):
todo = set([node])
next_todo = todo.pop
while todo:
node = next_todo()
see(node)
todo |= neighbors[node] - seen
yield node
return (set(component(node)) for node in neighbors if node not in seen)
def boundingbox(coordinates):
"""
Return the bounding box that contains all coordinates.
"""
xs, ys = zip(*coordinates)
return min(xs), min(ys), max(xs), max(ys)
def is_black_enough(pixel):
r, g, b = pixel
return r < 10 and g < 10 and b < 10
if __name__ == '__main__':
image = Image.open('some_image.jpg')
draw = ImageDraw.Draw(image)
for rect in disjoint_areas(image, is_black_enough):
draw.rectangle(boundingbox(region), outline=(255, 0, 0))
image.show()
Unlike disjoint_areas() below, closed_regions() returns sets of pixel coordinates instead of their bounding boxes.
Also, if we use flooding instead of the connected components algorithm, we can make it even simpler and about twice as fast:
from itertools import chain, product
from PIL import Image, ImageDraw
flatten = chain.from_iterable
def closed_regions(image, test):
"""
Return all closed regions in image who's pixel satisfy test.
"""
pixel = image.load()
xs, ys = map(xrange, image.size)
todo = set(xy for xy in product(xs, ys) if test(pixel[xy]))
while todo:
region = set()
edge = set([todo.pop()])
while edge:
region |= edge
todo -= edge
edge = todo.intersection(
flatten(((x - 1, y), (x, y - 1), (x + 1, y), (x, y + 1)) for x, y in edge))
yield region
# rest like above
It was inspired by Eric S. Raymond's version of floodfill.
[/EDIT]
One could probably use floodfill, but I like this:
from collections import defaultdict
from PIL import Image, ImageDraw
def connected_components(edges):
"""
Given a graph represented by edges (i.e. pairs of nodes), generate its
connected components as sets of nodes.
Time complexity is linear with respect to the number of edges.
"""
neighbors = defaultdict(set)
for a, b in edges:
neighbors[a].add(b)
neighbors[b].add(a)
seen = set()
def component(node, neighbors=neighbors, seen=seen, see=seen.add):
unseen = set([node])
next_unseen = unseen.pop
while unseen:
node = next_unseen()
see(node)
unseen |= neighbors[node] - seen
yield node
return (set(component(node)) for node in neighbors if node not in seen)
def matching_pixels(image, test):
"""
Generate all pixel coordinates where pixel satisfies test.
"""
width, height = image.size
pixels = image.load()
for x in xrange(width):
for y in xrange(height):
if test(pixels[x, y]):
yield x, y
def make_edges(coordinates):
"""
Generate all pairs of neighboring pixel coordinates.
"""
coordinates = set(coordinates)
for x, y in coordinates:
if (x - 1, y - 1) in coordinates:
yield (x, y), (x - 1, y - 1)
if (x, y - 1) in coordinates:
yield (x, y), (x, y - 1)
if (x + 1, y - 1) in coordinates:
yield (x, y), (x + 1, y - 1)
if (x - 1, y) in coordinates:
yield (x, y), (x - 1, y)
yield (x, y), (x, y)
def boundingbox(coordinates):
"""
Return the bounding box of all coordinates.
"""
xs, ys = zip(*coordinates)
return min(xs), min(ys), max(xs), max(ys)
def disjoint_areas(image, test):
"""
Return the bounding boxes of all non-consecutive areas
who's pixels satisfy test.
"""
for each in connected_components(make_edges(matching_pixels(image, test))):
yield boundingbox(each)
def is_black_enough(pixel):
r, g, b = pixel
return r < 10 and g < 10 and b < 10
if __name__ == '__main__':
image = Image.open('some_image.jpg')
draw = ImageDraw.Draw(image)
for rect in disjoint_areas(image, is_black_enough):
draw.rectangle(rect, outline=(255, 0, 0))
image.show()
Here, pairs of neighboring pixels that both satisfy is_black_enough() are interpreted as edges in a graph. Also, every pixel is viewed as its own neighbor. Due to this re-interpretation we can use the connected component algorithm for graphs which is quite easy to implement. The result is the sequence of the bounding boxes of all areas who's pixels satisfy is_black_enough().
What you want is called area labeling or connected component detection in image processing.
There is an implementation provided in the scipy.ndimage package.
So the following should work provided you have numpy + scipy installed
import numpy as np
import scipy.ndimage as ndi
import Image
image = Image.load()
# convert to numpy array (no data copy done since both use buffer protocol)
image = np.asarray(image)
# generate a black and white image marking red pixels as 1
bw = is_red(image)
# labeling : each region is associated with an int
labels, n = ndi.label(bw)
# provide bounding box for each region in the form of tuples of slices
objects = ndi.find_objects(labels)