I need to divide an image to regions of pixels whose RGB value pass a certain test.
I'm OK with scanning the image and checking each pixel's value however the part of clustering them into regions and then getting those regions coordinates (x, y, width, height) leaves me in total dark :)
here's the code I have so far
from PIL import Image
def detectRedRegions(PILImage):
image = PILImage.load()
width, height = PILImage.size
reds = []
h = 0
while h < height:
w = 0
while w < width:
px = image[w, h]
if is_red(px):
reds.append([w, h])
# Here's where I'm being clueless
w +=1
h +=1
I read tons about clustering but just can't wrap my head around this subject any code example s that will fit my needs will be great (and hopefully enlightening
Thanks!
[EDIT]
While the solution below works, it can be made better. Here is a version with better names and better performance:
from itertools import product
from PIL import Image, ImageDraw
def closed_regions(image, test):
"""
Return all closed regions in image who's pixels satisfy test.
"""
pixel = image.load()
xs, ys = map(xrange, image.size)
neighbors = dict((xy, set([xy])) for xy in product(xs, ys) if test(pixel[xy]))
for a, b in neighbors:
for cd in (a + 1, b), (a, b + 1):
if cd in neighbors:
neighbors[a, b].add(cd)
neighbors[cd].add((a, b))
seen = set()
def component(node, neighbors=neighbors, seen=seen, see=seen.add):
todo = set([node])
next_todo = todo.pop
while todo:
node = next_todo()
see(node)
todo |= neighbors[node] - seen
yield node
return (set(component(node)) for node in neighbors if node not in seen)
def boundingbox(coordinates):
"""
Return the bounding box that contains all coordinates.
"""
xs, ys = zip(*coordinates)
return min(xs), min(ys), max(xs), max(ys)
def is_black_enough(pixel):
r, g, b = pixel
return r < 10 and g < 10 and b < 10
if __name__ == '__main__':
image = Image.open('some_image.jpg')
draw = ImageDraw.Draw(image)
for rect in disjoint_areas(image, is_black_enough):
draw.rectangle(boundingbox(region), outline=(255, 0, 0))
image.show()
Unlike disjoint_areas() below, closed_regions() returns sets of pixel coordinates instead of their bounding boxes.
Also, if we use flooding instead of the connected components algorithm, we can make it even simpler and about twice as fast:
from itertools import chain, product
from PIL import Image, ImageDraw
flatten = chain.from_iterable
def closed_regions(image, test):
"""
Return all closed regions in image who's pixel satisfy test.
"""
pixel = image.load()
xs, ys = map(xrange, image.size)
todo = set(xy for xy in product(xs, ys) if test(pixel[xy]))
while todo:
region = set()
edge = set([todo.pop()])
while edge:
region |= edge
todo -= edge
edge = todo.intersection(
flatten(((x - 1, y), (x, y - 1), (x + 1, y), (x, y + 1)) for x, y in edge))
yield region
# rest like above
It was inspired by Eric S. Raymond's version of floodfill.
[/EDIT]
One could probably use floodfill, but I like this:
from collections import defaultdict
from PIL import Image, ImageDraw
def connected_components(edges):
"""
Given a graph represented by edges (i.e. pairs of nodes), generate its
connected components as sets of nodes.
Time complexity is linear with respect to the number of edges.
"""
neighbors = defaultdict(set)
for a, b in edges:
neighbors[a].add(b)
neighbors[b].add(a)
seen = set()
def component(node, neighbors=neighbors, seen=seen, see=seen.add):
unseen = set([node])
next_unseen = unseen.pop
while unseen:
node = next_unseen()
see(node)
unseen |= neighbors[node] - seen
yield node
return (set(component(node)) for node in neighbors if node not in seen)
def matching_pixels(image, test):
"""
Generate all pixel coordinates where pixel satisfies test.
"""
width, height = image.size
pixels = image.load()
for x in xrange(width):
for y in xrange(height):
if test(pixels[x, y]):
yield x, y
def make_edges(coordinates):
"""
Generate all pairs of neighboring pixel coordinates.
"""
coordinates = set(coordinates)
for x, y in coordinates:
if (x - 1, y - 1) in coordinates:
yield (x, y), (x - 1, y - 1)
if (x, y - 1) in coordinates:
yield (x, y), (x, y - 1)
if (x + 1, y - 1) in coordinates:
yield (x, y), (x + 1, y - 1)
if (x - 1, y) in coordinates:
yield (x, y), (x - 1, y)
yield (x, y), (x, y)
def boundingbox(coordinates):
"""
Return the bounding box of all coordinates.
"""
xs, ys = zip(*coordinates)
return min(xs), min(ys), max(xs), max(ys)
def disjoint_areas(image, test):
"""
Return the bounding boxes of all non-consecutive areas
who's pixels satisfy test.
"""
for each in connected_components(make_edges(matching_pixels(image, test))):
yield boundingbox(each)
def is_black_enough(pixel):
r, g, b = pixel
return r < 10 and g < 10 and b < 10
if __name__ == '__main__':
image = Image.open('some_image.jpg')
draw = ImageDraw.Draw(image)
for rect in disjoint_areas(image, is_black_enough):
draw.rectangle(rect, outline=(255, 0, 0))
image.show()
Here, pairs of neighboring pixels that both satisfy is_black_enough() are interpreted as edges in a graph. Also, every pixel is viewed as its own neighbor. Due to this re-interpretation we can use the connected component algorithm for graphs which is quite easy to implement. The result is the sequence of the bounding boxes of all areas who's pixels satisfy is_black_enough().
What you want is called area labeling or connected component detection in image processing.
There is an implementation provided in the scipy.ndimage package.
So the following should work provided you have numpy + scipy installed
import numpy as np
import scipy.ndimage as ndi
import Image
image = Image.load()
# convert to numpy array (no data copy done since both use buffer protocol)
image = np.asarray(image)
# generate a black and white image marking red pixels as 1
bw = is_red(image)
# labeling : each region is associated with an int
labels, n = ndi.label(bw)
# provide bounding box for each region in the form of tuples of slices
objects = ndi.find_objects(labels)
Related
Consider the following image:
and the following bounding contour( which is a smooth version of the output of a text-detection neural network of the above image ), so this contour is a given.
I need to warp both images so that I end up with a straight enough textline, so that it can be fed to a text recognition neural network:
using Piecewise Affine Transformation, or some other method. with an implementation if possible or key points of implementation in python.
I know how to find the medial axis, order its points, simplify it (e.g using Douglas-Peucker algorithm), and find the corresponding points on a straight line.
EDIT: the question can be rephrased -naively- as the following :
have you tried the "puppet warp" feature in Adobe Photoshop? you specify "joint" points on an image , and you move these points to the desired place to perform the image warping, we can calculate the source points using a simplified medial axis (e.g 20 points instead of 200 points), and calculate the corresponding target points on a straight line, how to perform Piecewise Affine Transformation using these two sets of points( source and target)?
EDIT: modified the images, my bad
Papers
Here's a paper that does the needed result:
A Novel Technique for Unwarping Curved Handwritten Texts Using Mathematical Morphology and Piecewise Affine Transformation
another paper: A novel method for straightening curved text-lines in stylistic documents
Similar questions:
Straighten B-Spline
Challenge : Curved text extraction using python
How to convert curves in images to lines in Python?
Deforming an image so that curved lines become straight lines
Straightening a curved contour
Full code also available in this notebook , runtime -> run all to reproduce the result.
import cv2
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
from scipy import interpolate
from scipy.spatial import distance
from shapely.geometry import LineString, GeometryCollection, MultiPoint
from skimage.morphology import skeletonize
from sklearn.decomposition import PCA
from warp import PiecewiseAffineTransform # https://raw.githubusercontent.com/TimSC/image-piecewise-affine/master/warp.py
# Helper functions
def extendline(line, length):
a = line[0]
b = line[1]
lenab = distance.euclidean(a, b)
cx = b[0] + ((b[0] - a[0]) / lenab * length)
cy = b[1] + ((b[1] - a[1]) / lenab * length)
return [cx, cy]
def XYclean(x, y):
xy = np.concatenate((x.reshape(-1, 1), y.reshape(-1, 1)), axis=1)
# make PCA object
pca = PCA(2)
# fit on data
pca.fit(xy)
# transform into pca space
xypca = pca.transform(xy)
newx = xypca[:, 0]
newy = xypca[:, 1]
# sort
indexSort = np.argsort(x)
newx = newx[indexSort]
newy = newy[indexSort]
# add some more points (optional)
f = interpolate.interp1d(newx, newy, kind='linear')
newX = np.linspace(np.min(newx), np.max(newx), 100)
newY = f(newX)
# #smooth with a filter (optional)
# window = 43
# newY = savgol_filter(newY, window, 2)
# return back to old coordinates
xyclean = pca.inverse_transform(np.concatenate((newX.reshape(-1, 1), newY.reshape(-1, 1)), axis=1))
xc = xyclean[:, 0]
yc = xyclean[:, 1]
return np.hstack((xc.reshape(-1, 1), yc.reshape(-1, 1))).astype(int)
def contour2skeleton(cnt):
x, y, w, h = cv2.boundingRect(cnt)
cnt_trans = cnt - [x, y]
bim = np.zeros((h, w))
bim = cv2.drawContours(bim, [cnt_trans], -1, color=255, thickness=cv2.FILLED) // 255
sk = skeletonize(bim > 0)
#####
skeleton_yx = np.argwhere(sk > 0)
skeleton_xy = np.flip(skeleton_yx, axis=None)
xx, yy = skeleton_xy[:, 0], skeleton_xy[:, 1]
skeleton_xy = XYclean(xx, yy)
skeleton_xy = skeleton_xy + [x, y]
return skeleton_xy
mm = cv2.imread('cont.png', cv2.IMREAD_GRAYSCALE)
plt.imshow(mm)
cnts, _ = cv2.findContours(mm.astype('uint8'), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
cont = cnts[0].reshape(-1, 2)
# find skeleton
sk = contour2skeleton(cont)
mm = np.zeros_like(mm)
cv2.polylines(mm, [sk], False, 255, 2)
plt.imshow(mm)
# simplify the skeleton
ln = LineString(sk).simplify(2)
sk_simp = np.int0(ln.coords)
mm = np.zeros_like(mm)
for pt in sk_simp:
cv2.circle(mm, pt, 5, 255, -1)
plt.imshow(mm)
# extend both ends of the skeleton
print(len(sk_simp))
a, b = sk_simp[1], sk_simp[0]
c1 = np.int0(extendline([a, b], 50))
sk_simp = np.vstack([c1, sk_simp])
a, b = sk_simp[-2], sk_simp[-1]
c2 = np.int0(extendline([a, b], 50))
sk_simp = np.vstack([sk_simp, c2])
print(len(sk_simp))
cv2.circle(mm, c1, 10, 255, -1)
cv2.circle(mm, c2, 10, 255, -1)
plt.imshow(mm)
########
# find the target points
########
pts1 = sk_simp.copy()
dists = [distance.euclidean(p1, p2) for p1, p2 in zip(pts1[:-1], pts1[1:])]
zip1 = list(zip(pts1[:-1], dists))
# find the first 2 target points
a = pts1[0]
b = a - (dists[0], 0)
pts2 = [a, b, ]
for z in zip1[1:]:
lastpt = pts2[-1]
pt, dst = z
ln = [a, lastpt]
c = extendline(ln, dst)
pts2.append(c)
pts2 = np.int0(pts2)
ln1 = LineString(pts1)
ln2 = LineString(pts2)
GeometryCollection([ln1.buffer(5), ln2.buffer(5),
MultiPoint(pts2), MultiPoint(pts1)])
########
# create translated copies of source and target points
# 50 is arbitary
pts1 = np.vstack([pts1 + [0, 50], pts1 + [0, -50]])
pts2 = np.vstack([pts2 + [0, 50], pts2 + [0, -50]])
MultiPoint(pts1)
########
# performing the warping
im = Image.open('orig.png')
dstIm = Image.new(im.mode, im.size, color=(255, 255, 255))
# Perform transform
PiecewiseAffineTransform(im, pts1, dstIm, pts2)
plt.figure(figsize=(10, 10))
plt.imshow(dstIm)
1- find medial axis , e.g using skimage.morphology.skeletonize and simplify it ,e.g using shapely object.simplify , I used a tolerance of 2 , the medial axis points are in white:
2- find the corresponding points on a straight line, using the distance between each point and the next:
3 - also added extra points on the ends, colored blue, so that the points fit the entire contour length
4- create 2 copies of the source and target points, one copy translated up and the other translated down (I choose an offset of 50 here), so the source points are now like this, please note that simple upward/downward displacement may not be the best approach for all contours, e.g if the contour is curving with degrees > 45:
5- using the code here , perform PiecewiseAffineTransform using the source and target points, here's the result, it's straight enough:
If the goal is to just unshift each column, then:
import numpy as np
from PIL import Image
source_img = Image.open("73614379-input-v2.png")
contour_img = Image.open("73614379-map-v3.png").convert("L")
assert source_img.size == contour_img.size
contour_arr = np.array(contour_img) != 0 # convert to boolean array
col_offsets = np.argmax(
contour_arr, axis=0
) # find the first non-zero row for each column
assert len(col_offsets) == source_img.size[0] # sanity check
min_nonzero_col_offset = np.min(
col_offsets[col_offsets > 0]
) # find the minimum non-zero row
target_img = Image.new("RGB", source_img.size, (255, 255, 255))
for x, col_offset in enumerate(col_offsets):
offset = col_offset - min_nonzero_col_offset if col_offset > 0 else 0
target_img.paste(
source_img.crop((x, offset, x + 1, source_img.size[1])), (x, 0)
)
target_img.save("unshifted3.png")
with the new input and the new contour from OP outputs this image:
I am trying to project a point cloud into a 2d image as if it were a satellite image.
I have six files I want to project and the point clouds are quite big.
For the biggest one, I have len(las.X) = 37_763_608, max(las.X) - min(las.X) = 122_124, and max(las.X) - min(las.X) = 273_683, so sometimes when calculate the size I have an overflow error.
My first try was this, but this was quite slow and took about 28 minutes to run.
Here I added the loops with k_x and k_y because the image I got was mostly black, and I wanted to have colour everywhere. I tried looping around each point/pixel to make them 5 times bigger, but this is the slow part.
see pictures
Colour version with the k padding
Black and white version without the padding
Ideally, I would like to have the colour from one point/pixel shift to the colour of their neighbours, so that there is a gradient between them, and no have any black leftover from me initialize the image as np.zeros
import laspy
import numpy as np
from PIL import Image
import cv2
from tqdm import tqdm
las = laspy.read("area1.las")
def las_to_rgb(las):
x, y = las.X, las.Y
delta_x = max(x) - min(x)
delta_y = max(y) - min(y)
re_x = x - min(x)
re_y = y - min(y)
# las.red, green and blue are stored as 16bit
r, g, b = (las.red/256).astype(np.uint8), (las.green/256).astype(np.uint8), (las.blue/256).astype(np.uint8)
image = np.zeros((delta_y+1, delta_x+1, 3))
for i, val in enumerate(zip(tqdm(re_x), re_y)):
for k_x in range(-5, 6):
for k_y in range(-5, 6):
if val[0] + k_x < 0 or val[0] + k_x >= delta_x + 1:
k_x = 0
if val[1] + k_y < 0 or val[1] + k_y >= delta_y + 1:
k_y = 0
image[val[1]+k_y, val[0]+k_x] = [b[i], g[i], r[i]]
cv2.imwrite("test.png", image)
cv2.waitKey(0)
I found how to do it faster in numpy, but it can only do one colour at a time, so I decided to loop for multiple color but I think I am doing something wrong when I change the type to np.unit8 as python takes up to 50GB of RAM.
With numpy:
One colour:
def nu_pro(las):
x, y = las.X, las.Y
delta_x = max(x) - min(x)
delta_y = max(y) - min(y)
xs = x - min(x)
ys = y - min(y)
img_size = (delta_y+1, delta_x+1) # +1 for ravel_multi_index
bgr = np.array([(las.blue/256).astype(np.uint8), (las.green/256).astype(np.uint8), (las.red/256).astype(np.uint8)])
coords = np.stack((ys, xs))
abs_coords = np.ravel_multi_index(coords, img_size)
image = np.bincount(abs_coords, weights=color, minlength=img_size[1]*img_size[0])
image = image.reshape(img_size))
cv2.imwrite("test.png", image)
cv2.waitKey(0)
For rgb
def nu_pro_rgb(las):
x, y = las.X, las.Y
delta_x = max(x) - min(x)
delta_y = max(y) - min(y)
xs = x - min(x)
ys = y - min(y)
img_size = (delta_y+1, delta_x+1) # +1 for ravel_multi_index
rgb = np.array([(las.red/256).astype(np.uint8), (las.green/256).astype(np.uint8), (las.blue/256).astype(np.uint8)])
image = []
coords = np.stack((ys, xs))
abs_coords = np.ravel_multi_index(coords, img_size)
for i, color in enumerate(tqdm(rgb)):
img = np.bincount(abs_coords, weights=color, minlength=img_size[1]*img_size[0])
image.append(img.reshape(img_size))
image = np.uint8(np.array(image))
# I am probably messing up this transpose but I'll figure it out eventually
im = Image.fromarray(image.T, "RGB")
im.save("pil.png")
Any indication would be welcome :)
EDIT for clarification about the colours.
When there is overlapping, it should be the point with the highest z coordinates that should be displayed.
For the colouring, in the picture below, the points between A and B should be a colour gradient from A to B.
If it is like the yellow point, then an average of the neighbouring colour (without the black if present)
I hope I am making some sense.
To interpolate, there are lots of libraries.
This uses cubic interpolation, but it only works inside the convex hull, so the points outside the convex hull are taken from the nearest neighbor.
If you are interpolating GIS data, you may look on Krigging interpolation, which should interpolate outside the convex hull.
This code does not check that a point with lower Z is under one with higher Z. You have to delete those points to avoid having them interpolated.
from scipy.interpolate import griddata
import numpy as np
import matplotlib.pyplot as plt
import cv2
# create data
height, width = 256, 256
# generate a random sample of 1000 (x,y) coordinates and colors
x, y, z = np.random.randint(0, 256, size=(3, 1000))
color = np.random.randint(0, 256, size=(1000, 3))
# sort x,y,z by z in ascending order so the highest z is plotted over the lowest z
zSort = z.argsort()
x, y, z, color = x[zSort], y[zSort], z[zSort], color[zSort]
# interpolation
# generate a grid where the interpolation will be calculated
X, Y = np.meshgrid(np.arange(width), np.arange(height))
R = griddata(np.vstack((x, y)).T, color[:, 0], (X, Y), method='cubic')
Rlinear= griddata(np.vstack((x, y)).T, color[:, 0], (X, Y), method='nearest')
G = griddata(np.vstack((x, y)).T, color[:, 1], (X, Y), method='cubic')
Glinear= griddata(np.vstack((x, y)).T, color[:, 1], (X, Y), method='nearest')
B = griddata(np.vstack((x, y)).T, color[:, 2], (X, Y), method='cubic')
Blinear= griddata(np.vstack((x, y)).T, color[:, 2], (X, Y), method='nearest')
#Fill empty values with nearest neighbor
R[np.isnan(R)] = Rlinear[np.isnan(R)]
G[np.isnan(G)] = Glinear[np.isnan(G)]
B[np.isnan(B)] = Blinear[np.isnan(B)]
R = R/np.max(R)
G = G/np.max(G)
B = B/np.max(B)
interpolated = cv2.merge((R, G, B))
plt.imshow(interpolated)
plt.scatter(x, y, c=color/255, marker="s",s=1)
plt.show()
I do not have access to the format you use, so I show you how to rapidly plot points at x,y coordinates, and enlarge them with a kernel mask, and a color for each point
import numpy as np
import cv2
height, width = 256, 256
# generate a random sample of 1000 (x,y) coordinates and colors
x, y, = np.random.randint(0, 256, size=(2, 1000))
color = np.random.randint(0, 256, size=(1000, 3))
# generate a blank image
# int16 to manage overflow colors when convolving
pointsPlotted = np.zeros((height, width, 3), np.uint16)
# plot x,y,color into blankImage
pointsPlotted[y, x] = color
cv2.imshow("points", pointsPlotted.astype(np.uint8))
# convlove the image with a kernel of ones, size k
k = 5
kernel = np.ones((k, k), np.int16)
largerSquares = cv2.filter2D(src=pointsPlotted, ddepth=-1, kernel=kernel)
# limit max color to 255
largerSquares[largerSquares > 255] = 255
# Convert to uint8
largerSquares = largerSquares.astype(np.uint8)
cv2.imshow("Larger Squares", largerSquares)
Is this what you want?
On the overlaps, adds the colors (capped to 255)
I have made a function that detects a object on screen using opencv matchTemplate and returns its center locations as (x, y).
I want to compare the results of running the same function on 2 different objects to detect the location of one object in reference of the other. In my case the two objects are a player avatar and some bushes I want to know is the player currently standing near a bush or not.There is only one user avatar on screen therefore it only returns single (x, y) value for it but there are multiple bushes therefore multiple (x, y) values. I want a way to compare those two matrices. Sorry about the code format.
def center(base_img, needle_img, th, color):
result = cv.matchTemplate(BASE_IMG, needle_img, cv.TM_CCOEFF_NORMED)
threshold = th
yloc, xloc = np.where(result >= threshold)
w = needle_img.shape[1]
h = needle_img.shape[0]
rectangles = []
for (x, y) in zip(xloc, yloc):
rectangles.append([int(x), int(y), int(w), int(h)])
rectangles.append([int(x), int(y), int(w), int(h)])
rectangles, weights = cv.groupRectangles(rectangles, 1, 0.4)
points = []
for (x, y, w, h) in rectangles:
certre_x = x + int(w / 2)
certre_y = y + int(h / 2)
cv.drawMarker(base_img, (certre_x, certre_y), color, cv.MARKER_DIAMOND)
points.append((certre_x, certre_y))
# cv.imshow("result", base_img)
# print(points)
# return points
You can loop through the center of the bushes and get the distance to each bush sqrt(( x2-x1)**2 + ( y2-y1)**2) or you could use nearest neighbor and numpy.
Post a code snippet if it worked.
I have a stationary camera which takes photos rapidly of the continuosly moving product but in a fixed position just of the same angle (translation perspective). I need to stitch all images into a panoramic picture. I've tried by using the class Stitcher. It worked, but it took a long time to compute.
I also tried to use another method by using the SIFT detector, FNNbasedMatcher, finding Homography and then warping the images. This method works fine if I only use two images. For multiple images it still doesn't stitch them properly. Does anyone know the best and fastest image stitching algorithm for this case?
This is my code which uses the Stitcher class.
import time
import cv2
import os
import numpy as np
import sys
def main():
# read input images
imgs = []
path = 'pics_rotated/'
i = 0
for (root, dirs, files) in os.walk(path):
images = [f for f in files]
print(images)
for i in range(0,len(images)):
curImg = cv2.imread(path + images[i])
imgs.append(curImg)
stitcher = cv2.Stitcher.create(mode= 0)
status ,result = stitcher.stitch(imgs)
if status != cv2.Stitcher_OK:
print("Can't stitch images, error code = %d" % status)
sys.exit(-1)
cv2.imwrite("imagesout/output.jpg", result)
cv2.waitKey(0)
if __name__ == '__main__':
start = time.time()
main()
end = time.time()
print("Time --->>>>>", end - start)
cv2.destroyAllWindows()enter code here
Briefing
Although OpenCV Stitcher class provides lots of methods and options to perform stitching, I find it hard to use it because of the complexity.
Therefore, I will try to provide the minimum and fastest way to perform stitching.
In case you are wondering more sophisticated approachs such as exposure compensation, I highly recommend looking at the detailed sample code.
As a side note, I will be grateful if someone can convert the following functions to use Stitcher class.
Introduction
In order to combine multiple images into the same perspective, the following operations are needed:
Detect and match features.
Compute homography (perspective transform between frames).
Warp one image onto the other perspective.
Combine the base and warped images while keeping track of the shift in origin.
Given the combination pattern, stitch multiple images.
Feature detection and matching
What are features?
They are distinguishable parts, like corners of a square, that are preserved across images.
There are different algorithms proposed for obtaining these characteristic points, like Harris, ORB, SIFT, SURF, etc.
See cv::Feature2d for the full list.
I will use SIFT because it is accurate and sufficiently fast.
A feature consists of a KeyPoint, which is the location in the image, and a descriptor, which is a set of numbers (e.g. a 128-D vector) that represents the properties of the feature.
After finding distinct points in images, we need to match the corresponding point pairs.
See cv::DescriptionMatcher.
I will use Flann-based descriptor matcher.
First, we initialize the descriptor and matcher classes.
descriptor = cv.SIFT.create()
matcher = cv.DescriptorMatcher.create(cv.DescriptorMatcher.FLANNBASED)
Then, we find the features in each image.
(kps, desc) = descriptor.detectAndCompute(image, mask=None)
Now we find the corresponding point pairs.
if (desc1 is not None and desc2 is not None and len(desc1) >=2 and len(desc2) >= 2):
rawMatch = matcher->knnMatch(desc2, desc1, k=2)
matches = []
# ensure the distance is within a certain ratio of each other (i.e. Lowe's ratio test)
ratio = 0.75
for m in rawMatch:
if len(m) == 2 and m[0].distance < m[1].distance * ratio:
matches.append((m[0].trainIdx, m[0].queryIdx))
Homography computation
Homography is the perspective transformation from one view to another.
The parallel lines in one view may not be parallel in another, like a road to sunset.
We need to have at least 4 corresponding point pairs.
The more means redundant data that have to be decomposed or eliminated.
Homography matrix that transforms the point in the initial view to its warped position.
It is a 3x3 matrix that is computed by Direct Linear Transform algorithm.
There are 8 DoF and the last element in the matrix is 1.
[pt2] = H * [pt1]
Now that we have corresponding point matches, we compute the homography.
The method we use to handle redundant data is RANSAC, which randomly selects 4 point pairs and uses the best fitting result.
See cv::findHomography for more options.
if len(matches) > 4:
(H, status) = cv.findHomography(pts1, pts2, cv.RANSAC)
Warping to perspective
By computing homography, we know which point in the source image corresponds to which point in the destination image.
In order not to lose information from the source image, we need to pad the destination image by the amount that the transformed point falls to negative regions.
At the same time, we need to keep track of the shift amount of the origin for stitching multiple images.
Auxilary functions
# find the ROI of a transformation result
def warpRect(rect, H):
x, y, w, h = rect
corners = [[x, y], [x, y + h - 1], [x + w - 1, y], [x + w - 1, y + h - 1]]
extremum = cv.transform(corners, H)
minx, miny = np.min(extremum[:,0]), np.min(extremum[:,1])
maxx, maxy = np.max(extremum[:,0]), np.max(extremum[:,1])
xo = int(np.floor(minx))
yo = int(np.floor(miny))
wo = int(np.ceil(maxx - minx))
ho = int(np.ceil(maxy - miny))
outrect = (xo, yo, wo, ho)
return outrect
# homography matrix is translated to fit in the screen
def coverH(rect, H):
# obtain bounding box of the result
x, y, _, _ = warpRect(rect, H)
# shift amount to the first quadrant
xpos = int(-x if x < 0 else 0)
ypos = int(-y if y < 0 else 0)
# correct the homography matrix so that no point is thrown out
T = np.array([[1, 0, xpos], [0, 1, ypos], [0, 0, 1]])
H_corr = T.dot(H)
return (H_corr, (xpos, ypos))
# pad image to cover ROI, return the shift amount of origin
def addBorder(img, rect):
x, y, w, h = rect
tl = (x, y)
br = (x + w, y + h)
top = int(-tl[1] if tl[1] < 0 else 0)
bottom = int(br[1] - img.shape[0] if br[1] > img.shape[0] else 0)
left = int(-tl[0] if tl[0] < 0 else 0)
right = int(br[0] - img.shape[1] if br[0] > img.shape[1] else 0)
img = cv.copyMakeBorder(img, top, bottom, left, right, cv.BORDER_CONSTANT, value=[0, 0, 0])
orig = (left, top)
return img, orig
def size2rect(size):
return (0, 0, size[1], size[0])
Warping function
def warpImage(img, H):
# tweak the homography matrix to move the result to the first quadrant
H_cover, pos = coverH(size2rect(img.shape), H)
# find the bounding box of the output
x, y, w, h = warpRect(size2rect(img.shape), H_cover)
width, height = x + w, y + h
# warp the image using the corrected homography matrix
warped = cv.warpPerspective(img, H_corr, (width, height))
# make the external boundary solid black, useful for masking
warped = np.ascontiguousarray(warped, dtype=np.uint8)
gray = cv.cvtColor(warped, cv.COLOR_RGB2GRAY)
_, bw = cv.threshold(gray, 1, 255, cv.THRESH_BINARY)
# https://stackoverflow.com/a/55806272/12447766
major = cv.__version__.split('.')[0]
if major == '3':
_, cnts, _ = cv.findContours(bw, cv.RETR_EXTERNAL, cv.CHAIN_APPROX_NONE)
else:
cnts, _ = cv.findContours(bw, cv.RETR_EXTERNAL, cv.CHAIN_APPROX_NONE)
warped = cv.drawContours(warped, cnts, 0, [0, 0, 0], lineType=cv.LINE_4)
return (warped, pos)
Combining warped and destination images
This is the step where image enhancement such as exposure compensation becomes involved.
In order to keep things simple, we will use mean value blending.
The easiest solution would be overriding the existing data in the destination image but averaging operation is not a burden for us.
# only the non-zero pixels are weighted to the average
def mean_blend(img1, img2):
assert(img1.shape == img2.shape)
locs1 = np.where(cv.cvtColor(img1, cv.COLOR_RGB2GRAY) != 0)
blended1 = np.copy(img2)
blended1[locs1[0], locs1[1]] = img1[locs1[0], locs1[1]]
locs2 = np.where(cv.cvtColor(img2, cv.COLOR_RGB2GRAY) != 0)
blended2 = np.copy(img1)
blended2[locs2[0], locs2[1]] = img2[locs2[0], locs2[1]]
blended = cv.addWeighted(blended1, 0.5, blended2, 0.5, 0)
return blended
def warpPano(prevPano, img, H, orig):
# correct homography matrix
T = np.array([[1, 0, -orig[0]], [0, 1, -orig[1]], [0, 0, 1]])
H_corr = H.dot(T)
# warp the image and obtain shift amount of origin
result, pos = warpImage(prevPano, H_corr)
xpos, ypos = pos
# zero pad the result
rect = (xpos, ypos, img.shape[1], img.shape[0])
result, _ = addBorder(result, rect)
# mean value blending
idx = np.s_[ypos : ypos + img.shape[0], xpos : xpos + img.shape[1]]
result[idx] = mean_blend(result[idx], img)
# crop extra paddings
x, y, w, h = cv.boundingRect(cv.cvtColor(result, cv.COLOR_RGB2GRAY))
result = result[y : y + h, x : x + w]
# return the resulting image with shift amount
return (result, (xpos - x, ypos - y))
Stitching multiple images given combination pattern
# base image is the last image in each iteration
def blend_multiple_images(images, homographies):
N = len(images)
assert(N >= 2)
assert(len(homographies) == N - 1)
pano = np.copy(images[0])
pos = (0, 0)
for i in range(N - 1):
img = images[i + 1]
# get homography matrix
H = homographies[i]
# warp pano onto image
pano, pos = warpPano(pano, img, H, pos)
return (pano, pos)
The method above warps the previously combined image, called pano, onto the next image subsequently.
A pattern, however, may have conjunction points for the best stitching view.
For example
1 2 3
4 5 6
The best pattern to combine these images is
1 -> 2 <- 3
|
V
4 -> 5 <- 6
Therefore, we need one last function to combine 1 & 2 with 2 & 3, or 1235 with 456 at node 5.
from operator import sub
# no warping here, useful for combining two different stitched images
# the image at given origin coordinates must be the same
def patchPano(img1, img2, orig1=(0,0), orig2=(0,0)):
# bottom right points
br1 = (img1.shape[1] - 1, img1.shape[0] - 1)
br2 = (img2.shape[1] - 1, img2.shape[0] - 1)
# distance from orig to br
diag2 = tuple(map(sub, br2, orig2))
# possible pano corner coordinates based on img1
extremum = np.array([(0, 0), br1,
tuple(map(sum, zip(orig1, diag2))),
tuple(map(sub, orig1, orig2))])
bb = cv.boundingRect(extremum)
# patch img1 to img2
pano, shift = addBorder(img1, bb)
orig = tuple(map(sum, zip(orig1, shift)))
idx = np.s_[orig[1] : orig[1] + img2.shape[0] - orig2[1],
orig[0] : orig[0] + img2.shape[1] - orig2[0]]
subImg = img2[orig2[1] : img2.shape[0], orig2[0] : img2.shape[1]]
pano[idx] = mean_blend(pano[idx], subImg)
return (pano, orig)
For a quick demo, you can run the Python code in GitHub.
If you want to use the above methods in C++, you can have a look at Stitch library.
Any PR or edit to this post is welcome.
As an alternative to the last step that #Burak gave, this is the way I used as I had the number of images for each of the rows (chunks), the multiStitching being nothing but a function to stitch images horizontally:
def stitchingImagesHV(img_list, size):
"""
As our multi stitching algorithm works on the horizontal line, we will hack
it to use also the vertical stitching by rotating each row "stitch_img" and
apply the same technique, and after that, the final result is rotated back to the
original direction.
"""
# Generate row chunks of "size" length from image list
chunks = [img_list[i:i + size] for i in range(0, len(img_list), size)]
list_rotated_images = []
for i in range(len(chunks)):
stitch_img = multiStitching(chunks[i])
stitch_img_rotated = cv2.rotate(stitch_img, cv2.ROTATE_90_COUNTERCLOCKWISE)
list_rotated_images.append(stitch_img_rotated.astype('uint8'))
stitch_img2 = multiStitching(list_rotated_images)
return cv2.rotate(stitch_img2, cv2.ROTATE_90_CLOCKWISE)
Uninformed search to find within a black and white image a path that connects the upper left corner of the image with the lower right corner, passing only black pixels with depth search (DFS), my program already opens the image with opencv and if I put a coordinate, it tells me if the pixel is black or not but idk how to do the rest.
Also I have to create an image with the found path, making an image that paints a pixel for each item in the successor list.
Lets read image
import cv2
im = cv2.imread("image.png", 0)
Now, we can build a graph using this image and using path finding algorithms, we can get path between two nodes.
from itertools import product, chain
import math
import mtplotlib.pyplot as plt
import numpy as np
import networkx as nx
h, w = im.shape. # Get height and width of image
# Add only those nodes which are black to graph
nodes = [(i, j) for (i, j) in product(range(h), range(w)) if im[i, j] == 0]
g = nx.Graph(nodes)
# For each node there can be 8 neighbours, if you consider diagonal as well.
def get_neighbors(node):
box_coords = product([-1, 0, 1], [-1, 0, 1])
nns = []
for coord in box_coords:
if coord[0] != coord[1]:
nn = (node[0] - coord[0], node[1] - coord[1])
nns.append(nn)
return nns
# A point will be a neighbour if it is black as well and is in image bounds
neighbors = list(chain.from_iterable([[(node, ng_node, 1) for ng_node in get_neighbors(node) if (im[node] == 0) and (0 < ng_node[0] < h) and (0 < ng_node[1] , w)] for node in nodes]))
g.add_weighted_edges_from(neighbors)
# In image loaded above (0, 0) is top left point. To keep things little more generic. I select point closest to (0, 0) as start and furthest as end.
min_pt = min(nodes, key=lambda x: math.hypot(x[0], x[1]))
max_pt = max(nodes, key=lambda x: math.hypot(x[0], x[1]))
# Now we can just use networkx to find path between two points
path = nx.shortest_path(g, source=min_pt, target=max_pt)
# Get new image with only shortest path
im2 = 255*np.ones_like(im)
for pt in path:
im2[pt] = 0
cv2.imwrite('image_path.png', im2)
plt.figure(figsize=(10, 10))
plt.imshow(im2, cmap='gray')