I am stitching multiple images. While stitching two images it is showing dashed black line in between stitching like below.
Has anyone knows about this how I can remove or get rid of this black dashed line ?
main part of stitching code which stitches two images and calls next image with result of previous stitched images untill all images gets over:
detector = cv2.xfeatures2d.SURF_create(400)
gray1 = cv2.cvtColor(image1,cv2.COLOR_BGR2GRAY)
ret1, mask1 = cv2.threshold(gray1,1,255,cv2.THRESH_BINARY)
kp1, descriptors1 = detector.detectAndCompute(gray1,mask1)
gray2 = cv2.cvtColor(image2,cv2.COLOR_BGR2GRAY)
ret2, mask2 = cv2.threshold(gray2,1,255,cv2.THRESH_BINARY)
kp2, descriptors2 = detector.detectAndCompute(gray2,mask2)
keypoints1Im = cv2.drawKeypoints(image1, kp1, outImage = cv2.DRAW_MATCHES_FLAGS_DEFAULT, color=(0,0,255))
util.display("KEYPOINTS",keypoints1Im)
keypoints2Im = cv2.drawKeypoints(image2, kp2, outImage = cv2.DRAW_MATCHES_FLAGS_DEFAULT, color=(0,0,255))
util.display("KEYPOINTS",keypoints2Im)
matcher = cv2.BFMatcher()
matches = matcher.knnMatch(descriptors2,descriptors1, k=2)
good = []
for m, n in matches:
if m.distance < 0.55 * n.distance:
good.append(m)
print (str(len(good)) + " Matches were Found")
if len(good) <= 10:
return image1
matches = copy.copy(good)
matchDrawing = util.drawMatches(gray2,kp2,gray1,kp1,matches)
util.display("matches",matchDrawing)
src_pts = np.float32([ kp2[m.queryIdx].pt for m in matches ]).reshape(-1,1,2)
dst_pts = np.float32([ kp1[m.trainIdx].pt for m in matches ]).reshape(-1,1,2)
A = cv2.estimateRigidTransform(src_pts,dst_pts,fullAffine=False)
if A is None:
HomogResult = cv2.findHomography(src_pts,dst_pts,method=cv2.RANSAC)
H = HomogResult[0]
height1,width1 = image1.shape[:2]
height2,width2 = image2.shape[:2]
corners1 = np.float32(([0,0],[0,height1],[width1,height1],[width1,0]))
corners2 = np.float32(([0,0],[0,height2],[width2,height2],[width2,0]))
warpedCorners2 = np.zeros((4,2))
for i in range(0,4):
cornerX = corners2[i,0]
cornerY = corners2[i,1]
if A is not None: #check if we're working with affine transform or perspective transform
warpedCorners2[i,0] = A[0,0]*cornerX + A[0,1]*cornerY + A[0,2]
warpedCorners2[i,1] = A[1,0]*cornerX + A[1,1]*cornerY + A[1,2]
else:
warpedCorners2[i,0] = (H[0,0]*cornerX + H[0,1]*cornerY + H[0,2])/(H[2,0]*cornerX + H[2,1]*cornerY + H[2,2])
warpedCorners2[i,1] = (H[1,0]*cornerX + H[1,1]*cornerY + H[1,2])/(H[2,0]*cornerX + H[2,1]*cornerY + H[2,2])
allCorners = np.concatenate((corners1, warpedCorners2), axis=0)
[xMin, yMin] = np.int32(allCorners.min(axis=0).ravel() - 0.5)
[xMax, yMax] = np.int32(allCorners.max(axis=0).ravel() + 0.5)
translation = np.float32(([1,0,-1*xMin],[0,1,-1*yMin],[0,0,1]))
warpedResImg = cv2.warpPerspective(image1, translation, (xMax-xMin, yMax-yMin))
if A is None:
fullTransformation = np.dot(translation,H) #again, images must be translated to be 100% visible in new canvas
warpedImage2 = cv2.warpPerspective(image2, fullTransformation, (xMax-xMin, yMax-yMin))
else:
warpedImageTemp = cv2.warpPerspective(image2, translation, (xMax-xMin, yMax-yMin))
warpedImage2 = cv2.warpAffine(warpedImageTemp, A, (xMax-xMin, yMax-yMin))
result = np.where(warpedImage2 != 0, warpedImage2, warpedResImg)
Please help me out. Thanks.
Edit:
Input image1(resized)
Input image2(resized)
Result(resized)
Update:
Result after #fmw42 anwser:
The problem arises because when you do the warping, the border pixels of the image get resampled/interpolated with black background pixels. This leaves a non-zero border around your warped image of varying values that show as your dashed dark line when merged with the other image. This happens because your merge test is binary, tested with != 0.
So one simple thing you can do is mask the warped image in Python/OpenCV to get its bounds from the black background outside the image and then erode the mask. Then use the mask to erode the image boundary. This can be achieve by the following changes to your last lines of code presented as follows:
if A is None:
fullTransformation = np.dot(translation,H) #again, images must be translated to be 100% visible in new canvas
warpedImage2 = cv2.warpPerspective(image2, fullTransformation, (xMax-xMin, yMax-yMin))
else:
warpedImageTemp = cv2.warpPerspective(image2, translation, (xMax-xMin, yMax-yMin))
warpedImage2 = cv2.warpAffine(warpedImageTemp, A, (xMax-xMin, yMax-yMin))
mask2 = cv2.threshold(warpedImage2, 0, 255, cv2.THRESH_BINARY)[1]
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3,3))
mask2 = cv2.morphologyEx(mask2, cv2.MORPH_ERODE, kernel)
warpedImage2[mask2==0] = 0
result = np.where(warpedImage2 != 0, warpedImage2, warpedResImg)
I simply added the following code lines to your code:
mask2 = cv2.threshold(warpedImage2, 0, 255, cv2.THRESH_BINARY)[1]
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3,3))
mask2 = cv2.morphologyEx(mask2, cv2.MORPH_ERODE, kernel)
warpedImage2[mask2==0] = 0
You can increase the kernel size if desired to erode more.
Here is the before and after. Note that I did not have SURF and tried to use ORB, which did not align well. So your roads do not align. But the mismatch due to misalignment emphasizes the issue as it shows the dashed jagged black border line. The fact that ORB did not work or I do not have proper code from above to make it align is not important. The masking does what I think you want and is extendable to the processing of all your images.
The other thing that can be done in combination with the above is to feather the mask and then ramp blend the two images using the mask. This is done by blurring the mask (a bit more) and then stretching the values over the inside half of the blurred border and making the ramp only on the outside half of the blurred border. Then blend the two images with the ramped mask and its inverse as follows for the same code as above.
if A is None:
fullTransformation = np.dot(translation,H) #again, images must be translated to be 100% visible in new canvas
warpedImage2 = cv2.warpPerspective(image2, fullTransformation, (xMax-xMin, yMax-yMin))
else:
warpedImageTemp = cv2.warpPerspective(image2, translation, (xMax-xMin, yMax-yMin))
warpedImage2 = cv2.warpAffine(warpedImageTemp, A, (xMax-xMin, yMax-yMin))
mask2 = cv2.threshold(warpedImage2, 0, 255, cv2.THRESH_BINARY)[1]
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3,3))
mask2 = cv2.morphologyEx(mask2, cv2.MORPH_ERODE, kernel)
warpedImage2[mask2==0] = 0
mask2 = cv2.blur(mask2, (5,5))
mask2 = skimage.exposure.rescale_intensity(mask2, in_range=(127.5,255), out_range=(0,255)).astype(np.float64)
result = (warpedImage2 * mask2 + warpedResImg * (255 - mask2))/255
result = result.clip(0,255).astype(np.uint8)
cv2.imwrite("image1_image2_merged3.png", result)
The result when compared to the original composite is as follows:
ADDITION
I have corrected my ORB code to reverse the use of images and now it aligns. So here are all 3 techniques: the original, the one that only uses a binary mask and the one that uses a ramped mask for blending (all as described above).
ADDITION2
Here are the 3 requested images: original, binary masked, ramped mask blending.
Here is my ORB code for the last version above
I tried to change as little as possible from your code, except I had to use ORB and I had to swap the names image1 and image2 near the end.
import cv2
import matplotlib.pyplot as plt
import numpy as np
import itertools
from scipy.interpolate import UnivariateSpline
from skimage.exposure import rescale_intensity
image1 = cv2.imread("image1.jpg")
image2 = cv2.imread("image2.jpg")
gray1 = cv2.cvtColor(image1, cv2.COLOR_BGR2GRAY)
gray2 = cv2.cvtColor(image2, cv2.COLOR_BGR2GRAY)
# Detect ORB features and compute descriptors.
MAX_FEATURES = 500
GOOD_MATCH_PERCENT = 0.15
orb = cv2.ORB_create(MAX_FEATURES)
keypoints1, descriptors1 = orb.detectAndCompute(gray1, None)
keypoints2, descriptors2 = orb.detectAndCompute(gray2, None)
# Match features.
matcher = cv2.DescriptorMatcher_create(cv2.DESCRIPTOR_MATCHER_BRUTEFORCE_HAMMING)
matches = matcher.match(descriptors1, descriptors2, None)
# Sort matches by score
matches.sort(key=lambda x: x.distance, reverse=False)
# Remove not so good matches
numGoodMatches = int(len(matches) * GOOD_MATCH_PERCENT)
matches = matches[:numGoodMatches]
# Draw top matches
imMatches = cv2.drawMatches(image1, keypoints1, image2, keypoints2, matches, None)
cv2.imwrite("/Users/fred/desktop/image1_image2_matches.png", imMatches)
# Extract location of good matches
points1 = np.zeros((len(matches), 2), dtype=np.float32)
points2 = np.zeros((len(matches), 2), dtype=np.float32)
for i, match in enumerate(matches):
points1[i, :] = keypoints1[match.queryIdx].pt
points2[i, :] = keypoints2[match.trainIdx].pt
print(points1)
print("")
print(points2)
A = cv2.estimateRigidTransform(points1,points2,fullAffine=False)
#print(A)
if A is None:
HomogResult = cv2.findHomography(points1,points2,method=cv2.RANSAC)
H = HomogResult[0]
height1,width1 = image1.shape[:2]
height2,width2 = image2.shape[:2]
corners1 = np.float32(([0,0],[0,height1],[width1,height1],[width1,0]))
corners2 = np.float32(([0,0],[0,height2],[width2,height2],[width2,0]))
warpedCorners2 = np.zeros((4,2))
# project corners2 into domain of image1 from A affine or H homography
for i in range(0,4):
cornerX = corners2[i,0]
cornerY = corners2[i,1]
if A is not None: #check if we're working with affine transform or perspective transform
warpedCorners2[i,0] = A[0,0]*cornerX + A[0,1]*cornerY + A[0,2]
warpedCorners2[i,1] = A[1,0]*cornerX + A[1,1]*cornerY + A[1,2]
else:
warpedCorners2[i,0] = (H[0,0]*cornerX + H[0,1]*cornerY + H[0,2])/(H[2,0]*cornerX + H[2,1]*cornerY + H[2,2])
warpedCorners2[i,1] = (H[1,0]*cornerX + H[1,1]*cornerY + H[1,2])/(H[2,0]*cornerX + H[2,1]*cornerY + H[2,2])
allCorners = np.concatenate((corners1, warpedCorners2), axis=0)
[xMin, yMin] = np.int32(allCorners.min(axis=0).ravel() - 0.5)
[xMax, yMax] = np.int32(allCorners.max(axis=0).ravel() + 0.5)
translation = np.float32(([1,0,-1*xMin],[0,1,-1*yMin],[0,0,1]))
warpedResImg = cv2.warpPerspective(image2, translation, (xMax-xMin, yMax-yMin))
if A is None:
fullTransformation = np.dot(translation,H) #again, images must be translated to be 100% visible in new canvas
warpedImage2 = cv2.warpPerspective(image2, fullTransformation, (xMax-xMin, yMax-yMin))
else:
warpedImageTemp = cv2.warpPerspective(image1, translation, (xMax-xMin, yMax-yMin))
warpedImage2 = cv2.warpAffine(warpedImageTemp, A, (xMax-xMin, yMax-yMin))
mask2 = cv2.threshold(warpedImage2, 0, 255, cv2.THRESH_BINARY)[1]
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3,3))
mask2 = cv2.morphologyEx(mask2, cv2.MORPH_ERODE, kernel)
warpedImage2[mask2==0] = 0
mask2 = cv2.blur(mask2, (5,5))
mask2 = rescale_intensity(mask2, in_range=(127.5,255), out_range=(0,255)).astype(np.float64)
result = (warpedImage2 * mask2 + warpedResImg * (255 - mask2))/255
result = result.clip(0,255).astype(np.uint8)
cv2.imwrite("image1_image2_merged2.png", result)
You had the following. Note where the names, image1 and image2 are being used compared to my code above.
warpedResImg = cv2.warpPerspective(image1, translation, (xMax-xMin, yMax-yMin))
if A is None:
fullTransformation = np.dot(translation,H) #again, images must be translated to be 100% visible in new canvas
warpedImage2 = cv2.warpPerspective(image2, fullTransformation, (xMax-xMin, yMax-yMin))
else:
warpedImageTemp = cv2.warpPerspective(image2, translation, (xMax-xMin, yMax-yMin))
warpedImage2 = cv2.warpAffine(warpedImageTemp, A, (xMax-xMin, yMax-yMin))
Horizontal Gluing
I will focus on one of the cuts as a prove of concept. I agree with the comments that your code is a bit lengthy and hard to work with. So step one is to glue the pictures myself.
import cv2
import matplotlib.pyplot as plt
import numpy as np
import itertools
from scipy.interpolate import UnivariateSpline
upper_image = cv2.cvtColor(cv2.imread('yQv6W.jpg'), cv2.COLOR_BGR2RGB)/255
lower_image = cv2.cvtColor(cv2.imread('zoWJv.jpg'), cv2.COLOR_BGR2RGB)/255
result_image = np.zeros((466+139,700+22,3))
result_image[139:139+lower_image.shape[0],:lower_image.shape[1]] = lower_image
result_image[0:upper_image.shape[0], 22:22+upper_image.shape[1]] = upper_image
plt.imshow(result_image)
Ok no dashed black line but I admit not perfect either. So the next step is to align at least the street and the little way at the very right of the picture. For that I will need to shrink the picture to non integer size and turn that back into a grid. I will use a knn like method for that.
Edit: As requested in the comments I'll explain the shrinking a bit more detailed since it would have to be done by hand again for an other stitching. The magic happens in the line (I replaced n by it's value)
f = UnivariateSpline([0,290,510,685],[0,310,530,700])
I tried first to scale the lower picture in the x-direction to make the little way on the very right fit the upper image. Unfortunately then the street wouldn't fit the street. So what I do is scale down according to the above function. At pixel 0 I still want to have pixel zero, at 290 I want to have what used to be at 310 and so on.
Notice that 290,510 and 310,530 are the new respectively old x-coordinates of street and way at the hight of the gluing.
class Image_knn():
def fit(self, image):
self.image = image.astype('float')
def predict(self, x, y):
image = self.image
weights_x = [(1-(x % 1)).reshape(*x.shape,1), (x % 1).reshape(*x.shape,1)]
weights_y = [(1-(y % 1)).reshape(*x.shape,1), (y % 1).reshape(*x.shape,1)]
start_x = np.floor(x).astype('int')
start_y = np.floor(y).astype('int')
return sum([image[np.clip(np.floor(start_x + x), 0, image.shape[0]-1).astype('int'),
np.clip(np.floor(start_y + y), 0, image.shape[1]-1).astype('int')] * weights_x[x]*weights_y[y]
for x,y in itertools.product(range(2),range(2))])
image_model = Image_knn()
image_model.fit(lower_image)
n = 685
f = UnivariateSpline([0,290,510,n],[0,310,530,700])
np.linspace(0,lower_image.shape[1],n)
yspace = f(np.arange(n))
result_image = np.zeros((466+139,700+22, 3))
a,b = np.meshgrid(np.arange(0,lower_image.shape[0]), yspace)
result_image[139:139+lower_image.shape[0],:n] = np.transpose(image_model.predict(a,b), [1,0,2])
result_image[0:upper_image.shape[0], 22:22+upper_image.shape[1]] = upper_image
plt.imshow(result_image, 'gray')
Much better, no black line but maybe we can still smoothen the cut a bit. I figured if I take convex combinations of upper and lower image at the cut it would look much better.
result_image = np.zeros((466+139,700+22,3))
a,b = np.meshgrid(np.arange(0,lower_image.shape[0]), yspace)
result_image[139:139+lower_image.shape[0],:n] = np.transpose(image_model.predict(a,b), [1,0,2])
transition_range = 10
result_image[0:upper_image.shape[0]-transition_range, 22:22+upper_image.shape[1]] = upper_image[:-transition_range,:]
transition_pixcels = upper_image[-transition_range:,:]*np.linspace(1,0,transition_range).reshape(-1,1,1)
result_image[upper_image.shape[0]-transition_range:upper_image.shape[0], 22:22+upper_image.shape[1]] *= np.linspace(0,1,transition_range).reshape(-1,1,1)
result_image[upper_image.shape[0]-transition_range:upper_image.shape[0], 22:22+upper_image.shape[1]] += transition_pixcels
plt.imshow(result_image)
plt.savefig('text.jpg')
Tilted Gluing
For completeness here also a version gluing at the top with a tilted bottom. I attach the pictures at a point and turn around that fixed point by a few degrees. And again I am correcting some very slight non alignements in the end. To get the coordinates of that I am using jupyter lab and %matplotlib widget.
fixed_point_upper = np.array([139,379])
fixed_point_lower = np.array([0,400])
angle = np.deg2rad(2)
down_dir = np.array([np.sin(angle+np.pi/2),np.cos(angle+np.pi/2)])
right_dir = np.array([np.sin(angle),np.cos(angle)])
result_image_height = np.ceil((fixed_point_upper+lower_image.shape[0]*down_dir+(lower_image.shape[1]-fixed_point_lower[1])*right_dir)[0]).astype('int')
right_shift = np.ceil(-(fixed_point_upper+lower_image.shape[0]*down_dir-fixed_point_lower[1]*right_dir)[1]).astype('int')
result_image_width = right_shift+upper_image.shape[1]
result_image = np.zeros([result_image_height, result_image_width,3])
fixed_point_result = np.array([fixed_point_upper[0],fixed_point_upper[1]+right_shift])
lower_top_left = fixed_point_result-fixed_point_lower[1]*right_dir
result_image[:upper_image.shape[0],-upper_image.shape[1]:] = upper_image
# calculate points in lower_image
result_coordinates = np.stack(np.where(np.ones(result_image.shape[:2],dtype='bool')),axis=1)
lower_coordinates = np.stack([(result_coordinates-lower_top_left)#down_dir,(result_coordinates-lower_top_left)#right_dir],axis=1)
mask = (0 <= lower_coordinates[:,0]) & (0 <= lower_coordinates[:,1]) \
& (lower_coordinates[:,0] <= lower_image.shape[0]) & (lower_coordinates[:,1] <= lower_image.shape[1])
result_coordinates = result_coordinates[mask]
lower_coordinates = lower_coordinates[mask]
# COORDINATES ON RESULT IMAGE
# left street 254
# left sides of houses 295, 420, 505
# right small street, both sides big street 590,635,664
# COORDINATES ON LOWER IMAGE
# left street 234
# left sides of houses 280, 399, 486
# right small street, both sides big street 571, 617, 642
def coord_transform(y):
return (y-lower_top_left[1])/right_dir[1]
y = tuple(map(coord_transform, [lower_top_left[1], 254, 295, 420, 505, 589, 635, 664]))
f = UnivariateSpline(y,[0, 234, 280, 399, 486, 571, 617, 642])
result_image[result_coordinates[:,0],result_coordinates[:,1]] = image_model.predict(lower_coordinates[:,0],np.vectorize(f)(lower_coordinates[:,1]))
Related
I need to determine the location of yogurts in the supermarket. Source photo looks like
With template:
I using SIFT to extract key points of template:
img1 = cv.imread('train.jpg')
sift = cv.SIFT_create()# queryImage
kp1, des1 = sift.detectAndCompute(img1, None)
path = glob.glob("template.jpg")
cv_img = []
l=0
for img in path:
img2 = cv.imread(img) # trainImage
# Initiate SIFT detector
# find the keypoints and descriptors with SIFT
kp2, des2 = sift.detectAndCompute(img2,None)
# FLANN parameters
FLANN_INDEX_KDTREE = 1
index_params = dict(algorithm = FLANN_INDEX_KDTREE, trees = 5)
search_params = dict(checks=50) # or pass empty dictionary
flann = cv.FlannBasedMatcher(index_params,search_params)
matches = flann.knnMatch(des1,des2,k=2)
# Need to draw only good matches, so create a mask
# ratio test as per Lowe's paper
if (l < len(matches)):
l = len(matches)
image = img2
match = matches
h_query, w_query, _= img2.shape
matchesMask = [[0,0] for i in range(len(match))]
good_matches = []
good_matches_indices = {}
for i,(m,n) in enumerate(match):
if m.distance < 0.7*n.distance:
matchesMask[i]=[1,0]
good_matches.append(m)
good_matches_indices[len(good_matches) - 1] = i
bboxes = []
src_pts = np.float32([ kp1[m.queryIdx].pt for m in good ]).reshape(-1,2)
dst_pts = np.float32([ kp2[m.trainIdx].pt for m in good ]).reshape(-1,2)
model, inliers = initialize_ransac(src_pts, dst_pts)
n_inliers = np.sum(inliers)
matched_indices = [good_matches_indices[idx] for idx in inliers.nonzero()[0]]
print(len(matched_indices))
model, inliers = ransac(
(src_pts, dst_pts),
AffineTransform, min_samples=4,
residual_threshold=4, max_trials=20000
)
n_inliers = np.sum(inliers)
print(n_inliers)
matched_indices = [good_matches_indices[idx] for idx in inliers.nonzero()[0]]
print(matched_indices)
q_coordinates = np.array([(0, 0), (h_query, w_query)])
coords = model.inverse(q_coordinates)
print(coords)
h_query, w_query,_ = img2.shape
q_coordinates = np.array([(0, 0), (h_query, w_query)])
coords = model.inverse(q_coordinates)
print(coords)
# bboxes_list.append((i, coords))
M, mask = cv.findHomography(src_pts, dst_pts, cv.RANSAC, 2)
draw_params = dict(matchColor = (0,255,0),
singlePointColor = (255,0,0),
matchesMask = matchesMask,
flags = cv.DrawMatchesFlags_DEFAULT)
img3 = cv.drawMatchesKnn(img1,kp1,image,kp2,match,None,**draw_params)
plt.imshow(img3),plt.show()
Result of SIFT looks like
The question is what is the best way to clasterise points to obtain rectangles, representing each yogurt? I tried RANSAC, but this method doesn't work in this case.
I am proposing an approach based on what is discussed in this paper. I have modified the approach a bit because the use-case is not entirely same but they do use SIFT features matching to locate multiple objects in video frames. They have used PCA for reducing time but that may not be required for still images.
Sorry I could not write a code for this as it will take a lot of time but I believe this should work to locate all the occurrences of the template object.
The modified approach is like this:
Divide the template image into regions: left, middle, right along the horizontal
and top, bottom along the vertical
Now when you match features between the template and source image, you will get features matched from some of the keypoints from these regions on multiple locations on the source image. You can use these keypoints to identify which region of the template is present at what location(s) in the source image. If there are overlapping regions i.e. keypoints from different regions matched with close keypoints in source image then that would mean a wrong match.
Mark each set of matching keypoints within a neighborhood on source image as left, center, right, top, bottom depending upon if they have majority matches from keypoints of a particular region in the template image.
Starting from each left region on source image move towards right and if we find a central region followed by a right region then this area of source image between regions marked as left and right, can be marked as location of one template object.
There could be overlapping objects which could result in a left region followed by another left region when moving in right direction from the left region. The area between the two left regions can be marked as one template object.
For further refined locations, each area of source image marked as one template object can be cropped and re-matched with the template image.
Try working spatially: for each key-point in img2 get some bounding box around and consider only the points in there for your ransac homography to check for best fit.
You can also work with overlapping windows and later discard similar resulting homographys
Here is you can do
Base Image = Whole picture of shelf
Template Image = Single product image
Get SIFT matches from both images. (base and template image)
Do feature matching.
Get all the points in base image which are matching. (refer to figure)
Create Cluster based on size of template image. (here threshold in 50px)
Get Bounding box of clusters.
Crop each bounding box cluter and check matches with template image.
Accept all cluters which has atleast minimum percentage of matched. (here taken minimum 10% of keypoints)
def plot_pts(img, pts):
img_plot = img.copy()
for i in range(len(pts)):
img_plot = cv2.circle(img_plot, (int(pts[i][0]), int(pts[i][1])), radius=7, color=(255, 0, 0), thickness=-1)
plt.figure(figsize=(20, 10))
plt.imshow(img_plot)
def plot_bbox(img, bbox_list):
img_plot = img.copy()
for i in range(len(bbox_list)):
start_pt = bbox_list[i][0]
end_pt = bbox_list[i][2]
img_plot = cv2.rectangle(img_plot, pt1=start_pt, pt2=end_pt, color=(255, 0, 0), thickness=2)
plt.figure(figsize=(20, 10))
plt.imshow(img_plot)
def get_distance(pt1, pt2):
x1, y1 = pt1
x2, y2 = pt2
return np.sqrt(np.square(x1 - x2) + np.square(y1 - y2))
def check_centroid(pt, centroid):
x, y = pt
cx, cy = centroid
distance = get_distance(pt1=(x, y), pt2=(cx, cy))
if distance < max_distance:
return True
else:
return False
def update_centroid(pt, centroids_list):
new_centroids_list = centroids_list.copy()
flag_new_centroid = True
for j, c in enumerate(centroids_list):
temp_centroid = np.mean(c, axis=0)
if_close = check_centroid(pt, temp_centroid)
if if_close:
new_centroids_list[j].append(pt)
flag_new_centroid = False
break
if flag_new_centroid:
new_centroids_list.append([pt])
new_centroids_list = recheck_centroid(new_centroids_list)
return new_centroids_list
def recheck_centroid(centroids_list):
new_centroids_list = [list(set(c)) for c in centroids_list]
return new_centroids_list
def get_bbox(pts):
minn_x, minn_y = np.min(pts, axis=0)
maxx_x, maxx_y = np.max(pts, axis=0)
return [[minn_x, minn_y], [maxx_x, minn_y], [maxx_x, maxx_y], [minn_x, maxx_y]]
class RotateAndTransform:
def __init__(self, path_img_ref):
self.path_img_ref = path_img_ref
self.ref_img = self._read_ref_image()
#sift
self.sift = cv2.SIFT_create()
#feature matching
self.bf = cv2.BFMatcher()
# FLANN parameters
FLANN_INDEX_KDTREE = 1
index_params = dict(algorithm = FLANN_INDEX_KDTREE, trees = 5)
search_params = dict(checks=50) # or pass empty dictionary
self.flann = cv2.FlannBasedMatcher(index_params,search_params)
def _read_ref_image(self):
ref_img = cv2.imread(self.path_img_ref, cv2.IMREAD_COLOR)
ref_img = cv2.cvtColor(ref_img, cv2.COLOR_BGR2RGB)
return ref_img
def read_src_image(self, path_img_src):
self.path_img_src = path_img_src
# read images
# ref_img = cv2.imread(self.path_img_ref, cv2.IMREAD_COLOR)
src_img = cv2.imread(path_img_src, cv2.IMREAD_COLOR)
src_img = cv2.cvtColor(src_img, cv2.COLOR_BGR2RGB)
return src_img
def convert_bw(self, img):
img_bw = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
return img_bw
def get_keypoints_descriptors(self, img_bw):
keypoints, descriptors = self.sift.detectAndCompute(img_bw,None)
return keypoints, descriptors
def get_matches(self, src_descriptors, ref_descriptors, threshold=0.6):
matches = self.bf.knnMatch(ref_descriptors, src_descriptors, k=2)
flann_matches = self.flann.knnMatch(ref_descriptors, src_descriptors,k=2)
good_matches = []
good_flann_matches = []
# Apply ratio test for Brute Force
for m,n in matches:
if m.distance <threshold*n.distance:
good_matches.append([m])
print(f'Numner of BF Match: {len(matches)}, Number of good BF Match: {len(good_matches)}')
# Apply ratio test for FLANN
for m,n in flann_matches:
if m.distance < threshold*n.distance:
good_flann_matches.append([m])
# matches = sorted(matches, key = lambda x:x.distance)
print(f'Numner of FLANN Match: {len(flann_matches)}, Number of good Flann Match: {len(good_flann_matches)}')
return good_matches, good_flann_matches
def get_src_dst_pts(self, good_flann_matches, ref_keypoints, src_keypoints):
pts_src = []
pts_ref = []
n = len(good_flann_matches)
for i in range(n):
ref_index = good_flann_matches[i][0].queryIdx
src_index = good_flann_matches[i][0].trainIdx
pts_src.append(src_keypoints[src_index].pt)
pts_ref.append(ref_keypoints[ref_index].pt)
return np.array(pts_src), np.array(pts_ref)
def extend_bbox(bbox, increment=0.1):
bbox_new = bbox.copy()
bbox_new[0] = [bbox_new[0][0] - int(bbox_new[0][0] * increment), bbox_new[0][1] - int(bbox_new[0][1] * increment)]
bbox_new[1] = [bbox_new[1][0] + int(bbox_new[1][0] * increment), bbox_new[1][1] - int(bbox_new[1][1] * increment)]
bbox_new[2] = [bbox_new[2][0] + int(bbox_new[2][0] * increment), bbox_new[2][1] + int(bbox_new[2][1] * increment)]
bbox_new[3] = [bbox_new[3][0] - int(bbox_new[3][0] * increment), bbox_new[3][1] + int(bbox_new[3][1] * increment)]
return bbox_new
def crop_bbox(img, bbox):
y, x = bbox[0]
h, w = bbox[1][0] - bbox[0][0], bbox[2][1] - bbox[0][1]
return img[x: x + w, y: y + h, :]
base_img = cv2.imread(path_img_base)
ref_img = cv2.imread(path_img_ref)
rnt = RotateAndTransform(path_img_ref)
ref_img_bw = rnt.convert_bw(img=rnt.ref_img)
ref_keypoints, ref_descriptors = rnt.get_keypoints_descriptors(ref_img_bw)
base_img = rnt.read_src_image(path_img_src = path_img_base)
base_img_bw = rnt.convert_bw(img=base_img)
base_keypoints, base_descriptors = rnt.get_keypoints_descriptors(base_img_bw)
good_matches, good_flann_matches = rnt.get_matches(src_descriptors=base_descriptors, ref_descriptors=ref_descriptors, threshold=0.6)
ref_points = []
for gm in good_flann_matches:
x, y = ref_keypoints[gm[0].queryIdx].pt
x, y = int(x), int(y)
ref_points.append((x, y))
max_distance = 50
centroids = [[ref_points[0]]]
for i in tqdm(range(len(ref_points))):
pt = ref_points[i]
centroids = update_centroid(pt, centroids)
bbox = [get_bbox(c) for c in centroi[![enter image description here][1]][1]ds]
centroids = [np.mean(c, axis=0) for c in centroids]
print(f'Number of Points: {len(good_flann_matches)}, centroids: {len(centroids)}')
data = []
for i in range(len(bbox)):
temp_crop_img = crop_bbox(ref_img, extend_bbox(bbox[i], 0.01))
temp_crop_img_bw = rnt.convert_bw(img=temp_crop_img)
temp_crop_keypoints, temp_crop_descriptors = rnt.get_keypoints_descriptors(temp_crop_img_bw)
good_matches, good_flann_matches = rnt.get_matches(src_descriptors=base_descriptors, ref_descriptors=temp_crop_descriptors, threshold=0.6)
temp_data = {'image': temp_crop_img,
'num_matched': len(good_flann_matches),
'total_keypoints' : len(base_keypoints),
}
data.append(temp_data)
filter_data = [{'num_matched' : i['num_matched'], 'image': i['image']} for i in data if i['num_matched'] > 25]
for i in range(len(filter_data)):
temp_num_match = filter_data[i]['num_matched']
plt.figure()
plt.title(f'num matched: {temp_num_match}')
plt.imshow(filter_data[i]['image'])
First you could detect any item that is on the shelf with a network like this, it's pre-trained in this exact context and works pretty well. You should also rectify the image before feeding it to the network. You will obtain bounding boxes for every product (maybe some false positive/negative, but that's another issue). Then you can match each box with the template using SIFT and calculating a score (it's up to you define which score works), but I suggest to use another approach like a siamese network if you a consistent dataset.
I want to stitch some images(identical resolution) into a panorama using openCV(without using the Stitcher class). I tried the algorithm described here, but instead of the desired panorama i get an image that is made up of the last image to be stitched and a large black area. I outputted am image for each iteration and the result is the same: current image + a larger black area each time.
import numpy
import cv2
# images is an array of images that i need to stitch
matcher = cv2.DescriptorMatcher_create(cv2.DescriptorMatcher_BRUTEFORCE_HAMMING)
ORB = cv2.ORB_create()
homography = 0
panorama = 0
for image in range(0, len(images) -1):
key1, desc1 = ORB.detectAndCompute(images[image], None)
key2, desc2 = ORB.detectAndCompute(images[image + 1], None)
matches = matcher.match(desc1, desc2, None)
matches = sorted(matches, key=lambda x: x.distance, reverse=True)
numGoodMatches = int(len(matches) * 0.15)
matches2 = matches[-numGoodMatches:]
points1 = numpy.zeros((len(matches2), 2), dtype=numpy.float32)
points2 = numpy.zeros((len(matches2), 2), dtype=numpy.float32)
for i, match in enumerate(matches2):
points1[i, :] = key1[match.queryIdx].pt
points2[i, :] = key2[match.trainIdx].pt
h, mask = cv2.findHomography(points2, points1, cv2.RANSAC)
if isinstance(homography, int):
homography = h
img1H, img1W = images[image].shape
img2H, img2W = images[image + 1].shape
aligned = cv2.warpPerspective(images[image + 1], h, (img1W + img2W, img2H))
stitchedImage = numpy.copy(aligned)
stitchedImage[0:img1H, 0:img2W] = images[image]
panorama = stitchedImage
else:
h *= homography
homography = h
img1H, img1W = panorama.shape
img2H, img2W = images[image + 1].shape
aligned = cv2.warpPerspective(images[image + 1], h, (img1W + img2W, img2H))
stitchedImage = numpy.copy(aligned)
stitchedImage[0:img1H, 0:img2W] = images[image + 1]
panorama = stitchedImage
Example of images i get:
The first image is correct.
The last image has the correct width (n * original width) but only one image and the rest is black area.
This
stitchedImage[0:img1H, 0:img2W] = images[image + 1]
places an image to the left top corner of "stitchedImage".
Somehow the image x offset must be calculated for each part of panorama:
stitchedImage[0:img1H, x_offset:x_offset+img2W] = images[image + 1]
I am trying to extract blood network from this face image: Face image
For such task, i am using the P&M anisotropic diffusion found in this question: Anisotropic diffusion 2d images. Then i am using tophat transform followed by blackhat transform, afterwards i use a simple threshold to set to 255 all pixel that has an intensity value of 100.
The problem is that, after i use the threshold and try to open the image, whatever way i try, the image is displayed as fully black:
In short, my goal is to extract the blood vessels using P&M anisotropic diffusion with structuring element of flat disk of 5x5, then apply tophat and blackhat, respectively and a simple threshold and actually be able to view the image afterwards.
Here's my code on how i am trying it:
import cv2
import import cv2 numpy as np
import warnings
face_img=mpimg.imread('path')
def anisodiff(img, niter=1, kappa=50, gamma=0.1, step=(1., 1.), option=1):
if img.ndim == 3:
m = "Only grayscale images allowed, converting to 2D matrix"
warnings.warn(m)
img = img.mean(2)
img = img.astype('float32')
imgout = img.copy()
deltaS = np.zeros_like(imgout)
deltaE = deltaS.copy()
NS = deltaS.copy()
EW = deltaS.copy()
gS = np.ones_like(imgout)
gE = gS.copy()
for ii in range(niter):
deltaS[:-1, :] = np.diff(imgout, axis=0)
deltaE[:, :-1] = np.diff(imgout, axis=1)
if option == 1:
gS = np.exp(-(deltaS/kappa)**2.)/step[0]
gE = np.exp(-(deltaE/kappa)**2.)/step[1]
elif option == 2:
gS = 1./(1.+(deltaS/kappa)**2.)/step[0]
gE = 1./(1.+(deltaE/kappa)**2.)/step[1]
E = gE*deltaE
S = gS*deltaS
NS[:] = S
EW[:] = E
NS[1:, :] -= S[:-1, :]
EW[:, 1:] -= E[:, :-1]
imgout += gamma*(NS+EW)
return imgout
new_img = anisodiff(face_img, niter=1, kappa=20, gamma=0.1, step=(1., 1.), option=1)
filterSize =(3, 3)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT,
filterSize)
input_image = new_img
first_tophat_img = cv2.morphologyEx(input_image,
cv2.MORPH_TOPHAT,
kernel)
filterSize =(3, 3)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT,
filterSize)
second_tophat_img = cv2.morphologyEx(input_image,
cv2.MORPH_BLACKHAT,
kernel)
ret, thresh1 = cv2.threshold(second_tophat_img, 200, 255, cv2.THRESH_BINARY)
Even when i set the threshold to 254 for instance, the image goes black.
I executed a simple MATLAB implementation, and got a nice result.
MATLAB code:
I = imread('02_giorgos_1_f_M_30_830.tif');
I = im2double(uint8(I));
J = imdiffusefilt(I);
K = imtophat(J, ones(3));
figure;imshow(imadjust(K, stretchlim(K)));
Result:
I don't know if you know MATLAB, but I used the default arguments of imdiffusefilt (equivalent to anisodiff in your code).
Default MATLAB arguments are equivalent to:
Input image is in range [0, 1] and not [0, 255].
niter=5 (note: you used only 1 iteration and it's not enough).
kappa=0.1
gamma=0.125
MATLAB default is 8 neighbors connectivity (not 4 neighbors as used in anisodiff).
8 neighbors connectivity:
For getting same result as in MATLAB, I implemented an 8 neighbors connectivity Anisotropic diffusion (based on MATLAB source code).
Note: with 4 neighbors connectivity it's working, but result is not so nice as using 8 neighbors.
Displaying the output image:
In order to display the output image correctly, I used imadjust(K, stretchlim(K)).
The command stretches the range of the input image such that percentile 1 goes to 0, and percentile 99 goes to 1 (linear stretch).
One more thing:
Instead of using fixed threshold of 200, I used percentile 95 threshold:
t = np.percentile(first_tophat_img, 95)
ret, thresh1 = cv2.threshold(first_tophat_img, t, 255,
cv2.THRESH_BINARY)
Here is the code (uses cv2.imshow for testing):
import cv2
import numpy as np
import matplotlib.image as mpimg
import warnings
face_img = mpimg.imread('02_giorgos_1_f_M_30_830.tif')
def anisodiff8neighbors(img, niter=5, kappa=0.1, gamma=0.125):
""" See https://www.mathworks.com/help/images/ref/imdiffusefilt.html
Anisotropic diffusion filtering with 8 neighbors
Range of img is assumed to be [0, 1] (not [0, 255]).
"""
if img.ndim == 3:
m = "Only grayscale images allowed, converting to 2D matrix"
warnings.warn(m)
img = img.mean(2)
img = img.astype('float32')
imgout = img.copy()
for ii in range(niter):
# MATLAB source code is commented
#paddedImg = padarray(I, [1 1], 'replicate');
padded_img = np.pad(imgout, (1, 1), 'edge')
#diffImgNorth = paddedImg(1:end-1,2:end-1) - paddedImg(2:end,2:end-1);
#diffImgEast = paddedImg(2:end-1,2:end) - paddedImg(2:end-1,1:end-1);
#diffImgNorthWest = paddedImg(1:end-2,1:end-2) - I;
#diffImgNorthEast = paddedImg(1:end-2,3:end) - I;
#diffImgSouthWest = paddedImg(3:end,1:end-2) - I;
#diffImgSouthEast = paddedImg(3:end,3:end) - I;
diff_img_north = padded_img[0:-1, 1:-1] - padded_img[1:, 1:-1]
diff_img_east = padded_img[1:-1, 1:] - padded_img[1:-1, 0:-1]
diff_img_north_west = padded_img[0:-2, 0:-2] - imgout
diff_img_north_east = padded_img[0:-2, 2:] - imgout
diff_img_south_west = padded_img[2:, 0:-2] - imgout
diff_img_south_east = padded_img[2:, 2:] - imgout
#case 'exponential'
#conductCoeffNorth = exp(-(abs(diffImgNorth)/gradientThreshold).^2);
#conductCoeffEast = exp(-(abs(diffImgEast)/gradientThreshold).^2);
#conductCoeffNorthWest = exp(-(abs(diffImgNorthWest)/gradientThreshold).^2);
#conductCoeffNorthEast = exp(-(abs(diffImgNorthEast)/gradientThreshold).^2);
#conductCoeffSouthWest = exp(-(abs(diffImgSouthWest)/gradientThreshold).^2);
#conductCoeffSouthEast = exp(-(abs(diffImgSouthEast)/gradientThreshold).^2);
conduct_coeff_north = np.exp(-(np.abs(diff_img_north)/kappa)**2.0)
conduct_coeff_east = np.exp(-(np.abs(diff_img_east)/kappa)**2.0)
conduct_coeff_north_west = np.exp(-(np.abs(diff_img_north_west)/kappa)**2.0)
conduct_coeff_north_east = np.exp(-(np.abs(diff_img_north_east)/kappa)**2.0)
conduct_coeff_south_west = np.exp(-(np.abs(diff_img_south_west)/kappa)**2.0)
conduct_coeff_south_east = np.exp(-(np.abs(diff_img_south_east)/kappa)**2.0)
#fluxNorth = conductCoeffNorth .* diffImgNorth;
#fluxEast = conductCoeffEast .* diffImgEast;
#fluxNorthWest = conductCoeffNorthWest .* diffImgNorthWest;
#fluxNorthEast = conductCoeffNorthEast .* diffImgNorthEast;
#fluxSouthWest = conductCoeffSouthWest .* diffImgSouthWest;
#fluxSouthEast = conductCoeffSouthEast .* diffImgSouthEast;
flux_north = conduct_coeff_north * diff_img_north
flux_east = conduct_coeff_east * diff_img_east
flux_north_west = conduct_coeff_north_west * diff_img_north_west
flux_north_east = conduct_coeff_north_east * diff_img_north_east
flux_south_west = conduct_coeff_south_west * diff_img_south_west
flux_south_east = conduct_coeff_south_east * diff_img_south_east
#% Discrete PDE solution
#I = I + diffusionRate * (fluxNorth(1:end-1,:) - fluxNorth(2:end,:) + ...
# fluxEast(:,2:end) - fluxEast(:,1:end-1) + (1/(dd^2)).* fluxNorthWest + ...
# (1/(dd^2)).* fluxNorthEast + (1/(dd^2)).* fluxSouthWest + (1/(dd^2)).* fluxSouthEast);
imgout = imgout + gamma * (flux_north[0:-1,:] - flux_north[1:,:] +
flux_east[:,1:] - flux_east[:,0:-1] + 0.5*flux_north_west +
0.5*flux_north_east + 0.5*flux_south_west + 0.5*flux_south_east)
return imgout
#new_img = anisodiff(face_img, niter=1, kappa=20, gamma=0.1, step=(1., 1.), option=1)
face_img = face_img.astype(float) / 255;
#new_img = anisodiff(face_img, niter=5, kappa=0.1, gamma=0.125, step=(1., 1.), option=1)
new_img = anisodiff8neighbors(face_img, niter=5, kappa=0.1, gamma=0.125)
filterSize =(3, 3)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT,
filterSize)
input_image = new_img
first_tophat_img = cv2.morphologyEx(input_image,
cv2.MORPH_TOPHAT,
kernel)
# Use percentile 95 (of image) as threshold instead of fixed threshold 200
t = np.percentile(first_tophat_img, 95)
ret, thresh1 = cv2.threshold(first_tophat_img, t, 255, cv2.THRESH_BINARY)
cv2.imshow('thresh1', thresh1)
filterSize =(3, 3)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT,
filterSize)
second_tophat_img = cv2.morphologyEx(input_image,
cv2.MORPH_BLACKHAT,
kernel)
#ret, thresh1 = cv2.threshold(second_tophat_img, 200, 255, cv2.THRESH_BINARY)
# Use percentile 95 (of image) as threshold instead of fixed threshold 200
t = np.percentile(second_tophat_img, 95)
ret, thresh2 = cv2.threshold(second_tophat_img, t, 255, cv2.THRESH_BINARY)
cv2.imshow('thresh2', thresh2)
lo, hi = np.percentile(first_tophat_img, (1, 99))
first_tophat_img_stretched = (first_tophat_img.astype(float) - lo) / (hi-lo) # Apply linear "stretch" - lo goes to 0, and hi goes to 1
cv2.imshow('first_tophat_img_stretched', first_tophat_img_stretched)
cv2.waitKey()
cv2.destroyAllWindows()
Result:
thresh1:
thresh2:
first_tophat_img_stretched:
I am trying to learn OpenCV in order to improve a script I wrote for comparing engineering drawings. I am using the code (see below) found on this tutorial but I am having zero success with it. In the tutorial the author uses the example of a blank form for the reference image and a photo of the completed form as the image to align. My situation is very similar because I am attempting to use a blank drawing title block as my reference image and a scanned image of a drawing as my image to align.
My goal is to use OpenCV to clean up the scanned engineering drawings so that they are aligned properly but no matter what I try in the MAX_FEATURES and GOOD_MATCH_PERCENT parameters, I get an image that looks like a black and white star burst. Also, when I review the "matches.jpg" file generated by the script, it appears that there are no correct matches. I have tried multiple drawings and I get the same results.
Can anyone see a reason why this script would not work in the way I am trying to use it?
from __future__ import print_function
import cv2
import numpy as np
MAX_FEATURES = 500
GOOD_MATCH_PERCENT = 0.15
def alignImages(im1, im2):
# Convert images to grayscale
im1Gray = cv2.cvtColor(im1, cv2.COLOR_BGR2GRAY)
im2Gray = cv2.cvtColor(im2, cv2.COLOR_BGR2GRAY)
# Detect ORB features and compute descriptors.
orb = cv2.ORB_create(MAX_FEATURES)
keypoints1, descriptors1 = orb.detectAndCompute(im1Gray, None)
keypoints2, descriptors2 = orb.detectAndCompute(im2Gray, None)
# Match features.
matcher = cv2.DescriptorMatcher_create(cv2.DESCRIPTOR_MATCHER_BRUTEFORCE_HAMMING)
matches = matcher.match(descriptors1, descriptors2, None)
# Sort matches by score
matches.sort(key=lambda x: x.distance, reverse=False)
# Remove not so good matches
numGoodMatches = int(len(matches) * GOOD_MATCH_PERCENT)
matches = matches[:numGoodMatches]
# Draw top matches
imMatches = cv2.drawMatches(im1, keypoints1, im2, keypoints2, matches, None)
cv2.imwrite("matches.jpg", imMatches)
# Extract location of good matches
points1 = np.zeros((len(matches), 2), dtype=np.float32)
points2 = np.zeros((len(matches), 2), dtype=np.float32)
for i, match in enumerate(matches):
points1[i, :] = keypoints1[match.queryIdx].pt
points2[i, :] = keypoints2[match.trainIdx].pt
# Find homography
h, mask = cv2.findHomography(points1, points2, cv2.RANSAC)
# Use homography
height, width, channels = im2.shape
im1Reg = cv2.warpPerspective(im1, h, (width, height))
return im1Reg, h
if __name__ == '__main__':
# Read reference image
refFilename = "form.jpg"
print("Reading reference image : ", refFilename)
imReference = cv2.imread(refFilename, cv2.IMREAD_COLOR)
# Read image to be aligned
imFilename = "scanned-form.jpg"
print("Reading image to align : ", imFilename);
im = cv2.imread(imFilename, cv2.IMREAD_COLOR)
print("Aligning images ...")
# Registered image will be resotred in imReg.
# The estimated homography will be stored in h.
imReg, h = alignImages(im, imReference)
# Write aligned image to disk.
outFilename = "aligned.jpg"
print("Saving aligned image : ", outFilename);
cv2.imwrite(outFilename, imReg)
# Print estimated homography
print("Estimated homography : \n", h)
Template Image:
Image to Align:
Expected output Image:
Here is one way in Python/OpenCV using a Rigid Affine Transformation (scale, rotation and translation only - no skew or perspective) to warp one image to match the other. It uses findTransformECC() -- Enhanced Correlation Coefficient Maximization) -- to get the rotation matrix and then uses warpAffine to do the rigid warping.
Template:
Image to be warped:
import cv2
import numpy as np
import math
import sys
# Get the image files from the command line arguments
# These are full paths to the images
# image2 will be warped to match image1
# argv[0] is name of script
image1 = sys.argv[1]
image2 = sys.argv[2]
outfile = sys.argv[3]
# Read the images to be aligned
# im2 is to be warped to match im1
im1 = cv2.imread(image1);
im2 = cv2.imread(image2);
# Convert images to grayscale for computing the rotation via ECC method
im1_gray = cv2.cvtColor(im1,cv2.COLOR_BGR2GRAY)
im2_gray = cv2.cvtColor(im2,cv2.COLOR_BGR2GRAY)
# Find size of image1
sz = im1.shape
# Define the motion model - euclidean is rigid (SRT)
warp_mode = cv2.MOTION_EUCLIDEAN
# Define 2x3 matrix and initialize the matrix to identity matrix I (eye)
warp_matrix = np.eye(2, 3, dtype=np.float32)
# Specify the number of iterations.
number_of_iterations = 5000;
# Specify the threshold of the increment
# in the correlation coefficient between two iterations
termination_eps = 1e-3;
# Define termination criteria
criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, number_of_iterations, termination_eps)
# Run the ECC algorithm. The results are stored in warp_matrix.
(cc, warp_matrix) = cv2.findTransformECC (im1_gray, im2_gray, warp_matrix, warp_mode, criteria, None, 1)
# Warp im2 using affine
im2_aligned = cv2.warpAffine(im2, warp_matrix, (sz[1],sz[0]), flags=cv2.INTER_LINEAR + cv2.WARP_INVERSE_MAP);
# write output
cv2.imwrite(outfile, im2_aligned)
# Print rotation angle
row1_col0 = warp_matrix[0,1]
angle = math.degrees(math.asin(row1_col0))
print(angle)
Result:
Resulting Angle of Rotation (in deg):
-0.3102187026194794
Note, you can change the background color in the affineWarp to white if desired.
Also make the termination epsilon smaller by an order of magnitude or two for more accuracy, but longer processing times.
The other Rigid Affine approach that I mentioned in my comments earlier is to use ORB feature matching, filter the key points, then use estimateAffinePartial2D() to get the rigid affine matrix. Then use that to warp the image. For large angles this seems to me to be more reliable than the ECC method. But the ECC method seems more accurate for small rotations.
import cv2
import numpy as np
import math
import sys
MAX_FEATURES = 10000
GOOD_MATCH_PERCENT = 0.15
DIFFY_THRESH = 2
# Get the image files from the command line arguments
# These are full paths to the images
# image[2] will be warped to match image[1]
# argv[0] is name of script
file1 = sys.argv[1]
file2 = sys.argv[2]
outFile = sys.argv[3]
# Read image1
image1 = cv2.imread(file1, cv2.IMREAD_COLOR)
# Read image2 to be warped to match image1
image2 = cv2.imread(file2, cv2.IMREAD_COLOR)
# Convert images to grayscale
image1Gray = cv2.cvtColor(image1, cv2.COLOR_BGR2GRAY)
image2Gray = cv2.cvtColor(image2, cv2.COLOR_BGR2GRAY)
# Detect ORB features and compute descriptors.
orb = cv2.ORB_create(MAX_FEATURES)
keypoints1, descriptors1 = orb.detectAndCompute(image1Gray, None)
keypoints2, descriptors2 = orb.detectAndCompute(image2Gray, None)
# Match features.
matcher = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
matches = matcher.match(descriptors1, descriptors2, None)
# Sort matches by score
matches.sort(key=lambda x: x.distance, reverse=False)
# Remove not so good matches
numGoodMatches = int(len(matches) * GOOD_MATCH_PERCENT)
matches = matches[:numGoodMatches]
#print('numgood',numGoodMatches)
# Extract location of good matches and filter by diffy if rotation is small
points1 = np.zeros((len(matches), 2), dtype=np.float32)
points2 = np.zeros((len(matches), 2), dtype=np.float32)
for i, match in enumerate(matches):
points1[i, :] = keypoints1[match.queryIdx].pt
points2[i, :] = keypoints2[match.trainIdx].pt
# initialize empty arrays for newpoints1 and newpoints2 and mask
newpoints1 = np.empty(shape=[0, 2], dtype=np.float32)
newpoints2 = np.empty(shape=[0, 2], dtype=np.float32)
matches_Mask = [0] * len(matches)
count=0
for i in range(len(matches)):
pt1 = points1[i]
pt2 = points2[i]
pt1x, pt1y = zip(*[pt1])
pt2x, pt2y = zip(*[pt2])
diffy = np.float32( np.float32(pt2y) - np.float32(pt1y) )
if abs(diffy) < DIFFY_THRESH:
newpoints1 = np.append(newpoints1, [pt1], axis=0).astype(np.uint8)
newpoints2 = np.append(newpoints2, [pt2], axis=0).astype(np.uint8)
matches_Mask[i]=1
count += 1
# Find Affine Transformation
# note swap of order of newpoints here so that image2 is warped to match image1
m, inliers = cv2.estimateAffinePartial2D(newpoints2,newpoints1)
# Use affine transform to warp im2 to match im1
height, width, channels = image1.shape
image2Reg = cv2.warpAffine(image2, m, (width, height))
# Write aligned image to disk.
cv2.imwrite(outFile, image2Reg)
# Print angle
row1_col0 = m[1,0]
print('row1_col0:',row1_col0)
angle = math.degrees(math.asin(row1_col0))
print('angle', angle)
Result Image:
Result Rotation Angle:
-0.6123936361765413
After some trial and error I determined that I don't need to find a homography in order to align my images properly. Since my images only need to be scaled and rotated slightly, my best option is to find the outer most points of the drawing title block and align one image to the other with a transform.
My approach is to use the Harris corner finding function to find all of the corners on the drawing, then do a simple calculation to find the points that are the shortest distance to the corners of the drawing canvas (these are the outside corners of the drawing title block). I then take 3 of the points (top left, top right, and bottom left) and use a transform to scale/rotate one drawing to the other.
Below is the code that I used:
import cv2
import numpy as np
import math
img1 = cv2.imread('reference.jpg')
img2 = cv2.imread('to-be-aligned.jpg')
#Find the corner points of img1
h1,w1,c=img1.shape
gray1 = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
gray1 = np.float32(gray1)
dst1 = cv2.cornerHarris(gray1,5,3,0.04)
ret1, dst1 = cv2.threshold(dst1,0.1*dst1.max(),255,0)
dst1 = np.uint8(dst1)
ret1, labels1, stats1, centroids1 = cv2.connectedComponentsWithStats(dst1)
criteria1 = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 100, 0.001)
corners1 = cv2.cornerSubPix(gray1,np.float32(centroids1),(5,5),(-1,-1),criteria1)
#Find the corner points of img2
h2,w2,c=img2.shape
gray2 = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY)
gray2 = np.float32(gray2)
dst2 = cv2.cornerHarris(gray2,5,3,0.04)
ret2, dst2 = cv2.threshold(dst2,0.1*dst2.max(),255,0)
dst2 = np.uint8(dst2)
ret2, labels2, stats2, centroids2 = cv2.connectedComponentsWithStats(dst2)
criteria2 = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 100, 0.001)
corners2 = cv2.cornerSubPix(gray2,np.float32(centroids2),(5,5),(-1,-1),criteria2)
#Find the top left, top right, and bottom left outer corners of the drawing frame for img1
a1=[0,0]
b1=[w1,0]
c1=[0,h1]
a1_dist=[]
b1_dist=[]
c1_dist=[]
for i in corners1:
temp_a1=math.sqrt((i[0]-a1[0])**2+(i[1]-a1[1])**2)
temp_b1=math.sqrt((i[0]-b1[0])**2+(i[1]-b1[1])**2)
temp_c1=math.sqrt((i[0]-c1[0])**2+(i[1]-c1[1])**2)
a1_dist.append(temp_a1)
b1_dist.append(temp_b1)
c1_dist.append(temp_c1)
print("Image #1 (reference):")
print("Top Left:")
print(corners1[a1_dist.index(min(a1_dist))])
print("Top Right:")
print(corners1[b1_dist.index(min(b1_dist))])
print("Bottom Left:")
print(corners1[c1_dist.index(min(c1_dist))])
#Find the top left, top right, and bottom left outer corners of the drawing frame for img2
a2=[0,0]
b2=[w2,0]
c2=[0,h2]
a2_dist=[]
b2_dist=[]
c2_dist=[]
for i in corners2:
temp_a2=math.sqrt((i[0]-a2[0])**2+(i[1]-a2[1])**2)
temp_b2=math.sqrt((i[0]-b2[0])**2+(i[1]-b2[1])**2)
temp_c2=math.sqrt((i[0]-c2[0])**2+(i[1]-c2[1])**2)
a2_dist.append(temp_a2)
b2_dist.append(temp_b2)
c2_dist.append(temp_c2)
print("Image #2 (image to align):")
print("Top Left:")
print(corners2[a2_dist.index(min(a2_dist))])
print("Top Right:")
print(corners2[b2_dist.index(min(b2_dist))])
print("Bottom Left:")
print(corners2[c2_dist.index(min(c2_dist))])
#Create the points for img1
point1 = np.zeros((3,2), dtype=np.float32)
point1[0][0]=corners1[a1_dist.index(min(a1_dist))][0]
point1[0][1]=corners1[a1_dist.index(min(a1_dist))][1]
point1[1][0]=corners1[b1_dist.index(min(b1_dist))][0]
point1[1][1]=corners1[b1_dist.index(min(b1_dist))][1]
point1[2][0]=corners1[c1_dist.index(min(c1_dist))][0]
point1[2][1]=corners1[c1_dist.index(min(c1_dist))][1]
#Create the points for img2
point2 = np.zeros((3,2), dtype=np.float32)
point2[0][0]=corners2[a2_dist.index(min(a2_dist))][0]
point2[0][1]=corners2[a2_dist.index(min(a2_dist))][1]
point2[1][0]=corners2[b2_dist.index(min(b2_dist))][0]
point2[1][1]=corners2[b2_dist.index(min(b2_dist))][1]
point2[2][0]=corners2[c2_dist.index(min(c2_dist))][0]
point2[2][1]=corners2[c2_dist.index(min(c2_dist))][1]
#Make sure points look ok:
print(point1)
print(point2)
#Transform the image
m = cv2.getAffineTransform(point2,point1)
image2Reg = cv2.warpAffine(img2, m, (w1, h1), borderValue=(255,255,255))
#Highlight found points in red:
img1[dst1>0.1*dst1.max()]=[0,0,255]
img2[dst2>0.1*dst2.max()]=[0,0,255]
#Output the images:
cv2.imwrite("output-img1-harris.jpg", img1)
cv2.imwrite("output-img2-harris.jpg", img2)
cv2.imwrite("output-harris-transform.jpg",image2Reg)
My goal is to
deskew a scanned image such that its text is perfectly placed on top of the text of the original image. (subtracting the images would remove the text)
prevent any loss of information on the deskewed image
I use SURF features to feed the findHomography function. Then I use the warpPerspective function to transform the scanned image. The resulting image almost perfectly fits onto the original image.
However, the scanned image has content on its corners which is lost after the transformation because the text in the scanned image is smaller and has to be scaled up.
Deskewing an image that has slightly smaller text
Information at the borders of the image is cropped
To avoid any loss of information, I convert the image to RGBA and set the borderValue parameter in warpPerspective such that any added background has transparent color. I remove the transparent pixels after the transformation again. This procedure works but seems highly inefficient.
Question: I'm looking for a working code example (C++ or Python) that shows how to do this more efficiently.
Image has been deskewed and content is preserved. However, the text of the two pictures isn't on top of each other anymore
Text position is off because the warped image has a different size than what warpPerspective expected
After transforming the image the problem is that the two images aren't aligned anymore because the dimensions of the transformed image are different than what the warpPerspective method expected.
Question: How can I realign the two images? It would be great if there was a way to do incorporate this into the previous step already. Again, a working code example would be very helpful.
Here's the code that I have so far. It deskews the image while preserving its content, however, the text is not on top of the original text anymore.
import math
import cv2
import numpy as np
class Deskewer:
def __init__(self, hessianTreshold = 5000):
self.__hessianThresh = hessianTreshold
self.imgOrigGray, self.imgSkewed, self.imgSkewedGray = None, None, None
def start(self, imgOrig, imgSkewed):
self.imgOrigGray = cv2.cvtColor(imgOrig, cv2.COLOR_BGR2GRAY)
self.imgSkewed = imgSkewed # final transformation will be performed on color image
self.imgSkewedGray = cv2.cvtColor(imgSkewed, cv2.COLOR_BGR2GRAY) # prior calculation is faster on gray
kp1, des1, kp2, des2 = self.__detectFeatures()
goodMatches = self.__flannMatch(des1, des2)
MIN_MATCH_COUNT = 10
M = None
if len(goodMatches) > MIN_MATCH_COUNT:
M, _ = self.__findHomography(kp1, kp2, goodMatches)
else:
print("Not enough matches are found - %d/%d" % (len(goodMatches), MIN_MATCH_COUNT))
return
return self.__deskew(M)
def __detectFeatures(self):
surf = cv2.xfeatures2d.SURF_create(self.__hessianThresh)
kp1, des1 = surf.detectAndCompute(self.imgOrigGray, None)
kp2, des2 = surf.detectAndCompute(self.imgSkewedGray, None)
return kp1, des1, kp2, des2
def __flannMatch(self, des1, des2):
global matches
FLANN_INDEX_KDTREE = 0
index_params = dict(algorithm=FLANN_INDEX_KDTREE, trees=5)
search_params = dict(checks=50)
flann = cv2.FlannBasedMatcher(index_params, search_params)
matches = flann.knnMatch(des1, des2, k=2)
# store all the good matches as per Lowe's ratio test.
good = []
for m, n in matches:
if m.distance < 0.7 * n.distance:
good.append(m)
return good
def __findHomography(self, kp1, kp2, goodMatches):
src_pts = np.float32([kp1[m.queryIdx].pt for m in goodMatches
]).reshape(-1, 1, 2)
dst_pts = np.float32([kp2[m.trainIdx].pt for m in goodMatches
]).reshape(-1, 1, 2)
M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0)
matchesMask = mask.ravel().tolist()
i = matchesMask.index(1)
# TODO: This is a matching point before the warpPerspective call. How can I calculate this point AFTER the call?
print("POINTS: object(", src_pts[i][0][1], ",", src_pts[i][0][0], ") - scene(", dst_pts[i][0][1], ",", dst_pts[i][0][0], ")")
return M, mask
def getComponents(self, M):
# ((translationx, translationy), rotation, (scalex, scaley), shear)
a = M[0, 0]
b = M[0, 1]
c = M[0, 2]
d = M[1, 0]
e = M[1, 1]
f = M[1, 2]
p = math.sqrt(a * a + b * b)
r = (a * e - b * d) / (p)
q = (a * d + b * e) / (a * e - b * d)
translation = (c, f)
scale = (p, r) # p = x-Axis, r = y-Axis
shear = q
theta = math.atan2(b, a)
degrees = math.atan2(b, a) * 180 / math.pi
return (translation, theta, degrees, scale, shear)
def __deskew(self, M):
# this info might come in handy here for calculating the dsize of warpPerspective?
translation, theta, degrees, scale, shear = self.getComponents(M)
# Alpha channel allows me to set unique feature to pixels that are created during warpPerspective
imSkewedAlpha = cv2.cvtColor(self.imgSkewed, cv2.COLOR_BGR2BGRA)
# These sizes have been randomly choosen to make sure that all the contents fit in the new canvas
height = 5000
width = 5000
shift = -500
M2 = np.array([[1, 0, shift],
[0, 1, shift],
[0, 0, 1]])
M3 = np.dot(M, M2)
# TODO: How can I calculate the dsize argument?
# Newly created pixels are set to transparent
im_out = cv2.warpPerspective(imSkewedAlpha, M3,
(height, width), flags=cv2.WARP_INVERSE_MAP, borderMode=cv2.BORDER_CONSTANT, borderValue=(255, 0, 0, 0))
# http://codereview.stackexchange.com/a/132933
# Mask of non-black pixels (assuming image has a single channel).
mask = im_out[:, :, 3] == 255
# Coordinates of non-black pixels.
coords = np.argwhere(mask)
# Bounding box of non-black pixels.
x0, y0 = coords.min(axis=0)
x1, y1 = coords.max(axis=0) + 1 # slices are exclusive at the top
# Get the contents of the bounding box.
cropped = im_out[x0:x1, y0:y1]
# TODO: The warped image needs to align nicely on the original image
return cropped
origImg = cv2.imread("Letter.png")
skewedImg = cv2.imread("A4.png")
deskewed = Deskewer().start(origImg, skewedImg)
cv2.imshow("Original", origImg)
cv2.imshow("Deskewed", deskewed)
cv2.waitKey(0)
Original and skewed image (with additional content) for testing