How to get more accurate rotations from decomposing a homography matrix - python

I am trying to create a program that can calculate the rotation of a plane from two images in python using opencv. I am doing this by finding the homography matrix that represents the translation, and decomposing it using the intrinsic camera matrix using the decomposeHomographyMat function in openCv.
I tested the accuracy using blender by creating a plane with a QR code on it, and then rotating it by known values as seen here where the plane has been rotated by 15,30,15 in XYZ Euler coordinates, although I want the final program to take pictures of a plane being translated in real life.
The intrinsic camera matrix was found in blender using this technique. And also found using camera calibration in blender by putting a checkboard in and taking renders from multiple angles and translations.
However, when I run the code, the ZYX Euler outputs I get are [ 27.9
, -25.4, -26.31] instead of [15, -30, -15] which is not accurate. Some other examples of the output to the code to expected values are below to give an idea of the accuracy of the code:
Expected - [0 -30 0]
Calcaulted - [0.82 -34.51 -1.91]
Expected - [0 0 15]
Calculated - [ 0 0 -15.02]
Expected - [15 0 15]
Calculated - [ 16.23 3.76 -13.76]
I was wondering if there's any way to increase the accuracy of the rotation matrices calculated or whether this is the best accuracy that I can get, and if this is the best accuracy that I can get what other alternatives I can do in order to calculate the rotation of a plane in 3 axis from images (adding extra cameras can also be done).
Any help would be much appreciated!
The code I am using is shown below:
#Import modules
import cv2
import numpy as np
from matplotlib import pyplot as plt
import glob
import math
########################################################################
#Import pictures
img1 = cv2.imread("top.png", cv2.IMREAD_GRAYSCALE)
img2 = cv2.imread("150015.png", cv2.IMREAD_GRAYSCALE)
#Feature Extraction
MIN_MATCH_COUNT = 10
sift = cv2.xfeatures2d.SIFT_create()
kp1, des1 = sift.detectAndCompute(img1,None)
kp2, des2 = sift.detectAndCompute(img2,None)
FLANN_INDEX_KDTREE = 0
index_params = dict(algorithm = FLANN_INDEX_KDTREE, trees = 5)
search_params = dict(checks = 50)
flann = cv2.FlannBasedMatcher(index_params, search_params)
matches = flann.knnMatch(des1,des2,k=2)
# store all the good matches as per Lowe's ratio test.
good = []
for m,n in matches:
if m.distance < 0.80*n.distance:
good.append(m)
if len(good)>MIN_MATCH_COUNT:
src_pts = np.float32([ kp1[m.queryIdx].pt for m in good ]).reshape(-1,1,2)
dst_pts = np.float32([ kp2[m.trainIdx].pt for m in good ]).reshape(-1,1,2)
#Finds homography matrix
M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC,1)
matchesMask = mask.ravel().tolist()
h,w = img1.shape
pts = np.float32([ [0,0],[0,h-1],[w-1,h-1],[w-1,0] ]).reshape(-1,1,2)
dst = cv2.perspectiveTransform(pts,M)
img2 = cv2.polylines(img2,[np.int32(dst)],True,255,3, cv2.LINE_AA)
else:
print "Not enough matches are found - %d/%d" % (len(good),MIN_MATCH_COUNT)
matchesMask = None
draw_params = dict(matchColor = (0,255,0), # draw matches in green color
singlePointColor = None,
matchesMask = matchesMask, # draw only inliers
flags = 2)
img3 = cv2.drawMatches(img1,kp1,img2,kp2,good,None,**draw_params)
plt.imshow(img3, 'gray'),plt.show()
#Camera calibration matrix
K = ((3,3))
K = np.zeros(K)
#Camera calibration matrix from blender python script
#K = np.matrix('1181.2500 0 540; 0 2100 540; 0 0 1')
#Camera calibration matrix from importing checkboard into blender
K = np.matrix('1307.68697 0 600.618354; 0 1309.66779 605.481488; 0 0 1')
#Homography matrix is decomposed
num, Rs, Ts, Ns = cv2.decomposeHomographyMat(M, K)
# Checks if a matrix is a valid rotation matrix.
def isRotationMatrix(R) :
Rt = np.transpose(R)
shouldBeIdentity = np.dot(Rt, R)
I = np.identity(3, dtype = R.dtype)
n = np.linalg.norm(I - shouldBeIdentity)
return n < 1e-6
# Calculates rotation matrix to euler angles
# The result is the same as MATLAB except the order
# of the euler angles ( x and z are swapped ).
def rotationMatrixToEulerAngles(R) :
assert(isRotationMatrix(R))
sy = math.sqrt(R[0,0] * R[0,0] + R[1,0] * R[1,0])
singular = sy < 1e-6
if not singular :
x = math.atan2(R[2,1] , R[2,2])
y = math.atan2(-R[2,0], sy)
z = math.atan2(R[1,0], R[0,0])
else :
x = math.atan2(-R[1,2], R[1,1])
y = math.atan2(-R[2,0], sy)
z = 0
return np.array([x, y, z])
#Conver the 4 rotation matrix solutions into XYZ Euler angles
i=0
for i in range(0,4):
R = Rs[i]
angles = rotationMatrixToEulerAngles(R)
x = np.degrees(angles[0])
y = np.degrees(angles[1])
z = np.degrees(angles[2])
anglesDeg = np.array([x,y,z])
print(anglesDeg)
The images I have generated from blender are as follows:
top.png (Ox, 0y, 0z)
003000.png (0x, 30y, 0z)
150015.png (15x, 0y, 15z)
153000.png (15x, 30y, 0z)
153015.png (15x, 30y, 15z)
And here is an image with keypoints matching for the 153015.png comparison

Euler angles are not unique. Many sets of Euler angles can map to the same rotation. Try using distance between the rotation vector as the criteria.

Related

Getting a 3D map on Meshlab with my disparity map

I have a program that allows me to find the disparity map with 2 images from two non-stereocalibrated cameras. The disparity map looks good but when I add a piece of program to get a 3D map via meshlab, I get some scattered points (see photo result_clou.png)
On the other topics, I saw that I had to change the type and divide the disparity map by 16. I tried with a new map called disparity_SGBM2 as follows: disparity_SGBM2 = disparity_SGBM.astype(np.float32) / 16.0
I took a screenshot of the .ply with his error message (see result_disparity_SGBM2.png)
Does anyone have an idea how to unblock me please?
I also joined my python program below (because I can't send a python file) and the images used with the program (clou-l.png and clou-r.png).
import numpy as np
import cv2 as cv
import matplotlib.pyplot as plt
# Read both images and convert to grayscale
img1 = cv.imread('clou-l.png', cv.IMREAD_GRAYSCALE)
img2 = cv.imread('clou-r.png', cv.IMREAD_GRAYSCALE)
# ------------------------------------------------------------
# PREPROCESSING
# Compare unprocessed images
#fig, axes = plt.subplots(1, 2, figsize=(15, 10))
#axes[0].imshow(img1, cmap="gray")
#axes[1].imshow(img2, cmap="gray")
#axes[0].axhline(250)
#axes[1].axhline(250)
#axes[0].axhline(450)
#axes[1].axhline(450)
#plt.suptitle("Original images")
#plt.savefig("original_images.png")
#plt.show()
# 1. Detect keypoints and their descriptors
# Based on: https://docs.opencv.org/master/dc/dc3/tutorial_py_matcher.html
# Initiate SIFT detector
sift = cv.SIFT_create()
# find the keypoints and descriptors with SIFT
kp1, des1 = sift.detectAndCompute(img1, None)
kp2, des2 = sift.detectAndCompute(img2, None)
# Visualize keypoints
imgSift = cv.drawKeypoints(
img1, kp1, None, flags=cv.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
#cv.imshow("SIFT Keypoints", imgSift)
#cv.imwrite("sift_keypoints.png", imgSift)
# Match keypoints in both images
# Based on: https://docs.opencv.org/master/dc/dc3/tutorial_py_matcher.html
FLANN_INDEX_KDTREE = 1
index_params = dict(algorithm=FLANN_INDEX_KDTREE, trees=5)
search_params = dict(checks=50) # or pass empty dictionary
flann = cv.FlannBasedMatcher(index_params, search_params)
matches = flann.knnMatch(des1, des2, k=2)
# Keep good matches: calculate distinctive image features
# Lowe, D.G. Distinctive Image Features from Scale-Invariant Keypoints. International Journal of Computer Vision 60, 91–110 (2004). https://doi.org/10.1023/B:VISI.0000029664.99615.94
# https://www.cs.ubc.ca/~lowe/papers/ijcv04.pdf
matchesMask = [[0, 0] for i in range(len(matches))]
good = []
pts1 = []
pts2 = []
for i, (m, n) in enumerate(matches):
if m.distance < 0.7*n.distance:
# Keep this keypoint pair
matchesMask[i] = [1, 0]
good.append(m)
pts2.append(kp2[m.trainIdx].pt)
pts1.append(kp1[m.queryIdx].pt)
# Draw the keypoint matches between both pictures
# Still based on: https://docs.opencv.org/master/dc/dc3/tutorial_py_matcher.html
draw_params = dict(matchColor=(0, 255, 0),
singlePointColor=(255, 0, 0),
matchesMask=matchesMask[300:500],
flags=cv.DrawMatchesFlags_DEFAULT)
keypoint_matches = cv.drawMatchesKnn(
img1, kp1, img2, kp2, matches[300:500], None, **draw_params)
#cv.imshow("Keypoint matches", keypoint_matches)
#cv.imwrite("keypoint_matches.png", keypoint_matches)
# ------------------------------------------------------------
# STEREO RECTIFICATION
# Calculate the fundamental matrix for the cameras
# https://docs.opencv.org/master/da/de9/tutorial_py_epipolar_geometry.html
pts1 = np.int32(pts1)
pts2 = np.int32(pts2)
fundamental_matrix, inliers = cv.findFundamentalMat(pts1, pts2, cv.FM_RANSAC)
# We select only inlier points
pts1 = pts1[inliers.ravel() == 1]
pts2 = pts2[inliers.ravel() == 1]
# Visualize epilines
# Adapted from: https://docs.opencv.org/master/da/de9/tutorial_py_epipolar_geometry.html
def drawlines(img1src, img2src, lines, pts1src, pts2src):
''' img1 - image on which we draw the epilines for the points in img2
lines - corresponding epilines '''
r, c = img1src.shape
img1color = cv.cvtColor(img1src, cv.COLOR_GRAY2BGR)
img2color = cv.cvtColor(img2src, cv.COLOR_GRAY2BGR)
# Edit: use the same random seed so that two images are comparable!
np.random.seed(0)
for r, pt1, pt2 in zip(lines, pts1src, pts2src):
color = tuple(np.random.randint(0, 255, 3).tolist())
x0, y0 = map(int, [0, -r[2]/r[1]])
x1, y1 = map(int, [c, -(r[2]+r[0]*c)/r[1]])
img1color = cv.line(img1color, (x0, y0), (x1, y1), color, 1)
img1color = cv.circle(img1color, tuple(pt1), 5, color, -1)
img2color = cv.circle(img2color, tuple(pt2), 5, color, -1)
return img1color, img2color
# Find epilines corresponding to points in right image (second image) and
# drawing its lines on left image
lines1 = cv.computeCorrespondEpilines(
pts2.reshape(-1, 1, 2), 2, fundamental_matrix)
lines1 = lines1.reshape(-1, 3)
img5, img6 = drawlines(img1, img2, lines1, pts1, pts2)
# Find epilines corresponding to points in left image (first image) and
# drawing its lines on right image
lines2 = cv.computeCorrespondEpilines(
pts1.reshape(-1, 1, 2), 1, fundamental_matrix)
lines2 = lines2.reshape(-1, 3)
img3, img4 = drawlines(img2, img1, lines2, pts2, pts1)
#plt.subplot(121), plt.imshow(img5)
#plt.subplot(122), plt.imshow(img3)
#plt.suptitle("Epilines in both images")
#plt.savefig("epilines.png")
#plt.show()
# Stereo rectification (uncalibrated variant)
# Adapted from: https://stackoverflow.com/a/62607343
h1, w1 = img1.shape
h2, w2 = img2.shape
_, H1, H2 = cv.stereoRectifyUncalibrated(
np.float32(pts1), np.float32(pts2), fundamental_matrix, imgSize=(w1, h1)
)
# Rectify (undistort) the images and save them
# Adapted from: https://stackoverflow.com/a/62607343
img1_rectified = cv.warpPerspective(img1, H1, (w1, h1))
img2_rectified = cv.warpPerspective(img2, H2, (w2, h2))
cv.imwrite("rectified_1.png", img1_rectified)
cv.imwrite("rectified_2.png", img2_rectified)
# Draw the rectified images
#fig, axes = plt.subplots(1, 2, figsize=(15, 10))
#axes[0].imshow(img1_rectified, cmap="gray")
#axes[1].imshow(img2_rectified, cmap="gray")
#axes[0].axhline(250)
#axes[1].axhline(250)
#axes[0].axhline(450)
#axes[1].axhline(450)
#plt.suptitle("Rectified images")
#plt.savefig("rectified_images.png")
#plt.show()
# ------------------------------------------------------------
# CALCULATE DISPARITY (DEPTH MAP)
# Adapted from: https://github.com/opencv/opencv/blob/master/samples/python/stereo_match.py
# and: https://docs.opencv.org/master/dd/d53/tutorial_py_depthmap.html
# StereoSGBM Parameter explanations:
# https://docs.opencv.org/4.5.0/d2/d85/classcv_1_1StereoSGBM.html
# Matched block size. It must be an odd number >=1 . Normally, it should be somewhere in the 3..11 range.
block_size = 11
min_disp = -128
max_disp = 128
# Maximum disparity minus minimum disparity. The value is always greater than zero.
# In the current implementation, this parameter must be divisible by 16.
num_disp = max_disp - min_disp
# Margin in percentage by which the best (minimum) computed cost function value should "win" the second best value to consider the found match correct.
# Normally, a value within the 5-15 range is good enough
uniquenessRatio = 5
# Maximum size of smooth disparity regions to consider their noise speckles and invalidate.
# Set it to 0 to disable speckle filtering. Otherwise, set it somewhere in the 50-200 range.
speckleWindowSize = 200
# Maximum disparity variation within each connected component.
# If you do speckle filtering, set the parameter to a positive value, it will be implicitly multiplied by 16.
# Normally, 1 or 2 is good enough.
speckleRange = 2
disp12MaxDiff = 0
stereo = cv.StereoSGBM_create(
minDisparity=min_disp,
numDisparities=num_disp,
blockSize=block_size,
uniquenessRatio=uniquenessRatio,
speckleWindowSize=speckleWindowSize,
speckleRange=speckleRange,
disp12MaxDiff=disp12MaxDiff,
P1=8 * 1 * block_size * block_size,
P2=32 * 1 * block_size * block_size,
)
disparity_SGBM = stereo.compute(img1_rectified, img2_rectified)
#disparity_SGBM2 = disparity_SGBM.astype(np.float32) / 16.0
#plt.imshow(disparity_SGBM, cmap='plasma')
#plt.colorbar()
#plt.show()
#Normalize the values to a range from 0..255 for a grayscale image
disparity_SGBM = cv.normalize(disparity_SGBM, disparity_SGBM, alpha=255,
beta=0, norm_type=cv.NORM_MINMAX)
disparity_SGBM = np.uint8(disparity_SGBM)
#cv.imshow("Disparity", disparity_SGBM)
#cv.imwrite("disparity_SGBM_norm.png", disparity_SGBM)
#cv.waitKey()
#cv.destroyAllWindows()
# ---------------------------------------------------------------
"""That's the new part of the program for reconstructing the 3D map from the disparity map.
For seeing the 3D result, you need to open the clou.ply folder with Meshlab"""
def create_output(vertices, colors, filename):
colors = colors.reshape(-1, 3)
vertices = np.hstack([vertices.reshape(-1,3), colors])
ply_header = '''ply
format ascii 1.0
element vertex %(vert_num)d
property float x
property float y
property float z
property uchar red
property uchar green
property uchar blue
end_header
'''
with open(filename, 'w') as f:
f.write(ply_header % dict(vert_num=len(vertices)))
np.savetxt(f, vertices, '%f %f %f %d %d %d')
print("\nGenerating the 3D map ...")
h, w = img1.shape[:2]
focal_length = 0.8*w
#Perspective transformation matrix
Q = np.float32([[1, 0, 0, -w/2.0],
[0,-1, 0, h/2.0],
[0, 0, 0, -focal_length],
[0, 0, 1, 0]])
output_file = 'clou' + '.ply'
points_3D = cv.reprojectImageTo3D(disparity_SGBM, Q, handleMissingValues=0)
colors = cv.cvtColor(img1, cv.COLOR_BGR2RGB)
mask_map = disparity_SGBM > disparity_SGBM.min()
output_points = points_3D[mask_map]
output_colors = colors[mask_map]
print("\nCreating the output file ...\n")
create_output(output_points, output_colors, output_file)
clou-l.png
clou-r.png
result_clou.png
result_disparity_SGBM2.png
I think the problem is that you're using very shiny objects, which are typically hard to match in stereo images and photogrammetry. You could try moving the illuminating lights, possibly to a more oblique angle, or fit polarizers over each lens, then illuminate with polarized light. Another technique you can employ is to cover the subject in white powder to create a matt/diffused surface, which can work better.
I've used DMAG (http://3dstereophoto.blogspot.com/2013/04/depth-map-automatic-generator-dmag.html) to produce depth maps (with varying degrees of success) but it can produce intermediate files that firstly show the features it can find, then another step to show which features match between images. I've run your script to produce the rectified images to get an epipolar projection, then I ran those through DMAG. It shows very few matches, Features L Features R Matches. With so few matches you're not going to produce much of a mesh.

Image stitching problem using Python and OpenCV

I got output like below after stitching result of 24 stitched images to next 25th image. Before that stitching was good.
Is anyone aware of why/when output of stitching comes like this? What are the possibilities of output coming like that? What may be the reason of that?
Stitching code is following standard stitching steps like finding keypoints, descriptors then matching points, calculating homography and then warping of images. But I am not understanding why that output is coming.
Core part of stitching is like below:
detector = cv2.SIFT_create(400)
# find the keypoints and descriptors with SIFT
gray1 = cv2.cvtColor(image1,cv2.COLOR_BGR2GRAY)
ret1, mask1 = cv2.threshold(gray1,1,255,cv2.THRESH_BINARY)
kp1, descriptors1 = detector.detectAndCompute(gray1,mask1)
gray2 = cv2.cvtColor(image2,cv2.COLOR_BGR2GRAY)
ret2, mask2 = cv2.threshold(gray2,1,255,cv2.THRESH_BINARY)
kp2, descriptors2 = detector.detectAndCompute(gray2,mask2)
keypoints1Im = cv2.drawKeypoints(image1, kp1, outImage = cv2.DRAW_MATCHES_FLAGS_DEFAULT, color=(0,0,255))
keypoints2Im = cv2.drawKeypoints(image2, kp2, outImage = cv2.DRAW_MATCHES_FLAGS_DEFAULT, color=(0,0,255))
# BFMatcher with default params
matcher = cv2.BFMatcher()
matches = matcher.knnMatch(descriptors2,descriptors1, k=2)
# Apply ratio test
good = []
for m, n in matches:
if m.distance < 0.75 * n.distance:
good.append(m)
print (str(len(good)) + " Matches were Found")
if len(good) <= 10:
return image1
matches = copy.copy(good)
matchDrawing = util.drawMatches(gray2,kp2,gray1,kp1,matches)
#Aligning the images
src_pts = np.float32([ kp2[m.queryIdx].pt for m in matches ]).reshape(-1,1,2)
dst_pts = np.float32([ kp1[m.trainIdx].pt for m in matches ]).reshape(-1,1,2)
H = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC,5.0)[0]
h1,w1 = image1.shape[:2]
h2,w2 = image2.shape[:2]
pts1 = np.float32([[0,0],[0,h1],[w1,h1],[w1,0]]).reshape(-1,1,2)
pts2 = np.float32([[0,0],[0,h2],[w2,h2],[w2,0]]).reshape(-1,1,2)
pts2_ = cv2.perspectiveTransform(pts2, H)
pts = np.concatenate((pts1, pts2_), axis=0)
# print("pts:", pts)
[xmin, ymin] = np.int32(pts.min(axis=0).ravel() - 0.5)
[xmax, ymax] = np.int32(pts.max(axis=0).ravel() + 0.5)
t = [-xmin,-ymin]
Ht = np.array([[1,0,t[0]],[0,1,t[1]],[0,0,1]]) # translate
result = cv2.warpPerspective(image2, Ht.dot(H), (xmax-xmin, ymax-ymin))
resizedB = np.zeros((result.shape[0], result.shape[1], 3), np.uint8)
resizedB[t[1]:t[1]+h1,t[0]:w1+t[0]] = image1
# Now create a mask of logo and create its inverse mask also
img2gray = cv2.cvtColor(result,cv2.COLOR_BGR2GRAY)
ret, mask = cv2.threshold(img2gray, 0, 255, cv2.THRESH_BINARY)
kernel = np.ones((5,5),np.uint8)
k1 = (kernel == 1).astype('uint8')
mask = cv2.erode(mask, k1, borderType=cv2.BORDER_CONSTANT)
mask_inv = cv2.bitwise_not(mask)
difference = cv2.bitwise_or(resizedB, resizedB, mask=mask_inv)
result2 = cv2.bitwise_and(result, result, mask=mask)
result = cv2.add(result2, difference)
Edit:
This image shows match drawing while stitching 25 to result until 24 images:
And before that match drawing:
I have total 97 images to stitch. If I stitch 24 and 25 image separately they stitches properly. If I start stitching from 23rd image onwards then also stitching is good but it gives me problem when I stitches starting from 1st image. I am not able to understand the problem.
Result after stitching 23rd image:
Result after stitching 24th image:
Result after stitching 25th image is as above which went wrong.
Strange Observation: If I stitch 23,24,25 images seperately with same code it gets stitches. If I stitch images after 23 till 97 , it gets stitches. But somehow if I stitch images from 1st, it breaks while stitching 25th image. I am not understanding why this happens.
I have tried different combination like different keypoint detection, extraction methods, matching methods, different homography calculations, different warping code but those combinations didn't work. Something is missing or wrong in the steps combination code. I am not able to figure it out.
Sorry for this long question. As I am completely new to this I am not able to explain and get the things properly. Thanks for your help and guidance.
Stitched result of 23,24,25 images separately with SAME code:
With different code (gives black lines in between stitching), if I stitched 97 images then 25th goes up in stitching and stitches as shown below (right corner point):
Firstly, I was not able to recreate your problem and solve it as the images were too big for my system to process. However, I had faced the same problem in my Panorama Stitching project, so I am sharing the reason behind it and my approach to solving my problem. Hope this helps you too.
Here's what my problem looked like when I stitched 4 images together just like you did.
As you can see, the 4th image was getting distorted a lot which must not happen. The same thing happened with you but on a greater level.
Now, here's the output when I stitched 8 images after some image pre-processing.
After some pre-processing on the input images, I was able to stitch 8 images together perfectly without any distortion.
To understand the exact reason behind this kind of distortion, watch this video by Joseph Redmon between 50:26 - 1:07:23.
As suggested in the video, we'll first have to project the images onto a cylinder and then unroll them and then stitch these unrolled images together.
Below is the initial input image(left) and the image after projection and unrolling onto a cylinder(right).
For your problem, as you are using satellite images, I guess projection onto a sphere would work better than the cylinder however you'll have to give it a try.
Sharing below my code for projecting the image onto a cylinder and unrolling it for reference. The mathematics used behind it is the same as given in the video.
def Convert_xy(x, y):
global center, f
xt = ( f * np.tan( (x - center[0]) / f ) ) + center[0]
yt = ( (y - center[1]) / np.cos( (x - center[0]) / f ) ) + center[1]
return xt, yt
def ProjectOntoCylinder(InitialImage):
global w, h, center, f
h, w = InitialImage.shape[:2]
center = [w // 2, h // 2]
f = 1100 # 1100 field; 1000 Sun; 1500 Rainier; 1050 Helens
# Creating a blank transformed image
TransformedImage = np.zeros(InitialImage.shape, dtype=np.uint8)
# Storing all coordinates of the transformed image in 2 arrays (x and y coordinates)
AllCoordinates_of_ti = np.array([np.array([i, j]) for i in range(w) for j in range(h)])
ti_x = AllCoordinates_of_ti[:, 0]
ti_y = AllCoordinates_of_ti[:, 1]
# Finding corresponding coordinates of the transformed image in the initial image
ii_x, ii_y = Convert_xy(ti_x, ti_y)
# Rounding off the coordinate values to get exact pixel values (top-left corner)
ii_tl_x = ii_x.astype(int)
ii_tl_y = ii_y.astype(int)
# Finding transformed image points whose corresponding
# initial image points lies inside the initial image
GoodIndices = (ii_tl_x >= 0) * (ii_tl_x <= (w-2)) * \
(ii_tl_y >= 0) * (ii_tl_y <= (h-2))
# Removing all the outside points from everywhere
ti_x = ti_x[GoodIndices]
ti_y = ti_y[GoodIndices]
ii_x = ii_x[GoodIndices]
ii_y = ii_y[GoodIndices]
ii_tl_x = ii_tl_x[GoodIndices]
ii_tl_y = ii_tl_y[GoodIndices]
# Bilinear interpolation
dx = ii_x - ii_tl_x
dy = ii_y - ii_tl_y
weight_tl = (1.0 - dx) * (1.0 - dy)
weight_tr = (dx) * (1.0 - dy)
weight_bl = (1.0 - dx) * (dy)
weight_br = (dx) * (dy)
TransformedImage[ti_y, ti_x, :] = ( weight_tl[:, None] * InitialImage[ii_tl_y, ii_tl_x, :] ) + \
( weight_tr[:, None] * InitialImage[ii_tl_y, ii_tl_x + 1, :] ) + \
( weight_bl[:, None] * InitialImage[ii_tl_y + 1, ii_tl_x, :] ) + \
( weight_br[:, None] * InitialImage[ii_tl_y + 1, ii_tl_x + 1, :] )
# Getting x coorinate to remove black region from right and left in the transformed image
min_x = min(ti_x)
# Cropping out the black region from both sides (using symmetricity)
TransformedImage = TransformedImage[:, min_x : -min_x, :]
return TransformedImage, ti_x-min_x, ti_y
You just have to call the function ProjectOntoCylinder and pass it an image to get the resultant image and the coordinates of white pixels in the mask image. Use the code below to call this function and get the mask image.
# Applying Cylindrical projection on Image
Image_Cyl, mask_x, mask_y = ProjectOntoCylinder(Image)
# Getting Image Mask
Image_Mask = np.zeros(Image_Cyl.shape, dtype=np.uint8)
Image_Mask[mask_y, mask_x, :] = 255
Here are links to my project and its detailed documentation for reference:
Part 1:
Source Code,
Documentation
Part 2:
Source Code,
Documentation

OpenCV: undistort (for images) and undistortPoints are inconsistent

For testing I generate a grid image as matrix and again the grid points as point array:
This represents a "distorted" camera image along with some feature points.
When I now undistort both the image and the grid points, I get the following result:
(Note that the fact that the "distorted" image is straight and the "undistorted" image is morphed is not the point, I'm just testing the undistortion functions with a straight test image.)
The grid image and the red grid points are totally misaligned now. I googled and found that some people forget to specify the "new camera matrix" parameter in undistortPoints but I didn't. The documentation also mentions a normalization but I still have the problem when I use the identity matrix as camera matrix. Also, in the central region it fits perfectly.
Why is this not identical, do I use something in a wrong way?
I use cv2 (4.1.0) in Python. Here is the code for testing:
import numpy as np
import matplotlib.pyplot as plt
import cv2
w = 401
h = 301
# helpers
#--------
def plotImageAndPoints(im, pu, pv):
plt.imshow(im, cmap="gray")
plt.scatter(pu, pv, c="red", s=16)
plt.xlim(0, w)
plt.ylim(0, h)
plt.show()
def cv2_undistortPoints(uSrc, vSrc, cameraMatrix, distCoeffs):
uvSrc = np.array([np.matrix([uSrc, vSrc]).transpose()], dtype="float32")
uvDst = cv2.undistortPoints(uvSrc, cameraMatrix, distCoeffs, None, cameraMatrix)
uDst = [uv[0] for uv in uvDst[0]]
vDst = [uv[1] for uv in uvDst[0]]
return uDst, vDst
# test data
#----------
# generate grid image
img = np.ones((h, w), dtype = "float32")
img[0::20, :] = 0
img[:, 0::20] = 0
# generate grid points
uPoints, vPoints = np.meshgrid(range(0, w, 20), range(0, h, 20), indexing='xy')
uPoints = uPoints.flatten()
vPoints = vPoints.flatten()
# see if points align with the image
plotImageAndPoints(img, uPoints, vPoints) # perfect!
# undistort both image and points individually
#---------------------------------------------
# camera matrix parameters
fx = 1
fy = 1
cx = w/2
cy = h/2
# distortion parameters
k1 = 0.00003
k2 = 0
p1 = 0
p2 = 0
# convert for opencv
mtx = np.matrix([
[fx, 0, cx],
[ 0, fy, cy],
[ 0, 0, 1]
], dtype = "float32")
dist = np.array([k1, k2, p1, p2], dtype = "float32")
# undistort image
imgUndist = cv2.undistort(img, mtx, dist)
# undistort points
uPointsUndist, vPointsUndist = cv2_undistortPoints(uPoints, vPoints, mtx, dist)
# test if they still match
plotImageAndPoints(imgUndist, uPointsUndist, vPointsUndist) # awful!
Any help appreciated!
A bit late to the party, but to help others running into this issue:
The problem is that UndistortPoints is an iterative calculation which in some cases exits before a stable solution has been reached. This can be fixed by modifying the termination criteria for the calculation, which can be done by using UndistortPointsIter. You should replace:
uvDst = cv2.undistortPoints(uvSrc, cameraMatrix, distCoeffs, None, cameraMatrix)
with:
uvDst = cv2.undistortPointsIter(uvSrc, cameraMatrix, distCoeffs, None, cameraMatrix,(cv2.TERM_CRITERIA_COUNT | cv2.TERM_CRITERIA_EPS, 40, 0.03))
Now, it tries 40 iterations to find a solution, rather than the default 5 iterations.

How to stitch two images using homography matrix in OpenCv?

I want to stitch two panoramic images using homography matrix in OpenCv. I found 3x3 homography matrix,
but I can't stitch two images. I must stitch two images by hand(no build-in function).
Here is my code:
import cv2
import numpy as np
MIN_MATCH_COUNT = 10
img1 = cv2.imread("pano1/cyl_image00.png")
img2 = cv2.imread("pano1/cyl_image01.png")
orb = cv2.ORB_create()
kp1, des1 = orb.detectAndCompute(img1, None)
kp2, des2 = orb.detectAndCompute(img2, None)
FLANN_INDEX_KDTREE = 0
index_params = dict(algorithm=FLANN_INDEX_KDTREE, trees=5)
search_params = dict(checks=50)
flann = cv2.FlannBasedMatcher(index_params, search_params)
des1 = np.float32(des1)
des2 = np.float32(des2)
matches = flann.knnMatch(des1, des2, k=2)
goodMatches = []
for m, n in matches:
if m.distance < 0.7 * n.distance:
goodMatches.append(m)
src_pts = 0
dst_pts = 0
if len(goodMatches) > MIN_MATCH_COUNT:
dst_pts = np.float32([kp1[m.queryIdx].pt for m in goodMatches]).reshape(-1, 2)
src_pts = np.float32([kp2[m.trainIdx].pt for m in goodMatches]).reshape(-1, 2)
def generateRandom(src_Pts, dest_Pts, N):
r = np.random.choice(len(src_Pts), N)
src = [src_Pts[i] for i in r]
dest = [dest_Pts[i] for i in r]
return np.asarray(src, dtype=np.float32), np.asarray(dest, dtype=np.float32)
def findH(src, dest, N):
A = []
for i in range(N):
x, y = src[i][0], src[i][1]
xp, yp = dest[i][0], dest[i][1]
A.append([x, y, 1, 0, 0, 0, -x * xp, -xp * y, -xp])
A.append([0, 0, 0, x, y, 1, -yp * x, -yp * y, -yp])
A = np.asarray(A)
U, S, Vh = np.linalg.svd(A)
L = Vh[-1, :] / Vh[-1, -1]
H = L.reshape(3, 3)
return H
def ransacHomography(src_Pts, dst_Pts):
maxI = 0
maxLSrc = []
maxLDest = []
for i in range(70):
srcP, destP = generateRandom(src_Pts, dst_Pts, 4)
H = findH(srcP, destP, 4)
inlines = 0
linesSrc = []
lineDest = []
for p1, p2 in zip(src_Pts, dst_Pts):
p1U = (np.append(p1, 1)).reshape(3, 1)
p2e = H.dot(p1U)
p2e = (p2e / p2e[2])[:2].reshape(1, 2)[0]
if cv2.norm(p2 - p2e) < 10:
inlines += 1
linesSrc.append(p1)
lineDest.append(p2)
if inlines > maxI:
maxI = inlines
maxLSrc = linesSrc.copy()
maxLSrc = np.asarray(maxLSrc, dtype=np.float32)
maxLDest = lineDest.copy()
maxLDest = np.asarray(maxLDest, dtype=np.float32)
Hf = findH(maxLSrc, maxLDest, maxI)
return Hf
H = ransacHomography(src_pts, dst_pts)
So far, so good. I found homography matrix(H).
Next, I tried to stitch two panoramic images.
First, I create a big array to stitch images(img3).
I copied img1 to the first half of img3.
I tried to find new coordinates for img2 through homography matrix and I copied new img2 coordinates to img3.
Here is my code:
height1, width1, rgb1 = img1.shape
height2, width2, rgb2 = img2.shape
img3 = np.empty((height1, width1+width2, 3))
img3[:, 0:width1] = img1/255.0
for i in range(len(img2)):
for j in range(len(img2[0])):
pp = H.dot(np.array([[i], [j], [1]]))
pp = (pp / pp[2]).reshape(1, 3)[0]
img3[int(round(pp[0])), int(round(pp[1]))] = img2[i, j]/255.0
But this part is not working.
How can I solve this problem?
Once you have the Homography matrix you need to transform one of the images to have the same perspective as the other. This is done using the warpPerspective function in OpenCV. Once you've done the transformation, it's time to concatenate the images.
Let's say you want to transform img_1 into the perspective of img_2 and that you already have the Homography matrix H
dst = cv2.warpPerspective(img_1, H, ((img_1.shape[1] + img_2.shape[1]), img_2.shape[0])) #wraped image
# now paste them together
dst[0:img_2.shape[0], 0:img_2.shape[1]] = img_2
dst[0:img_1.shape[0], 0:img_1.shape[1]] = img_1
Also note that OpenCV already has a build in RANSAC Homography finder
H, masked = cv2.findHomography(src, dst, cv2.RANSAC, 5.0)
So it can save you a lot of code.
Check out these tutorial for more details
https://medium.com/#navekshasood/image-stitching-to-create-a-panorama-5e030ecc8f7
https://medium.com/analytics-vidhya/image-stitching-with-opencv-and-python-1ebd9e0a6d78

OpenCV: Use findHomography() and warpPerspective to align a bigger image onto a smaller one

My goal is to
deskew a scanned image such that its text is perfectly placed on top of the text of the original image. (subtracting the images would remove the text)
prevent any loss of information on the deskewed image
I use SURF features to feed the findHomography function. Then I use the warpPerspective function to transform the scanned image. The resulting image almost perfectly fits onto the original image.
However, the scanned image has content on its corners which is lost after the transformation because the text in the scanned image is smaller and has to be scaled up.
Deskewing an image that has slightly smaller text
Information at the borders of the image is cropped
To avoid any loss of information, I convert the image to RGBA and set the borderValue parameter in warpPerspective such that any added background has transparent color. I remove the transparent pixels after the transformation again. This procedure works but seems highly inefficient.
Question: I'm looking for a working code example (C++ or Python) that shows how to do this more efficiently.
Image has been deskewed and content is preserved. However, the text of the two pictures isn't on top of each other anymore
Text position is off because the warped image has a different size than what warpPerspective expected
After transforming the image the problem is that the two images aren't aligned anymore because the dimensions of the transformed image are different than what the warpPerspective method expected.
Question: How can I realign the two images? It would be great if there was a way to do incorporate this into the previous step already. Again, a working code example would be very helpful.
Here's the code that I have so far. It deskews the image while preserving its content, however, the text is not on top of the original text anymore.
import math
import cv2
import numpy as np
class Deskewer:
def __init__(self, hessianTreshold = 5000):
self.__hessianThresh = hessianTreshold
self.imgOrigGray, self.imgSkewed, self.imgSkewedGray = None, None, None
def start(self, imgOrig, imgSkewed):
self.imgOrigGray = cv2.cvtColor(imgOrig, cv2.COLOR_BGR2GRAY)
self.imgSkewed = imgSkewed # final transformation will be performed on color image
self.imgSkewedGray = cv2.cvtColor(imgSkewed, cv2.COLOR_BGR2GRAY) # prior calculation is faster on gray
kp1, des1, kp2, des2 = self.__detectFeatures()
goodMatches = self.__flannMatch(des1, des2)
MIN_MATCH_COUNT = 10
M = None
if len(goodMatches) > MIN_MATCH_COUNT:
M, _ = self.__findHomography(kp1, kp2, goodMatches)
else:
print("Not enough matches are found - %d/%d" % (len(goodMatches), MIN_MATCH_COUNT))
return
return self.__deskew(M)
def __detectFeatures(self):
surf = cv2.xfeatures2d.SURF_create(self.__hessianThresh)
kp1, des1 = surf.detectAndCompute(self.imgOrigGray, None)
kp2, des2 = surf.detectAndCompute(self.imgSkewedGray, None)
return kp1, des1, kp2, des2
def __flannMatch(self, des1, des2):
global matches
FLANN_INDEX_KDTREE = 0
index_params = dict(algorithm=FLANN_INDEX_KDTREE, trees=5)
search_params = dict(checks=50)
flann = cv2.FlannBasedMatcher(index_params, search_params)
matches = flann.knnMatch(des1, des2, k=2)
# store all the good matches as per Lowe's ratio test.
good = []
for m, n in matches:
if m.distance < 0.7 * n.distance:
good.append(m)
return good
def __findHomography(self, kp1, kp2, goodMatches):
src_pts = np.float32([kp1[m.queryIdx].pt for m in goodMatches
]).reshape(-1, 1, 2)
dst_pts = np.float32([kp2[m.trainIdx].pt for m in goodMatches
]).reshape(-1, 1, 2)
M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0)
matchesMask = mask.ravel().tolist()
i = matchesMask.index(1)
# TODO: This is a matching point before the warpPerspective call. How can I calculate this point AFTER the call?
print("POINTS: object(", src_pts[i][0][1], ",", src_pts[i][0][0], ") - scene(", dst_pts[i][0][1], ",", dst_pts[i][0][0], ")")
return M, mask
def getComponents(self, M):
# ((translationx, translationy), rotation, (scalex, scaley), shear)
a = M[0, 0]
b = M[0, 1]
c = M[0, 2]
d = M[1, 0]
e = M[1, 1]
f = M[1, 2]
p = math.sqrt(a * a + b * b)
r = (a * e - b * d) / (p)
q = (a * d + b * e) / (a * e - b * d)
translation = (c, f)
scale = (p, r) # p = x-Axis, r = y-Axis
shear = q
theta = math.atan2(b, a)
degrees = math.atan2(b, a) * 180 / math.pi
return (translation, theta, degrees, scale, shear)
def __deskew(self, M):
# this info might come in handy here for calculating the dsize of warpPerspective?
translation, theta, degrees, scale, shear = self.getComponents(M)
# Alpha channel allows me to set unique feature to pixels that are created during warpPerspective
imSkewedAlpha = cv2.cvtColor(self.imgSkewed, cv2.COLOR_BGR2BGRA)
# These sizes have been randomly choosen to make sure that all the contents fit in the new canvas
height = 5000
width = 5000
shift = -500
M2 = np.array([[1, 0, shift],
[0, 1, shift],
[0, 0, 1]])
M3 = np.dot(M, M2)
# TODO: How can I calculate the dsize argument?
# Newly created pixels are set to transparent
im_out = cv2.warpPerspective(imSkewedAlpha, M3,
(height, width), flags=cv2.WARP_INVERSE_MAP, borderMode=cv2.BORDER_CONSTANT, borderValue=(255, 0, 0, 0))
# http://codereview.stackexchange.com/a/132933
# Mask of non-black pixels (assuming image has a single channel).
mask = im_out[:, :, 3] == 255
# Coordinates of non-black pixels.
coords = np.argwhere(mask)
# Bounding box of non-black pixels.
x0, y0 = coords.min(axis=0)
x1, y1 = coords.max(axis=0) + 1 # slices are exclusive at the top
# Get the contents of the bounding box.
cropped = im_out[x0:x1, y0:y1]
# TODO: The warped image needs to align nicely on the original image
return cropped
origImg = cv2.imread("Letter.png")
skewedImg = cv2.imread("A4.png")
deskewed = Deskewer().start(origImg, skewedImg)
cv2.imshow("Original", origImg)
cv2.imshow("Deskewed", deskewed)
cv2.waitKey(0)
Original and skewed image (with additional content) for testing

Categories