Epipolar geometry pose estimation: Epipolar lines look good but wrong pose - python

I am trying to use OpenCV to estimate one pose of a camera relative to another, using SIFT feature tracking, FLANN matching and subsequent calculations of the fundamental and essential matrix. After decomposing the essential matrix, I check for degenerate configurations and obtain the "right" R and t.
Problem is, they never seem to be right. I am including a couple of image pairs:
Image 2 taken with 45 degree rotation along the Y axis and same position w.r.t. Image 1.
Image pair
Image 2 taken from approx. couple of meters away along the negative X direction, slight displacement in the negative Y direction. Approx. 45-60 degree rotation in camera pose along Y axis.
Image pair
The translation vector in the second case, seems to be overestimating the movement in Y and underestimating the movement in X. The rotation matrices when converted to Euler angles give wrong results in both the cases. This happens with a lot of other datasets as well. I have tried switching the fundamental matrix computation technique between RANSAC, LMEDS etc., and am now doing it with RANSAC and a second computation using only the inliers with the 8 point method. Changing the feature detection method does not help either. The epipolar lines seem to be proper, and the fundamental matrix satisfies x'.F.x = 0
Am I missing something fundamentally wrong here? Given the program understands the epipolar geometry properly, what could possibly be happening that results in a completely wrong pose? I am doing the check to make sure points lie in front of both cameras. Any thoughts/suggestions would be very helpful. Thanks!
EDIT: Tried the same technique with two different calibrated cameras spaced apart; and computed essential matrix as K2'.F.K1, but still the translations and rotations are still way off.
Code for reference
import cv2
import numpy as np
from matplotlib import pyplot as plt
# K2 = np.float32([[1357.3, 0, 441.413], [0, 1355.9, 259.393], [0, 0, 1]]).reshape(3,3)
# K1 = np.float32([[1345.8, 0, 394.9141], [0, 1342.9, 291.6181], [0, 0, 1]]).reshape(3,3)
# K1_inv = np.linalg.inv(K1)
# K2_inv = np.linalg.inv(K2)
K = np.float32([3541.5, 0, 2088.8, 0, 3546.9, 1161.4, 0, 0, 1]).reshape(3,3)
K_inv = np.linalg.inv(K)
def in_front_of_both_cameras(first_points, second_points, rot, trans):
# check if the point correspondences are in front of both images
rot_inv = rot
for first, second in zip(first_points, second_points):
first_z = np.dot(rot[0, :] - second[0]*rot[2, :], trans) / np.dot(rot[0, :] - second[0]*rot[2, :], second)
first_3d_point = np.array([first[0] * first_z, second[0] * first_z, first_z])
second_3d_point = np.dot(rot.T, first_3d_point) - np.dot(rot.T, trans)
if first_3d_point[2] < 0 or second_3d_point[2] < 0:
return False
return True
def drawlines(img1,img2,lines,pts1,pts2):
''' img1 - image on which we draw the epilines for the points in img1
lines - corresponding epilines '''
pts1 = np.int32(pts1)
pts2 = np.int32(pts2)
r,c = img1.shape
img1 = cv2.cvtColor(img1,cv2.COLOR_GRAY2BGR)
img2 = cv2.cvtColor(img2,cv2.COLOR_GRAY2BGR)
for r,pt1,pt2 in zip(lines,pts1,pts2):
color = tuple(np.random.randint(0,255,3).tolist())
x0,y0 = map(int, [0, -r[2]/r[1] ])
x1,y1 = map(int, [c, -(r[2]+r[0]*c)/r[1] ])
cv2.line(img1, (x0,y0), (x1,y1), color,1)
cv2.circle(img1,tuple(pt1), 10, color, -1)
cv2.circle(img2,tuple(pt2), 10,color,-1)
return img1,img2
img1 = cv2.imread('C:\\Users\\Sai\\Desktop\\room1.jpg', 0)
img2 = cv2.imread('C:\\Users\\Sai\\Desktop\\room0.jpg', 0)
img1 = cv2.resize(img1, (0,0), fx=0.5, fy=0.5)
img2 = cv2.resize(img2, (0,0), fx=0.5, fy=0.5)
sift = cv2.SIFT()
# find the keypoints and descriptors with SIFT
kp1, des1 = sift.detectAndCompute(img1,None)
kp2, des2 = sift.detectAndCompute(img2,None)
# FLANN parameters
index_params = dict(algorithm = FLANN_INDEX_KDTREE, trees = 5)
search_params = dict(checks=50) # or pass empty dictionary
flann = cv2.FlannBasedMatcher(index_params,search_params)
matches = flann.knnMatch(des1,des2,k=2)
good = []
pts1 = []
pts2 = []
# ratio test as per Lowe's paper
for i,(m,n) in enumerate(matches):
if m.distance < 0.7*n.distance:
pts2 = np.float32(pts2)
pts1 = np.float32(pts1)
F, mask = cv2.findFundamentalMat(pts1,pts2,cv2.FM_RANSAC)
# Selecting only the inliers
pts1 = pts1[mask.ravel()==1]
pts2 = pts2[mask.ravel()==1]
F, mask = cv2.findFundamentalMat(pts1,pts2,cv2.FM_8POINT)
print "Fundamental matrix is"
print F
pt1 = np.array([[pts1[0][0]], [pts1[0][1]], [1]])
pt2 = np.array([[pts2[0][0], pts2[0][1], 1]])
print "Fundamental matrix error check: %f"%np.dot(np.dot(pt2,F),pt1)
print " "
# drawing lines on left image
lines1 = cv2.computeCorrespondEpilines(pts2.reshape(-1,1,2), 2,F)
lines1 = lines1.reshape(-1,3)
img5,img6 = drawlines(img1,img2,lines1,pts1,pts2)
# drawing lines on right image
lines2 = cv2.computeCorrespondEpilines(pts1.reshape(-1,1,2), 1,F)
lines2 = lines2.reshape(-1,3)
img3,img4 = drawlines(img2,img1,lines2,pts2,pts1)
E = K.T.dot(F).dot(K)
print "The essential matrix is"
print E
U, S, Vt = np.linalg.svd(E)
W = np.array([0.0, -1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0]).reshape(3, 3)
first_inliers = []
second_inliers = []
for i in range(len(pts1)):
# normalize and homogenize the image coordinates
first_inliers.append(K_inv.dot([pts1[i][0], pts1[i][1], 1.0]))
second_inliers.append(K_inv.dot([pts2[i][0], pts2[i][1], 1.0]))
# Determine the correct choice of second camera matrix
# only in one of the four configurations will all the points be in front of both cameras
# First choice: R = U * Wt * Vt, T = +u_3 (See Hartley Zisserman 9.19)
R = U.dot(W).dot(Vt)
T = U[:, 2]
if not in_front_of_both_cameras(first_inliers, second_inliers, R, T):
# Second choice: R = U * W * Vt, T = -u_3
T = - U[:, 2]
if not in_front_of_both_cameras(first_inliers, second_inliers, R, T):
# Third choice: R = U * Wt * Vt, T = u_3
R = U.dot(W.T).dot(Vt)
T = U[:, 2]
if not in_front_of_both_cameras(first_inliers, second_inliers, R, T):
# Fourth choice: R = U * Wt * Vt, T = -u_3
T = - U[:, 2]
# Computing Euler angles
thetaX = np.arctan2(R[1][2], R[2][2])
c2 = np.sqrt((R[0][0]*R[0][0] + R[0][1]*R[0][1]))
thetaY = np.arctan2(-R[0][2], c2)
s1 = np.sin(thetaX)
c1 = np.cos(thetaX)
thetaZ = np.arctan2((s1*R[2][0] - c1*R[1][0]), (c1*R[1][1] - s1*R[2][1]))
print "Pitch: %f, Yaw: %f, Roll: %f"%(thetaX*180/3.1415, thetaY*180/3.1415, thetaZ*180/3.1415)
print "Rotation matrix:"
print R
print "Translation vector:"
print T

There are many things which can lead to inaccurate estimation of camera pose from point correspondences. Some factors you have to consider:-
(*) 8 point method minimizes algebraic error ( x'.F.x = 0). It is usually better to find a solution which minimizes a meaningful geometric error. For example, you can use re-projection error in your RANSAC implementation.
(*) The linear algorithm which solves for fundamental matrix from 8 points is sensitive to noise. Sub-pixel accurate point matching, proper data normalization and accurate camera calibration are all important for better results.
(*) Feature point localization and matching lead to noisy point matches, hence the solution you get by solving the algebraic equation x'Fx should really be used as an initial estimate and further steps such as parameter optimization need to be applied to refine the solution.
(*) Some two view camera configurations can lead to an ambiguous solution hence further methods (such as third view disambiguation) are needed for reliable results.

How do you get K, the internal parameters of the camera? It seems to me that the computation of fundamental matrix is correct, because the matches points lie on the epipolar lines. But if the matrix K is inaccurate, you may get a wrong essential matrix and thus the wrong R and t.

In the book"Programming Computer Vision with Python", we need to check the E's rank by using such code:
Computes the second camera matrix (assuming P1 = [I 0])
from an essential matrix. Output is a list of four
possible camera matrices.
make sure E is rank 2
U,S,V = np.linalg.svd(E)
if np.linalg.det(np.dot(U,V))<0:
V = -V
E = np.dot(U,np.dot(np.diag([1.0,1.0,0.0]),V))
I am not sure if this also can improve the performance.
Please let me know.

Expanding on the second point by #Sammy,
(*) The linear algorithm which solves for fundamental matrix from 8
points is sensitive to noise. Sub-pixel accurate point matching,
proper data normalization and accurate camera calibration are all
important for better results.
I would suggest to compute essential matrix directly using the findEssentialMat by openCV instead of findFundamentalMat because the former has less degrees of freedom and can hence be numerically more stable.
The new openCV versions support giving two different camera matrices and distortion coefficients for findEssentialMat as well.


What are the inaccuracies of this 'inverse map' function in OpenCV?

I am trying to horizontally stretch an image in a very specific way. Each x prime coordinate should follow a tangent path with respect to the original x coordinate. I believe there are two ways to do this:
Inverse the tangent function and map it normally
Map the tangent function and then inverse the mapping
Using this answer for map inversion, Im trying to figure out why the two images are not the same. I know that the first method gives me the correct image that I'm looking for, so why doesnt the second method work? Is it because of the "limited precision" that #ChristophRackwitz commented on the answer?
import cv2
import glob
import numpy as np
import math
A = -1010
B = -3.931
C = 5.258
D = 978.3
M = -193.8
N = 1740
def get_tan_func_value(x):
return A * math.tan((((x-N)/M)+B)/C) + D
def get_inverse_tan_func_value(x):
return M * (C*math.atan((x-D)/A) - B) + N
# answer from linked post
def invert_map(F, shape):
I = np.zeros_like(F)
I[:,:,1], I[:,:,0] = np.indices(shape)
P = np.copy(I)
for i in range(10):
P += I - cv2.remap(F, P, None, interpolation=cv2.INTER_LINEAR)
return P
# import image
images = glob.glob('*.jpg')
img = cv2.imread(images[0])
h, w = img.shape[:2]
map_x_tan = np.zeros((img.shape[0], img.shape[1]), dtype=np.float32)
map_x_inverse_tan = np.zeros((img.shape[0], img.shape[1]), dtype=np.float32)
map_y = np.zeros((img.shape[0], img.shape[1]), dtype=np.float32)
# x tan function map
for i in range(map_x_tan.shape[0]):
map_x_tan[i,:] = [get_tan_func_value(x) for x in range(map_x_tan.shape[1])]
# x inverse tan function map
for i in range(map_x_inverse_tan.shape[0]):
map_x_inverse_tan[i,:] = [get_inverse_tan_func_value(x) for x in range(map_x_inverse_tan.shape[1])]
# default y map
for j in range(map_y.shape[1]):
map_y[:,j] = [y for y in range(map_y.shape[0])]
# convert x tan map to 2 channel (x,y) map
(xymap_tan, _) = cv2.convertMaps(map1=map_x_tan, map2=map_y, dstmap1type=cv2.CV_32FC2)
# invert the 2 channel x tan map
xymap_inverted = invert_map(xymap_tan, (h,w))
# remap and write the target image (inverse tan function with normal map)
target = cv2.remap(img, map_x_inverse_tan, map_y, cv2.INTER_LINEAR)
cv2.imwrite("target.jpg", target)
# remap and write the attempted image (normal tan function with inverted map)
attempt = cv2.remap(img, xymap_inverted, None, cv2.INTER_LINEAR)
cv2.imwrite("attempt.jpg", attempt)
Method 1: Target Image
Method 2: Attempt Image
The results show that the attempt (normal tan function with inverted map) has less stretching near the edges of the image than expected. Almost everywhere else on the images are identical except the edges. I did not post the original picture to save space.
I've played around with that invert_map procedure. It seems slightly susceptible to oscillation.
use this instead:
def invert_map(F):
(h, w) = F.shape[:2] # (h, w, 2), "xymap"
I = np.zeros_like(F)
I[:,:,1], I[:,:,0] = np.indices((h,w)) # identity map
P = np.copy(I)
for i in range(10):
correction = I - cv2.remap(F, P, None, interpolation=cv2.INTER_LINEAR)
P += correction * 0.5
return P
I simply damped the correction by 0.5, which makes the fixed point iteration tamer, converging a lot faster too.
In my experiments with your tan map, I've found that 5-10 iterations are good enough already, and there's no further progress in further iterations.
Entire notebook of my explorations: https://gist.github.com/crackwitz/67f76f8a9eff21476b080c06d20660d0
Feature request: https://github.com/opencv/opencv/issues/22120

Fundamental Matrix for Point Correspondence

I am estimating the fundamental matrix, by using
method of the OpenCV. I give keypoints which I get from flann matcher.
# Initiate ORB detector
orb = cv2.ORB_create()
# find the keypoints with ORB
keyPointsLeft = orb.detect(imgLeft,None)
keyPointsRight= orb.detect(imgRight, None)
# compute the descriptors with ORB
keyPointsLeft, descriptorsLeft = orb.compute(imgLeft, keyPointsLeft)
keyPointsRight, descriptorsRight = orb.compute(imgRight, keyPointsRight)
desLeft = np.float32(descriptorsLeft)
desRight = np.float32(descriptorsRight)
matches = flann.knnMatch(desLeft,desRight,k=2)
# Need to draw only good matches, so create a mask
matchesMask = [[0,0] for i in range(len(matches))]
# Apply ratio test
goodMatches = []
ptsLeft = []
ptsRight = []
for i,(m,n) in enumerate(matches):
if m.distance < 0.7*n.distance:
x1,y1 = keyPointsLeft[m.queryIdx].pt
x2,y2 = keyPointsRight[n.trainIdx].pt
matchesMask[i] = [1,0]
ptsLeft = np.int32(ptsLeft)
ptsRight = np.int32(ptsRight)
F, mask = cv2.findFundamentalMat(ptsLeft,ptsRight,cv2.FM_7POINT)
After finding the Fundamental matrix, I am trying to verify the point correspondences via epipolar constraint equation, which is :
(1) p1T * F * p2 = 0
So, lets say that we have point p1 (x1,y1) on left image and point p1 (x2,y2) on right image. So If I apply epipolar equation (1) to these points, I should get 0, or a closer number to 0.
So this equation, can be written like this :
(2) 0 = x1*x2*F[0][0] + x1*y2*F[0][1] + x1*F[0][2] + y1*x2*F[1][0] + y1*y2*F[1][1] + y1*F[1][2] + x2*F[2][0] + y2*F[2][1] + F[2][2]
By using equation (2), I am trying to verify the points matched by flann with this equation. I printed the equation results and the points which are in the same row, since my input images have just translation between them.
Output :
p1(82,340) --> p2(74,340)
p1T * F * p2 = -0.7662387780488729
p1(355,240) --> p2(354,240)
p1T * F * p2 = -0.0047911844235635215
p1(354,229) --> p2(349,229)
p1T * F * p2 = 0.11662882831689814
p1(377,175) --> p2(372,175)
p1T * F * p2 = 0.3450325352994703
p1(319,227) --> p2(311,227)
p1T * F * p2 = 0.19119563752361657
p1(276,171) --> p2(273,171)
p1T * F * p2 = 0.251353775637849
p1(371,259) --> p2(366,259)
p1T * F * p2 = -0.019570666111391688
With this output, I can't verify the points and my fundamental matrix. Equation (1) is not working in my case, sometimes it gives closer values to zero, sometimes not. Is there anything that I did wrong ?
I am not sure if equation 1 can be written as equation 2. I would definitely recommend using equation 1 and homogenous coordinates (z value is set to 1). You should consider normalizing the coordinates because X and Y would be much larger than Z. This can be done by computing a similarity transform established by Hartley, Zisserman and e.g. shown here: page 2.
After that you could use the OpenCV Mat to directly compute the results. First, I would recommend that you try to use different methods for calculating the Fundamental Matrix. Since you are having a lot of feature points generated by ORB, I would use CV_FM_RANSAC e.g. with a reprojThreshold of 3 and confidence of 0.99. This could already do the trick.
A short code example in C++:
cv::Mat F = cv::findFundamentalMat(kp_left, kp_right, cv::RANSAC, 3.0, 0.999); # here you need to use your feature points cv::Point2f
point_left = cv::Mat(cv::Point3d(82,340,1));
point_right = cv::Mat(cv::Point3d(74,340,1));
cv::Mat distance = point_left.t()*F*point_right;
The distance is usually never exactly 0 but should now be close to it.

Why does the output contain only 2 values but not the displacement for the entire image?

I have been stuck here for sometime now. I cannot understand what am I doing wrong in calculating the displacement vectors along x-axis and y-axis using the Lucas Kanade method.
I implemented it as given in the above Wikipedia link. Here is what I have done:
import cv2
import numpy as np
img_a = cv2.imread("./images/1.png",0)
img_b = cv2.imread("./images/2.png",0)
# Calculate gradient along x and y axis
ix = cv2.Sobel(img_a, cv2.CV_64F, 1, 0, ksize = 3, scale = 1.0/3.0)
iy = cv2.Sobel(img_a, cv2.CV_64F, 0, 1, ksize = 3, scale = 1.0/3.0)
# Calculate temporal difference between the 2 images
it = img_b - img_a
ix = ix.flatten()
iy = iy.flatten()
it = -it.flatten()
A = np.vstack((ix, iy)).T
atai = np.linalg.inv(np.dot(A.T,A))
atb = np.dot(A.T, it)
v = np.dot(np.dot(np.linalg.inv(np.dot(A.T,A)),A.T),it)
This code runs without an error but it prints an array of 2 values! I had expected the v matrix to be of the same size as that of the image. Why does this happen? What am I doing incorrectly?
PS: I know there are methods directly available with OpenCV but I want to write this simple algorithm (as also given in the Wikipedia link shared above) myself.
To properly compute the Lucas–Kanade optical flow estimate you need to solve the system of two equations for every pixel, using information from its neighborhood, not for the image as a whole.
This is the recipe (notation refers to that used on the Wikipedia page):
Compute the image gradient (A) for the first image (ix, iy in the OP) using any method (Sobel is OK, I prefer Gaussian derivatives; note that it is important to apply the right scaling in Sobel: 1/8).
ix = cv2.Sobel(img_a, cv2.CV_64F, 1, 0, ksize = 3, scale = 1.0/8.0)
iy = cv2.Sobel(img_a, cv2.CV_64F, 0, 1, ksize = 3, scale = 1.0/8.0)
Compute the structure tensor (ATWA): Axx = ix * ix, Axy = ix * iy, Ayy = iy * iy. Each of these three images must be smoothed with a Gaussian filter (this is the windowing). For example,
Axx = cv2.GaussianBlur(ix * ix, (0,0), 5)
Axy = cv2.GaussianBlur(ix * iy, (0,0), 5)
Ayy = cv2.GaussianBlur(iy * iy, (0,0), 5)
These three images together form the structure tensor, which is a 2x2 symmetric matrix at each pixel. For a pixel at (i,j), the matrix is:
| Axx(i,j) Axy(i,j) |
| Axy(i,j) Ayy(i,j) |
Compute the temporal gradient (b) by subtracting the two images (it in the OP).
it = img_b - img_a
Compute ATWb: Abx = ix * it, Aby = iy * it, and smooth these two images with the same Gaussian filter as above.
Abx = cv2.GaussianBlur(ix * it, (0,0), 5)
Aby = cv2.GaussianBlur(iy * it, (0,0), 5)
Compute the inverse of ATWA (a symmetric positive-definite matrix) and multiply by ATWb. Note that this inverse is of the 2x2 matrix at each pixel, not of the images as a whole. You can write this out as a set of simple arithmetic operations on the images Axx, Axy, Ayy, Abx and Aby.
The inverse of the matrix ATWA is given by:
| Ayy -Axy |
| -Axy Axx | / ( Axx*Ayy - Axy*Axy )
so you can write the solution as
norm = Axx*Ayy - Axy*Axy
vx = ( Ayy * Abx - Axy * Aby ) / norm
vy = ( Axx * Aby - Axy * Abx ) / norm
If the image is natural, it will have at least a tiny bit of noise, and norm will not have zeros. But for artificial images norm could have zeros, meaning you can't divide by it. Simply adding a small value to it will avoid division by zero errors: norm += 1e-6.
The size of the Gaussian filter is chosen as a compromise between precision and allowed motion speed: a larger filter will yield less precise results, but will work with larger shifts between images.
Typically, the vx and vy is only evaluated where the two eigenvalues of the matrix ATWA are sufficiently large (if at least one is small, the result is inaccurate or possibly wrong).
Using DIPlib (disclosure: I'm an author) this is all very easy because it supports images with a matrix at each pixel. You would do this as follows:
import diplib as dip
img_a = dip.ImageRead("./images/1.png")
img_b = dip.ImageRead("./images/2.png")
A = dip.Gradient(img_a, [1.0])
b = img_b - img_a
ATA = dip.Gauss(A * dip.Transpose(A), [5.0])
ATb = dip.Gauss(A * b, [5.0])
v = dip.Inverse(ATA) * ATb

find intersection point of two lines drawn using houghlines opencv

How can I get the intersection points of lines down using opencv Hough lines algorithm?
Here is my code:
import cv2
import numpy as np
import imutils
im = cv2.imread('../data/test1.jpg')
gray = cv2.cvtColor(im,cv2.COLOR_BGR2GRAY)
edges = cv2.Canny(gray, 60, 150, apertureSize=3)
img = im.copy()
lines = cv2.HoughLines(edges,1,np.pi/180,200)
for line in lines:
for rho,theta in line:
a = np.cos(theta)
b = np.sin(theta)
x0 = a*rho
y0 = b*rho
x1 = int(x0 + 3000*(-b))
y1 = int(y0 + 3000*(a))
x2 = int(x0 - 3000*(-b))
y2 = int(y0 - 3000*(a))
cv2.imshow('houghlines',imutils.resize(img, height=650))
I want to get all the points of intersection.
You don't want to get the intersections of the parallel lines; only the intersections of the vertical lines with those of the horizontal lines. Also, since you have vertical lines, calculating the slope will likely result in exploding or inf slopes, so you shouldn't use the y = mx+b equations. You need to do two things:
Segment your lines into two classes based on their angle.
Calculate the intersections of each line in one class to the lines in the other classes.
With HoughLines, you already have the result as rho, theta so you can easily segment into two classes of angle with theta. You can use for e.g. cv2.kmeans() with theta as your data you want to split.
Then, to calculate the intersections, you can use the formula for calculating intersections given two points from each line. You are already calculating two points from each line: (x1, y1), (x2, y2) so you can simply just store those and use them. Edit: Actually, as seen below in my code, there's a formula you can use for calculating the intersections of lines with the rho, theta form that HoughLines gives.
I have answered a similar question before with some python code that you can check out; note this was using HoughLinesP which gives you only line segments.
Code example
You didn't provide your original image so I can't use that. Instead I'll use the standard sudoku image used by OpenCV on their Hough transform and thresholding tutorials:
First, we'll just read this image and binarize it using adaptive thresholding like what's used in this OpenCV tutorial:
import cv2
import numpy as np
img = cv2.imread('sudoku.jpg')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
blur = cv2.medianBlur(gray, 5)
thresh_type = cv2.THRESH_BINARY_INV
bin_img = cv2.adaptiveThreshold(blur, 255, adapt_type, thresh_type, 11, 2)
Then we'll find the Hough lines with cv2.HoughLines():
rho, theta, thresh = 2, np.pi/180, 400
lines = cv2.HoughLines(bin_img, rho, theta, thresh)
Now, if we want to find the intersections, really we want to find the intersections only of the perpendicular lines. We don't want the intersections of mostly parallel lines. So we need to segment our lines. In this particular example you could easily just check whether the line is horizontal or vertical based on a simple test; the vertical lines will have a theta of around 0 or around 180; the horizontal lines will have a theta of around 90. However, if you want to segment them based on an arbitrary number of angles, automatically, without you defining those angles, I think the best idea is to use cv2.kmeans().
There is one tricky thing to get right. HoughLines returns lines in rho, theta form (Hesse normal form), and the theta returned is between 0 and 180 degrees, and lines around 180 and 0 degrees are similar (they are both close to horizontal lines), so we need some way to get this periodicity in kmeans.
If we plot the angle on the unit circle, but multiply the angle by two, then the angles originally around 180 degrees will become close to 360 degrees and thus will have x, y values on the unit circle near the same for angles at 0. So we can get some nice "closeness" here by plotting 2*angle with the coordinates on the unit circle. Then we can run cv2.kmeans() on those points, and segment automatically with however many pieces we want.
So let's build a function to do the segmentation:
from collections import defaultdict
def segment_by_angle_kmeans(lines, k=2, **kwargs):
"""Groups lines based on angle with k-means.
Uses k-means on the coordinates of the angle on the unit circle
to segment `k` angles inside `lines`.
# Define criteria = (type, max_iter, epsilon)
default_criteria_type = cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER
criteria = kwargs.get('criteria', (default_criteria_type, 10, 1.0))
flags = kwargs.get('flags', cv2.KMEANS_RANDOM_CENTERS)
attempts = kwargs.get('attempts', 10)
# returns angles in [0, pi] in radians
angles = np.array([line[0][1] for line in lines])
# multiply the angles by two and find coordinates of that angle
pts = np.array([[np.cos(2*angle), np.sin(2*angle)]
for angle in angles], dtype=np.float32)
# run kmeans on the coords
labels, centers = cv2.kmeans(pts, k, None, criteria, attempts, flags)[1:]
labels = labels.reshape(-1) # transpose to row vec
# segment lines based on their kmeans label
segmented = defaultdict(list)
for i, line in enumerate(lines):
segmented = list(segmented.values())
return segmented
Now to use it, we can simply call:
segmented = segment_by_angle_kmeans(lines)
What's nice is here we can specify an arbitrary number of groups by specifying the optional argument k (by default, k = 2 so I didn't specify it here).
If we plot the lines from each group with a different color:
And now all that's left is to find the intersections of each line in the first group with the intersection of each line in the second group. Since the lines are in Hesse normal form, there's a nice linear algebra formula for calculating the intersection of lines from this form. See here. Let's create two functions here; one that finds the intersection of just two lines, and one function that loops through all the lines in the groups and uses that simpler function for two lines:
def intersection(line1, line2):
"""Finds the intersection of two lines given in Hesse normal form.
Returns closest integer pixel locations.
See https://stackoverflow.com/a/383527/5087436
rho1, theta1 = line1[0]
rho2, theta2 = line2[0]
A = np.array([
[np.cos(theta1), np.sin(theta1)],
[np.cos(theta2), np.sin(theta2)]
b = np.array([[rho1], [rho2]])
x0, y0 = np.linalg.solve(A, b)
x0, y0 = int(np.round(x0)), int(np.round(y0))
return [[x0, y0]]
def segmented_intersections(lines):
"""Finds the intersections between groups of lines."""
intersections = []
for i, group in enumerate(lines[:-1]):
for next_group in lines[i+1:]:
for line1 in group:
for line2 in next_group:
intersections.append(intersection(line1, line2))
return intersections
Then to use it, it's simply:
intersections = segmented_intersections(segmented)
And plotting all the intersections, we get:
As mentioned above, this code can segment lines into more than two groups of angles as well. Here's it running on a hand drawn triangle, and calculating the intersection points of the detected lines with k=3:
If you already have the line segment, just substitute them in a line equation ...
x = x1 + u * (x2-x1)
y = y1 + u * (y2-y1)
u can be found by using any of the following ...
u = ((x4-x3)*(y1-y3) - (y4-y3)*(x1-x3)) / ((y4-y3)*(x2-x1) - (x4-x3)*(y2-y1))
u = ((x2-x1)*(y1-y3) - (y2-y1)*(x1-x3)) / ((y4-y3)*(x2-x1) - (x4-x3)*(y2-y1))
First of all, you need to refine the output of Hough transform (I usually do this by k-means clustering based on some criteria, e.g. slope and/or centroids of segments). In your problem, for instance, it seems like the slope for all the lines is usually in the vicinity of 0, 180, 90 degrees so you can do clustering on this basis.
Next, there are two different ways you can get the intersecting points(which are technically the same):
The equations in Bhupen's answer.
Using a geometry library like Shapely or SymPy. The benefit of doing this with a geometry library is that you have access to a variety of tools you might need later on in development(intersection, interpolation, convex hull, etc. etc.)
P.S. Shapely is a wrapper around a powerful C++ geometry library but SymPy is pure Python. You may want to consider this in case your application is time critical.
Here is a more direct solution, adapting this answer. It should be more numerically stable than Bhupen’s answer
First you should cluster the lines so that you do not try to find the intersection of parallel lines, as mentioned in other answers (otherwise, you will inconsistent results and/or computation errors)
Then you can find the intersection of a pair of lines with this:
def hough_inter(theta1, rho1, theta2, rho2):
A = np.array([[cos(theta1), sin(theta1)],
[cos(theta2), sin(theta2)]])
b = np.array([rho1, rho2])
return np.linalg.lstsq(A, b)[0] # use lstsq to solve Ax = b, not inv() which is unstable
result on my data:
Lines in the hough (rho/theta) space are expressed like this in the x-y space:
rho = x cosθ + y sinθ
therefore the intersection (x, y) necessarily solves
x cos θ1 + y sin θ1 = r1
x cos θ2 + y sin θ2 = r2
that is AX = b, where
A = [cos θ1 sin θ1] b = |r1| X = |x|
[cos θ2 sin θ2] |r2| |y|
Therefore if you have two lines in python, you can find their intersection like this.
Here is a complete solution written in python 2.7.x using OpenCV 2.4.
It uses the solution from alkasm in this thread, which was incomplete. Also the returned value from HoughLines() and the syntax for kmeans() has changed from OpenCV 2.x to 3.x
Result 1: A piece of paper on a desk
This answers the original question, however using k-means clustering with k = 2,3,4 does not segment the piece of paper. You would need a different approach to find the corners of the paper
e.g. filtering for parallel lines.
Result 2: Sudoku grid
Code: https://pastiebin.com/5f36425b7ae3d
Find the intersection points of lines.
import numpy as np
import cv2
from collections import defaultdict
import sys
img = cv2.imread("paper_on_desk.jpg")
#img = cv2.imread("sudoku.jpg")
def segment_by_angle_kmeans(lines, k=2, **kwargs):
Group lines by their angle using k-means clustering.
Code from here:
# Define criteria = (type, max_iter, epsilon)
default_criteria_type = cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER
criteria = kwargs.get('criteria', (default_criteria_type, 10, 1.0))
flags = kwargs.get('flags', cv2.KMEANS_RANDOM_CENTERS)
attempts = kwargs.get('attempts', 10)
# Get angles in [0, pi] radians
angles = np.array([line[0][1] for line in lines])
# Multiply the angles by two and find coordinates of that angle on the Unit Circle
pts = np.array([[np.cos(2*angle), np.sin(2*angle)] for angle in angles], dtype=np.float32)
# Run k-means
if sys.version_info[0] == 2:
# python 2.x
ret, labels, centers = cv2.kmeans(pts, k, criteria, attempts, flags)
# python 3.x, syntax has changed.
labels, centers = cv2.kmeans(pts, k, None, criteria, attempts, flags)[1:]
labels = labels.reshape(-1) # Transpose to row vector
# Segment lines based on their label of 0 or 1
segmented = defaultdict(list)
for i, line in zip(range(len(lines)), lines):
segmented = list(segmented.values())
print("Segmented lines into two groups: %d, %d" % (len(segmented[0]), len(segmented[1])))
return segmented
def intersection(line1, line2):
Find the intersection of two lines
specified in Hesse normal form.
Returns closest integer pixel locations.
See here:
rho1, theta1 = line1[0]
rho2, theta2 = line2[0]
A = np.array([[np.cos(theta1), np.sin(theta1)],
[np.cos(theta2), np.sin(theta2)]])
b = np.array([[rho1], [rho2]])
x0, y0 = np.linalg.solve(A, b)
x0, y0 = int(np.round(x0)), int(np.round(y0))
return [[x0, y0]]
def segmented_intersections(lines):
Find the intersection between groups of lines.
intersections = []
for i, group in enumerate(lines[:-1]):
for next_group in lines[i+1:]:
for line1 in group:
for line2 in next_group:
intersections.append(intersection(line1, line2))
return intersections
def drawLines(img, lines, color=(0,0,255)):
Draw lines on an image
for line in lines:
for rho,theta in line:
a = np.cos(theta)
b = np.sin(theta)
x0 = a*rho
y0 = b*rho
x1 = int(x0 + 1000*(-b))
y1 = int(y0 + 1000*(a))
x2 = int(x0 - 1000*(-b))
y2 = int(y0 - 1000*(a))
cv2.line(img, (x1,y1), (x2,y2), color, 1)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
blur = cv2.medianBlur(gray, 5)
# Make binary image
thresh_type = cv2.THRESH_BINARY_INV
bin_img = cv2.adaptiveThreshold(blur, 255, adapt_type, thresh_type, 11, 2)
cv2.imshow("binary", bin_img)
# Detect lines
rho = 2
theta = np.pi/180
thresh = 350
lines = cv2.HoughLines(bin_img, rho, theta, thresh)
if sys.version_info[0] == 2:
# python 2.x
# Re-shape from 1xNx2 to Nx1x2
temp_lines = []
N = lines.shape[1]
for i in range(N):
rho = lines[0,i,0]
theta = lines[0,i,1]
temp_lines.append( np.array([[rho,theta]]) )
lines = temp_lines
print("Found lines: %d" % (len(lines)))
# Draw all Hough lines in red
img_with_all_lines = np.copy(img)
drawLines(img_with_all_lines, lines)
cv2.imshow("Hough lines", img_with_all_lines)
cv2.imwrite("all_lines.jpg", img_with_all_lines)
# Cluster line angles into 2 groups (vertical and horizontal)
segmented = segment_by_angle_kmeans(lines, 2)
# Find the intersections of each vertical line with each horizontal line
intersections = segmented_intersections(segmented)
img_with_segmented_lines = np.copy(img)
# Draw vertical lines in green
vertical_lines = segmented[1]
img_with_vertical_lines = np.copy(img)
drawLines(img_with_segmented_lines, vertical_lines, (0,255,0))
# Draw horizontal lines in yellow
horizontal_lines = segmented[0]
img_with_horizontal_lines = np.copy(img)
drawLines(img_with_segmented_lines, horizontal_lines, (0,255,255))
# Draw intersection points in magenta
for point in intersections:
pt = (point[0][0], point[0][1])
length = 5
cv2.line(img_with_segmented_lines, (pt[0], pt[1]-length), (pt[0], pt[1]+length), (255, 0, 255), 1) # vertical line
cv2.line(img_with_segmented_lines, (pt[0]-length, pt[1]), (pt[0]+length, pt[1]), (255, 0, 255), 1)
cv2.imshow("Segmented lines", img_with_segmented_lines)
cv2.imwrite("intersection_points.jpg", img_with_segmented_lines)
Here I have processed my image with some methods;
2.Either bitwise conversion or edge detection, it depends on the image I guess,here I have gone with bitwise conversion.
First carrying out all detected line into a list.
listOflines = cv2.HoughLines(mask_inv,1,np.pi/180,200)
We will be getting values of 'rho' and 'theta',
What I am doing is here creating two empty list one for vertical lines one for the horizontal lines,and appending the values of both lines in respective list.
rowsValue = []
columnValue = []
Here is the logic for vertical and horizontal lines.
for line in listOflines:
if line[0][1] == 0:
Now the important part is here,
When every line passing through and intersecting one another it is intersecting that line on a particular pixel value.
And we have that pixel value in terms of 'rho'.
Now lets create tuples to pass as a co-ordinate into 'cv2' function i.e. in the form of (x,y).
tupsList = [(r,c) for r in rowsValue for c in columnValue]
for tups in tupsList:
cv2.circle(image, tups, 1,(0,0,255), 2)
Thats It!!
Now here the images before and after.
Original Image
Intersection Image

distance measurement using disparity map

I was working on 3D reconstruction and distance measurement using OpenCP and Python. I generate the disparity map for the left camera and then I used this formula to get the distance:
Where f is the focal length, b is the distance between the 2 cameras and disp is the matrix of the disparity map.
My questions are:
The numbers that I get, are they supposed to be the distance of each point in the picture?
What is the max distance that I can get with this method (for example in my project the max number i get is 110)?
img_L = cv2.pyrDown( cv2.imread(Li) )
img_R = cv2.pyrDown( cv2.imread(Ri) )
'''h, w = img_L.shape[:2]
window_size = 3
min_disp = 16
num_disp = 112-min_disp
stereo = cv2.StereoSGBM(minDisparity = min_disp,
numDisparities = num_disp,
SADWindowSize = window_size,
uniquenessRatio = 10,
speckleWindowSize = 100,
speckleRange = 32,
disp12MaxDiff = 1,
P1 = 8*3*window_size**2,
P2 = 32*3*window_size**2,
fullDP = False
print "computing disparity..."
disp = stereo.compute(img_L, img_R).astype(np.float32) / 16.0
print "generating 3d point cloud..."
h, w = img_L.shape[:2]
f = 0.8*w # guess for focal length
points = cv2.reprojectImageTo3D(disp, Mat)
colors = cv2.cvtColor(img_L, cv2.COLOR_BGR2RGB)
mask = disp > disp.min()
cv2.imshow('left', img_L)
cv2.imshow('disparity',disparity )
return D
The values D that you get using this formula are the depths of each point for which you provided a disparity.
The depth and the distance are two slightly different things. If you use the standard coordinate system for a camera (i.e. Z axis along the optical axis, X and Y axis in the directions of the image X and Y axis), then a 3D point M = (X, Y, Z) has a distance of sqrt(X²+Y²+Z²) from the optical center and a depth of Z. The D in the formula is the depth, not the distance.
If you want to retrieve the 3D point M = (X, Y, Z) from the depth value, you need to know the camera matrix K: M = D * inv(K) * [u; v; 1], where (u, v) are the image coordinates of the point.
Edit: Concerning your second question, the maximum depth that you can get with this method is linked to the minimum disparity (not the maximum, since disp is on the denominator). And since disparity estimation is quantified (done pixel by pixel), you can't estimate depth up to infinity.
