I want to stitch two panoramic images using homography matrix in OpenCv. I found 3x3 homography matrix,
but I can't stitch two images. I must stitch two images by hand(no build-in function).
Here is my code:
import cv2
import numpy as np
MIN_MATCH_COUNT = 10
img1 = cv2.imread("pano1/cyl_image00.png")
img2 = cv2.imread("pano1/cyl_image01.png")
orb = cv2.ORB_create()
kp1, des1 = orb.detectAndCompute(img1, None)
kp2, des2 = orb.detectAndCompute(img2, None)
FLANN_INDEX_KDTREE = 0
index_params = dict(algorithm=FLANN_INDEX_KDTREE, trees=5)
search_params = dict(checks=50)
flann = cv2.FlannBasedMatcher(index_params, search_params)
des1 = np.float32(des1)
des2 = np.float32(des2)
matches = flann.knnMatch(des1, des2, k=2)
goodMatches = []
for m, n in matches:
if m.distance < 0.7 * n.distance:
goodMatches.append(m)
src_pts = 0
dst_pts = 0
if len(goodMatches) > MIN_MATCH_COUNT:
dst_pts = np.float32([kp1[m.queryIdx].pt for m in goodMatches]).reshape(-1, 2)
src_pts = np.float32([kp2[m.trainIdx].pt for m in goodMatches]).reshape(-1, 2)
def generateRandom(src_Pts, dest_Pts, N):
r = np.random.choice(len(src_Pts), N)
src = [src_Pts[i] for i in r]
dest = [dest_Pts[i] for i in r]
return np.asarray(src, dtype=np.float32), np.asarray(dest, dtype=np.float32)
def findH(src, dest, N):
A = []
for i in range(N):
x, y = src[i][0], src[i][1]
xp, yp = dest[i][0], dest[i][1]
A.append([x, y, 1, 0, 0, 0, -x * xp, -xp * y, -xp])
A.append([0, 0, 0, x, y, 1, -yp * x, -yp * y, -yp])
A = np.asarray(A)
U, S, Vh = np.linalg.svd(A)
L = Vh[-1, :] / Vh[-1, -1]
H = L.reshape(3, 3)
return H
def ransacHomography(src_Pts, dst_Pts):
maxI = 0
maxLSrc = []
maxLDest = []
for i in range(70):
srcP, destP = generateRandom(src_Pts, dst_Pts, 4)
H = findH(srcP, destP, 4)
inlines = 0
linesSrc = []
lineDest = []
for p1, p2 in zip(src_Pts, dst_Pts):
p1U = (np.append(p1, 1)).reshape(3, 1)
p2e = H.dot(p1U)
p2e = (p2e / p2e[2])[:2].reshape(1, 2)[0]
if cv2.norm(p2 - p2e) < 10:
inlines += 1
linesSrc.append(p1)
lineDest.append(p2)
if inlines > maxI:
maxI = inlines
maxLSrc = linesSrc.copy()
maxLSrc = np.asarray(maxLSrc, dtype=np.float32)
maxLDest = lineDest.copy()
maxLDest = np.asarray(maxLDest, dtype=np.float32)
Hf = findH(maxLSrc, maxLDest, maxI)
return Hf
H = ransacHomography(src_pts, dst_pts)
So far, so good. I found homography matrix(H).
Next, I tried to stitch two panoramic images.
First, I create a big array to stitch images(img3).
I copied img1 to the first half of img3.
I tried to find new coordinates for img2 through homography matrix and I copied new img2 coordinates to img3.
Here is my code:
height1, width1, rgb1 = img1.shape
height2, width2, rgb2 = img2.shape
img3 = np.empty((height1, width1+width2, 3))
img3[:, 0:width1] = img1/255.0
for i in range(len(img2)):
for j in range(len(img2[0])):
pp = H.dot(np.array([[i], [j], [1]]))
pp = (pp / pp[2]).reshape(1, 3)[0]
img3[int(round(pp[0])), int(round(pp[1]))] = img2[i, j]/255.0
But this part is not working.
How can I solve this problem?
Once you have the Homography matrix you need to transform one of the images to have the same perspective as the other. This is done using the warpPerspective function in OpenCV. Once you've done the transformation, it's time to concatenate the images.
Let's say you want to transform img_1 into the perspective of img_2 and that you already have the Homography matrix H
dst = cv2.warpPerspective(img_1, H, ((img_1.shape[1] + img_2.shape[1]), img_2.shape[0])) #wraped image
# now paste them together
dst[0:img_2.shape[0], 0:img_2.shape[1]] = img_2
dst[0:img_1.shape[0], 0:img_1.shape[1]] = img_1
Also note that OpenCV already has a build in RANSAC Homography finder
H, masked = cv2.findHomography(src, dst, cv2.RANSAC, 5.0)
So it can save you a lot of code.
Check out these tutorial for more details
https://medium.com/#navekshasood/image-stitching-to-create-a-panorama-5e030ecc8f7
https://medium.com/analytics-vidhya/image-stitching-with-opencv-and-python-1ebd9e0a6d78
Related
I need to determine the location of yogurts in the supermarket. Source photo looks like
With template:
I using SIFT to extract key points of template:
img1 = cv.imread('train.jpg')
sift = cv.SIFT_create()# queryImage
kp1, des1 = sift.detectAndCompute(img1, None)
path = glob.glob("template.jpg")
cv_img = []
l=0
for img in path:
img2 = cv.imread(img) # trainImage
# Initiate SIFT detector
# find the keypoints and descriptors with SIFT
kp2, des2 = sift.detectAndCompute(img2,None)
# FLANN parameters
FLANN_INDEX_KDTREE = 1
index_params = dict(algorithm = FLANN_INDEX_KDTREE, trees = 5)
search_params = dict(checks=50) # or pass empty dictionary
flann = cv.FlannBasedMatcher(index_params,search_params)
matches = flann.knnMatch(des1,des2,k=2)
# Need to draw only good matches, so create a mask
# ratio test as per Lowe's paper
if (l < len(matches)):
l = len(matches)
image = img2
match = matches
h_query, w_query, _= img2.shape
matchesMask = [[0,0] for i in range(len(match))]
good_matches = []
good_matches_indices = {}
for i,(m,n) in enumerate(match):
if m.distance < 0.7*n.distance:
matchesMask[i]=[1,0]
good_matches.append(m)
good_matches_indices[len(good_matches) - 1] = i
bboxes = []
src_pts = np.float32([ kp1[m.queryIdx].pt for m in good ]).reshape(-1,2)
dst_pts = np.float32([ kp2[m.trainIdx].pt for m in good ]).reshape(-1,2)
model, inliers = initialize_ransac(src_pts, dst_pts)
n_inliers = np.sum(inliers)
matched_indices = [good_matches_indices[idx] for idx in inliers.nonzero()[0]]
print(len(matched_indices))
model, inliers = ransac(
(src_pts, dst_pts),
AffineTransform, min_samples=4,
residual_threshold=4, max_trials=20000
)
n_inliers = np.sum(inliers)
print(n_inliers)
matched_indices = [good_matches_indices[idx] for idx in inliers.nonzero()[0]]
print(matched_indices)
q_coordinates = np.array([(0, 0), (h_query, w_query)])
coords = model.inverse(q_coordinates)
print(coords)
h_query, w_query,_ = img2.shape
q_coordinates = np.array([(0, 0), (h_query, w_query)])
coords = model.inverse(q_coordinates)
print(coords)
# bboxes_list.append((i, coords))
M, mask = cv.findHomography(src_pts, dst_pts, cv.RANSAC, 2)
draw_params = dict(matchColor = (0,255,0),
singlePointColor = (255,0,0),
matchesMask = matchesMask,
flags = cv.DrawMatchesFlags_DEFAULT)
img3 = cv.drawMatchesKnn(img1,kp1,image,kp2,match,None,**draw_params)
plt.imshow(img3),plt.show()
Result of SIFT looks like
The question is what is the best way to clasterise points to obtain rectangles, representing each yogurt? I tried RANSAC, but this method doesn't work in this case.
I am proposing an approach based on what is discussed in this paper. I have modified the approach a bit because the use-case is not entirely same but they do use SIFT features matching to locate multiple objects in video frames. They have used PCA for reducing time but that may not be required for still images.
Sorry I could not write a code for this as it will take a lot of time but I believe this should work to locate all the occurrences of the template object.
The modified approach is like this:
Divide the template image into regions: left, middle, right along the horizontal
and top, bottom along the vertical
Now when you match features between the template and source image, you will get features matched from some of the keypoints from these regions on multiple locations on the source image. You can use these keypoints to identify which region of the template is present at what location(s) in the source image. If there are overlapping regions i.e. keypoints from different regions matched with close keypoints in source image then that would mean a wrong match.
Mark each set of matching keypoints within a neighborhood on source image as left, center, right, top, bottom depending upon if they have majority matches from keypoints of a particular region in the template image.
Starting from each left region on source image move towards right and if we find a central region followed by a right region then this area of source image between regions marked as left and right, can be marked as location of one template object.
There could be overlapping objects which could result in a left region followed by another left region when moving in right direction from the left region. The area between the two left regions can be marked as one template object.
For further refined locations, each area of source image marked as one template object can be cropped and re-matched with the template image.
Try working spatially: for each key-point in img2 get some bounding box around and consider only the points in there for your ransac homography to check for best fit.
You can also work with overlapping windows and later discard similar resulting homographys
Here is you can do
Base Image = Whole picture of shelf
Template Image = Single product image
Get SIFT matches from both images. (base and template image)
Do feature matching.
Get all the points in base image which are matching. (refer to figure)
Create Cluster based on size of template image. (here threshold in 50px)
Get Bounding box of clusters.
Crop each bounding box cluter and check matches with template image.
Accept all cluters which has atleast minimum percentage of matched. (here taken minimum 10% of keypoints)
def plot_pts(img, pts):
img_plot = img.copy()
for i in range(len(pts)):
img_plot = cv2.circle(img_plot, (int(pts[i][0]), int(pts[i][1])), radius=7, color=(255, 0, 0), thickness=-1)
plt.figure(figsize=(20, 10))
plt.imshow(img_plot)
def plot_bbox(img, bbox_list):
img_plot = img.copy()
for i in range(len(bbox_list)):
start_pt = bbox_list[i][0]
end_pt = bbox_list[i][2]
img_plot = cv2.rectangle(img_plot, pt1=start_pt, pt2=end_pt, color=(255, 0, 0), thickness=2)
plt.figure(figsize=(20, 10))
plt.imshow(img_plot)
def get_distance(pt1, pt2):
x1, y1 = pt1
x2, y2 = pt2
return np.sqrt(np.square(x1 - x2) + np.square(y1 - y2))
def check_centroid(pt, centroid):
x, y = pt
cx, cy = centroid
distance = get_distance(pt1=(x, y), pt2=(cx, cy))
if distance < max_distance:
return True
else:
return False
def update_centroid(pt, centroids_list):
new_centroids_list = centroids_list.copy()
flag_new_centroid = True
for j, c in enumerate(centroids_list):
temp_centroid = np.mean(c, axis=0)
if_close = check_centroid(pt, temp_centroid)
if if_close:
new_centroids_list[j].append(pt)
flag_new_centroid = False
break
if flag_new_centroid:
new_centroids_list.append([pt])
new_centroids_list = recheck_centroid(new_centroids_list)
return new_centroids_list
def recheck_centroid(centroids_list):
new_centroids_list = [list(set(c)) for c in centroids_list]
return new_centroids_list
def get_bbox(pts):
minn_x, minn_y = np.min(pts, axis=0)
maxx_x, maxx_y = np.max(pts, axis=0)
return [[minn_x, minn_y], [maxx_x, minn_y], [maxx_x, maxx_y], [minn_x, maxx_y]]
class RotateAndTransform:
def __init__(self, path_img_ref):
self.path_img_ref = path_img_ref
self.ref_img = self._read_ref_image()
#sift
self.sift = cv2.SIFT_create()
#feature matching
self.bf = cv2.BFMatcher()
# FLANN parameters
FLANN_INDEX_KDTREE = 1
index_params = dict(algorithm = FLANN_INDEX_KDTREE, trees = 5)
search_params = dict(checks=50) # or pass empty dictionary
self.flann = cv2.FlannBasedMatcher(index_params,search_params)
def _read_ref_image(self):
ref_img = cv2.imread(self.path_img_ref, cv2.IMREAD_COLOR)
ref_img = cv2.cvtColor(ref_img, cv2.COLOR_BGR2RGB)
return ref_img
def read_src_image(self, path_img_src):
self.path_img_src = path_img_src
# read images
# ref_img = cv2.imread(self.path_img_ref, cv2.IMREAD_COLOR)
src_img = cv2.imread(path_img_src, cv2.IMREAD_COLOR)
src_img = cv2.cvtColor(src_img, cv2.COLOR_BGR2RGB)
return src_img
def convert_bw(self, img):
img_bw = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
return img_bw
def get_keypoints_descriptors(self, img_bw):
keypoints, descriptors = self.sift.detectAndCompute(img_bw,None)
return keypoints, descriptors
def get_matches(self, src_descriptors, ref_descriptors, threshold=0.6):
matches = self.bf.knnMatch(ref_descriptors, src_descriptors, k=2)
flann_matches = self.flann.knnMatch(ref_descriptors, src_descriptors,k=2)
good_matches = []
good_flann_matches = []
# Apply ratio test for Brute Force
for m,n in matches:
if m.distance <threshold*n.distance:
good_matches.append([m])
print(f'Numner of BF Match: {len(matches)}, Number of good BF Match: {len(good_matches)}')
# Apply ratio test for FLANN
for m,n in flann_matches:
if m.distance < threshold*n.distance:
good_flann_matches.append([m])
# matches = sorted(matches, key = lambda x:x.distance)
print(f'Numner of FLANN Match: {len(flann_matches)}, Number of good Flann Match: {len(good_flann_matches)}')
return good_matches, good_flann_matches
def get_src_dst_pts(self, good_flann_matches, ref_keypoints, src_keypoints):
pts_src = []
pts_ref = []
n = len(good_flann_matches)
for i in range(n):
ref_index = good_flann_matches[i][0].queryIdx
src_index = good_flann_matches[i][0].trainIdx
pts_src.append(src_keypoints[src_index].pt)
pts_ref.append(ref_keypoints[ref_index].pt)
return np.array(pts_src), np.array(pts_ref)
def extend_bbox(bbox, increment=0.1):
bbox_new = bbox.copy()
bbox_new[0] = [bbox_new[0][0] - int(bbox_new[0][0] * increment), bbox_new[0][1] - int(bbox_new[0][1] * increment)]
bbox_new[1] = [bbox_new[1][0] + int(bbox_new[1][0] * increment), bbox_new[1][1] - int(bbox_new[1][1] * increment)]
bbox_new[2] = [bbox_new[2][0] + int(bbox_new[2][0] * increment), bbox_new[2][1] + int(bbox_new[2][1] * increment)]
bbox_new[3] = [bbox_new[3][0] - int(bbox_new[3][0] * increment), bbox_new[3][1] + int(bbox_new[3][1] * increment)]
return bbox_new
def crop_bbox(img, bbox):
y, x = bbox[0]
h, w = bbox[1][0] - bbox[0][0], bbox[2][1] - bbox[0][1]
return img[x: x + w, y: y + h, :]
base_img = cv2.imread(path_img_base)
ref_img = cv2.imread(path_img_ref)
rnt = RotateAndTransform(path_img_ref)
ref_img_bw = rnt.convert_bw(img=rnt.ref_img)
ref_keypoints, ref_descriptors = rnt.get_keypoints_descriptors(ref_img_bw)
base_img = rnt.read_src_image(path_img_src = path_img_base)
base_img_bw = rnt.convert_bw(img=base_img)
base_keypoints, base_descriptors = rnt.get_keypoints_descriptors(base_img_bw)
good_matches, good_flann_matches = rnt.get_matches(src_descriptors=base_descriptors, ref_descriptors=ref_descriptors, threshold=0.6)
ref_points = []
for gm in good_flann_matches:
x, y = ref_keypoints[gm[0].queryIdx].pt
x, y = int(x), int(y)
ref_points.append((x, y))
max_distance = 50
centroids = [[ref_points[0]]]
for i in tqdm(range(len(ref_points))):
pt = ref_points[i]
centroids = update_centroid(pt, centroids)
bbox = [get_bbox(c) for c in centroi[![enter image description here][1]][1]ds]
centroids = [np.mean(c, axis=0) for c in centroids]
print(f'Number of Points: {len(good_flann_matches)}, centroids: {len(centroids)}')
data = []
for i in range(len(bbox)):
temp_crop_img = crop_bbox(ref_img, extend_bbox(bbox[i], 0.01))
temp_crop_img_bw = rnt.convert_bw(img=temp_crop_img)
temp_crop_keypoints, temp_crop_descriptors = rnt.get_keypoints_descriptors(temp_crop_img_bw)
good_matches, good_flann_matches = rnt.get_matches(src_descriptors=base_descriptors, ref_descriptors=temp_crop_descriptors, threshold=0.6)
temp_data = {'image': temp_crop_img,
'num_matched': len(good_flann_matches),
'total_keypoints' : len(base_keypoints),
}
data.append(temp_data)
filter_data = [{'num_matched' : i['num_matched'], 'image': i['image']} for i in data if i['num_matched'] > 25]
for i in range(len(filter_data)):
temp_num_match = filter_data[i]['num_matched']
plt.figure()
plt.title(f'num matched: {temp_num_match}')
plt.imshow(filter_data[i]['image'])
First you could detect any item that is on the shelf with a network like this, it's pre-trained in this exact context and works pretty well. You should also rectify the image before feeding it to the network. You will obtain bounding boxes for every product (maybe some false positive/negative, but that's another issue). Then you can match each box with the template using SIFT and calculating a score (it's up to you define which score works), but I suggest to use another approach like a siamese network if you a consistent dataset.
I'm trying to set up a fish-eye camera for object localisation with respect to a particular frame of reference.
I tried both the OpenCV fisheye module and the rational model from calibrateCamera() to calibrate. I obtained this result.
I collected 2 different datasets, one with calibration images taken mostly close to the camera (ds1) and a second one with images taken from afar (ds2). ds12 is a dataset obtained merging the two.
nok4 indicates the fisheye model with k4 fixed to 0.
rm is the rational model from cv2.calibrateCamera()
The camera has 178° horizontal FOV and 101° vertical FOV, the distortion is corrected mostly in the center of the image with disappointing results in the outermost parts of the image.
Am I doing something wrong? What could I do to improve the results?
Edit
Here's the code I'm using for both the calibration processes:
import cv2 as cv
import os
import numpy as np
cwd = os.path.dirname(os.path.realpath(__file__))
os.chdir(cwd)
number = None
folder_name = "merged_images"
if number is not None:
folder_name += "_" + str(number)
points_path = os.path.join(folder_name, "dataset", "good_detections")
npz = np.load(os.path.join(points_path, "points.npz"))
square_size = 0.02435
imgpoints = npz["imgpoints"]
objpoints = npz["objpoints"] * square_size
file_names = npz["file_names"]
shuffle = True
if shuffle:
if "indices.npz" in os.listdir(points_path):
p = np.load(os.path.join(points_path, "indices.npz"))["indices"]
else:
print("Random indices assigned")
p = np.random.permutation(len(imgpoints))
imgpoints = imgpoints[p]
objpoints = objpoints[p]
file_names = file_names[p]
img = cv.imread(os.path.join(points_path, file_names[2].replace("detected_", "")))
flag_list = [
cv.CALIB_RATIONAL_MODEL,
# cv.CALIB_ZERO_TANGENT_DIST,
]
calibration_flags = 0
for flag in flag_list:
calibration_flags += flag
gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
shape = gray.shape[::-1]
ret, mtx, dist, rvecs, tvecs = cv.calibrateCamera(objpoints,
imgpoints,
shape,
None,
None,
flags = calibration_flags
)
h, w = img.shape[:2]
newcameramtx, roi = cv.getOptimalNewCameraMatrix(mtx, dist, (w,h), 1, (w,h))
dst = cv.undistort(img, mtx, dist, None, newcameramtx)
# fisheye model
flag_list = [
cv.fisheye.CALIB_RECOMPUTE_EXTRINSIC,
cv.fisheye.CALIB_CHECK_COND,
cv.fisheye.CALIB_FIX_SKEW,
# cv.fisheye.CALIB_FIX_K4,
# cv.fisheye.CALIB_FIX_K3,
# cv.fisheye.CALIB_FIX_K2,
# cv.fisheye.CALIB_FIX_K1,
]
calibration_flags = 0
for flag in flag_list:
calibration_flags += flag
N_OK = len(objpoints)
K = np.zeros((3, 3))
D = np.zeros((4, 1))
rvecs = [np.zeros((1, 1, 3), dtype=np.float64) for i in range(N_OK)]
tvecs = [np.zeros((1, 1, 3), dtype=np.float64) for i in range(N_OK)]
n_objpoints = [np.expand_dims(objp, 0) for objp in objpoints]
all_true_points = list(n_objpoints)
all_image_points = list(imgpoints)
all_frames = list(file_names)
rejected = []
counter = 0
while True:
try:
rms, mtx, dist, rvecs, tvecs = \
cv.fisheye.calibrate(
all_true_points,
all_image_points,
gray.shape[::-1],
K,
D,
rvecs,
tvecs,
calibration_flags,
(cv.TERM_CRITERIA_EPS + cv.TERM_CRITERIA_MAX_ITER, 30, 1e-3)
)
print('Found a calibration based on {} well-conditioned images.'.format(len(all_true_points)))
break
except cv.error as err:
try:
idx = int(str(err).split('array ')[1][0]) # Parse index of invalid image from error message
all_true_points.pop(idx)
all_image_points.pop(idx)
rejected.append(all_frames.pop(idx))
print(f"{counter}. Removed ill-conditioned image {idx} from the data. Trying again...".format(idx))
counter += 1
except IndexError:
raise err
h,w = img.shape[:2]
DIM = (w, h)
dim1 = img.shape[:2][::-1] # dim1 is the dimension of input image to un-distort
dim2 = None
dim3 = None
balance = 1
assert dim1[0]/dim1[1] == DIM[0]/DIM[1], "Image to undistort needs to have same aspect ratio as the ones used in calibration"
if not dim2:
dim2 = dim1
if not dim3:
dim3 = dim1
scaled_K = K * dim1[0] / DIM[0] # The values of K is to scale with image dimension.
scaled_K[2][2] = 1.0 # Except that K[2][2] is always 1.0
# This is how scaled_K, dim2 and balance are used to determine the final K used to un-distort image. OpenCV document failed to make this clear!
new_K = cv.fisheye.estimateNewCameraMatrixForUndistortRectify(scaled_K, D, dim2, np.eye(3), balance=balance)
map1, map2 = cv.fisheye.initUndistortRectifyMap(scaled_K, D, np.eye(3), new_K, dim3, cv.CV_16SC2)
undistorted_img = cv.remap(img, map1, map2, interpolation=cv.INTER_LINEAR, borderMode=cv.BORDER_CONSTANT)
new_img = cv.hconcat([undistorted_img, img])
Corner extraction is performed with the following code:
objpoints = [] # 3d point in real world space
imgpoints = [] # 2d points in image plane.
file_names = [] # files analyzed
for file in os.listdir(detections_path):
if file.startswith("hd_frame"):
frame = cv.imread(os.path.join(detections_path, file))
gray = cv.cvtColor(frame, cv.COLOR_BGR2GRAY)
found, corners = cv.findChessboardCorners(gray, (9,6), None)
if found:
file_name = "detected_" + file
objpoints.append(objp)
imgpoints.append(corners)
# save file_name
file_names.append(file_name)
corners2 = cv.cornerSubPix(gray, corners, (11,11), (-1,-1), criteria)
# Draw and display the corners
cv.drawChessboardCorners(frame, (9,6), corners2, found)
# save image and points
cv.imwrite(os.path.join(detections_path, file_name), frame)
You can use a single image for calibration as described in details here: https://discorpy.readthedocs.io/en/latest/usage/demo_06.html . The correction model used in this package may give you better results than the model used by opencv: https://discorpy.readthedocs.io/en/latest/tutorials/methods.html
I want to stitch some images(identical resolution) into a panorama using openCV(without using the Stitcher class). I tried the algorithm described here, but instead of the desired panorama i get an image that is made up of the last image to be stitched and a large black area. I outputted am image for each iteration and the result is the same: current image + a larger black area each time.
import numpy
import cv2
# images is an array of images that i need to stitch
matcher = cv2.DescriptorMatcher_create(cv2.DescriptorMatcher_BRUTEFORCE_HAMMING)
ORB = cv2.ORB_create()
homography = 0
panorama = 0
for image in range(0, len(images) -1):
key1, desc1 = ORB.detectAndCompute(images[image], None)
key2, desc2 = ORB.detectAndCompute(images[image + 1], None)
matches = matcher.match(desc1, desc2, None)
matches = sorted(matches, key=lambda x: x.distance, reverse=True)
numGoodMatches = int(len(matches) * 0.15)
matches2 = matches[-numGoodMatches:]
points1 = numpy.zeros((len(matches2), 2), dtype=numpy.float32)
points2 = numpy.zeros((len(matches2), 2), dtype=numpy.float32)
for i, match in enumerate(matches2):
points1[i, :] = key1[match.queryIdx].pt
points2[i, :] = key2[match.trainIdx].pt
h, mask = cv2.findHomography(points2, points1, cv2.RANSAC)
if isinstance(homography, int):
homography = h
img1H, img1W = images[image].shape
img2H, img2W = images[image + 1].shape
aligned = cv2.warpPerspective(images[image + 1], h, (img1W + img2W, img2H))
stitchedImage = numpy.copy(aligned)
stitchedImage[0:img1H, 0:img2W] = images[image]
panorama = stitchedImage
else:
h *= homography
homography = h
img1H, img1W = panorama.shape
img2H, img2W = images[image + 1].shape
aligned = cv2.warpPerspective(images[image + 1], h, (img1W + img2W, img2H))
stitchedImage = numpy.copy(aligned)
stitchedImage[0:img1H, 0:img2W] = images[image + 1]
panorama = stitchedImage
Example of images i get:
The first image is correct.
The last image has the correct width (n * original width) but only one image and the rest is black area.
This
stitchedImage[0:img1H, 0:img2W] = images[image + 1]
places an image to the left top corner of "stitchedImage".
Somehow the image x offset must be calculated for each part of panorama:
stitchedImage[0:img1H, x_offset:x_offset+img2W] = images[image + 1]
I am trying to create a program that can calculate the rotation of a plane from two images in python using opencv. I am doing this by finding the homography matrix that represents the translation, and decomposing it using the intrinsic camera matrix using the decomposeHomographyMat function in openCv.
I tested the accuracy using blender by creating a plane with a QR code on it, and then rotating it by known values as seen here where the plane has been rotated by 15,30,15 in XYZ Euler coordinates, although I want the final program to take pictures of a plane being translated in real life.
The intrinsic camera matrix was found in blender using this technique. And also found using camera calibration in blender by putting a checkboard in and taking renders from multiple angles and translations.
However, when I run the code, the ZYX Euler outputs I get are [ 27.9
, -25.4, -26.31] instead of [15, -30, -15] which is not accurate. Some other examples of the output to the code to expected values are below to give an idea of the accuracy of the code:
Expected - [0 -30 0]
Calcaulted - [0.82 -34.51 -1.91]
Expected - [0 0 15]
Calculated - [ 0 0 -15.02]
Expected - [15 0 15]
Calculated - [ 16.23 3.76 -13.76]
I was wondering if there's any way to increase the accuracy of the rotation matrices calculated or whether this is the best accuracy that I can get, and if this is the best accuracy that I can get what other alternatives I can do in order to calculate the rotation of a plane in 3 axis from images (adding extra cameras can also be done).
Any help would be much appreciated!
The code I am using is shown below:
#Import modules
import cv2
import numpy as np
from matplotlib import pyplot as plt
import glob
import math
########################################################################
#Import pictures
img1 = cv2.imread("top.png", cv2.IMREAD_GRAYSCALE)
img2 = cv2.imread("150015.png", cv2.IMREAD_GRAYSCALE)
#Feature Extraction
MIN_MATCH_COUNT = 10
sift = cv2.xfeatures2d.SIFT_create()
kp1, des1 = sift.detectAndCompute(img1,None)
kp2, des2 = sift.detectAndCompute(img2,None)
FLANN_INDEX_KDTREE = 0
index_params = dict(algorithm = FLANN_INDEX_KDTREE, trees = 5)
search_params = dict(checks = 50)
flann = cv2.FlannBasedMatcher(index_params, search_params)
matches = flann.knnMatch(des1,des2,k=2)
# store all the good matches as per Lowe's ratio test.
good = []
for m,n in matches:
if m.distance < 0.80*n.distance:
good.append(m)
if len(good)>MIN_MATCH_COUNT:
src_pts = np.float32([ kp1[m.queryIdx].pt for m in good ]).reshape(-1,1,2)
dst_pts = np.float32([ kp2[m.trainIdx].pt for m in good ]).reshape(-1,1,2)
#Finds homography matrix
M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC,1)
matchesMask = mask.ravel().tolist()
h,w = img1.shape
pts = np.float32([ [0,0],[0,h-1],[w-1,h-1],[w-1,0] ]).reshape(-1,1,2)
dst = cv2.perspectiveTransform(pts,M)
img2 = cv2.polylines(img2,[np.int32(dst)],True,255,3, cv2.LINE_AA)
else:
print "Not enough matches are found - %d/%d" % (len(good),MIN_MATCH_COUNT)
matchesMask = None
draw_params = dict(matchColor = (0,255,0), # draw matches in green color
singlePointColor = None,
matchesMask = matchesMask, # draw only inliers
flags = 2)
img3 = cv2.drawMatches(img1,kp1,img2,kp2,good,None,**draw_params)
plt.imshow(img3, 'gray'),plt.show()
#Camera calibration matrix
K = ((3,3))
K = np.zeros(K)
#Camera calibration matrix from blender python script
#K = np.matrix('1181.2500 0 540; 0 2100 540; 0 0 1')
#Camera calibration matrix from importing checkboard into blender
K = np.matrix('1307.68697 0 600.618354; 0 1309.66779 605.481488; 0 0 1')
#Homography matrix is decomposed
num, Rs, Ts, Ns = cv2.decomposeHomographyMat(M, K)
# Checks if a matrix is a valid rotation matrix.
def isRotationMatrix(R) :
Rt = np.transpose(R)
shouldBeIdentity = np.dot(Rt, R)
I = np.identity(3, dtype = R.dtype)
n = np.linalg.norm(I - shouldBeIdentity)
return n < 1e-6
# Calculates rotation matrix to euler angles
# The result is the same as MATLAB except the order
# of the euler angles ( x and z are swapped ).
def rotationMatrixToEulerAngles(R) :
assert(isRotationMatrix(R))
sy = math.sqrt(R[0,0] * R[0,0] + R[1,0] * R[1,0])
singular = sy < 1e-6
if not singular :
x = math.atan2(R[2,1] , R[2,2])
y = math.atan2(-R[2,0], sy)
z = math.atan2(R[1,0], R[0,0])
else :
x = math.atan2(-R[1,2], R[1,1])
y = math.atan2(-R[2,0], sy)
z = 0
return np.array([x, y, z])
#Conver the 4 rotation matrix solutions into XYZ Euler angles
i=0
for i in range(0,4):
R = Rs[i]
angles = rotationMatrixToEulerAngles(R)
x = np.degrees(angles[0])
y = np.degrees(angles[1])
z = np.degrees(angles[2])
anglesDeg = np.array([x,y,z])
print(anglesDeg)
The images I have generated from blender are as follows:
top.png (Ox, 0y, 0z)
003000.png (0x, 30y, 0z)
150015.png (15x, 0y, 15z)
153000.png (15x, 30y, 0z)
153015.png (15x, 30y, 15z)
And here is an image with keypoints matching for the 153015.png comparison
Euler angles are not unique. Many sets of Euler angles can map to the same rotation. Try using distance between the rotation vector as the criteria.
My goal is to
deskew a scanned image such that its text is perfectly placed on top of the text of the original image. (subtracting the images would remove the text)
prevent any loss of information on the deskewed image
I use SURF features to feed the findHomography function. Then I use the warpPerspective function to transform the scanned image. The resulting image almost perfectly fits onto the original image.
However, the scanned image has content on its corners which is lost after the transformation because the text in the scanned image is smaller and has to be scaled up.
Deskewing an image that has slightly smaller text
Information at the borders of the image is cropped
To avoid any loss of information, I convert the image to RGBA and set the borderValue parameter in warpPerspective such that any added background has transparent color. I remove the transparent pixels after the transformation again. This procedure works but seems highly inefficient.
Question: I'm looking for a working code example (C++ or Python) that shows how to do this more efficiently.
Image has been deskewed and content is preserved. However, the text of the two pictures isn't on top of each other anymore
Text position is off because the warped image has a different size than what warpPerspective expected
After transforming the image the problem is that the two images aren't aligned anymore because the dimensions of the transformed image are different than what the warpPerspective method expected.
Question: How can I realign the two images? It would be great if there was a way to do incorporate this into the previous step already. Again, a working code example would be very helpful.
Here's the code that I have so far. It deskews the image while preserving its content, however, the text is not on top of the original text anymore.
import math
import cv2
import numpy as np
class Deskewer:
def __init__(self, hessianTreshold = 5000):
self.__hessianThresh = hessianTreshold
self.imgOrigGray, self.imgSkewed, self.imgSkewedGray = None, None, None
def start(self, imgOrig, imgSkewed):
self.imgOrigGray = cv2.cvtColor(imgOrig, cv2.COLOR_BGR2GRAY)
self.imgSkewed = imgSkewed # final transformation will be performed on color image
self.imgSkewedGray = cv2.cvtColor(imgSkewed, cv2.COLOR_BGR2GRAY) # prior calculation is faster on gray
kp1, des1, kp2, des2 = self.__detectFeatures()
goodMatches = self.__flannMatch(des1, des2)
MIN_MATCH_COUNT = 10
M = None
if len(goodMatches) > MIN_MATCH_COUNT:
M, _ = self.__findHomography(kp1, kp2, goodMatches)
else:
print("Not enough matches are found - %d/%d" % (len(goodMatches), MIN_MATCH_COUNT))
return
return self.__deskew(M)
def __detectFeatures(self):
surf = cv2.xfeatures2d.SURF_create(self.__hessianThresh)
kp1, des1 = surf.detectAndCompute(self.imgOrigGray, None)
kp2, des2 = surf.detectAndCompute(self.imgSkewedGray, None)
return kp1, des1, kp2, des2
def __flannMatch(self, des1, des2):
global matches
FLANN_INDEX_KDTREE = 0
index_params = dict(algorithm=FLANN_INDEX_KDTREE, trees=5)
search_params = dict(checks=50)
flann = cv2.FlannBasedMatcher(index_params, search_params)
matches = flann.knnMatch(des1, des2, k=2)
# store all the good matches as per Lowe's ratio test.
good = []
for m, n in matches:
if m.distance < 0.7 * n.distance:
good.append(m)
return good
def __findHomography(self, kp1, kp2, goodMatches):
src_pts = np.float32([kp1[m.queryIdx].pt for m in goodMatches
]).reshape(-1, 1, 2)
dst_pts = np.float32([kp2[m.trainIdx].pt for m in goodMatches
]).reshape(-1, 1, 2)
M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0)
matchesMask = mask.ravel().tolist()
i = matchesMask.index(1)
# TODO: This is a matching point before the warpPerspective call. How can I calculate this point AFTER the call?
print("POINTS: object(", src_pts[i][0][1], ",", src_pts[i][0][0], ") - scene(", dst_pts[i][0][1], ",", dst_pts[i][0][0], ")")
return M, mask
def getComponents(self, M):
# ((translationx, translationy), rotation, (scalex, scaley), shear)
a = M[0, 0]
b = M[0, 1]
c = M[0, 2]
d = M[1, 0]
e = M[1, 1]
f = M[1, 2]
p = math.sqrt(a * a + b * b)
r = (a * e - b * d) / (p)
q = (a * d + b * e) / (a * e - b * d)
translation = (c, f)
scale = (p, r) # p = x-Axis, r = y-Axis
shear = q
theta = math.atan2(b, a)
degrees = math.atan2(b, a) * 180 / math.pi
return (translation, theta, degrees, scale, shear)
def __deskew(self, M):
# this info might come in handy here for calculating the dsize of warpPerspective?
translation, theta, degrees, scale, shear = self.getComponents(M)
# Alpha channel allows me to set unique feature to pixels that are created during warpPerspective
imSkewedAlpha = cv2.cvtColor(self.imgSkewed, cv2.COLOR_BGR2BGRA)
# These sizes have been randomly choosen to make sure that all the contents fit in the new canvas
height = 5000
width = 5000
shift = -500
M2 = np.array([[1, 0, shift],
[0, 1, shift],
[0, 0, 1]])
M3 = np.dot(M, M2)
# TODO: How can I calculate the dsize argument?
# Newly created pixels are set to transparent
im_out = cv2.warpPerspective(imSkewedAlpha, M3,
(height, width), flags=cv2.WARP_INVERSE_MAP, borderMode=cv2.BORDER_CONSTANT, borderValue=(255, 0, 0, 0))
# http://codereview.stackexchange.com/a/132933
# Mask of non-black pixels (assuming image has a single channel).
mask = im_out[:, :, 3] == 255
# Coordinates of non-black pixels.
coords = np.argwhere(mask)
# Bounding box of non-black pixels.
x0, y0 = coords.min(axis=0)
x1, y1 = coords.max(axis=0) + 1 # slices are exclusive at the top
# Get the contents of the bounding box.
cropped = im_out[x0:x1, y0:y1]
# TODO: The warped image needs to align nicely on the original image
return cropped
origImg = cv2.imread("Letter.png")
skewedImg = cv2.imread("A4.png")
deskewed = Deskewer().start(origImg, skewedImg)
cv2.imshow("Original", origImg)
cv2.imshow("Deskewed", deskewed)
cv2.waitKey(0)
Original and skewed image (with additional content) for testing