so I was wondering if there is a way to stitch two parts of an image at the right position using key-points and homography matrix.
as an example here are the two images.
so for the keypoints detection I am using "Superpoint" and I am getting the following result.
so I am searching for a way to maybe use the cv2.warpAffine function to align the images together, all my attempts so far didn't work. The goal is to let the program decide where to place the second image in the first one.
here is the code I am using:
enter code here
import argparse
from pathlib import Path
import cv2
import numpy as np
import tensorflow as tf # noqa: E402
from superpoint.settings import EXPER_PATH # noqa: E402
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
def extract_superpoint_keypoints_and_descriptors(keypoint_map, descriptor_map,
keep_k_points=1000):
def select_k_best(points, k):
""" Select the k most probable points (and strip their proba).
points has shape (num_points, 3) where the last coordinate is the proba. """
sorted_prob = points[points[:, 2].argsort(), :2]
start = min(k, points.shape[0])
return sorted_prob[-start:, :]
# Extract keypoints
keypoints = np.where(keypoint_map > 0)
prob = keypoint_map[keypoints[0], keypoints[1]]
keypoints = np.stack([keypoints[0], keypoints[1], prob], axis=-1)
keypoints = select_k_best(keypoints, keep_k_points)
keypoints = keypoints.astype(int)
# Get descriptors for keypoints
desc = descriptor_map[keypoints[:, 0], keypoints[:, 1]]
# Convert from just pts to cv2.KeyPoints
keypoints = [cv2.KeyPoint(p[1], p[0], 1) for p in keypoints]
return keypoints, desc
def match_descriptors(kp1, desc1, kp2, desc2):
# Match the keypoints with the warped_keypoints with nearest neighbor search
bf = cv2.BFMatcher(cv2.NORM_L2, crossCheck=True)
matches = bf.match(desc1, desc2)
matches_idx = np.array([m.queryIdx for m in matches])
m_kp1 = [kp1[idx] for idx in matches_idx]
matches_idx = np.array([m.trainIdx for m in matches])
m_kp2 = [kp2[idx] for idx in matches_idx]
return m_kp1, m_kp2, matches
def compute_homography(matched_kp1, matched_kp2):
matched_pts1 = cv2.KeyPoint_convert(matched_kp1)
matched_pts2 = cv2.KeyPoint_convert(matched_kp2)
H, inliers = cv2.findHomography(matched_pts1[:, [1, 0]],
matched_pts2[:, [1, 0]],cv2.RANSAC,5.0)
inliers = inliers.flatten()
print(H)
return H, inliers
def preprocess_image(img_file, img_size):
img = cv2.imread(img_file, cv2.IMREAD_COLOR)
img = cv2.resize(img, img_size)
img_orig = img.copy()
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
img = np.expand_dims(img, 2)
img = img.astype(np.float32)
img_preprocessed = img / 255.
return img_preprocessed, img_orig
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser = argparse.ArgumentParser(description='Compute the homography \
between two images with the SuperPoint feature matches.')
parser.add_argument('weights_name', type=str)
parser.add_argument('img1_path', type=str)
parser.add_argument('img2_path', type=str)
parser.add_argument('--H', type=int, default=480,
help='The height in pixels to resize the images to. \
(default: 480)')
parser.add_argument('--W', type=int, default=640,
help='The width in pixels to resize the images to. \
(default: 640)')
parser.add_argument('--k_best', type=int, default=1000,
help='Maximum number of keypoints to keep \
(default: 1000)')
args = parser.parse_args()
weights_name = args.weights_name
img1_file = args.img1_path
img2_file = args.img2_path
img_size = (args.W, args.H)
keep_k_best = args.k_best
weights_root_dir = Path(EXPER_PATH, 'saved_models')
weights_root_dir.mkdir(parents=True, exist_ok=True)
weights_dir = Path(weights_root_dir, weights_name)
graph = tf.Graph()
with tf.Session(graph=graph) as sess:
tf.saved_model.loader.load(sess,
[tf.saved_model.tag_constants.SERVING],
str(weights_dir))
input_img_tensor = graph.get_tensor_by_name('superpoint/image:0')
output_prob_nms_tensor = graph.get_tensor_by_name('superpoint/prob_nms:0')
output_desc_tensors = graph.get_tensor_by_name('superpoint/descriptors:0')
img1, img1_orig = preprocess_image(img1_file, img_size)
out1 = sess.run([output_prob_nms_tensor, output_desc_tensors],
feed_dict={input_img_tensor: np.expand_dims(img1, 0)})
keypoint_map1 = np.squeeze(out1[0])
descriptor_map1 = np.squeeze(out1[1])
kp1, desc1 = extract_superpoint_keypoints_and_descriptors(
keypoint_map1, descriptor_map1, keep_k_best)
img2, img2_orig = preprocess_image(img2_file, img_size)
out2 = sess.run([output_prob_nms_tensor, output_desc_tensors],
feed_dict={input_img_tensor: np.expand_dims(img2, 0)})
keypoint_map2 = np.squeeze(out2[0])
descriptor_map2 = np.squeeze(out2[1])
kp2, desc2 = extract_superpoint_keypoints_and_descriptors(
keypoint_map2, descriptor_map2, keep_k_best)
# Match and get rid of outliers
m_kp1, m_kp2, matches = match_descriptors(kp1, desc1, kp2, desc2)
H, inliers= compute_homography(m_kp1, m_kp2)
matches = np.array(matches)[inliers.astype(bool)].tolist()
matched_img = cv2.drawMatches(img1_orig, kp1, img2_orig, kp2, matches,
None, matchColor=(0, 255, 0),
singlePointColor=(0, 0, 255))
I appreciate your help!
So for closure, using homographies or affine transformations to stitch these two images together is not the right way to do this. There are too many interest points that are candidates between each portion that are highly ambiguous which would not allow this stitching to be successful.
The only way for this to have worked would be if your feature matches were specified at the boundaries of where the two images were disconnected from each other. However, these points would never be interest points as the local neighbourhoods do not offer any unique information. Also, edges by definition are not interest points so any detector would never yield those as proper points.
I apologize if this wasn't the answer you were looking for!
This should hopefully be what you are looking for. The addWeighted function is the function in cv2 for merging images
import cv2
img1 = cv2.imread('7FgBZ.jpg',1)
img2 = cv2.imread('qqiH8.jpg',1)
print(img2)
dst = cv2.addWeighted(img1,0.2,img2,0.2,0)
print(dst)
cv2.imshow('dst',dst)
cv2.waitKey(0)
cv2.destroyAllWindows()
Related
I'm a novice at openCV, currently i'm following this tutorial on image alignment, i have the following image and template for testing
scanned image(test_image.jpg):
template image(template.jpg):
and the following python code:
from __future__ import print_function
import cv2
import numpy as np
MAX_FEATURES = 500
GOOD_MATCH_PERCENT = 0.15
def alignImages(im1, im2):
# Convert images to grayscale
im1Gray = cv2.cvtColor(im1, cv2.COLOR_BGR2GRAY)
im2Gray = cv2.cvtColor(im2, cv2.COLOR_BGR2GRAY)
# Detect ORB features and compute descriptors.
orb = cv2.ORB_create(MAX_FEATURES)
keypoints1, descriptors1 = orb.detectAndCompute(im1Gray, None)
keypoints2, descriptors2 = orb.detectAndCompute(im2Gray, None)
# Match features.
matcher = cv2.DescriptorMatcher_create(
cv2.DESCRIPTOR_MATCHER_BRUTEFORCE_HAMMING)
matches = list(matcher.match(descriptors1, descriptors2, None))
# Sort matches by score
matches.sort(key=lambda x: x.distance, reverse=False)
# Remove not so good matches
numGoodMatches = int(len(matches) * GOOD_MATCH_PERCENT)
matches = matches[:numGoodMatches]
# Draw top matches
imMatches = cv2.drawMatches(im1, keypoints1, im2, keypoints2, matches, None)
cv2.imwrite("matches.jpg", imMatches)
# Extract location of good matches
points1 = np.zeros((len(matches), 2), dtype=np.float32)
points2 = np.zeros((len(matches), 2), dtype=np.float32)
for i, match in enumerate(matches):
points1[i, :] = keypoints1[match.queryIdx].pt
points2[i, :] = keypoints2[match.trainIdx].pt
# Find homography
h, mask = cv2.findHomography(points1, points2, cv2.RANSAC)
# Use homography
height, width, channels = im2.shape
im1Reg = cv2.warpPerspective(im1, h, (width, height))
return im1Reg, h
if __name__ == '__main__':
# Read reference image
refFilename = "template.jpg"
print("Reading reference image : ", refFilename)
imReference = cv2.imread(refFilename, cv2.IMREAD_COLOR)
# Read image to be aligned
imFilename = "test_image.jpg"
print("Reading image to align : ", imFilename)
im = cv2.imread(imFilename, cv2.IMREAD_COLOR)
print("Aligning images ...")
# Registered image will be resotred in imReg.
# The estimated homography will be stored in h.
imReg, h = alignImages(im, imReference)
# Write aligned image to disk.
outFilename = "aligned.jpg"
print("Saving aligned image : ", outFilename)
cv2.imwrite(outFilename, imReg)
# Print estimated homography
print("Estimated homography : \n", h)
I get the following results after i ran the script:
matches.jpg:
UPDATE:
I was able to get the image when i increase the amount of orb features to 2000
aligned.jpg
But the homography is still not rotating the image, how can i rotate the image to the same position as the template?
There are two types of forms to finding a homography (forward and backward), but if you already found the homography, applying it can be done without using opencv as follows:
import numpy as np
from scipy.interpolate import griddata
# creating the homogenious coordinates
src_h, src_w, _ = src_image.shape
values = np.matrix.reshape(src_image, (-1, 3), order='F')
yy, xx = np.meshgrid(np.arange(src_h), np.arange(src_w))
input_flat = np.concatenate((xx.reshape((1, -1)), yy.reshape((1, -1)), np.ones_like(xx.reshape((1, -1)))), axis=0)
# applying the homography and converting back to homogenious coordinates
points = np.matmul(homography, input_flat)
points_homogeneous = points[0:2, :] / points[2, :]
# interpolating the result to nicely fit the grid coordinates
dst_image_shape = [400, 400] # could be any number here
yy, xx = np.meshgrid(np.arange(dst_image_shape[1]), np.arange(dst_image_shape[0]))
src_image_warp = griddata(np.transpose(points_homogeneous ), values_relevant, (yy, xx), method='linear')
#numerical rounding
src_image_warp[np.isnan(src_image_warp)] = 0
src_image_warp[src_image_warp > 255] = 255
src_image_warp = np.uint8(src_image_warp)
Note that this is done for a 1 channel image, for RGB image this has to be done for each channel searately. In addition, this could be made to run faster by interpolating only the relevant coordinates since the interpolation is the most time-consuming operation.
With opencv this can be done by:
import cv2
image_dst = cv2.warpPerspective(image_src, homography, size) # size is a tuple (width, height) of the destination image
Read more on homographies and the opencv implementation here.
Finding the homography
The homography can be found without using opencv but that requires knowlage in linear algebra adn the explanation is a bit lengthy, if needed I will post it as an edit. For any practical case however, the homography can be found using opencv as follows:
homography, status = cv2.findHomography(pts_src, pts_dst)
where pts_src are coordinates in the original image and pts_dst are their matching location in the destination image. Since you already found the point pairs, this will yield you the homography (opencv optimizes the hmography for minimal distortion in the backward operation which is the correct way to perform homography computations).
You have a homography h calculated from findHomography and you can use warpPerspective to transform the template to have the same perspective as the photo.
Now you just need to invert the homography, and apply it to the photo instead of the template.
Either use np.linalg.inv for that, or pass the WARP_INVERSE_MAP flag to warpPerspetive instead.
I need to determine the location of yogurts in the supermarket. Source photo looks like
With template:
I using SIFT to extract key points of template:
img1 = cv.imread('train.jpg')
sift = cv.SIFT_create()# queryImage
kp1, des1 = sift.detectAndCompute(img1, None)
path = glob.glob("template.jpg")
cv_img = []
l=0
for img in path:
img2 = cv.imread(img) # trainImage
# Initiate SIFT detector
# find the keypoints and descriptors with SIFT
kp2, des2 = sift.detectAndCompute(img2,None)
# FLANN parameters
FLANN_INDEX_KDTREE = 1
index_params = dict(algorithm = FLANN_INDEX_KDTREE, trees = 5)
search_params = dict(checks=50) # or pass empty dictionary
flann = cv.FlannBasedMatcher(index_params,search_params)
matches = flann.knnMatch(des1,des2,k=2)
# Need to draw only good matches, so create a mask
# ratio test as per Lowe's paper
if (l < len(matches)):
l = len(matches)
image = img2
match = matches
h_query, w_query, _= img2.shape
matchesMask = [[0,0] for i in range(len(match))]
good_matches = []
good_matches_indices = {}
for i,(m,n) in enumerate(match):
if m.distance < 0.7*n.distance:
matchesMask[i]=[1,0]
good_matches.append(m)
good_matches_indices[len(good_matches) - 1] = i
bboxes = []
src_pts = np.float32([ kp1[m.queryIdx].pt for m in good ]).reshape(-1,2)
dst_pts = np.float32([ kp2[m.trainIdx].pt for m in good ]).reshape(-1,2)
model, inliers = initialize_ransac(src_pts, dst_pts)
n_inliers = np.sum(inliers)
matched_indices = [good_matches_indices[idx] for idx in inliers.nonzero()[0]]
print(len(matched_indices))
model, inliers = ransac(
(src_pts, dst_pts),
AffineTransform, min_samples=4,
residual_threshold=4, max_trials=20000
)
n_inliers = np.sum(inliers)
print(n_inliers)
matched_indices = [good_matches_indices[idx] for idx in inliers.nonzero()[0]]
print(matched_indices)
q_coordinates = np.array([(0, 0), (h_query, w_query)])
coords = model.inverse(q_coordinates)
print(coords)
h_query, w_query,_ = img2.shape
q_coordinates = np.array([(0, 0), (h_query, w_query)])
coords = model.inverse(q_coordinates)
print(coords)
# bboxes_list.append((i, coords))
M, mask = cv.findHomography(src_pts, dst_pts, cv.RANSAC, 2)
draw_params = dict(matchColor = (0,255,0),
singlePointColor = (255,0,0),
matchesMask = matchesMask,
flags = cv.DrawMatchesFlags_DEFAULT)
img3 = cv.drawMatchesKnn(img1,kp1,image,kp2,match,None,**draw_params)
plt.imshow(img3),plt.show()
Result of SIFT looks like
The question is what is the best way to clasterise points to obtain rectangles, representing each yogurt? I tried RANSAC, but this method doesn't work in this case.
I am proposing an approach based on what is discussed in this paper. I have modified the approach a bit because the use-case is not entirely same but they do use SIFT features matching to locate multiple objects in video frames. They have used PCA for reducing time but that may not be required for still images.
Sorry I could not write a code for this as it will take a lot of time but I believe this should work to locate all the occurrences of the template object.
The modified approach is like this:
Divide the template image into regions: left, middle, right along the horizontal
and top, bottom along the vertical
Now when you match features between the template and source image, you will get features matched from some of the keypoints from these regions on multiple locations on the source image. You can use these keypoints to identify which region of the template is present at what location(s) in the source image. If there are overlapping regions i.e. keypoints from different regions matched with close keypoints in source image then that would mean a wrong match.
Mark each set of matching keypoints within a neighborhood on source image as left, center, right, top, bottom depending upon if they have majority matches from keypoints of a particular region in the template image.
Starting from each left region on source image move towards right and if we find a central region followed by a right region then this area of source image between regions marked as left and right, can be marked as location of one template object.
There could be overlapping objects which could result in a left region followed by another left region when moving in right direction from the left region. The area between the two left regions can be marked as one template object.
For further refined locations, each area of source image marked as one template object can be cropped and re-matched with the template image.
Try working spatially: for each key-point in img2 get some bounding box around and consider only the points in there for your ransac homography to check for best fit.
You can also work with overlapping windows and later discard similar resulting homographys
Here is you can do
Base Image = Whole picture of shelf
Template Image = Single product image
Get SIFT matches from both images. (base and template image)
Do feature matching.
Get all the points in base image which are matching. (refer to figure)
Create Cluster based on size of template image. (here threshold in 50px)
Get Bounding box of clusters.
Crop each bounding box cluter and check matches with template image.
Accept all cluters which has atleast minimum percentage of matched. (here taken minimum 10% of keypoints)
def plot_pts(img, pts):
img_plot = img.copy()
for i in range(len(pts)):
img_plot = cv2.circle(img_plot, (int(pts[i][0]), int(pts[i][1])), radius=7, color=(255, 0, 0), thickness=-1)
plt.figure(figsize=(20, 10))
plt.imshow(img_plot)
def plot_bbox(img, bbox_list):
img_plot = img.copy()
for i in range(len(bbox_list)):
start_pt = bbox_list[i][0]
end_pt = bbox_list[i][2]
img_plot = cv2.rectangle(img_plot, pt1=start_pt, pt2=end_pt, color=(255, 0, 0), thickness=2)
plt.figure(figsize=(20, 10))
plt.imshow(img_plot)
def get_distance(pt1, pt2):
x1, y1 = pt1
x2, y2 = pt2
return np.sqrt(np.square(x1 - x2) + np.square(y1 - y2))
def check_centroid(pt, centroid):
x, y = pt
cx, cy = centroid
distance = get_distance(pt1=(x, y), pt2=(cx, cy))
if distance < max_distance:
return True
else:
return False
def update_centroid(pt, centroids_list):
new_centroids_list = centroids_list.copy()
flag_new_centroid = True
for j, c in enumerate(centroids_list):
temp_centroid = np.mean(c, axis=0)
if_close = check_centroid(pt, temp_centroid)
if if_close:
new_centroids_list[j].append(pt)
flag_new_centroid = False
break
if flag_new_centroid:
new_centroids_list.append([pt])
new_centroids_list = recheck_centroid(new_centroids_list)
return new_centroids_list
def recheck_centroid(centroids_list):
new_centroids_list = [list(set(c)) for c in centroids_list]
return new_centroids_list
def get_bbox(pts):
minn_x, minn_y = np.min(pts, axis=0)
maxx_x, maxx_y = np.max(pts, axis=0)
return [[minn_x, minn_y], [maxx_x, minn_y], [maxx_x, maxx_y], [minn_x, maxx_y]]
class RotateAndTransform:
def __init__(self, path_img_ref):
self.path_img_ref = path_img_ref
self.ref_img = self._read_ref_image()
#sift
self.sift = cv2.SIFT_create()
#feature matching
self.bf = cv2.BFMatcher()
# FLANN parameters
FLANN_INDEX_KDTREE = 1
index_params = dict(algorithm = FLANN_INDEX_KDTREE, trees = 5)
search_params = dict(checks=50) # or pass empty dictionary
self.flann = cv2.FlannBasedMatcher(index_params,search_params)
def _read_ref_image(self):
ref_img = cv2.imread(self.path_img_ref, cv2.IMREAD_COLOR)
ref_img = cv2.cvtColor(ref_img, cv2.COLOR_BGR2RGB)
return ref_img
def read_src_image(self, path_img_src):
self.path_img_src = path_img_src
# read images
# ref_img = cv2.imread(self.path_img_ref, cv2.IMREAD_COLOR)
src_img = cv2.imread(path_img_src, cv2.IMREAD_COLOR)
src_img = cv2.cvtColor(src_img, cv2.COLOR_BGR2RGB)
return src_img
def convert_bw(self, img):
img_bw = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
return img_bw
def get_keypoints_descriptors(self, img_bw):
keypoints, descriptors = self.sift.detectAndCompute(img_bw,None)
return keypoints, descriptors
def get_matches(self, src_descriptors, ref_descriptors, threshold=0.6):
matches = self.bf.knnMatch(ref_descriptors, src_descriptors, k=2)
flann_matches = self.flann.knnMatch(ref_descriptors, src_descriptors,k=2)
good_matches = []
good_flann_matches = []
# Apply ratio test for Brute Force
for m,n in matches:
if m.distance <threshold*n.distance:
good_matches.append([m])
print(f'Numner of BF Match: {len(matches)}, Number of good BF Match: {len(good_matches)}')
# Apply ratio test for FLANN
for m,n in flann_matches:
if m.distance < threshold*n.distance:
good_flann_matches.append([m])
# matches = sorted(matches, key = lambda x:x.distance)
print(f'Numner of FLANN Match: {len(flann_matches)}, Number of good Flann Match: {len(good_flann_matches)}')
return good_matches, good_flann_matches
def get_src_dst_pts(self, good_flann_matches, ref_keypoints, src_keypoints):
pts_src = []
pts_ref = []
n = len(good_flann_matches)
for i in range(n):
ref_index = good_flann_matches[i][0].queryIdx
src_index = good_flann_matches[i][0].trainIdx
pts_src.append(src_keypoints[src_index].pt)
pts_ref.append(ref_keypoints[ref_index].pt)
return np.array(pts_src), np.array(pts_ref)
def extend_bbox(bbox, increment=0.1):
bbox_new = bbox.copy()
bbox_new[0] = [bbox_new[0][0] - int(bbox_new[0][0] * increment), bbox_new[0][1] - int(bbox_new[0][1] * increment)]
bbox_new[1] = [bbox_new[1][0] + int(bbox_new[1][0] * increment), bbox_new[1][1] - int(bbox_new[1][1] * increment)]
bbox_new[2] = [bbox_new[2][0] + int(bbox_new[2][0] * increment), bbox_new[2][1] + int(bbox_new[2][1] * increment)]
bbox_new[3] = [bbox_new[3][0] - int(bbox_new[3][0] * increment), bbox_new[3][1] + int(bbox_new[3][1] * increment)]
return bbox_new
def crop_bbox(img, bbox):
y, x = bbox[0]
h, w = bbox[1][0] - bbox[0][0], bbox[2][1] - bbox[0][1]
return img[x: x + w, y: y + h, :]
base_img = cv2.imread(path_img_base)
ref_img = cv2.imread(path_img_ref)
rnt = RotateAndTransform(path_img_ref)
ref_img_bw = rnt.convert_bw(img=rnt.ref_img)
ref_keypoints, ref_descriptors = rnt.get_keypoints_descriptors(ref_img_bw)
base_img = rnt.read_src_image(path_img_src = path_img_base)
base_img_bw = rnt.convert_bw(img=base_img)
base_keypoints, base_descriptors = rnt.get_keypoints_descriptors(base_img_bw)
good_matches, good_flann_matches = rnt.get_matches(src_descriptors=base_descriptors, ref_descriptors=ref_descriptors, threshold=0.6)
ref_points = []
for gm in good_flann_matches:
x, y = ref_keypoints[gm[0].queryIdx].pt
x, y = int(x), int(y)
ref_points.append((x, y))
max_distance = 50
centroids = [[ref_points[0]]]
for i in tqdm(range(len(ref_points))):
pt = ref_points[i]
centroids = update_centroid(pt, centroids)
bbox = [get_bbox(c) for c in centroi[![enter image description here][1]][1]ds]
centroids = [np.mean(c, axis=0) for c in centroids]
print(f'Number of Points: {len(good_flann_matches)}, centroids: {len(centroids)}')
data = []
for i in range(len(bbox)):
temp_crop_img = crop_bbox(ref_img, extend_bbox(bbox[i], 0.01))
temp_crop_img_bw = rnt.convert_bw(img=temp_crop_img)
temp_crop_keypoints, temp_crop_descriptors = rnt.get_keypoints_descriptors(temp_crop_img_bw)
good_matches, good_flann_matches = rnt.get_matches(src_descriptors=base_descriptors, ref_descriptors=temp_crop_descriptors, threshold=0.6)
temp_data = {'image': temp_crop_img,
'num_matched': len(good_flann_matches),
'total_keypoints' : len(base_keypoints),
}
data.append(temp_data)
filter_data = [{'num_matched' : i['num_matched'], 'image': i['image']} for i in data if i['num_matched'] > 25]
for i in range(len(filter_data)):
temp_num_match = filter_data[i]['num_matched']
plt.figure()
plt.title(f'num matched: {temp_num_match}')
plt.imshow(filter_data[i]['image'])
First you could detect any item that is on the shelf with a network like this, it's pre-trained in this exact context and works pretty well. You should also rectify the image before feeding it to the network. You will obtain bounding boxes for every product (maybe some false positive/negative, but that's another issue). Then you can match each box with the template using SIFT and calculating a score (it's up to you define which score works), but I suggest to use another approach like a siamese network if you a consistent dataset.
I have a project where people can add data about utility bills, and there's also an OCR service inside. So people from my city can recognize data from bills just by loading their photos of bills. The trouble is that I can't reach this goal fully.
So I have 4 templates of bills (like for heating, water, gas and so on) in high quality. Example is below:
My user can load a picture like that:
And after alignment I get this result:
And it's obvious that I can't get good recognition with such image.
My code which I use for image alignment:
import os
import cv2
import numpy as np
from config import folder_path_aligned_images
MAX_FEATURES = 500
GOOD_MATCH_PERCENT = 0.15
class OpenCV:
#classmethod
def match_img(cls, im1, im2):
# Convert images to grayscale
im1_gray = cv2.cvtColor(im1, cv2.COLOR_BGR2GRAY)
im2_gray = cv2.cvtColor(im2, cv2.COLOR_BGR2GRAY)
# Detect ORB features and compute descriptors.
orb = cv2.ORB_create(MAX_FEATURES)
keypoints_1, descriptors_1 = orb.detectAndCompute(im1_gray, None)
keypoints_2, descriptors_2 = orb.detectAndCompute(im2_gray, None)
# Match features.
matcher = cv2.DescriptorMatcher_create(cv2.DESCRIPTOR_MATCHER_BRUTEFORCE_HAMMING)
matches = matcher.match(descriptors_1, descriptors_2, None)
# Sort matches by score
matches.sort(key=lambda x: x.distance, reverse=False)
# Remove not so good matches
num_good_matches = int(len(matches) * GOOD_MATCH_PERCENT)
matches = matches[:num_good_matches]
# Draw top matches
im_matches = cv2.drawMatches(im1, keypoints_1, im2, keypoints_2, matches, None)
cv2.imwrite(os.path.join(folder_path_aligned_images, "matches.jpg"), im_matches)
# Extract location of good matches
points_1 = np.zeros((len(matches), 2), dtype=np.float32)
points_2 = np.zeros((len(matches), 2), dtype=np.float32)
for i, match in enumerate(matches):
points_1[i, :] = keypoints_1[match.queryIdx].pt
points_2[i, :] = keypoints_2[match.trainIdx].pt
# Find homography
h, mask = cv2.findHomography(points_1, points_2, cv2.RANSAC)
# Use homography
height, width, channels = im2.shape
im1_reg = cv2.warpPerspective(im1, h, (width, height))
return im1_reg, h
#classmethod
def align_img(cls, template_path, raw_img_path, result_img_path):
# Read reference image
ref_filename = template_path
print("Reading reference image: ", ref_filename)
im_reference = cv2.imread(ref_filename, cv2.IMREAD_COLOR)
# Read image to be aligned
im_filename = raw_img_path
print("Reading image to align: ", im_filename)
im = cv2.imread(raw_img_path, cv2.IMREAD_COLOR)
print("Aligning images ...")
# Registered image will be resorted in im_reg.
im_reg, h = OpenCV.match_img(im, im_reference)
# Write aligned image to disk.
print("Saving aligned image : ", result_img_path)
cv2.imwrite(result_img_path, im_reg)
return result_img_path
How can I improve this?
EDIT: image with matches:
Don't know if this helps almost a year on, but I used a similar code that you have, and what worked for me was to increase the number of MAX_FEATURES (I use 80000, but you might not even need that much) and to decrease the GOOD_MATCH_PERCENT to like 0.05. Try playing with the numbers!
I am trying to learn OpenCV in order to improve a script I wrote for comparing engineering drawings. I am using the code (see below) found on this tutorial but I am having zero success with it. In the tutorial the author uses the example of a blank form for the reference image and a photo of the completed form as the image to align. My situation is very similar because I am attempting to use a blank drawing title block as my reference image and a scanned image of a drawing as my image to align.
My goal is to use OpenCV to clean up the scanned engineering drawings so that they are aligned properly but no matter what I try in the MAX_FEATURES and GOOD_MATCH_PERCENT parameters, I get an image that looks like a black and white star burst. Also, when I review the "matches.jpg" file generated by the script, it appears that there are no correct matches. I have tried multiple drawings and I get the same results.
Can anyone see a reason why this script would not work in the way I am trying to use it?
from __future__ import print_function
import cv2
import numpy as np
MAX_FEATURES = 500
GOOD_MATCH_PERCENT = 0.15
def alignImages(im1, im2):
# Convert images to grayscale
im1Gray = cv2.cvtColor(im1, cv2.COLOR_BGR2GRAY)
im2Gray = cv2.cvtColor(im2, cv2.COLOR_BGR2GRAY)
# Detect ORB features and compute descriptors.
orb = cv2.ORB_create(MAX_FEATURES)
keypoints1, descriptors1 = orb.detectAndCompute(im1Gray, None)
keypoints2, descriptors2 = orb.detectAndCompute(im2Gray, None)
# Match features.
matcher = cv2.DescriptorMatcher_create(cv2.DESCRIPTOR_MATCHER_BRUTEFORCE_HAMMING)
matches = matcher.match(descriptors1, descriptors2, None)
# Sort matches by score
matches.sort(key=lambda x: x.distance, reverse=False)
# Remove not so good matches
numGoodMatches = int(len(matches) * GOOD_MATCH_PERCENT)
matches = matches[:numGoodMatches]
# Draw top matches
imMatches = cv2.drawMatches(im1, keypoints1, im2, keypoints2, matches, None)
cv2.imwrite("matches.jpg", imMatches)
# Extract location of good matches
points1 = np.zeros((len(matches), 2), dtype=np.float32)
points2 = np.zeros((len(matches), 2), dtype=np.float32)
for i, match in enumerate(matches):
points1[i, :] = keypoints1[match.queryIdx].pt
points2[i, :] = keypoints2[match.trainIdx].pt
# Find homography
h, mask = cv2.findHomography(points1, points2, cv2.RANSAC)
# Use homography
height, width, channels = im2.shape
im1Reg = cv2.warpPerspective(im1, h, (width, height))
return im1Reg, h
if __name__ == '__main__':
# Read reference image
refFilename = "form.jpg"
print("Reading reference image : ", refFilename)
imReference = cv2.imread(refFilename, cv2.IMREAD_COLOR)
# Read image to be aligned
imFilename = "scanned-form.jpg"
print("Reading image to align : ", imFilename);
im = cv2.imread(imFilename, cv2.IMREAD_COLOR)
print("Aligning images ...")
# Registered image will be resotred in imReg.
# The estimated homography will be stored in h.
imReg, h = alignImages(im, imReference)
# Write aligned image to disk.
outFilename = "aligned.jpg"
print("Saving aligned image : ", outFilename);
cv2.imwrite(outFilename, imReg)
# Print estimated homography
print("Estimated homography : \n", h)
Template Image:
Image to Align:
Expected output Image:
Here is one way in Python/OpenCV using a Rigid Affine Transformation (scale, rotation and translation only - no skew or perspective) to warp one image to match the other. It uses findTransformECC() -- Enhanced Correlation Coefficient Maximization) -- to get the rotation matrix and then uses warpAffine to do the rigid warping.
Template:
Image to be warped:
import cv2
import numpy as np
import math
import sys
# Get the image files from the command line arguments
# These are full paths to the images
# image2 will be warped to match image1
# argv[0] is name of script
image1 = sys.argv[1]
image2 = sys.argv[2]
outfile = sys.argv[3]
# Read the images to be aligned
# im2 is to be warped to match im1
im1 = cv2.imread(image1);
im2 = cv2.imread(image2);
# Convert images to grayscale for computing the rotation via ECC method
im1_gray = cv2.cvtColor(im1,cv2.COLOR_BGR2GRAY)
im2_gray = cv2.cvtColor(im2,cv2.COLOR_BGR2GRAY)
# Find size of image1
sz = im1.shape
# Define the motion model - euclidean is rigid (SRT)
warp_mode = cv2.MOTION_EUCLIDEAN
# Define 2x3 matrix and initialize the matrix to identity matrix I (eye)
warp_matrix = np.eye(2, 3, dtype=np.float32)
# Specify the number of iterations.
number_of_iterations = 5000;
# Specify the threshold of the increment
# in the correlation coefficient between two iterations
termination_eps = 1e-3;
# Define termination criteria
criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, number_of_iterations, termination_eps)
# Run the ECC algorithm. The results are stored in warp_matrix.
(cc, warp_matrix) = cv2.findTransformECC (im1_gray, im2_gray, warp_matrix, warp_mode, criteria, None, 1)
# Warp im2 using affine
im2_aligned = cv2.warpAffine(im2, warp_matrix, (sz[1],sz[0]), flags=cv2.INTER_LINEAR + cv2.WARP_INVERSE_MAP);
# write output
cv2.imwrite(outfile, im2_aligned)
# Print rotation angle
row1_col0 = warp_matrix[0,1]
angle = math.degrees(math.asin(row1_col0))
print(angle)
Result:
Resulting Angle of Rotation (in deg):
-0.3102187026194794
Note, you can change the background color in the affineWarp to white if desired.
Also make the termination epsilon smaller by an order of magnitude or two for more accuracy, but longer processing times.
The other Rigid Affine approach that I mentioned in my comments earlier is to use ORB feature matching, filter the key points, then use estimateAffinePartial2D() to get the rigid affine matrix. Then use that to warp the image. For large angles this seems to me to be more reliable than the ECC method. But the ECC method seems more accurate for small rotations.
import cv2
import numpy as np
import math
import sys
MAX_FEATURES = 10000
GOOD_MATCH_PERCENT = 0.15
DIFFY_THRESH = 2
# Get the image files from the command line arguments
# These are full paths to the images
# image[2] will be warped to match image[1]
# argv[0] is name of script
file1 = sys.argv[1]
file2 = sys.argv[2]
outFile = sys.argv[3]
# Read image1
image1 = cv2.imread(file1, cv2.IMREAD_COLOR)
# Read image2 to be warped to match image1
image2 = cv2.imread(file2, cv2.IMREAD_COLOR)
# Convert images to grayscale
image1Gray = cv2.cvtColor(image1, cv2.COLOR_BGR2GRAY)
image2Gray = cv2.cvtColor(image2, cv2.COLOR_BGR2GRAY)
# Detect ORB features and compute descriptors.
orb = cv2.ORB_create(MAX_FEATURES)
keypoints1, descriptors1 = orb.detectAndCompute(image1Gray, None)
keypoints2, descriptors2 = orb.detectAndCompute(image2Gray, None)
# Match features.
matcher = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
matches = matcher.match(descriptors1, descriptors2, None)
# Sort matches by score
matches.sort(key=lambda x: x.distance, reverse=False)
# Remove not so good matches
numGoodMatches = int(len(matches) * GOOD_MATCH_PERCENT)
matches = matches[:numGoodMatches]
#print('numgood',numGoodMatches)
# Extract location of good matches and filter by diffy if rotation is small
points1 = np.zeros((len(matches), 2), dtype=np.float32)
points2 = np.zeros((len(matches), 2), dtype=np.float32)
for i, match in enumerate(matches):
points1[i, :] = keypoints1[match.queryIdx].pt
points2[i, :] = keypoints2[match.trainIdx].pt
# initialize empty arrays for newpoints1 and newpoints2 and mask
newpoints1 = np.empty(shape=[0, 2], dtype=np.float32)
newpoints2 = np.empty(shape=[0, 2], dtype=np.float32)
matches_Mask = [0] * len(matches)
count=0
for i in range(len(matches)):
pt1 = points1[i]
pt2 = points2[i]
pt1x, pt1y = zip(*[pt1])
pt2x, pt2y = zip(*[pt2])
diffy = np.float32( np.float32(pt2y) - np.float32(pt1y) )
if abs(diffy) < DIFFY_THRESH:
newpoints1 = np.append(newpoints1, [pt1], axis=0).astype(np.uint8)
newpoints2 = np.append(newpoints2, [pt2], axis=0).astype(np.uint8)
matches_Mask[i]=1
count += 1
# Find Affine Transformation
# note swap of order of newpoints here so that image2 is warped to match image1
m, inliers = cv2.estimateAffinePartial2D(newpoints2,newpoints1)
# Use affine transform to warp im2 to match im1
height, width, channels = image1.shape
image2Reg = cv2.warpAffine(image2, m, (width, height))
# Write aligned image to disk.
cv2.imwrite(outFile, image2Reg)
# Print angle
row1_col0 = m[1,0]
print('row1_col0:',row1_col0)
angle = math.degrees(math.asin(row1_col0))
print('angle', angle)
Result Image:
Result Rotation Angle:
-0.6123936361765413
After some trial and error I determined that I don't need to find a homography in order to align my images properly. Since my images only need to be scaled and rotated slightly, my best option is to find the outer most points of the drawing title block and align one image to the other with a transform.
My approach is to use the Harris corner finding function to find all of the corners on the drawing, then do a simple calculation to find the points that are the shortest distance to the corners of the drawing canvas (these are the outside corners of the drawing title block). I then take 3 of the points (top left, top right, and bottom left) and use a transform to scale/rotate one drawing to the other.
Below is the code that I used:
import cv2
import numpy as np
import math
img1 = cv2.imread('reference.jpg')
img2 = cv2.imread('to-be-aligned.jpg')
#Find the corner points of img1
h1,w1,c=img1.shape
gray1 = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
gray1 = np.float32(gray1)
dst1 = cv2.cornerHarris(gray1,5,3,0.04)
ret1, dst1 = cv2.threshold(dst1,0.1*dst1.max(),255,0)
dst1 = np.uint8(dst1)
ret1, labels1, stats1, centroids1 = cv2.connectedComponentsWithStats(dst1)
criteria1 = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 100, 0.001)
corners1 = cv2.cornerSubPix(gray1,np.float32(centroids1),(5,5),(-1,-1),criteria1)
#Find the corner points of img2
h2,w2,c=img2.shape
gray2 = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY)
gray2 = np.float32(gray2)
dst2 = cv2.cornerHarris(gray2,5,3,0.04)
ret2, dst2 = cv2.threshold(dst2,0.1*dst2.max(),255,0)
dst2 = np.uint8(dst2)
ret2, labels2, stats2, centroids2 = cv2.connectedComponentsWithStats(dst2)
criteria2 = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 100, 0.001)
corners2 = cv2.cornerSubPix(gray2,np.float32(centroids2),(5,5),(-1,-1),criteria2)
#Find the top left, top right, and bottom left outer corners of the drawing frame for img1
a1=[0,0]
b1=[w1,0]
c1=[0,h1]
a1_dist=[]
b1_dist=[]
c1_dist=[]
for i in corners1:
temp_a1=math.sqrt((i[0]-a1[0])**2+(i[1]-a1[1])**2)
temp_b1=math.sqrt((i[0]-b1[0])**2+(i[1]-b1[1])**2)
temp_c1=math.sqrt((i[0]-c1[0])**2+(i[1]-c1[1])**2)
a1_dist.append(temp_a1)
b1_dist.append(temp_b1)
c1_dist.append(temp_c1)
print("Image #1 (reference):")
print("Top Left:")
print(corners1[a1_dist.index(min(a1_dist))])
print("Top Right:")
print(corners1[b1_dist.index(min(b1_dist))])
print("Bottom Left:")
print(corners1[c1_dist.index(min(c1_dist))])
#Find the top left, top right, and bottom left outer corners of the drawing frame for img2
a2=[0,0]
b2=[w2,0]
c2=[0,h2]
a2_dist=[]
b2_dist=[]
c2_dist=[]
for i in corners2:
temp_a2=math.sqrt((i[0]-a2[0])**2+(i[1]-a2[1])**2)
temp_b2=math.sqrt((i[0]-b2[0])**2+(i[1]-b2[1])**2)
temp_c2=math.sqrt((i[0]-c2[0])**2+(i[1]-c2[1])**2)
a2_dist.append(temp_a2)
b2_dist.append(temp_b2)
c2_dist.append(temp_c2)
print("Image #2 (image to align):")
print("Top Left:")
print(corners2[a2_dist.index(min(a2_dist))])
print("Top Right:")
print(corners2[b2_dist.index(min(b2_dist))])
print("Bottom Left:")
print(corners2[c2_dist.index(min(c2_dist))])
#Create the points for img1
point1 = np.zeros((3,2), dtype=np.float32)
point1[0][0]=corners1[a1_dist.index(min(a1_dist))][0]
point1[0][1]=corners1[a1_dist.index(min(a1_dist))][1]
point1[1][0]=corners1[b1_dist.index(min(b1_dist))][0]
point1[1][1]=corners1[b1_dist.index(min(b1_dist))][1]
point1[2][0]=corners1[c1_dist.index(min(c1_dist))][0]
point1[2][1]=corners1[c1_dist.index(min(c1_dist))][1]
#Create the points for img2
point2 = np.zeros((3,2), dtype=np.float32)
point2[0][0]=corners2[a2_dist.index(min(a2_dist))][0]
point2[0][1]=corners2[a2_dist.index(min(a2_dist))][1]
point2[1][0]=corners2[b2_dist.index(min(b2_dist))][0]
point2[1][1]=corners2[b2_dist.index(min(b2_dist))][1]
point2[2][0]=corners2[c2_dist.index(min(c2_dist))][0]
point2[2][1]=corners2[c2_dist.index(min(c2_dist))][1]
#Make sure points look ok:
print(point1)
print(point2)
#Transform the image
m = cv2.getAffineTransform(point2,point1)
image2Reg = cv2.warpAffine(img2, m, (w1, h1), borderValue=(255,255,255))
#Highlight found points in red:
img1[dst1>0.1*dst1.max()]=[0,0,255]
img2[dst2>0.1*dst2.max()]=[0,0,255]
#Output the images:
cv2.imwrite("output-img1-harris.jpg", img1)
cv2.imwrite("output-img2-harris.jpg", img2)
cv2.imwrite("output-harris-transform.jpg",image2Reg)
I am using opencv and want to stick with it.
I have 5 images with some common areas in a pairwise manner. I want to merge them together in a single image. I have been successful joining two images together, as they were of the same resolution(a little tweak brought them to the same resolution without distorting the contents significantly). But now this first stage of merging gives me a highly inflated image, the resolution has gone significantly up(kind of an addition of two images).
To merge the two images I had brought their resolutions to the same value and it didn't cause much distortion. But now there's this image with double the length. If I change its resolution to the level of the image next in line for stitching, it is going to highly distort the content of the first stage and hence the result from here on.
How do I fix this issue given that I need to go through 5-6 iterations of stitching where the resolution is going to keep increasing?
Also, if there is any text which goes into details of image processing with examples, like above.
Stitcher.py
# -*- coding: utf-8 -*-
"""
Spyder Editor
This is a temporary script file.
"""
# import the necessary packages
import numpy as np
import imutils
import cv2
class Stitcher:
def __init__(self):
# determine if we are using OpenCV v3.X
self.isv3 = imutils.is_cv3()
def stitch(self, images, ratio=0.75, reprojThresh=4.0,
showMatches=False):
# unpack the images, then detect keypoints and extract
# local invariant descriptors from them
(imageB, imageA) = images
#(b, g, r) = cv2.split(imageA)
#imageA = cv2.merge([r,g,b])
#(b, g, r) = cv2.split(imageB)
#imageB = cv2.merge([r,g,b])
(kpsA, featuresA) = self.detectAndDescribe(imageA)
(kpsB, featuresB) = self.detectAndDescribe(imageB)
# match features between the two images
M = self.matchKeypoints(kpsA, kpsB,
featuresA, featuresB, ratio, reprojThresh)
# if the match is None, then there aren't enough matched
# keypoints to create a panorama
if M is None:
return None
# otherwise, apply a perspective warp to stitch the images
# together
(matches, H, status) = M
result = cv2.warpPerspective(imageA, H,
(imageA.size[1] + imageB.size[1], imageA.size[0]))
result[0:imageB.size[0], 0:imageB.size[1]] = imageB
# check to see if the keypoint matches should be visualized
if showMatches:
vis = self.drawMatches(imageA, imageB, kpsA, kpsB, matches,
status)
# return a tuple of the stitched image and the
# visualization
return (result, vis)
# return the stitched image
return result
def detectAndDescribe(self, image):
# convert the image to grayscale
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# check to see if we are using OpenCV 3.X
if self.isv3:
# detect and extract features from the image
descriptor = cv2.xfeatures2d.SIFT_create()
(kps, features) = descriptor.detectAndCompute(image, None)
# otherwise, we are using OpenCV 2.4.X
else:
# detect keypoints in the image
detector = cv2.FeatureDetector_create("SIFT")
kps = detector.detect(gray)
# extract features from the image
extractor = cv2.DescriptorExtractor_create("SIFT")
(kps, features) = extractor.compute(gray, kps)
# convert the keypoints from KeyPoint objects to NumPy
# arrays
kps = np.float32([kp.pt for kp in kps])
# return a tuple of keypoints and features
return (kps, features)
def matchKeypoints(self, kpsA, kpsB, featuresA, featuresB,
ratio, reprojThresh):
# compute the raw matches and initialize the list of actual
# matches
matcher = cv2.DescriptorMatcher_create("BruteForce")
rawMatches = matcher.knnMatch(featuresA, featuresB, 2)
matches = []
# loop over the raw matches
for m in rawMatches:
# ensure the distance is within a certain ratio of each
# other (i.e. Lowe's ratio test)
if len(m) == 2 and m[0].distance < m[1].distance * ratio:
matches.append((m[0].trainIdx, m[0].queryIdx))
# computing a homography requires at least 4 matches
if len(matches) > 4:
# construct the two sets of points
ptsA = np.float32([kpsA[i] for (_, i) in matches])
ptsB = np.float32([kpsB[i] for (i, _) in matches])
# compute the homography between the two sets of points
(H, status) = cv2.findHomography(ptsA, ptsB, cv2.RANSAC,
reprojThresh)
# return the matches along with the homograpy matrix
# and status of each matched point
return (matches, H, status)
# otherwise, no homograpy could be computed
return None
def drawMatches(self, imageA, imageB, kpsA, kpsB, matches, status):
# initialize the output visualization image
(hA, wA) = imageA.shape[:2]
(hB, wB) = imageB.shape[:2]
vis = np.zeros((max(hA, hB), wA + wB, 3), dtype="uint8")
vis[0:hA, 0:wA] = imageA
vis[0:hB, wA:] = imageB
# loop over the matches
for ((trainIdx, queryIdx), s) in zip(matches, status):
# only process the match if the keypoint was successfully
# matched
if s == 1:
# draw the match
ptA = (int(kpsA[queryIdx][0]), int(kpsA[queryIdx][1]))
ptB = (int(kpsB[trainIdx][0]) + wA, int(kpsB[trainIdx][1]))
cv2.line(vis, ptA, ptB, (0, 255, 0), 1)
# return the visualization
return vis
run.py
# -*- coding: utf-8 -*-
"""
Created on Mon Dec 18 11:13:23 2017
#author: user
"""
# import the necessary packages
import os
os.chdir('/home/user/Desktop/stitcher')
from str import Stitcher
import argparse
import imutils
import cv2
# construct the argument parse and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-f", "--first", required=True,
help="path to the first image")
ap.add_argument("-s", "--second", required=True,
help="path to the second image")
args = vars(ap.parse_args())
# load the two images and resize them to have a width of 400 pixels
# (for faster processing)
#from PIL import Image
#imageA = Image.open(args['first']).convert('RGB')
#imageB = Image.open(args['second']).convert('RGB')
imageA = cv2.imread(args["first"])
imageB = cv2.imread(args["second"])
#imageA = imutils.resize(imageA, width=400)
#imageB = imutils.resize(imageB, width=400)
imageA = cv2.resize(imageA,(2464,832)) #hardcoded values
imageB = cv2.resize(imageB,(2464,832)) #hardcoded values
# stitch the images together to create a panorama
stitcher = Stitcher()
(result, vis) = stitcher.stitch([imageA, imageB], showMatches=True)
cv2.imwrite('stage1.png',result)
# show the images
cv2.imshow("Image A", imageA)
cv2.imshow("Image B", imageB)
cv2.imshow("Keypoint Matches", vis)
cv2.imshow("Result", result)
cv2.waitKey(0)
As you can see, I have resized the images so that they have the same height and width with hardcoded values. I could have just got the minimum of two and put that as their length and breadth.
When I bring in the third image, I can't inflate it to match the resolution of stage1 or neither can I decrease the stage1's resolution to match the third image.
P.S. : imgutils didn't give me a way to choose both length and breadth.