I'm trying to examine two images for similarity with the usage of SIFT. The result should be a percentage.
I have understood how to extract the features and descriptors from the images using OpenCV an its lib. If I calculate the distance between the descriptors I don't get a percentage. I have not yet sorted my head correctly how to calculate this.
Can someone help me put the missing piece in my head together properly?
alg = cv2.xfeatures2d.SIFT_create()
trainFiles = getPaths(dirTrain)
images = []
for file in trainFiles:
img = cv2.imread(file)
images.append(img)
np_images = np.array(images)
descriptors = np.zeros((1,128)) #Matrix to hold the descriptors
for i,img in enumerate(np_images):
kp, des = alg.detectAndCompute(img,None)
descriptors = np.concatenate((descriptors,des),axis=0)
print('Processed image {} of {}'.format(i,len(np_images)))
descriptors = descriptors[1:,:]
a = descriptors[0]
b = descriptors[1]
# euclidean distance
dist = np.linalg.norm(a-b)
Related
I have a bunch of images with each representing a shape.
I have one image as a reference and six others that I need to sort out to find the one that resemble the most my reference image.
Note that the six images tested were applied a kind of filter with dots and aren't perfectly centered like the reference image as you can see below :
reference image
corresponding image with filter and mispositioning
I implemented this code to mine without success : https://github.com/bnsreenu/python_for_microscopists/blob/master/191_measure_img_similarity.py
My code :
from skimage.metrics import structural_similarity
import cv2
from PIL import Image
#Works well with images of different dimensions
def orb_sim(img1, img2):
orb = cv2.ORB_create()
# detect keypoints and descriptors
kp_a, desc_a = orb.detectAndCompute(img1, None)
kp_b, desc_b = orb.detectAndCompute(img2, None)
# define the bruteforce matcher object
bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
#perform matches
matches = bf.match(desc_a, desc_b)
#Look for similar regions with distance < 50. Goes from 0 to 100 so pick a number between
similar_regions = [i for i in matches if i.distance < 50]
if len(matches) == 0:
return 0
return len(similar_regions) / len(matches)
#Needs images to be same dimensions
def structural_sim(img1, img2):
sim, diff = structural_similarity(img1, img2, full=True)
return sim
im_ref = cv2.imread('captcha_clean/umbrella.png', 0)
for i in range(1,7):
path = "Tmp_Image/split_"+str(i)+".png"
im = Image.open(path)
path = "Tmp_Image/newsplit_"+str(i)+".png"
new_im = im.resize((32,32))
new_im.save(path)
im_test = cv2.imread(path, 0)
ssim = structural_sim(im_ref, im_test) #1.0 means identical. Lower = not similar
print("For image "+str(i)+" similarity using SSIM is: ", ssim)
orb_similarity = orb_sim(im_ref, im_test) #1.0 means identical. Lower = not similar
print("For image "+str(i)+" similarity using ORB is: ", orb_similarity)
In this case the corresponding image is number 4.
Here is the output :
For image 1 similarity using SSIM is: -0.04562843656475159
For image 1 similarity using ORB is: 0
For image 2 similarity using SSIM is: 0.04770572948037391
For image 2 similarity using ORB is: 0
For image 3 similarity using SSIM is: 0.10395830102945436
For image 3 similarity using ORB is: 0
For image 4 similarity using SSIM is: 0.08297170823406234
For image 4 similarity using ORB is: 0
For image 5 similarity using SSIM is: 0.07766704880294842
For image 5 similarity using ORB is: 0
For image 6 similarity using SSIM is: 0.12072132618711812
For image 6 similarity using ORB is: 0
The most resembling looking image is the closest to 1.0
Here according to the algorithm this is image 6, which is false.
Is there any way to find the corresponding images despite these?
I am open for any new method to solve my problem
I am calculating FID (Frechet Inception Distance) score of my CycleGAN model using this reference. I have input photos in a directory and generated 'artistic' photos in another directory.
input_photo_path = 'data/'
art_photo_path = 'images/'
import PIL
import numpy as np
import glob
def convert_image_to_numpy(image_path_dir):
arr = []
for img in glob.iglob(image_path_dir + '*.jpg', recursive=True):
image = PIL.Image.open(img)
image_array = np.array(image)
arr.append(image_array)
return np.array(arr)
img1 = convert_image_to_numpy(cat_photo_path)
img2 = convert_image_to_numpy(art_photo_path)
img1 = preprocess_and_scale(img1)
img2 = preprocess_and_scale(img2)
fid = calculate_fid(inception_model, img1, img2)
This looks all good logically. But the problem is - the input images and the generated images are not necessarily in the same order. Say my input_photo_path has images in the order [001_r.jpg, 002_r.jpg, 003_r.jpg, ..., 012_r.jpg] but my art_photo_path has images in the order [1.jpg, 10.jpg, 2.jpg ...]. And 10.jpg in the art_photo_path is NOT the generated version of 002_r.jpg in the input_photo_path.
I think that definitely messes up the FID calculation. Any suggestion on how should I solve it?
The FID computation doesn't depend on the ordering of images. It takes the statistics of the entire dataset (i.e it calculates the mean and covariance across the feature dimension for all the examples present in the dataset). So, the ordering doesn't matter.
Here's the pytorch implementation: https://github.com/mseitzer/pytorch-fid/blob/master/src/pytorch_fid/fid_score.py
I need to align two images which are slightly shifted and rotated 180 deg. relative to each other. I tried several ways using opencv (in Python), but no luck.
Method 1 was using MOTION_AFFINE:
im1 = cv2.imread(file1) # Reference image.
im2 = cv2.imread(file2) # Image to be aligned.
# Convert images to grayscale for computing the rotation via ECC method
im1_gray = cv2.cvtColor(im1,cv2.COLOR_BGR2GRAY)
im2_gray = cv2.cvtColor(im2,cv2.COLOR_BGR2GRAY)
# Find size of image1
sz = im1.shape
# Define the motion model - euclidean is rigid (SRT)
warp_mode = cv2.MOTION_AFFINE
# Define 2x3 matrix and initialize the matrix to identity matrix I (eye)
warp_matrix = np.eye(2, 3, dtype=np.float32)
# Specify the number of iterations.
number_of_iterations = 5000;
# Specify the threshold of the increment
# in the correlation coefficient between two iterations
termination_eps = 1e-3;
# Define termination criteria
criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, number_of_iterations, termination_eps)
# Run the ECC algorithm. The results are stored in warp_matrix.
(cc, warp_matrix) = cv2.findTransformECC (im1_gray, im2_gray, warp_matrix, warp_mode, criteria, None, 1)
# Warp im2 using affine
im2_aligned = cv2.warpAffine(im2, warp_matrix, (sz[1],sz[0]))#, flags=cv2.INTER_LINEAR + cv2.WARP_INVERSE_MAP);
# Save the output.
cv2.imwrite(outfile, im2_aligned)
This didn't even converge.
Method 2 was using feature matching, like so:
im1 = cv2.imread(file1) # Reference image.
im2 = cv2.imread(file2) # Image to be aligned.
img1 = cv2.cvtColor(img1_color, cv2.COLOR_BGR2GRAY)
img2 = cv2.cvtColor(img2_color, cv2.COLOR_BGR2GRAY)
height, width = img2.shape
# Create ORB detector with 4000 features.
orb_detector = cv2.ORB_create(4000)
# The first arg is the image, second arg is the mask
# (which is not reqiured in this case).
kp1, d1 = orb_detector.detectAndCompute(img1, None)
kp2, d2 = orb_detector.detectAndCompute(img2, None)
# Match features between the two images.
# We create a Brute Force matcher with
# Hamming distance as measurement mode.
matcher = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck = True)
# Match the two sets of descriptors.
matches = matcher.match(d1, d2)
# Sort matches on the basis of their Hamming distance.
matches.sort(key = lambda x: x.distance)
# Take the top 90 % matches forward.
matches = matches[:int(len(matches)*90)]
no_of_matches = len(matches)
# Define empty matrices of shape no_of_matches * 2.
p1 = np.zeros((no_of_matches, 2))
p2 = np.zeros((no_of_matches, 2))
for i in range(len(matches)):
p1[i, :] = kp1[matches[i].queryIdx].pt
p2[i, :] = kp2[matches[i].trainIdx].pt
# Find the homography matrix.
homography, mask = cv2.findHomography(p1, p2, cv2.RANSAC)
# Use this matrix to transform the
# colored image wrt the reference image.
transformed_img = cv2.warpPerspective(img1_color,
homography, (width, height))
# Save the output.
cv2.imwrite(outfile, transformed_img)
This ended up rotating the second image to the first image's orientation, but warping it too much, so it looks like it's not even in the same plane.
Is there any way to combine feature-based matching of two images with a transform that only rotates and translates, but does not warp perspective?
Thank you!
I'm building an image similarity program and, as I am a begginer in CV, I talked with an expert who gave me the following recommended steps to get the really basic functionality:
Extract keypoints (DoG, Harris, etc.) and local invariant descriptors (SIFT, SURF, etc.) from all images.
Cluster them to form a codebook (bag of visual words dictionary; BOVW)
Quantize the features from each image into a BOVW histogram
Compare the BOVW histograms for each image (typically using chi-squared, cosine, or euclidean distance)
The point number one is easy, but I start getting confused at step 2. This is the code I've written so far:
import cv2
import numpy as np
dictionarySize = 20
BOW = cv2.BOWKMeansTrainer(dictionarySize)
for imgpath in ['testimg/testcropped1.jpg','testimg/testcropped2.jpg','testimg/testcropped3.jpg']:
img = cv2.imread(imgpath)
gray= cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
dst = cv2.cornerHarris(gray,2,3,0.04)
sift = cv2.xfeatures2d.SIFT_create()
kp = sift.detect(gray,None)
kp,des = sift.compute(img,kp)
img=cv2.drawKeypoints(gray,kp,img)
cv2.imwrite('%s_keypoints.jpg' % imgpath, img)
BOW.add(des)
I extract some features using SIFT and then I try to build a BOVW o each image descriptor. The problem is I have no idea if this is correct and how to get the histograms.
Intro: From what I understand, given 2 images of the same scene with moving objects, I can make a mean on each pixel in order to "remove" the moving objects (it will have ghosting effects, but if I'll repeat with several images it will do fine).
However, this wouldn't work at all if I have different scenes, so I want to move all my images to the same scene (warping the perspective) and then do the suggested above.
I want to remove moving-objects from movie-frames I have. To do so, I find the matching key-points between 2 images, use RANSAC to remove outliers, and warp the perspective of image A to image B's perspective to get a warped image.
Now I want to that warped image to be in the same size of the src,dst images but that's not what I get:
Eventually I want to have a full warped image from several frames, and to use it to remove objects that move from frame to frame.
How to create the warped images in full size?
How to find the corresponding pixels on which I want to mean the values in order to delete moving objects?
# find key points + features of a given image
def get_keyPoints_and_features(img):
descriptor = cv2.xfeatures2d.SIFT_create()
kps, features = descriptor.detectAndCompute(img, None)
kps = np.float32([kp.pt for kp in kps])
return kps, features
# match key points of 2 images
def match_key_points(a, b, ratio = 0.75):
# unpack
kpsA, featuresA = a
kpsB, featuresB = b
matcher = cv2.DescriptorMatcher_create("BruteForce")
rawMatches = matcher.knnMatch(featuresA, featuresB, 2)
matches = []
# remove outliers using RANSAC
for m in rawMatches:
if len(m) == 2 and m[0].distance < m[1].distance * ratio:
matches.append((m[0].trainIdx, m[0].queryIdx))
# must have more than 4 matches
# the more matches we have, the more noise robust it will be
assert(len(matches) > 4)
ptsA = np.float32([kpsA[i] for (_, i) in matches])
ptsB = np.float32([kpsB[i] for (i, _) in matches])
(H, status) = cv2.findHomography(ptsA, ptsB, method = cv2.RANSAC,
ransacReprojThreshold = 0.4)
return matches, H
# warp src image to have perspective like dst image using the
# homogrpahy between both images
def warp_perspective(src, dst):
# read images
a,b = cv2.imread(src), cv2.imread(dst)
# generate key points and features
kps_and_features_a = get_keyPoints_and_features(a)
kps_and_features_b = get_keyPoints_and_features(b)
# get homography
_, H = match_key_points(kps_and_features_a, kps_and_features_b)
warped = cv2.warpPerspective(a, H, (a.shape[1], a.shape[0]))
inv_warped = cv2.warpPerspective(warped, inv(H), (a.shape[1], a.shape[0]))
cv2.imshow("a", a)
cv2.imshow("b", b)
cv2.imshow("warped", warped)
cv2.imshow("inv_warped", inv_warped)
cv2.moveWindow("a",0,0)
cv2.moveWindow("b",0,370)
cv2.moveWindow("warped",600,0)
cv2.moveWindow("inv_warped",600,370)
cv2.waitKey(0)
cv2.destroyAllWindows()
def main():
# get images
path = r'...'
images = [os.path.join(path, file) for file in os.listdir(path)]
warp_perspective(images[0], images[1])
main()