python opencv cv2 matchTemplate with transparency - python

OpenCV 3.0.0 added the ability to specify a mask while performing templateMatch. When I specify a mask I get this error: error: (-215) (depth == CV_8U || depth == CV_32F) && type == _templ.type() && _img.dims() <= 2 in function matchTemplateMask
Template image (PNG with transparency):
Source image:
Code
# read the template emoji with the alpha channel
template = cv2.imread(imagePath, cv2.IMREAD_UNCHANGED)
channels = cv2.split(template)
zero_channel = np.zeros_like(channels[0])
mask = np.array(channels[3])
# all elements in alpha_channel that have value 0 are set to 1 in the mask matrix
mask[channels[3] == 0] = 1
# all elements in alpha_channel that have value 100 are set to 0 in the mask matrix
mask[channels[3] == 100] = 0
transparent_mask = cv2.merge([zero_channel, zero_channel, zero_channel, mask])
print image.shape, image.dtype # (72, 232, 3) uint8
print template.shape, template.dtype # (40, 40, 4) uint8
print transparent_mask.shape, transparent_mask.dtype # (40, 40, 4) uint8
# find the matches
res = cv2.matchTemplate(image, template, cv2.TM_CCOEFF_NORMED, mask=transparent_mask)
Is something wrong with the image type? I am unable to find any examples (in Python) using the new mask parameter of the matchTemplate method. Does anyone know how to create the mask?

I was able to get this to work using Python 2.7.13 and opencv-python==3.1.0.4
Here is the code for it.
import cv2
import numpy as np
import sys
if len(sys.argv) < 3:
print 'Usage: python match.py <template.png> <image.png>'
sys.exit()
template_path = sys.argv[1]
template = cv2.imread(template_path, cv2.IMREAD_UNCHANGED)
channels = cv2.split(template)
zero_channel = np.zeros_like(channels[0])
mask = np.array(channels[3])
image_path = sys.argv[2]
image = cv2.imread(image_path, cv2.IMREAD_UNCHANGED)
mask[channels[3] == 0] = 1
mask[channels[3] == 100] = 0
# transparent_mask = None
# According to http://www.devsplanet.com/question/35658323, we can only use
# cv2.TM_SQDIFF or cv2.TM_CCORR_NORMED
# All methods can be seen here:
# http://docs.opencv.org/2.4/doc/tutorials/imgproc/histograms/template_matching/template_matching.html#which-are-the-matching-methods-available-in-opencv
method = cv2.TM_SQDIFF # R(x,y) = \sum _{x',y'} (T(x',y')-I(x+x',y+y'))^2 (essentially, sum of squared differences)
transparent_mask = cv2.merge([zero_channel, zero_channel, zero_channel, mask])
result = cv2.matchTemplate(image, template, method, mask=transparent_mask)
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result)
print 'Lowest squared difference WITH mask', min_val
# Now we'll try it without the mask (should give a much larger error)
transparent_mask = None
result = cv2.matchTemplate(image, template, method, mask=transparent_mask)
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result)
print 'Lowest squared difference WITHOUT mask', min_val
Here it is as a gist.
Essentially, you need to make sure you're using the right matching method.

My environment is using opencv 3.1.0 and python 2.7.11.
Here is the code that is looking for images in another image where the template is using transparency (alpha channel). I hope this can help you.
def getMultiFullInfo(all_matches,w,h):
#This function will rearrange the data and calculate the tuple
# for the square and the center and the tolerance for each point
result = []
for match in all_matches:
tlx = match[0]
tly = match[1]
top_left = (tlx,tly)
brx = match[0] + w
bry = match[1] + h
bottom_right = (brx,bry)
centerx = match[0] + w/2
centery = match[1] + h/2
center = [centerx,centery]
result.append({'top_left':top_left,'bottom_right':bottom_right,'center':center,'tolerance':match[2]})
return result
def getMulti(res, tolerance,w,h):
#We get an opencv image in the form of a numpy array and we need to
# find all the occurances in there knowing that 2 squares cannot intersect
#This will give us exactly the matches that are unique
#First we need to get all the points where value is >= tolerance
#This wil get sometimes some squares that vary only from some pixels and that are overlapping
all_matches_full = np.where (res >= tolerance)
logging.debug('*************Start of getMulti function')
logging.debug('All >= tolerance')
logging.debug(all_matches_full)
#Now we need to arrange it in x,y coordinates
all_matches_coords = []
for pt in zip(*all_matches_full[::-1]):
all_matches_coords.append([pt[0],pt[1],res[pt[1]][pt[0]]])
logging.debug('In coords form')
logging.debug(all_matches_coords)
#Let's sort the new array
all_matches_coords = sorted(all_matches_coords)
logging.debug('Sorted')
logging.debug(all_matches_coords)
#This function will be called only when there is at least one match so if matchtemplate returns something
#This means we have found at least one record so we can prepare the analysis and loop through each records
all_matches = [[all_matches_coords[0][0],all_matches_coords[0][1],all_matches_coords[0][2]]]
i=1
for pt in all_matches_coords:
found_in_existing = False
logging.debug('%s)',i)
for match in all_matches:
logging.debug(match)
#This is the test to make sure that the square we analyse doesn't overlap with one of the squares already found
if pt[0] >= (match[0]-w) and pt[0] <= (match[0]+w) and pt[1] >= (match[1]-h) and pt[1] <= (match[1]+h):
found_in_existing = True
if pt[2] > match[2]:
match[0] = pt[0]
match[1] = pt[1]
match[2] = res[pt[1]][pt[0]]
if not found_in_existing:
all_matches.append([pt[0],pt[1],res[pt[1]][pt[0]]])
i += 1
logging.debug('Final')
logging.debug(all_matches)
logging.debug('Final with all info')
#Before returning the result, we will arrange it with data easily accessible
all_matches = getMultiFullInfo(all_matches,w,h)
logging.debug(all_matches)
logging.debug('*************End of getMulti function')
return all_matches
def checkPicture(screenshot,templateFile, tolerance, multiple = False):
#This is an intermediary function so that the actual function doesn't include too much specific arguments
#We open the config file
configFile = 'test.cfg'
config = SafeConfigParser()
config.read(configFile)
basepics_dir = config.get('general', 'basepics_dir')
debug_dir = config.get('general', 'debug_dir')
font = cv2.FONT_HERSHEY_PLAIN
#The value -1 means we keep the file as is meaning with color and alpha channel if any
# btw, 0 means grayscale and 1 is color
template = cv2.imread(basepics_dir+templateFile,-1)
#Now we search in the picture
result = findPicture(screenshot,template, tolerance, multiple)
#If it didn't get any result, we log the best value
if not result['res']:
logging.debug('Best value found for %s is: %f',templateFile,result['best_val'])
elif logging.getLogger().getEffectiveLevel() == 10:
screenshot_with_rectangle = screenshot.copy()
for pt in result['points']:
cv2.rectangle(screenshot_with_rectangle, pt['top_left'], pt['bottom_right'], 255, 2)
fileName_top_left = (pt['top_left'][0],pt['top_left'][1]-10)
cv2.putText(screenshot_with_rectangle,str(pt['tolerance'])[:4],fileName_top_left, font, 1,(255,255,255),2)
#Now we save to the file if needed
filename = time.strftime("%Y%m%d-%H%M%S") + '_' + templateFile[:-4] + '.jpg'
cv2.imwrite(debug_dir + filename, screenshot_with_rectangle)
result['name']=templateFile
return result
def extractAlpha(img, hardedge = True):
if img.shape[2]>3:
logging.debug('Mask detected')
channels = cv2.split(img)
mask = np.array(channels[3])
if hardedge:
for idx in xrange(len(mask[0])):
if mask[0][idx] <=128:
mask[0][idx] = 0
else:
mask[0][idx] = 255
mask = cv2.cvtColor(mask, cv2.COLOR_GRAY2BGR)
img = cv2.cvtColor(img, cv2.COLOR_BGRA2BGR)
return {'res':True,'image':img,'mask':mask}
else:
return {'res':False,'image':img}
def findPicture(screenshot,template, tolerance, multiple = False):
#This function will work with color images 3 channels minimum
#The template can have an alpha channel and we will extract it to have the mask
logging.debug('Looking for %s' , template)
logging.debug('Tolerance to check is %f' , tolerance)
logging.debug('*************Start of checkPicture')
h = template.shape[0]
w = template.shape[1]
#We will now extract the alpha channel
tmpl = extractAlpha(template)
logging.debug('Image width: %d - Image heigth: %d',w,h)
# the method used for comparison, can be ['cv2.TM_CCOEFF', 'cv2.TM_CCOEFF_NORMED', 'cv2.TM_CCORR','cv2.TM_CCORR_NORMED', 'cv2.TM_SQDIFF', 'cv2.TM_SQDIFF_NORMED']
meth = 'cv2.TM_CCORR_NORMED'
method = eval(meth)
# Apply template Matching
if tmpl['res']:
res = cv2.matchTemplate(screenshot,tmpl['image'],method, mask = tmpl['mask'])
else:
res = cv2.matchTemplate(screenshot,tmpl['image'],method)
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
# If the method is TM_SQDIFF or TM_SQDIFF_NORMED, take minimum
if method in [cv2.TM_SQDIFF, cv2.TM_SQDIFF_NORMED]:
top_left = min_loc
best_val = 1 - min_val
else:
top_left = max_loc
best_val = max_val
#We need to ensure we found at least one match otherwise we return false
if best_val >= tolerance:
if multiple:
#We need to find all the time the image is found
all_matches = getMulti(res, float(tolerance),int(w),int(h))
else:
bottom_right = (top_left[0] + w, top_left[1] + h)
center = (top_left[0] + (w/2), top_left[1] + (h/2))
all_matches = [{'top_left':top_left,'bottom_right':bottom_right,'center':center,'tolerance':best_val}]
#point will be in the form: [{'tolerance': 0.9889718890190125, 'center': (470, 193), 'bottom_right': (597, 215), 'top_left': (343, 172)}]
logging.debug('The points found will be:')
logging.debug(all_matches)
logging.debug('*************End of checkPicture')
return {'res': True,'points':all_matches}
else:
logging.debug('Could not find a value above tolerance')
logging.debug('*************End of checkPicture')
return {'res': False,'best_val':best_val}

In OpenCV 4.2.0 the first two suggested codes result for me in the following error:
cv2.error: OpenCV(4.2.0) C:\projects\opencv-python\opencv\modules\imgproc\src\templmatch.cpp:766: error: (-215:Assertion failed) (depth == CV_8U || depth == CV_32F) && type == _templ.type() && _img.dims() <= 2 in function 'cv::matchTemplateMask'
It looks like things have got much easier in the meantime. Here is my Python code that I tried to reduce at its maximum. The file "crowncap_85x85_mask.png" is a black and white image. All black pixels in the mask will be ignored during matching.
Still only the matching methods TM_SQDIFF and TM_CCORR_NORMED are supported when using a mask.
import cv2 as cv
img = cv.imread("fridge_zoomed.png", cv.IMREAD_COLOR)
templ = cv.imread("crowncap_85x85.png", cv.IMREAD_COLOR)
mask = cv.imread( "crowncap_85x85_mask.png", cv.IMREAD_COLOR )
result = cv.matchTemplate(img, templ, cv.TM_CCORR_NORMED, None, mask)
cv.imshow("Matching with mask", result)
min_val, max_val, min_loc, max_loc = cv.minMaxLoc(result)
print('Highest correlation WITH mask', max_val)
result = cv.matchTemplate(img, templ, cv.TM_CCORR_NORMED)
cv.imshow("Matching without mask", result)
min_val, max_val, min_loc, max_loc = cv.minMaxLoc(result)
print('Highest correlation without mask', max_val)
while True:
if cv.waitKey(10) == 27:
break
cv.destroyAllWindows()
If you want to generate the mask from the alpha channel of your template, you can proceed as follows:
import cv2 as cv
import numpy as np
img = cv.imread("fridge_zoomed.png", cv.IMREAD_COLOR)
templ = cv.imread("crowncap_85x85_transp.png", cv.IMREAD_COLOR)
templ_incl_alpha_ch = cv.imread("crowncap_85x85_transp.png", cv.IMREAD_UNCHANGED)
channels = cv.split(templ_incl_alpha_ch)
#extract "transparency" channel from image
alpha_channel = np.array(channels[3])
#generate mask image, all black dots will be ignored during matching
mask = cv.merge([alpha_channel,alpha_channel,alpha_channel])
cv.imshow("Mask", mask)
result = cv.matchTemplate(img, templ, cv.TM_CCORR_NORMED, None, mask)
cv.imshow("Matching with mask", result)
min_val, max_val, min_loc, max_loc = cv.minMaxLoc(result)
print('Highest correlation WITH mask', max_val)
result = cv.matchTemplate(img, templ, cv.TM_CCORR_NORMED)
cv.imshow("Matching without mask", result)
min_val, max_val, min_loc, max_loc = cv.minMaxLoc(result)
print('Highest correlation without mask', max_val)
while True:
if cv.waitKey(10) == 27:
break
cv.destroyAllWindows()

I have now got the whole thing running in Python 3.9, I have already created a new question here that explicitly refers to Python 3.x using opencv-python:
Template Matching with Python 3.9.1, opencv.python 4.5.1.48 and mask (transparency).
The graphics I have taken from this question, because I have also worked privately with these to test.

Related

How to properly filter an image using OpenCV? To read and extract the text with the highest possible percentage of effectiveness

This is what I do, I take some images and some of them contain information that I need. Being these the images:
How do I find that information? I use a template that contains two symbols (Euro and Dollar), when this symbols are found in any of the images, then I can process the image and try to extract the data that I need.
How do I extract the data? I take the dimensions of the found match, and since I know that the information to extract will always be contained to the right of the match, I dimension a box towards the right edge of my image, with which I make sure I have a box with the data to extract.
Here is the code, I will divide it into several sections to explain the process a little better:
1) Initial Settings for the Code (Imports, a list of images which will be processed, a couple of functions to filter the image and finally the configuration set for reading data from Tesseract):
import cv2
import numpy as np
from matplotlib import pyplot as plt
import pytesseract
from pytesseract import Output
imagenes = ["monitor1.jpg", "monitor2.jpg", "monitor3.jpg"]
# get grayscale image
def get_grayscale(image):
return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Noise Removal (This is the filter I am using)
def remove_noise(image):
return cv2.medianBlur(image,5)
# The configuration we will use to read the images:
my_config = r"--psm 11 --oem 3"
2) Next, the template with which we will try to match is read and we take its dimensions (w=width and h=height).
We present the methods to find the matches and enter a loop reviewing image by image, trying to find a matching image:
# Reading the Template (Euro and Dollar):
# template_simbolo = cv2.imread('template_euro_dolar.jpg', 0)
template = cv2.imread('template_simbolos.jpg', 0)
w, h = template.shape[::-1]
# The methods we will use to find the matchs (This should be a list of 6 methods
# but working with a big list of images it takes an eternity, so we will only
# use one by now for the tests) :
methods = ['cv2.TM_CCOEFF']
# A loop where we are going to filter every image, find the matchs if there are
# and extract the data:
for img in imagenes:
print("**************************")
# Image to read:
img_rgb = cv2.imread(img)
# The image filtered in gray:
gray = get_grayscale(img_rgb)
img_gray = remove_noise(gray)
# With res we will find the matches but we only take the accurate ones (80% or more)
res = cv2.matchTemplate(img_gray, template, cv2.TM_CCOEFF_NORMED)
threshold = 0.8
loc = np.where(res >= threshold)
print(loc)
print(len(loc[0]))
3) In this part of the code, first we enclose the match in a box, we filter the original image, and we look for the coordinates of the match, once this is done, we proceed to enclose in a box the section where the desired information is found.
# If loc contains values ​​it is because there is a match:
if len(loc[0]) > 0:
print("Match Found")
# We enclose the found matches in a box and save the result:
for pt in zip(*loc[::-1]):
cv2.rectangle(img_rgb, pt, (pt[0] + w, pt[1] + h), (0, 0, 255), 2)
cv2.imwrite('res_monitor.png', img_rgb)
# A loop of matching methods:
for meth in methods:
# We filter the image and change it to a gray color
gray = get_grayscale(img_rgb)
img_gray = remove_noise(gray)
# We evaluate the method to use and according to it we have some
# default coordinates
method = eval(meth)
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
print("min_val:", min_val)
print("max_val:", max_val)
print("min_loc:", min_loc)
print("max_loc:", max_loc)
# If the method is TM_SQDIFF or TM_SQDIFF_NORMED, take minimum
if method in [cv2.TM_SQDIFF, cv2.TM_SQDIFF_NORMED]:
top_left = min_loc
else:
top_left = max_loc
# To know the bottom right coordinate, we respectively take the value
# of top_left and add the width w and height h to know this coordinate.
w, h = template.shape[::-1]
bottom_right = (top_left[0] + w, top_left[1] + h)
print("top_left:", top_left)
print("bottom_right:", bottom_right)
print("x:", top_left[0])
print("y:", top_left[1])
# Now, in our original image, which we previously filtered, we will
# place a box or rectangle according to the dimensions established
# before. (top_left and bottom_right)
w, h = img_gray.shape[::-1]
print("w:", w)
print("h:", bottom_right[1])
cv2.rectangle(img_gray, top_left, bottom_right, 255, 2)
imagen = cv2.rectangle(img_gray, top_left, (w, bottom_right[1]), 255, 2)
x = top_left[0]
y = top_left[1]
w = w
h = bottom_right[1]
# Finally we crop this section of the code where we established the area
# to review and with pytesseract we look for the data that we can obtain
# from said cropped image.
crop_image = img_gray[y:h, x:w]
cv2.imwrite("croped.jpg", crop_image)
data = pytesseract.image_to_data(crop_image, config=my_config,
output_type=Output.DICT)
print(data, "\n")
4) Finally we create a dictionary to save the rate of the euro and the dollar, if everything goes well they will be saved correctly.
At the end of this process, a plot is shown to verify that the information was extracted correctly.
# We create a dictionary to store the values ​​of Euro and Dollar
i = 0
currencies= {}
for value in data["text"]:
print(value)
try:
currency= value.replace(".", "").replace(",", ".")
currency= float(currency)
i = i + 1
if i == 1:
currencies["Euro"] = currency
elif i == 2 and currency< currencies["Euro"]:
currencies["Dolar"] = currency
except: ValueError
# We pass the image to string to obtain the rates of the currencies
text = pytesseract.image_to_string(crop_image, config = my_config)
print(text)
print(currencies)
# We graph the results and confirm that the data extraction and the
# demarcated area are correct.
plt.subplot(121),plt.imshow(res, cmap = 'gray')
plt.title('Matching Result'), plt.xticks([]), plt.yticks([])
plt.subplot(122),plt.imshow(img_gray, cmap = 'gray')
plt.title('Detected Point'), plt.xticks([]), plt.yticks([])
plt.suptitle(meth)
plt.show()
else:
print("DOES NOT MATCH")
The results:
With the code and all the logic presented above, it usually works very well on these images, but for some reason sometimes it doesn't read the image properly, it doesn't save the information in the desired way.
As can be seen, the area that the code takes is the desired one, but the currency dictionary does not record any information:
Which is strange because if I run the code on a longer list of images, that same image is recognized perfectly.
So the problem here is that sometimes it worked and sometimes it didn't, and I'm not quite sure why. Does anyone know what I can polish? What am I doing wrong? Any advice?

How do I get location of matched template in opencv?

I have this code so far.
import cv2 as cv
import numpy as np
import pyautogui as pg
def grabscreen():
i = pg.screenshot()
return cv.cvtColor(np.array(i), cv.COLOR_RGB2BGR)
img_rgb = grabscreen()
img_gray = cv.cvtColor(img_rgb, cv.COLOR_BGR2GRAY)
template = cv.imread('capture.png',0)
w, h = template.shape[::-1]
res = cv.matchTemplate(img_gray,template,cv.TM_CCOEFF_NORMED)
threshold = 0.8
loc = np.where( res >= threshold)
It locates a template on your screen.
What the heck method, function, whatever would I use to get the xy where the template was found?
(Center of where it was found, like pyautogui does it)
Use minMaxLoc function to find the maximum and minimum values (as well as their positions) in a given array.
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result)

remove black dashed lines from image stitching

I am stitching multiple images. While stitching two images it is showing dashed black line in between stitching like below.
Has anyone knows about this how I can remove or get rid of this black dashed line ?
main part of stitching code which stitches two images and calls next image with result of previous stitched images untill all images gets over:
detector = cv2.xfeatures2d.SURF_create(400)
gray1 = cv2.cvtColor(image1,cv2.COLOR_BGR2GRAY)
ret1, mask1 = cv2.threshold(gray1,1,255,cv2.THRESH_BINARY)
kp1, descriptors1 = detector.detectAndCompute(gray1,mask1)
gray2 = cv2.cvtColor(image2,cv2.COLOR_BGR2GRAY)
ret2, mask2 = cv2.threshold(gray2,1,255,cv2.THRESH_BINARY)
kp2, descriptors2 = detector.detectAndCompute(gray2,mask2)
keypoints1Im = cv2.drawKeypoints(image1, kp1, outImage = cv2.DRAW_MATCHES_FLAGS_DEFAULT, color=(0,0,255))
util.display("KEYPOINTS",keypoints1Im)
keypoints2Im = cv2.drawKeypoints(image2, kp2, outImage = cv2.DRAW_MATCHES_FLAGS_DEFAULT, color=(0,0,255))
util.display("KEYPOINTS",keypoints2Im)
matcher = cv2.BFMatcher()
matches = matcher.knnMatch(descriptors2,descriptors1, k=2)
good = []
for m, n in matches:
if m.distance < 0.55 * n.distance:
good.append(m)
print (str(len(good)) + " Matches were Found")
if len(good) <= 10:
return image1
matches = copy.copy(good)
matchDrawing = util.drawMatches(gray2,kp2,gray1,kp1,matches)
util.display("matches",matchDrawing)
src_pts = np.float32([ kp2[m.queryIdx].pt for m in matches ]).reshape(-1,1,2)
dst_pts = np.float32([ kp1[m.trainIdx].pt for m in matches ]).reshape(-1,1,2)
A = cv2.estimateRigidTransform(src_pts,dst_pts,fullAffine=False)
if A is None:
HomogResult = cv2.findHomography(src_pts,dst_pts,method=cv2.RANSAC)
H = HomogResult[0]
height1,width1 = image1.shape[:2]
height2,width2 = image2.shape[:2]
corners1 = np.float32(([0,0],[0,height1],[width1,height1],[width1,0]))
corners2 = np.float32(([0,0],[0,height2],[width2,height2],[width2,0]))
warpedCorners2 = np.zeros((4,2))
for i in range(0,4):
cornerX = corners2[i,0]
cornerY = corners2[i,1]
if A is not None: #check if we're working with affine transform or perspective transform
warpedCorners2[i,0] = A[0,0]*cornerX + A[0,1]*cornerY + A[0,2]
warpedCorners2[i,1] = A[1,0]*cornerX + A[1,1]*cornerY + A[1,2]
else:
warpedCorners2[i,0] = (H[0,0]*cornerX + H[0,1]*cornerY + H[0,2])/(H[2,0]*cornerX + H[2,1]*cornerY + H[2,2])
warpedCorners2[i,1] = (H[1,0]*cornerX + H[1,1]*cornerY + H[1,2])/(H[2,0]*cornerX + H[2,1]*cornerY + H[2,2])
allCorners = np.concatenate((corners1, warpedCorners2), axis=0)
[xMin, yMin] = np.int32(allCorners.min(axis=0).ravel() - 0.5)
[xMax, yMax] = np.int32(allCorners.max(axis=0).ravel() + 0.5)
translation = np.float32(([1,0,-1*xMin],[0,1,-1*yMin],[0,0,1]))
warpedResImg = cv2.warpPerspective(image1, translation, (xMax-xMin, yMax-yMin))
if A is None:
fullTransformation = np.dot(translation,H) #again, images must be translated to be 100% visible in new canvas
warpedImage2 = cv2.warpPerspective(image2, fullTransformation, (xMax-xMin, yMax-yMin))
else:
warpedImageTemp = cv2.warpPerspective(image2, translation, (xMax-xMin, yMax-yMin))
warpedImage2 = cv2.warpAffine(warpedImageTemp, A, (xMax-xMin, yMax-yMin))
result = np.where(warpedImage2 != 0, warpedImage2, warpedResImg)
Please help me out. Thanks.
Edit:
Input image1(resized)
Input image2(resized)
Result(resized)
Update:
Result after #fmw42 anwser:
The problem arises because when you do the warping, the border pixels of the image get resampled/interpolated with black background pixels. This leaves a non-zero border around your warped image of varying values that show as your dashed dark line when merged with the other image. This happens because your merge test is binary, tested with != 0.
So one simple thing you can do is mask the warped image in Python/OpenCV to get its bounds from the black background outside the image and then erode the mask. Then use the mask to erode the image boundary. This can be achieve by the following changes to your last lines of code presented as follows:
if A is None:
fullTransformation = np.dot(translation,H) #again, images must be translated to be 100% visible in new canvas
warpedImage2 = cv2.warpPerspective(image2, fullTransformation, (xMax-xMin, yMax-yMin))
else:
warpedImageTemp = cv2.warpPerspective(image2, translation, (xMax-xMin, yMax-yMin))
warpedImage2 = cv2.warpAffine(warpedImageTemp, A, (xMax-xMin, yMax-yMin))
mask2 = cv2.threshold(warpedImage2, 0, 255, cv2.THRESH_BINARY)[1]
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3,3))
mask2 = cv2.morphologyEx(mask2, cv2.MORPH_ERODE, kernel)
warpedImage2[mask2==0] = 0
result = np.where(warpedImage2 != 0, warpedImage2, warpedResImg)
I simply added the following code lines to your code:
mask2 = cv2.threshold(warpedImage2, 0, 255, cv2.THRESH_BINARY)[1]
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3,3))
mask2 = cv2.morphologyEx(mask2, cv2.MORPH_ERODE, kernel)
warpedImage2[mask2==0] = 0
You can increase the kernel size if desired to erode more.
Here is the before and after. Note that I did not have SURF and tried to use ORB, which did not align well. So your roads do not align. But the mismatch due to misalignment emphasizes the issue as it shows the dashed jagged black border line. The fact that ORB did not work or I do not have proper code from above to make it align is not important. The masking does what I think you want and is extendable to the processing of all your images.
The other thing that can be done in combination with the above is to feather the mask and then ramp blend the two images using the mask. This is done by blurring the mask (a bit more) and then stretching the values over the inside half of the blurred border and making the ramp only on the outside half of the blurred border. Then blend the two images with the ramped mask and its inverse as follows for the same code as above.
if A is None:
fullTransformation = np.dot(translation,H) #again, images must be translated to be 100% visible in new canvas
warpedImage2 = cv2.warpPerspective(image2, fullTransformation, (xMax-xMin, yMax-yMin))
else:
warpedImageTemp = cv2.warpPerspective(image2, translation, (xMax-xMin, yMax-yMin))
warpedImage2 = cv2.warpAffine(warpedImageTemp, A, (xMax-xMin, yMax-yMin))
mask2 = cv2.threshold(warpedImage2, 0, 255, cv2.THRESH_BINARY)[1]
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3,3))
mask2 = cv2.morphologyEx(mask2, cv2.MORPH_ERODE, kernel)
warpedImage2[mask2==0] = 0
mask2 = cv2.blur(mask2, (5,5))
mask2 = skimage.exposure.rescale_intensity(mask2, in_range=(127.5,255), out_range=(0,255)).astype(np.float64)
result = (warpedImage2 * mask2 + warpedResImg * (255 - mask2))/255
result = result.clip(0,255).astype(np.uint8)
cv2.imwrite("image1_image2_merged3.png", result)
The result when compared to the original composite is as follows:
ADDITION
I have corrected my ORB code to reverse the use of images and now it aligns. So here are all 3 techniques: the original, the one that only uses a binary mask and the one that uses a ramped mask for blending (all as described above).
ADDITION2
Here are the 3 requested images: original, binary masked, ramped mask blending.
Here is my ORB code for the last version above
I tried to change as little as possible from your code, except I had to use ORB and I had to swap the names image1 and image2 near the end.
import cv2
import matplotlib.pyplot as plt
import numpy as np
import itertools
from scipy.interpolate import UnivariateSpline
from skimage.exposure import rescale_intensity
image1 = cv2.imread("image1.jpg")
image2 = cv2.imread("image2.jpg")
gray1 = cv2.cvtColor(image1, cv2.COLOR_BGR2GRAY)
gray2 = cv2.cvtColor(image2, cv2.COLOR_BGR2GRAY)
# Detect ORB features and compute descriptors.
MAX_FEATURES = 500
GOOD_MATCH_PERCENT = 0.15
orb = cv2.ORB_create(MAX_FEATURES)
keypoints1, descriptors1 = orb.detectAndCompute(gray1, None)
keypoints2, descriptors2 = orb.detectAndCompute(gray2, None)
# Match features.
matcher = cv2.DescriptorMatcher_create(cv2.DESCRIPTOR_MATCHER_BRUTEFORCE_HAMMING)
matches = matcher.match(descriptors1, descriptors2, None)
# Sort matches by score
matches.sort(key=lambda x: x.distance, reverse=False)
# Remove not so good matches
numGoodMatches = int(len(matches) * GOOD_MATCH_PERCENT)
matches = matches[:numGoodMatches]
# Draw top matches
imMatches = cv2.drawMatches(image1, keypoints1, image2, keypoints2, matches, None)
cv2.imwrite("/Users/fred/desktop/image1_image2_matches.png", imMatches)
# Extract location of good matches
points1 = np.zeros((len(matches), 2), dtype=np.float32)
points2 = np.zeros((len(matches), 2), dtype=np.float32)
for i, match in enumerate(matches):
points1[i, :] = keypoints1[match.queryIdx].pt
points2[i, :] = keypoints2[match.trainIdx].pt
print(points1)
print("")
print(points2)
A = cv2.estimateRigidTransform(points1,points2,fullAffine=False)
#print(A)
if A is None:
HomogResult = cv2.findHomography(points1,points2,method=cv2.RANSAC)
H = HomogResult[0]
height1,width1 = image1.shape[:2]
height2,width2 = image2.shape[:2]
corners1 = np.float32(([0,0],[0,height1],[width1,height1],[width1,0]))
corners2 = np.float32(([0,0],[0,height2],[width2,height2],[width2,0]))
warpedCorners2 = np.zeros((4,2))
# project corners2 into domain of image1 from A affine or H homography
for i in range(0,4):
cornerX = corners2[i,0]
cornerY = corners2[i,1]
if A is not None: #check if we're working with affine transform or perspective transform
warpedCorners2[i,0] = A[0,0]*cornerX + A[0,1]*cornerY + A[0,2]
warpedCorners2[i,1] = A[1,0]*cornerX + A[1,1]*cornerY + A[1,2]
else:
warpedCorners2[i,0] = (H[0,0]*cornerX + H[0,1]*cornerY + H[0,2])/(H[2,0]*cornerX + H[2,1]*cornerY + H[2,2])
warpedCorners2[i,1] = (H[1,0]*cornerX + H[1,1]*cornerY + H[1,2])/(H[2,0]*cornerX + H[2,1]*cornerY + H[2,2])
allCorners = np.concatenate((corners1, warpedCorners2), axis=0)
[xMin, yMin] = np.int32(allCorners.min(axis=0).ravel() - 0.5)
[xMax, yMax] = np.int32(allCorners.max(axis=0).ravel() + 0.5)
translation = np.float32(([1,0,-1*xMin],[0,1,-1*yMin],[0,0,1]))
warpedResImg = cv2.warpPerspective(image2, translation, (xMax-xMin, yMax-yMin))
if A is None:
fullTransformation = np.dot(translation,H) #again, images must be translated to be 100% visible in new canvas
warpedImage2 = cv2.warpPerspective(image2, fullTransformation, (xMax-xMin, yMax-yMin))
else:
warpedImageTemp = cv2.warpPerspective(image1, translation, (xMax-xMin, yMax-yMin))
warpedImage2 = cv2.warpAffine(warpedImageTemp, A, (xMax-xMin, yMax-yMin))
mask2 = cv2.threshold(warpedImage2, 0, 255, cv2.THRESH_BINARY)[1]
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3,3))
mask2 = cv2.morphologyEx(mask2, cv2.MORPH_ERODE, kernel)
warpedImage2[mask2==0] = 0
mask2 = cv2.blur(mask2, (5,5))
mask2 = rescale_intensity(mask2, in_range=(127.5,255), out_range=(0,255)).astype(np.float64)
result = (warpedImage2 * mask2 + warpedResImg * (255 - mask2))/255
result = result.clip(0,255).astype(np.uint8)
cv2.imwrite("image1_image2_merged2.png", result)
You had the following. Note where the names, image1 and image2 are being used compared to my code above.
warpedResImg = cv2.warpPerspective(image1, translation, (xMax-xMin, yMax-yMin))
if A is None:
fullTransformation = np.dot(translation,H) #again, images must be translated to be 100% visible in new canvas
warpedImage2 = cv2.warpPerspective(image2, fullTransformation, (xMax-xMin, yMax-yMin))
else:
warpedImageTemp = cv2.warpPerspective(image2, translation, (xMax-xMin, yMax-yMin))
warpedImage2 = cv2.warpAffine(warpedImageTemp, A, (xMax-xMin, yMax-yMin))
Horizontal Gluing
I will focus on one of the cuts as a prove of concept. I agree with the comments that your code is a bit lengthy and hard to work with. So step one is to glue the pictures myself.
import cv2
import matplotlib.pyplot as plt
import numpy as np
import itertools
from scipy.interpolate import UnivariateSpline
upper_image = cv2.cvtColor(cv2.imread('yQv6W.jpg'), cv2.COLOR_BGR2RGB)/255
lower_image = cv2.cvtColor(cv2.imread('zoWJv.jpg'), cv2.COLOR_BGR2RGB)/255
result_image = np.zeros((466+139,700+22,3))
result_image[139:139+lower_image.shape[0],:lower_image.shape[1]] = lower_image
result_image[0:upper_image.shape[0], 22:22+upper_image.shape[1]] = upper_image
plt.imshow(result_image)
Ok no dashed black line but I admit not perfect either. So the next step is to align at least the street and the little way at the very right of the picture. For that I will need to shrink the picture to non integer size and turn that back into a grid. I will use a knn like method for that.
Edit: As requested in the comments I'll explain the shrinking a bit more detailed since it would have to be done by hand again for an other stitching. The magic happens in the line (I replaced n by it's value)
f = UnivariateSpline([0,290,510,685],[0,310,530,700])
I tried first to scale the lower picture in the x-direction to make the little way on the very right fit the upper image. Unfortunately then the street wouldn't fit the street. So what I do is scale down according to the above function. At pixel 0 I still want to have pixel zero, at 290 I want to have what used to be at 310 and so on.
Notice that 290,510 and 310,530 are the new respectively old x-coordinates of street and way at the hight of the gluing.
class Image_knn():
def fit(self, image):
self.image = image.astype('float')
def predict(self, x, y):
image = self.image
weights_x = [(1-(x % 1)).reshape(*x.shape,1), (x % 1).reshape(*x.shape,1)]
weights_y = [(1-(y % 1)).reshape(*x.shape,1), (y % 1).reshape(*x.shape,1)]
start_x = np.floor(x).astype('int')
start_y = np.floor(y).astype('int')
return sum([image[np.clip(np.floor(start_x + x), 0, image.shape[0]-1).astype('int'),
np.clip(np.floor(start_y + y), 0, image.shape[1]-1).astype('int')] * weights_x[x]*weights_y[y]
for x,y in itertools.product(range(2),range(2))])
image_model = Image_knn()
image_model.fit(lower_image)
n = 685
f = UnivariateSpline([0,290,510,n],[0,310,530,700])
np.linspace(0,lower_image.shape[1],n)
yspace = f(np.arange(n))
result_image = np.zeros((466+139,700+22, 3))
a,b = np.meshgrid(np.arange(0,lower_image.shape[0]), yspace)
result_image[139:139+lower_image.shape[0],:n] = np.transpose(image_model.predict(a,b), [1,0,2])
result_image[0:upper_image.shape[0], 22:22+upper_image.shape[1]] = upper_image
plt.imshow(result_image, 'gray')
Much better, no black line but maybe we can still smoothen the cut a bit. I figured if I take convex combinations of upper and lower image at the cut it would look much better.
result_image = np.zeros((466+139,700+22,3))
a,b = np.meshgrid(np.arange(0,lower_image.shape[0]), yspace)
result_image[139:139+lower_image.shape[0],:n] = np.transpose(image_model.predict(a,b), [1,0,2])
transition_range = 10
result_image[0:upper_image.shape[0]-transition_range, 22:22+upper_image.shape[1]] = upper_image[:-transition_range,:]
transition_pixcels = upper_image[-transition_range:,:]*np.linspace(1,0,transition_range).reshape(-1,1,1)
result_image[upper_image.shape[0]-transition_range:upper_image.shape[0], 22:22+upper_image.shape[1]] *= np.linspace(0,1,transition_range).reshape(-1,1,1)
result_image[upper_image.shape[0]-transition_range:upper_image.shape[0], 22:22+upper_image.shape[1]] += transition_pixcels
plt.imshow(result_image)
plt.savefig('text.jpg')
Tilted Gluing
For completeness here also a version gluing at the top with a tilted bottom. I attach the pictures at a point and turn around that fixed point by a few degrees. And again I am correcting some very slight non alignements in the end. To get the coordinates of that I am using jupyter lab and %matplotlib widget.
fixed_point_upper = np.array([139,379])
fixed_point_lower = np.array([0,400])
angle = np.deg2rad(2)
down_dir = np.array([np.sin(angle+np.pi/2),np.cos(angle+np.pi/2)])
right_dir = np.array([np.sin(angle),np.cos(angle)])
result_image_height = np.ceil((fixed_point_upper+lower_image.shape[0]*down_dir+(lower_image.shape[1]-fixed_point_lower[1])*right_dir)[0]).astype('int')
right_shift = np.ceil(-(fixed_point_upper+lower_image.shape[0]*down_dir-fixed_point_lower[1]*right_dir)[1]).astype('int')
result_image_width = right_shift+upper_image.shape[1]
result_image = np.zeros([result_image_height, result_image_width,3])
fixed_point_result = np.array([fixed_point_upper[0],fixed_point_upper[1]+right_shift])
lower_top_left = fixed_point_result-fixed_point_lower[1]*right_dir
result_image[:upper_image.shape[0],-upper_image.shape[1]:] = upper_image
# calculate points in lower_image
result_coordinates = np.stack(np.where(np.ones(result_image.shape[:2],dtype='bool')),axis=1)
lower_coordinates = np.stack([(result_coordinates-lower_top_left)#down_dir,(result_coordinates-lower_top_left)#right_dir],axis=1)
mask = (0 <= lower_coordinates[:,0]) & (0 <= lower_coordinates[:,1]) \
& (lower_coordinates[:,0] <= lower_image.shape[0]) & (lower_coordinates[:,1] <= lower_image.shape[1])
result_coordinates = result_coordinates[mask]
lower_coordinates = lower_coordinates[mask]
# COORDINATES ON RESULT IMAGE
# left street 254
# left sides of houses 295, 420, 505
# right small street, both sides big street 590,635,664
# COORDINATES ON LOWER IMAGE
# left street 234
# left sides of houses 280, 399, 486
# right small street, both sides big street 571, 617, 642
def coord_transform(y):
return (y-lower_top_left[1])/right_dir[1]
y = tuple(map(coord_transform, [lower_top_left[1], 254, 295, 420, 505, 589, 635, 664]))
f = UnivariateSpline(y,[0, 234, 280, 399, 486, 571, 617, 642])
result_image[result_coordinates[:,0],result_coordinates[:,1]] = image_model.predict(lower_coordinates[:,0],np.vectorize(f)(lower_coordinates[:,1]))

Opencv has found image that doesn't exist on screen

I try to use opencv for search button location on screen. If button exist on screen opencv work perfect but it return some !=0 x,y even if image doesn't exist. How to fix it?
import cv2
def buttonlocation(image):
im = ImageGrab.grab()
im.save('screenshot.png')
img = cv2.imread(image,0)
img2 = img.copy()
template = cv2.imread('screenshot.png',0)
w,h = template.shape[::-1]
meth = 'cv2.TM_SQDIFF'
img = img2.copy()
method = eval(meth)
res = cv2.matchTemplate(img,template,method)
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
top_left = min_loc
x,y = top_left
return x,y
The documentation of opencv details to two steps of the template matching procedure.
R=cv2.matchTemplate(I,T,method) computes an image R. Each pixel x,y of this image represents a mark depending on the similarity between the template T and the sub-image of I starting at x,y. For instance, if the method cv.TM_SQDIFF is applied, the mark is computed as:
If R[x,y] is null, then the sub-image I[x:x+sxT,y:y+syT] is exactly identical to the template T. The smaller R[x,y] is, the closer to the template the sub-image is.
cv2.minMaxLoc(R) is applied to find the minimum of R. The corresponding subimage of I is expected to closer to the template than any other sub-image of I.
If the image I does not contain the template, the sub-image of I corresponding to the minimum of R can be very different from T. But the value of the minimum reflects this ! Indeed, a threshold on R can be applied as a way to decide whether the template is in the image or not.
Choosing the value for the threshold is a tricky task. It could be a fraction of the maximum value of R or a fraction of the mean value of R. The influence of the size of the template can be discarted by dividing R by the sxT*syT. For instance, the maximum value of R depends on the template size and the type of the image. For instance, for CV_8UC3 (unsigned char, 3 channels) the maximum value of R is 255*3*sxT*syT.
Here is an example:
import cv2
img = cv2.imread('image.jpg',eval('cv2.CV_LOAD_IMAGE_COLOR'))
template = cv2.imread('template.jpg',eval('cv2.CV_LOAD_IMAGE_COLOR'))
cv2.imshow('image',img)
#cv2.waitKey(0)
#cv2.destroyAllWindows()
meth = 'cv2.TM_SQDIFF'
method = eval(meth)
res = cv2.matchTemplate(img,template,method)
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
top_left = min_loc
x,y = top_left
h,w,c=template.shape
print 'R='+str( min_val)
if min_val< h*w*3*(20*20):
cv2.rectangle(img,min_loc,(min_loc[0] + w,min_loc[1] + h),(0,255,0),3)
else:
print 'first template not found'
template = cv2.imread('template2.jpg',eval('cv2.CV_LOAD_IMAGE_COLOR'))
res = cv2.matchTemplate(img,template,method)
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
top_left = min_loc
x,y = top_left
h,w,c=template.shape
print 'R='+str( min_val)
if min_val< h*w*3*(20*20):
cv2.rectangle(img,min_loc,(min_loc[0] + w,min_loc[1] + h),(0,0,255),3)
else:
print 'second template not found'
cv2.imwrite( "result.jpg", img);
cv2.namedWindow('res',0)
cv2.imshow('res',img)
cv2.waitKey(0)
cv2.destroyAllWindows()
The image:
The first template is to be found:
The second template is not to be found:
The result:

Resolution Manipulation for Template Matching in OpenCV

I am trying to use template matching to find an equation inside a given pdf document that is generated from LaTeX. When I use the code over here, I get only a very good matching when I crop the picture from the original page (converted to jpeg or png), however when I compile the equation code separately and generate an jpg/png output of it the matching goes wrong tremendously.
I believe the reason is relevant to the resolution, but since I am an amateur in this field, I cannot reasonably make the jpg generated out of standalone equation to have the same pixel structure of the jpg for the whole page. Here is the code that is copied (more or less) from the above-mentioned website of OpenCV, which is an implementation for python:
import cv2
from PIL import Image
img = cv2.imread('location of the original image', 0)
img2 = img.copy()
template = cv2.imread('location of the patch I look for',0)
w, h = template.shape[::-1]
# All the 6 methods for comparison in a list
methods = ['cv2.TM_CCOEFF', 'cv2.TM_CCOEFF_NORMED', 'cv2.TM_CCORR',
'cv2.TM_CCORR_NORMED', 'cv2.TM_SQDIFF', 'cv2.TM_SQDIFF_NORMED']
method = eval(methods[0])
# Apply template Matching
res = cv2.matchTemplate(img,template,method)
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
# If the method is TM_SQDIFF or TM_SQDIFF_NORMED, take minimum
if method in [cv2.TM_SQDIFF, cv2.TM_SQDIFF_NORMED]:
top_left = min_loc
else:
top_left = max_loc
bottom_right = (top_left[0] + w, top_left[1] + h)
print top_left, bottom_right
img = Image.open('location of the original image')
#cropping the original image with the found coordinates to make a qualitative comparison
cropped = img.crop((top_left[0], top_left[1], bottom_right[0], bottom_right[1]))
cropped.save('location to save the cropped image using the coordinates found by template matching')
Here is a sample page that I look for the first equation:
The code to generate a specific standalone equation is as follows:
\documentclass[preview]{standalone}
\usepackage{amsmath}
\begin{document}\begin{align*}
(\mu_1+\mu_2)(\emptyset) = \mu_1(\emptyset) + \mu_2(\emptyset) = 0 + 0 =0
\label{eq_0}
\end{align*}
\end{document}
Which I compile and later trim the white space around the equation either using pdfcrop or using .image() method in PythonMagick. Template matching generated with this trimmed output on the original page does not give a reasonable result. Here is the trimmed/converted output using pdfcrop/Mac's Preview.app:
.
Cropping directly the equation from the above page works perfectly. I would appreciate some explanation and help.
EDIT:
I also found the following which uses template matching by bruteforcing different possible scales:
http://www.pyimagesearch.com/2015/01/26/multi-scale-template-matching-using-python-opencv/
However since I am willing to process as many as 1000 of documents, this seems a very slow method to go for. Plus I imagine there should be a more logical way of handling it, by somehow finding the relevant scales.
Instead of template matching you could use features, i.e. keypoints with descriptors. They are scale- invariant, so you do not need to iterate over different scaled versions of the image.
The python example find_obj.py
provieded with OpenCV works with ORB features for your given example.
python find_obj.py --feature=brisk rB4Yy_big.jpg ZjBAA.jpg
Note that I did not use the cropped version of the formula to search for, but a version with some white pixels around it, so the keypoint detection can work correctly. There needs to be some space around it, because keypoints have to be completely inside the image. Otherwise the descriptors can not be calculated.
The big image is the original from your post.
One additional remark: You will always get some matches. If the formula image you are searching for is not present in the big images, the matches will be nonsensical. If you need to sort out these false positives, you have the following options:
Check if the average distance of the resulting DMatches is small enough.
Check if the transformation matrix can be calculated.
Edit: Since you asked for it, here is a version that draws the bounding box around the found formula instead of the matches:
#!/usr/bin/env python
# Python 2/3 compatibility
from __future__ import print_function
import numpy as np
import cv2
def init_feature():
detector = cv2.BRISK_create()
norm = cv2.NORM_HAMMING
matcher = cv2.BFMatcher(norm)
return detector, matcher
def filter_matches(kp1, kp2, matches, ratio = 0.75):
mkp1, mkp2 = [], []
for m in matches:
if len(m) == 2 and m[0].distance < m[1].distance * ratio:
m = m[0]
mkp1.append( kp1[m.queryIdx] )
mkp2.append( kp2[m.trainIdx] )
p1 = np.float32([kp.pt for kp in mkp1])
p2 = np.float32([kp.pt for kp in mkp2])
kp_pairs = zip(mkp1, mkp2)
return p1, p2, kp_pairs
def explore_match(win, img1, img2, kp_pairs, status = None, H = None):
h1, w1 = img1.shape[:2]
h2, w2 = img2.shape[:2]
vis = np.zeros((max(h1, h2), w1+w2), np.uint8)
vis[:h1, :w1] = img1
vis[:h2, w1:w1+w2] = img2
vis = cv2.cvtColor(vis, cv2.COLOR_GRAY2BGR)
if H is not None:
corners = np.float32([[0, 0], [w1, 0], [w1, h1], [0, h1]])
corners = np.int32( cv2.perspectiveTransform(corners.reshape(1, -1, 2), H).reshape(-1, 2) + (w1, 0) )
cv2.polylines(vis, [corners], True, (0, 0, 255))
cv2.imshow(win, vis)
return vis
if __name__ == '__main__':
img1 = cv2.imread('rB4Yy_big.jpg' , 0)
img2 = cv2.imread('ZjBAA.jpg', 0)
detector, matcher = init_feature()
kp1, desc1 = detector.detectAndCompute(img1, None)
kp2, desc2 = detector.detectAndCompute(img2, None)
raw_matches = matcher.knnMatch(desc1, trainDescriptors = desc2, k = 2)
p1, p2, kp_pairs = filter_matches(kp1, kp2, raw_matches)
if len(p1) >= 4:
H, status = cv2.findHomography(p1, p2, cv2.RANSAC, 5.0)
print('%d / %d inliers/matched' % (np.sum(status), len(status)))
vis = explore_match('find_obj', img1, img2, kp_pairs, status, H)
cv2.waitKey()
cv2.destroyAllWindows()
else:
print('%d matches found, not enough for homography estimation' % len(p1))
The problem with template matching is that it only works in very controlled environments. Meaning that it will work perfectly if you take the template from the actual image, but it won't work if the resolution is different or even if the image is a little bit turned.
I would suggest you finding another algorithm more suitable for this problem. In OpenCV docs you can find some specific algorithms for your problem.

Categories