Resolution Manipulation for Template Matching in OpenCV - python

I am trying to use template matching to find an equation inside a given pdf document that is generated from LaTeX. When I use the code over here, I get only a very good matching when I crop the picture from the original page (converted to jpeg or png), however when I compile the equation code separately and generate an jpg/png output of it the matching goes wrong tremendously.
I believe the reason is relevant to the resolution, but since I am an amateur in this field, I cannot reasonably make the jpg generated out of standalone equation to have the same pixel structure of the jpg for the whole page. Here is the code that is copied (more or less) from the above-mentioned website of OpenCV, which is an implementation for python:
import cv2
from PIL import Image
img = cv2.imread('location of the original image', 0)
img2 = img.copy()
template = cv2.imread('location of the patch I look for',0)
w, h = template.shape[::-1]
# All the 6 methods for comparison in a list
methods = ['cv2.TM_CCOEFF', 'cv2.TM_CCOEFF_NORMED', 'cv2.TM_CCORR',
'cv2.TM_CCORR_NORMED', 'cv2.TM_SQDIFF', 'cv2.TM_SQDIFF_NORMED']
method = eval(methods[0])
# Apply template Matching
res = cv2.matchTemplate(img,template,method)
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
# If the method is TM_SQDIFF or TM_SQDIFF_NORMED, take minimum
if method in [cv2.TM_SQDIFF, cv2.TM_SQDIFF_NORMED]:
top_left = min_loc
else:
top_left = max_loc
bottom_right = (top_left[0] + w, top_left[1] + h)
print top_left, bottom_right
img = Image.open('location of the original image')
#cropping the original image with the found coordinates to make a qualitative comparison
cropped = img.crop((top_left[0], top_left[1], bottom_right[0], bottom_right[1]))
cropped.save('location to save the cropped image using the coordinates found by template matching')
Here is a sample page that I look for the first equation:
The code to generate a specific standalone equation is as follows:
\documentclass[preview]{standalone}
\usepackage{amsmath}
\begin{document}\begin{align*}
(\mu_1+\mu_2)(\emptyset) = \mu_1(\emptyset) + \mu_2(\emptyset) = 0 + 0 =0
\label{eq_0}
\end{align*}
\end{document}
Which I compile and later trim the white space around the equation either using pdfcrop or using .image() method in PythonMagick. Template matching generated with this trimmed output on the original page does not give a reasonable result. Here is the trimmed/converted output using pdfcrop/Mac's Preview.app:
.
Cropping directly the equation from the above page works perfectly. I would appreciate some explanation and help.
EDIT:
I also found the following which uses template matching by bruteforcing different possible scales:
http://www.pyimagesearch.com/2015/01/26/multi-scale-template-matching-using-python-opencv/
However since I am willing to process as many as 1000 of documents, this seems a very slow method to go for. Plus I imagine there should be a more logical way of handling it, by somehow finding the relevant scales.

Instead of template matching you could use features, i.e. keypoints with descriptors. They are scale- invariant, so you do not need to iterate over different scaled versions of the image.
The python example find_obj.py
provieded with OpenCV works with ORB features for your given example.
python find_obj.py --feature=brisk rB4Yy_big.jpg ZjBAA.jpg
Note that I did not use the cropped version of the formula to search for, but a version with some white pixels around it, so the keypoint detection can work correctly. There needs to be some space around it, because keypoints have to be completely inside the image. Otherwise the descriptors can not be calculated.
The big image is the original from your post.
One additional remark: You will always get some matches. If the formula image you are searching for is not present in the big images, the matches will be nonsensical. If you need to sort out these false positives, you have the following options:
Check if the average distance of the resulting DMatches is small enough.
Check if the transformation matrix can be calculated.
Edit: Since you asked for it, here is a version that draws the bounding box around the found formula instead of the matches:
#!/usr/bin/env python
# Python 2/3 compatibility
from __future__ import print_function
import numpy as np
import cv2
def init_feature():
detector = cv2.BRISK_create()
norm = cv2.NORM_HAMMING
matcher = cv2.BFMatcher(norm)
return detector, matcher
def filter_matches(kp1, kp2, matches, ratio = 0.75):
mkp1, mkp2 = [], []
for m in matches:
if len(m) == 2 and m[0].distance < m[1].distance * ratio:
m = m[0]
mkp1.append( kp1[m.queryIdx] )
mkp2.append( kp2[m.trainIdx] )
p1 = np.float32([kp.pt for kp in mkp1])
p2 = np.float32([kp.pt for kp in mkp2])
kp_pairs = zip(mkp1, mkp2)
return p1, p2, kp_pairs
def explore_match(win, img1, img2, kp_pairs, status = None, H = None):
h1, w1 = img1.shape[:2]
h2, w2 = img2.shape[:2]
vis = np.zeros((max(h1, h2), w1+w2), np.uint8)
vis[:h1, :w1] = img1
vis[:h2, w1:w1+w2] = img2
vis = cv2.cvtColor(vis, cv2.COLOR_GRAY2BGR)
if H is not None:
corners = np.float32([[0, 0], [w1, 0], [w1, h1], [0, h1]])
corners = np.int32( cv2.perspectiveTransform(corners.reshape(1, -1, 2), H).reshape(-1, 2) + (w1, 0) )
cv2.polylines(vis, [corners], True, (0, 0, 255))
cv2.imshow(win, vis)
return vis
if __name__ == '__main__':
img1 = cv2.imread('rB4Yy_big.jpg' , 0)
img2 = cv2.imread('ZjBAA.jpg', 0)
detector, matcher = init_feature()
kp1, desc1 = detector.detectAndCompute(img1, None)
kp2, desc2 = detector.detectAndCompute(img2, None)
raw_matches = matcher.knnMatch(desc1, trainDescriptors = desc2, k = 2)
p1, p2, kp_pairs = filter_matches(kp1, kp2, raw_matches)
if len(p1) >= 4:
H, status = cv2.findHomography(p1, p2, cv2.RANSAC, 5.0)
print('%d / %d inliers/matched' % (np.sum(status), len(status)))
vis = explore_match('find_obj', img1, img2, kp_pairs, status, H)
cv2.waitKey()
cv2.destroyAllWindows()
else:
print('%d matches found, not enough for homography estimation' % len(p1))

The problem with template matching is that it only works in very controlled environments. Meaning that it will work perfectly if you take the template from the actual image, but it won't work if the resolution is different or even if the image is a little bit turned.
I would suggest you finding another algorithm more suitable for this problem. In OpenCV docs you can find some specific algorithms for your problem.

Related

Using Keypoint feature matching + Homography to straighten document (Aadhaar)

Hi I'm trying to create an OCR where the model should be able to read an uploaded document. However, lot of times, the documents uploaded are skewed or tilted. I plan to straighten and/or resize the document based on a template.
To achieve this, I intend to use feature mapping and homography. However, whenever I calculate my keypoints and descriptors (using ORB), and try to match them using Brute Force Matching, none of the features seem to match. Here's the code that I've used so far and the results with it. Can someone point me in the right direction if I'm missing something or doing it in a certain incorrect way?
def straighten_image(ORIG_IMG, IMG2):
# read both the images:
orig_image = cv2.imread(ORIG_IMG)
img_input = cv2.imread(IMG2)
orig_gray_scale = cv2.cvtColor(orig_image, cv2.COLOR_BGR2GRAY)
gray_scale_img = cv2.cvtColor(img_input, cv2.COLOR_BGR2GRAY)
#Detect ORB features and compute descriptors
MAX_NUM_FEATURES = 100
orb = cv2.ORB_create(MAX_NUM_FEATURES)
keypoints1, descriptors1 = orb.detectAndCompute(orig_gray_scale, None)
keypoints2, descriptors2= orb.detectAndCompute(gray_scale_img, None)
#display image with keypoints
orig_wid_decriptors = cv2.drawKeypoints(orig_gray_scale, keypoints1, outImage = np.array([]), color= (255, 0, 0), flags= cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
inp_wid_decriptors = cv2.drawKeypoints(img_input, keypoints2, outImage = np.array([]), color= (255, 0, 0), flags= cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
#Match features
matcher = cv2.DescriptorMatcher_create(cv2.DESCRIPTOR_MATCHER_BRUTEFORCE_HAMMING)
matches = matcher.match(descriptors1, descriptors2, None)
print(type(matches))
#sort matches
# matches.sort(key=lambda x: x.distance, reverse=False)
#Remove not-so-good matches
numGoodMatches = int(len(matches)*0.1)
matches = matches[:numGoodMatches]
#Draw Top matches
im_matches = cv2.drawMatches(orig_gray_scale, keypoints1, gray_scale_img, keypoints2, matches, None)
cv2.imshow("", im_matches)
cv2.waitKey(0)
#Homography
points1 = np.zeros((len(matches), 2), dtype = np.float32)
points2 = np.zeros((len(matches), 2), dtype = np.float32)
for i, match in enumerate(matches):
points1[i, :] = keypoints1[match.queryIdx].pt
points2[i, :] = keypoints2[match.trainIdx].pt
#Find homography:
h, mask = cv2.findHomography(points2, points1, cv2.RANSAC)
#Warp image
# Use homography to warp image
height, width = orig_gray_scale.shape
inp_reg = cv2.warpPerspective(gray_scale_img, h, (width, height), borderValue = 255)
return inp_reg
import cv2
import matplotlib.pyplot as plt
import numpy as np
template = "template_aadhaar.jpg"
test = "test.jpeg"
str_img = straighten_image(template, test)
cv2.imshow("", str_img)
cv2.waitKey(0)
EDIT: If I use my own ID-card (perfectly straight) as the template and try to align the same ID-card that is tilted, it matches the features and re-aligns the tilted image perfectly. However, I need the model to be able to re-align any other ID-card based on the template. By any ID, I mean the details could be different but the location and font would be exactly the same.
EDIT#2: As suggested by #Olli, I tried using a template with only those features that are same for all Aadhaar cards. Image attached. But still the feature matching is a bit arbitrary.
Feature mapping tries to detect the most significant features on an image and tries to match them. This only works if the features really are the same. If the features are similar but different, it will fail.
If you have some features that are always the same (e.g. the logo on the top left), you could try to create a template with only these features and blank in all other areas, i.e. remove the person and the name and the QR code and...
But because there are more differences ("Government of India inside the green area on image and above on the other,...) than similarities, I would try to find the rotation based on the corners and/or the edges of the shape.
For example:
convert to grayscale
perform canny edge detection
detect corners, e.g. using cv2.goodFeaturesToTrack. If some corners are hidden, try finding the sides using Hough lines instead.
undistort
If some images are rotated 90, 180 or 270 degrees after undistortion, you could use a filter to find the orange and green areas and rotate so that this area is at the top again.

How to properly filter an image using OpenCV? To read and extract the text with the highest possible percentage of effectiveness

This is what I do, I take some images and some of them contain information that I need. Being these the images:
How do I find that information? I use a template that contains two symbols (Euro and Dollar), when this symbols are found in any of the images, then I can process the image and try to extract the data that I need.
How do I extract the data? I take the dimensions of the found match, and since I know that the information to extract will always be contained to the right of the match, I dimension a box towards the right edge of my image, with which I make sure I have a box with the data to extract.
Here is the code, I will divide it into several sections to explain the process a little better:
1) Initial Settings for the Code (Imports, a list of images which will be processed, a couple of functions to filter the image and finally the configuration set for reading data from Tesseract):
import cv2
import numpy as np
from matplotlib import pyplot as plt
import pytesseract
from pytesseract import Output
imagenes = ["monitor1.jpg", "monitor2.jpg", "monitor3.jpg"]
# get grayscale image
def get_grayscale(image):
return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Noise Removal (This is the filter I am using)
def remove_noise(image):
return cv2.medianBlur(image,5)
# The configuration we will use to read the images:
my_config = r"--psm 11 --oem 3"
2) Next, the template with which we will try to match is read and we take its dimensions (w=width and h=height).
We present the methods to find the matches and enter a loop reviewing image by image, trying to find a matching image:
# Reading the Template (Euro and Dollar):
# template_simbolo = cv2.imread('template_euro_dolar.jpg', 0)
template = cv2.imread('template_simbolos.jpg', 0)
w, h = template.shape[::-1]
# The methods we will use to find the matchs (This should be a list of 6 methods
# but working with a big list of images it takes an eternity, so we will only
# use one by now for the tests) :
methods = ['cv2.TM_CCOEFF']
# A loop where we are going to filter every image, find the matchs if there are
# and extract the data:
for img in imagenes:
print("**************************")
# Image to read:
img_rgb = cv2.imread(img)
# The image filtered in gray:
gray = get_grayscale(img_rgb)
img_gray = remove_noise(gray)
# With res we will find the matches but we only take the accurate ones (80% or more)
res = cv2.matchTemplate(img_gray, template, cv2.TM_CCOEFF_NORMED)
threshold = 0.8
loc = np.where(res >= threshold)
print(loc)
print(len(loc[0]))
3) In this part of the code, first we enclose the match in a box, we filter the original image, and we look for the coordinates of the match, once this is done, we proceed to enclose in a box the section where the desired information is found.
# If loc contains values ​​it is because there is a match:
if len(loc[0]) > 0:
print("Match Found")
# We enclose the found matches in a box and save the result:
for pt in zip(*loc[::-1]):
cv2.rectangle(img_rgb, pt, (pt[0] + w, pt[1] + h), (0, 0, 255), 2)
cv2.imwrite('res_monitor.png', img_rgb)
# A loop of matching methods:
for meth in methods:
# We filter the image and change it to a gray color
gray = get_grayscale(img_rgb)
img_gray = remove_noise(gray)
# We evaluate the method to use and according to it we have some
# default coordinates
method = eval(meth)
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
print("min_val:", min_val)
print("max_val:", max_val)
print("min_loc:", min_loc)
print("max_loc:", max_loc)
# If the method is TM_SQDIFF or TM_SQDIFF_NORMED, take minimum
if method in [cv2.TM_SQDIFF, cv2.TM_SQDIFF_NORMED]:
top_left = min_loc
else:
top_left = max_loc
# To know the bottom right coordinate, we respectively take the value
# of top_left and add the width w and height h to know this coordinate.
w, h = template.shape[::-1]
bottom_right = (top_left[0] + w, top_left[1] + h)
print("top_left:", top_left)
print("bottom_right:", bottom_right)
print("x:", top_left[0])
print("y:", top_left[1])
# Now, in our original image, which we previously filtered, we will
# place a box or rectangle according to the dimensions established
# before. (top_left and bottom_right)
w, h = img_gray.shape[::-1]
print("w:", w)
print("h:", bottom_right[1])
cv2.rectangle(img_gray, top_left, bottom_right, 255, 2)
imagen = cv2.rectangle(img_gray, top_left, (w, bottom_right[1]), 255, 2)
x = top_left[0]
y = top_left[1]
w = w
h = bottom_right[1]
# Finally we crop this section of the code where we established the area
# to review and with pytesseract we look for the data that we can obtain
# from said cropped image.
crop_image = img_gray[y:h, x:w]
cv2.imwrite("croped.jpg", crop_image)
data = pytesseract.image_to_data(crop_image, config=my_config,
output_type=Output.DICT)
print(data, "\n")
4) Finally we create a dictionary to save the rate of the euro and the dollar, if everything goes well they will be saved correctly.
At the end of this process, a plot is shown to verify that the information was extracted correctly.
# We create a dictionary to store the values ​​of Euro and Dollar
i = 0
currencies= {}
for value in data["text"]:
print(value)
try:
currency= value.replace(".", "").replace(",", ".")
currency= float(currency)
i = i + 1
if i == 1:
currencies["Euro"] = currency
elif i == 2 and currency< currencies["Euro"]:
currencies["Dolar"] = currency
except: ValueError
# We pass the image to string to obtain the rates of the currencies
text = pytesseract.image_to_string(crop_image, config = my_config)
print(text)
print(currencies)
# We graph the results and confirm that the data extraction and the
# demarcated area are correct.
plt.subplot(121),plt.imshow(res, cmap = 'gray')
plt.title('Matching Result'), plt.xticks([]), plt.yticks([])
plt.subplot(122),plt.imshow(img_gray, cmap = 'gray')
plt.title('Detected Point'), plt.xticks([]), plt.yticks([])
plt.suptitle(meth)
plt.show()
else:
print("DOES NOT MATCH")
The results:
With the code and all the logic presented above, it usually works very well on these images, but for some reason sometimes it doesn't read the image properly, it doesn't save the information in the desired way.
As can be seen, the area that the code takes is the desired one, but the currency dictionary does not record any information:
Which is strange because if I run the code on a longer list of images, that same image is recognized perfectly.
So the problem here is that sometimes it worked and sometimes it didn't, and I'm not quite sure why. Does anyone know what I can polish? What am I doing wrong? Any advice?

Using opencv to find the most similar image that contains another image

If the title isn't clear let's say I have a list of images (10k+), and I have a target image I am searching for.
Here's an example of the target image:
Here's an example of images I will want to be searching to find something 'similar' (ex1, ex2, and ex3):
Here's the matching I do (I use KAZE)
from matplotlib import pyplot as plt
import numpy as np
import cv2
from typing import List
import os
import imutils
def calculate_matches(des1: List[cv2.KeyPoint], des2: List[cv2.KeyPoint]):
"""
does a matching algorithm to match if keypoints 1 and 2 are similar
#param des1: a numpy array of floats that are the descriptors of the keypoints
#param des2: a numpy array of floats that are the descriptors of the keypoints
#return:
"""
# bf matcher with default params
bf = cv2.BFMatcher(cv2.NORM_L2)
matches = bf.knnMatch(des1, des2, k=2)
topResults = []
for m, n in matches:
if m.distance < 0.7 * n.distance:
topResults.append([m])
return topResults
def compare_images_kaze():
cwd = os.getcwd()
target = os.path.join(cwd, 'opencv_target', 'target.png')
images_list = os.listdir('opencv_images')
for image in images_list:
# get my 2 images
img2 = cv2.imread(target)
img1 = cv2.imread(os.path.join(cwd, 'opencv_images', image))
for i in range(0, 360, int(360 / 8)):
# rotate my image by i
img_target_rotation = imutils.rotate_bound(img2, i)
# Initiate KAZE object with default values
kaze = cv2.KAZE_create()
kp1, des1 = kaze.detectAndCompute(img1, None)
kp2, des2 = kaze.detectAndCompute(img2, None)
matches = calculate_matches(des1, des2)
try:
score = 100 * (len(matches) / min(len(kp1), len(kp2)))
except ZeroDivisionError:
score = 0
print(image, score)
img3 = cv2.drawMatchesKnn(img1, kp1, img_target_rotation, kp2, matches,
None, flags=2)
img3 = cv2.cvtColor(img3, cv2.COLOR_BGR2RGB)
plt.imshow(img3)
plt.show()
plt.clf()
if __name__ == '__main__':
compare_images_kaze()
Here's the result of my code:
ex1.png 21.052631578947366
ex2.png 0.0
ex3.png 42.10526315789473
It does alright! It was able to tell that ex1 is similar and ex2 is not similar, however it states that ex3 is similar (even more similar than ex1). Any extra pre-processing or post-processing (maybe ml, assuming ml is actually useful) or just changes I can do to my method that can be done to keep only ex1 as similar and not ex3?
(Note this score I create is something I found online. Not sure if it's an accurate way to go about it)
ADDED MORE EXAMPLES BELOW
Another set of examples:
Here's what I am searching for
I want the above image to be similar to the middle and bottom images (NOTE: I rotate my target image by 45 degrees and compare it to the images below.)
Feature matching (as stated in answers below) were useful in found similarity with the second image, but not the third image (Even after rotating it properly)
Detecting The Most Similar Image
The Code
You can use template matching, where the image you want to detect if it's in the other images is the template. I have that small image saved in template.png, and the other three images in img1.png, img2.png and img3.png.
I defined a function that utilizes the cv2.matchTemplate to calculate the amount of confidence for if a template is in an image. Using the function on every image, the one that results ion the highest confidence is the image that contains the template:
import cv2
template = cv2.imread("template.png", 0)
files = ["img1.png", "img2.png", "img3.png"]
for name in files:
img = cv2.imread(name, 0)
print(f"Confidence for {name}:")
print(cv2.matchTemplate(img, template, cv2.TM_CCOEFF_NORMED).max())
The Output:
Confidence for img1.png:
0.8906427
Confidence for img2.png:
0.4427919
Confidence for img3.png:
0.5933967
The Explanation:
Import the opencv module, and read in the template image as grayscale by setting the second parameter of the cv2.imread method to 0:
import cv2
template = cv2.imread("template.png", 0)
Define your list of images of which you want to determine which one contains the template:
files = ["img1.png", "img2.png", "img3.png"]
Loop through the filenames and read in each one as a grayscale image:
for name in files:
img = cv2.imread(name, 0)
Finally, you can use the cv2.matchTemplate to detect the template in each image. There are many detection methods you can use, but for this I decided to use the cv2.TM_CCOEFF_NORMED method:
print(f"Confidence for {name}:")
print(cv2.matchTemplate(img, template, cv2.TM_CCOEFF_NORMED).max())
The output of the function ranges from between 0 and 1, and as you can see, it successfully detected that the first image is most likely to contain the template image (it has the highest level of confidence).
The Visualization
The Code
If detecting which image contains the template isn't enough, and you want a visualization, you can try the code below:
import cv2
import numpy as np
def confidence(img, template):
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
template = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY)
res = cv2.matchTemplate(img, template, cv2.TM_CCOEFF_NORMED)
conf = res.max()
return np.where(res == conf), conf
files = ["img1.png", "img2.png", "img3.png"]
template = cv2.imread("template.png")
h, w, _ = template.shape
for name in files:
img = cv2.imread(name)
([y], [x]), conf = confidence(img, template)
cv2.rectangle(img, (x, y), (x + w, y + h), (0, 0, 255), 2)
text = f'Confidence: {round(float(conf), 2)}'
cv2.putText(img, text, (x, y), 1, cv2.FONT_HERSHEY_PLAIN, (0, 0, 0), 2)
cv2.imshow(name, img)
cv2.imshow('Template', template)
cv2.waitKey(0)
The Output:
The Explanation:
Import the necessary libraries:
import cv2
import numpy as np
Define a function that will take in a full image and a template image. As the cv2.matchTemplate method requires grayscale images, convert the 2 images into grayscale:
def confidence(img, template):
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
template = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY)
Use the cv2.matchTemplate method to detect the template in the image, and return the position of the point with the highest confidence, and return the highest confidence:
res = cv2.matchTemplate(img, template, cv2.TM_CCOEFF_NORMED)
conf = res.max()
return np.where(res == conf), conf
Define your list of images you want to determine which one contains the template, and read in the template image:
files = ["img1.png", "img2.png", "img3.png"]
template = cv2.imread("template.png")
Get the size of the template image to later use for drawing a rectangle on the images:
h, w, _ = template.shape
Loop though the filenames and read in each image. Using the confidence function we defined before, get the x y position of the top-left corner of the detected template and the confidence amount for the detection:
for name in files:
img = cv2.imread(name)
([y], [x]), conf = confidence(img, template)
Draw a rectangle on the image at the corner and put the text on the image. Finally, show the image:
cv2.rectangle(img, (x, y), (x + w, y + h), (0, 0, 255), 2)
text = f'Confidence: {round(float(conf), 2)}'
cv2.putText(img, text, (x, y), 1, cv2.FONT_HERSHEY_PLAIN, (0, 0, 0), 2)
cv2.imshow(name, img)
Also, show the template for comparison:
cv2.imshow('Template', template)
cv2.waitKey(0)
I'm not sure, if the given images resemble your actual task or data, but for this kind of images, you could try simple template matching, cf. this OpenCV tutorial.
Basically, I just implemented the tutorial with some modifications:
import cv2
import matplotlib.pyplot as plt
# Read images
examples = [cv2.imread(img) for img in ['ex1.png', 'ex2.png', 'ex3.png']]
target = cv2.imread('target.png')
h, w = target.shape[:2]
# Iterate examples
for i, img in enumerate(examples):
# Template matching
# cf. https://docs.opencv.org/4.5.2/d4/dc6/tutorial_py_template_matching.html
res = cv2.matchTemplate(img, target, cv2.TM_CCOEFF_NORMED)
# Get location of maximum
_, max_val, _, top_left = cv2.minMaxLoc(res)
# Set up threshold for decision target found or not
thr = 0.7
if max_val > thr:
# Show found target in example
bottom_right = (top_left[0] + w, top_left[1] + h)
cv2.rectangle(img, top_left, bottom_right, (0, 255, 0), 2)
# Visualization
plt.figure(i, figsize=(10, 5))
plt.subplot(1, 2, 1), plt.imshow(img[..., [2, 1, 0]]), plt.title('Example')
plt.subplot(1, 2, 2), plt.imshow(res, vmin=0, vmax=1, cmap='gray')
plt.title('Matching result'), plt.colorbar(), plt.tight_layout()
plt.show()
These are the results:
----------------------------------------
System information
----------------------------------------
Platform: Windows-10-10.0.16299-SP0
Python: 3.9.1
PyCharm: 2021.1.1
Matplotlib: 3.4.1
OpenCV: 4.5.1
----------------------------------------
EDIT: To emphasize the information from the different colors, one might use the hue channel from the HSV color space for the template matching:
import cv2
import matplotlib.pyplot as plt
# Read images
examples = [
[cv2.imread(img) for img in ['ex1.png', 'ex2.png', 'ex3.png']],
[cv2.imread(img) for img in ['ex12.png', 'ex22.png', 'ex32.png']]
]
targets = [
cv2.imread('target.png'),
cv2.imread('target2.png')
]
# Iterate examples and targets
for i, (ex, target) in enumerate(zip(examples, targets)):
for j, img in enumerate(ex):
# Rotate last image from second data set
if (i == 1) and (j == 2):
img = cv2.rotate(img, cv2.ROTATE_90_CLOCKWISE)
h, w = target.shape[:2]
# Get hue channel from HSV color space
target_h = cv2.cvtColor(target, cv2.COLOR_BGR2HSV)[..., 0]
img_h = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)[..., 0]
# Template matching
# cf. https://docs.opencv.org/4.5.2/d4/dc6/tutorial_py_template_matching.html
res = cv2.matchTemplate(img_h, target_h, cv2.TM_CCOEFF_NORMED)
# Get location of maximum
_, max_val, _, top_left = cv2.minMaxLoc(res)
# Set up threshold for decision target found or not
thr = 0.6
if max_val > thr:
# Show found target in example
bottom_right = (top_left[0] + w, top_left[1] + h)
cv2.rectangle(img, top_left, bottom_right, (0, 255, 0), 2)
# Visualization
plt.figure(i * 10 + j, figsize=(10, 5))
plt.subplot(1, 2, 1), plt.imshow(img[..., [2, 1, 0]]), plt.title('Example')
plt.subplot(1, 2, 2), plt.imshow(res, vmin=0, vmax=1, cmap='gray')
plt.title('Matching result'), plt.colorbar(), plt.tight_layout()
plt.savefig('{}.png'.format(i * 10 + j))
plt.show()
New results:
The Concept
We can use the cv2.matchTemplate method to detect where an image is in another image, but for your second set of images you have rotation. Also, we'll need to take the colors into account.
cv2.matchTemplate will take in an image, a template (the other image) and a template detection method, and will return a grayscale array where the brightest point in the grayscale array will be the point with the most confidence that template is at that point.
We can use the template at 4 different angles and use the one that resulted in the highest confidence. When we detected a possible point that matched the template, we use a function (that we will define ourselves) to check if the most frequent colors in the template is present in the patch of the image we detected. If not, then ignore the patch, regardless of the amount of confidence returned.
The Code
import cv2
import numpy as np
def frequent_colors(img, vals=3):
colors, count = np.unique(np.vstack(img), return_counts=True, axis=0)
sorted_by_freq = colors[np.argsort(count)]
return sorted_by_freq[-vals:]
def get_templates(img):
template = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
for i in range(3):
yield cv2.rotate(template, i)
def detect(img, template, min_conf=0.45):
colors = frequent_colors(template)
img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
conf_max = min_conf
shape = 0, 0, 0, 0
for tmp in get_templates(template):
h, w = tmp.shape
res = cv2.matchTemplate(img_gray, tmp, cv2.TM_CCOEFF_NORMED)
for y, x in zip(*np.where(res > conf_max)):
conf = res[y, x]
if conf > conf_max:
seg = img[y:y + h, x:x + w]
if all(np.any(np.all(seg == color, -1)) for color in colors):
conf_max = conf
shape = x, y, w, h
return shape
files = ["img1_2.png", "img2_2.png", "img3_2.png"]
template = cv2.imread("template2.png")
for name in files:
img = cv2.imread(name)
x, y, w, h = detect(img, template)
cv2.rectangle(img, (x, y), (x + w, y + h), (0, 0, 255), 2)
cv2.imshow(name, img)
cv2.imshow('Template', template)
cv2.waitKey(0)
The Output
The Explanation
Import the necessary libraries:
import cv2
import numpy as np
Define a function, frequent_colors, that will take in an image and return the most frequent colors in the image. An optional parameter, val, is how many colors to return; if val is 3, then the 3 most frequent colors will be returned:
def frequent_colors(img, vals=3):
colors, count = np.unique(np.vstack(img), return_counts=True, axis=0)
sorted_by_freq = colors[np.argsort(count)]
return sorted_by_freq[-vals:]
Define a function, get_templates, that will take in an image, and yield the image (in grayscale) at 4 different angles - original, 90 clockwise, 180, and 90 counterclockwise:
def get_templates(img):
template = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
for i in range(3):
yield cv2.rotate(template, i)
Define a function, detect, that will take in an image and a template image, and return the x, y, w, h of the bounding box of the detected template on the image, and for this function we will be utilizing the frequent_colors and get_templates functions defined earlier. The min_conf parameter will be the minimum amount of confidence needed to classify a detection as an actual detection:
def detect(img, template, min_conf=0.45):
Detect the three most frequent colors in the template and store them in a variable, colors. Also, define a grayscale version of the main image:
colors = frequent_colors(template)
img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
Define the initial value for the greatest confidence detected, and initial values for the detected patch:
conf_max = min_conf
shape = 0, 0, 0, 0
Loop though the grayscale templates at 4 angles, get the shape of the grayscale template (as rotation changes the shape), and use the cv2.matchTemplate method to get the grayscale array of detected templates on the image:
for tmp in get_templates(template):
h, w = tmp.shape
res = cv2.matchTemplate(img_gray, tmp, cv2.TM_CCOEFF_NORMED)
Loop though the x, y coordinates of the detected templates where the confidence is greater than conf_min, and store the confidence in a variable, conf. If conf is greater than the initial greatest confidence variable (conf_max), proceed to detect if all three most frequent colors in the template is present in the patch of the image:
for y, x in zip(*np.where(res > conf_max)):
conf = res[y, x]
if conf > conf_max:
seg = img[y:y + h, x:x + w]
if all(np.any(np.all(seg == color, -1)) for color in colors):
conf_max = conf
shape = x, y, w, h
At the end we can return the shape. If no template is detected in the image, the shape will be the initial values defined for it, 0, 0, 0, 0:
return shape
Finally, loop though each image and use the detect function we defined to get the x, y, w, h of the bounding box. Use the cv2.rectangle method to draw the bounding box onto the images:
files = ["img1_2.png", "img2_2.png", "img3_2.png"]
template = cv2.imread("template2.png")
for name in files:
img = cv2.imread(name)
x, y, w, h = detect(img, template)
cv2.rectangle(img, (x, y), (x + w, y + h), (0, 0, 255), 2)
cv2.imshow(name, img)
cv2.imshow('Template', template)
cv2.waitKey(0)
First, the data appears in graphs, aren't you able to get the overlapping values from their numerical data?
And have you tried performing some edge detection for the change in color from white-blue and then from blue-red, fitting some circles to those edges and then checking if they overlap?
Since the input data is quite controlled (no organic photography or videos), perhaps you won't have to go the ML route.

How to perform automated detection and cropping of region of interest in an image in python?

I want to perform operation on the region of my interest..that is central rectangular table which you can see in the image.
I am able to give the co-ordinates of my region of interest manually and crop that part
img = cv2.imread('test12.jpg',0)
box = img[753:1915,460:1315]
but i want to crop that part automatically without giving the pixels or coordinates manually.Can anyone please help me with this?
http://picpaste.com/test12_-_Copy-BXqHMAnd.jpg this is my original image.
http://picpaste.com/boxdemo-zHz57dBM.jpg this is my cropped image.
for doing this I entered the coordinates of the desired region and cropped.
But , now i have to deal with many similar images where the coordinates of my region of interest will slightly vary. I want a method which will detect the table(my region of interest) and crop it.
Currently I'm using this
img = cv2.imread('test12.jpg',0)
box = img[753:1915,460:1315]
to crop my image.
You could try using the openCV Template Matching to find the coordinates of your rectangular table within the image.
Template Matching
The following is a test program to find the coordinates for images I am trying to find.
from __future__ import print_function
import cv2
import numpy as np
from matplotlib import pyplot as plt
try:
img = cv2.imread(r'new_webcam_image.jpg',0)
template = cv2.imread(r'table_template.jpg',0)
except IOError as e:
print("({})".format(e))
else:
img2 = img.copy()
w, h = template.shape[::-1]
# All the 6 methods for comparison in a list
methods = ['cv2.TM_CCOEFF', 'cv2.TM_CCOEFF_NORMED', 'cv2.TM_CCORR',
'cv2.TM_CCORR_NORMED', 'cv2.TM_SQDIFF', 'cv2.TM_SQDIFF_NORMED']
for meth in methods:
img = img2.copy()
method = eval(meth)
# Apply template Matching
res = cv2.matchTemplate(img,template,method)
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
print("Method: %s" , meth)
print("min_val: " , min_val)
print("max_val: " , max_val)
print("min_loc: " , min_loc)
print("max_loc: " , max_loc)
print(" ")
# If the method is TM_SQDIFF or TM_SQDIFF_NORMED, take minimum
if method in [cv2.TM_SQDIFF, cv2.TM_SQDIFF_NORMED]:
top_left = min_loc
else:
top_left = max_loc
bottom_right = (top_left[0] + w, top_left[1] + h)
cv2.rectangle(img,top_left, bottom_right, 255, 2)
plt.subplot(121),plt.imshow(res,cmap = 'gray')
plt.title('Matching Result'), plt.xticks([]), plt.yticks([])
plt.subplot(122),plt.imshow(img,cmap = 'gray')
plt.title('Detected Point'), plt.xticks([]), plt.yticks([])
plt.suptitle(meth) #; plt.legend([min_val, max_val, min_loc, max_loc], ["min_val", "max_val", "min_loc", "max_loc"])
plt.show()
box = img[top_left[1]:top_left[1]+h,0:bottom_right[1]+w]
cv2.imshow("cropped", box)
cv2.waitKey(0)
I don't have a full solution for you. The code shown was based on some code I was using to fix output from a scanner. The template solution to me sounds like a better approach, but the following should give you something else to work with.
import cv2
imageSrc = cv2.imread("test12.jpg")
# First cut the source down slightly
h = imageSrc.shape[0]
w = imageSrc.shape[1]
cropInitial = 50
imageSrc = imageSrc[100:50+(h-cropInitial*2), 50:50+(w-cropInitial*2)]
# Threshold the image and find edges (to reduce the amount of pixels to count)
ret, imageDest = cv2.threshold(imageSrc, 220, 255, cv2.THRESH_BINARY_INV)
imageDest = cv2.Canny(imageDest, 100, 100, 3)
# Create a list of remaining pixels
points = cv2.findNonZero(imageDest)
# Calculate a bounding rectangle for these points
hull = cv2.convexHull(points)
x,y,w,h = cv2.boundingRect(hull)
# Crop the original image to the bounding rectangle
imageResult = imageSrc[y:y+h,x:x+w]
cv2.imwrite("test12 cropped.jpg", imageResult)
The output does not crop as much as you need. Playing with the various threshold parameters should improve your results.
I suggest using imshow at various points on imageThresh and imageDest so you can see what is happening at each stage in the code. Hopefully this helps you progress.

How to visualize descriptor matching using opencv module in python

I am trying to use opencv with python. I wrote a descriptor (SIFT, SURF, or ORB) matching code in C++ version of opencv 2.4. I want to convert this code to opencv with python. I found some documents about how to use opencv functions in c++ but many of the opencv function in python I could not find how to use them. Here is my python code, and my current problem is that I don't know how to use "drawMatches" of opencv c++ in python. I found cv2.DRAW_MATCHES_FLAGS_DEFAULT but I have no idea how to use it. Here is my python code of matching using ORB descriptors:
im1 = cv2.imread(r'C:\boldt.jpg')
im2 = cv2.cvtColor(im1, cv2.COLOR_BGR2GRAY)
im3 = cv2.imread(r'C:\boldt_resize50.jpg')
im4 = cv2.cvtColor(im3, cv2.COLOR_BGR2GRAY)
orbDetector2 = cv2.FeatureDetector_create("ORB")
orbDescriptorExtractor2 = cv2.DescriptorExtractor_create("ORB")
orbDetector4 = cv2.FeatureDetector_create("ORB")
orbDescriptorExtractor4 = cv2.DescriptorExtractor_create("ORB")
keypoints2 = orbDetector2.detect(im2)
(keypoints2, descriptors2) = orbDescriptorExtractor2.compute(im2,keypoints2)
keypoints4 = orbDetector4.detect(im4)
(keypoints4, descriptors4) = orbDescriptorExtractor4.compute(im4,keypoints4)
matcher = cv2.DescriptorMatcher_create('BruteForce-Hamming')
raw_matches = matcher.match(descriptors2, descriptors4)
img_matches = cv2.DRAW_MATCHES_FLAGS_DEFAULT(im2, keypoints2, im4, keypoints4, raw_matches)
cv2.namedWindow("Match")
cv2.imshow( "Match", img_matches);
Error message of the line "img_matches = cv2.DRAW_MATCHES_FLAGS_DEFAULT(im2, keypoints2, im4, keypoints4, raw_matches)"
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
TypeError: 'long' object is not callable
I spent much time search documentation and examples of using opencv functions with python. However, I am very frustrated because there is very little information of using opencv functions in python. It will be extremely helpful if anyone can teach me where I can find the documentation of how to use every function of the opencv module in python. I appreciate your time and help.
I've also written something myself that just uses the OpenCV Python interface and I didn't use scipy. drawMatches is part of OpenCV 3.0.0 and isn't part of OpenCV 2, which is what I'm currently using. Even though I'm late to the party, here's my own implementation that mimics drawMatches to the best of my ability.
I've provided my own images where one is of a camera man, and the other one is the same image but rotated by 55 degrees counter-clockwise.
The basic premise of what I wrote is that I allocate an output RGB image where the amount of rows is the maximum of the two images to accommodate for placing both of the images in the output image and the columns are simply the summation of both the columns together. I place each image in their corresponding spots, then run through a loop of all of the matched keypoints. I extract which keypoints matched between the two images, then extract their (x,y) co-ordinates. I then draw circles at each of the detected locations, then draw a line connecting these circles together.
Bear in mind that the detected keypoint in the second image is with respect to its own co-ordinate system. If you want to place this in the final output image, you need to offset the column co-ordinate by the amount of columns from the first image so that the column co-ordinate is with respect to the co-ordinate system of the output image.
Without further ado:
import numpy as np
import cv2
def drawMatches(img1, kp1, img2, kp2, matches):
"""
My own implementation of cv2.drawMatches as OpenCV 2.4.9
does not have this function available but it's supported in
OpenCV 3.0.0
This function takes in two images with their associated
keypoints, as well as a list of DMatch data structure (matches)
that contains which keypoints matched in which images.
An image will be produced where a montage is shown with
the first image followed by the second image beside it.
Keypoints are delineated with circles, while lines are connected
between matching keypoints.
img1,img2 - Grayscale images
kp1,kp2 - Detected list of keypoints through any of the OpenCV keypoint
detection algorithms
matches - A list of matches of corresponding keypoints through any
OpenCV keypoint matching algorithm
"""
# Create a new output image that concatenates the two images together
# (a.k.a) a montage
rows1 = img1.shape[0]
cols1 = img1.shape[1]
rows2 = img2.shape[0]
cols2 = img2.shape[1]
out = np.zeros((max([rows1,rows2]),cols1+cols2,3), dtype='uint8')
# Place the first image to the left
out[:rows1,:cols1,:] = np.dstack([img1, img1, img1])
# Place the next image to the right of it
out[:rows2,cols1:cols1+cols2,:] = np.dstack([img2, img2, img2])
# For each pair of points we have between both images
# draw circles, then connect a line between them
for mat in matches:
# Get the matching keypoints for each of the images
img1_idx = mat.queryIdx
img2_idx = mat.trainIdx
# x - columns
# y - rows
(x1,y1) = kp1[img1_idx].pt
(x2,y2) = kp2[img2_idx].pt
# Draw a small circle at both co-ordinates
# radius 4
# colour blue
# thickness = 1
cv2.circle(out, (int(x1),int(y1)), 4, (255, 0, 0), 1)
cv2.circle(out, (int(x2)+cols1,int(y2)), 4, (255, 0, 0), 1)
# Draw a line in between the two points
# thickness = 1
# colour blue
cv2.line(out, (int(x1),int(y1)), (int(x2)+cols1,int(y2)), (255, 0, 0), 1)
# Show the image
cv2.imshow('Matched Features', out)
cv2.waitKey(0)
cv2.destroyAllWindows()
To illustrate that this works, here are the two images that I used:
I used OpenCV's ORB detector to detect the keypoints, and used the normalized Hamming distance as the distance measure for similarity as this is a binary descriptor. As such:
import numpy as np
import cv2
img1 = cv2.imread('cameraman.png') # Original image
img2 = cv2.imread('cameraman_rot55.png') # Rotated image
# Create ORB detector with 1000 keypoints with a scaling pyramid factor
# of 1.2
orb = cv2.ORB(1000, 1.2)
# Detect keypoints of original image
(kp1,des1) = orb.detectAndCompute(img1, None)
# Detect keypoints of rotated image
(kp2,des2) = orb.detectAndCompute(img2, None)
# Create matcher
bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
# Do matching
matches = bf.match(des1,des2)
# Sort the matches based on distance. Least distance
# is better
matches = sorted(matches, key=lambda val: val.distance)
# Show only the top 10 matches
drawMatches(img1, kp1, img2, kp2, matches[:10])
This is the image I get:
you can visualize the feature matching in Python as following. Note the use of scipy library.
# matching features of two images
import cv2
import sys
import scipy as sp
if len(sys.argv) < 3:
print 'usage: %s img1 img2' % sys.argv[0]
sys.exit(1)
img1_path = sys.argv[1]
img2_path = sys.argv[2]
img1 = cv2.imread(img1_path, cv2.CV_LOAD_IMAGE_GRAYSCALE)
img2 = cv2.imread(img2_path, cv2.CV_LOAD_IMAGE_GRAYSCALE)
detector = cv2.FeatureDetector_create("SURF")
descriptor = cv2.DescriptorExtractor_create("BRIEF")
matcher = cv2.DescriptorMatcher_create("BruteForce-Hamming")
# detect keypoints
kp1 = detector.detect(img1)
kp2 = detector.detect(img2)
print '#keypoints in image1: %d, image2: %d' % (len(kp1), len(kp2))
# descriptors
k1, d1 = descriptor.compute(img1, kp1)
k2, d2 = descriptor.compute(img2, kp2)
print '#keypoints in image1: %d, image2: %d' % (len(d1), len(d2))
# match the keypoints
matches = matcher.match(d1, d2)
# visualize the matches
print '#matches:', len(matches)
dist = [m.distance for m in matches]
print 'distance: min: %.3f' % min(dist)
print 'distance: mean: %.3f' % (sum(dist) / len(dist))
print 'distance: max: %.3f' % max(dist)
# threshold: half the mean
thres_dist = (sum(dist) / len(dist)) * 0.5
# keep only the reasonable matches
sel_matches = [m for m in matches if m.distance < thres_dist]
print '#selected matches:', len(sel_matches)
# #####################################
# visualization of the matches
h1, w1 = img1.shape[:2]
h2, w2 = img2.shape[:2]
view = sp.zeros((max(h1, h2), w1 + w2, 3), sp.uint8)
view[:h1, :w1, :] = img1
view[:h2, w1:, :] = img2
view[:, :, 1] = view[:, :, 0]
view[:, :, 2] = view[:, :, 0]
for m in sel_matches:
# draw the keypoints
# print m.queryIdx, m.trainIdx, m.distance
color = tuple([sp.random.randint(0, 255) for _ in xrange(3)])
cv2.line(view, (int(k1[m.queryIdx].pt[0]), int(k1[m.queryIdx].pt[1])) , (int(k2[m.trainIdx].pt[0] + w1), int(k2[m.trainIdx].pt[1])), color)
cv2.imshow("view", view)
cv2.waitKey()
As the error message says, DRAW_MATCHES_FLAGS_DEFAULT is of type 'long'. It is a constant defined by the cv2 module, not a function. Unfortunately, the function you want, 'drawMatches' only exists in OpenCV's C++ interface.

Categories