This is what I do, I take some images and some of them contain information that I need. Being these the images:
How do I find that information? I use a template that contains two symbols (Euro and Dollar), when this symbols are found in any of the images, then I can process the image and try to extract the data that I need.
How do I extract the data? I take the dimensions of the found match, and since I know that the information to extract will always be contained to the right of the match, I dimension a box towards the right edge of my image, with which I make sure I have a box with the data to extract.
Here is the code, I will divide it into several sections to explain the process a little better:
1) Initial Settings for the Code (Imports, a list of images which will be processed, a couple of functions to filter the image and finally the configuration set for reading data from Tesseract):
import cv2
import numpy as np
from matplotlib import pyplot as plt
import pytesseract
from pytesseract import Output
imagenes = ["monitor1.jpg", "monitor2.jpg", "monitor3.jpg"]
# get grayscale image
def get_grayscale(image):
return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Noise Removal (This is the filter I am using)
def remove_noise(image):
return cv2.medianBlur(image,5)
# The configuration we will use to read the images:
my_config = r"--psm 11 --oem 3"
2) Next, the template with which we will try to match is read and we take its dimensions (w=width and h=height).
We present the methods to find the matches and enter a loop reviewing image by image, trying to find a matching image:
# Reading the Template (Euro and Dollar):
# template_simbolo = cv2.imread('template_euro_dolar.jpg', 0)
template = cv2.imread('template_simbolos.jpg', 0)
w, h = template.shape[::-1]
# The methods we will use to find the matchs (This should be a list of 6 methods
# but working with a big list of images it takes an eternity, so we will only
# use one by now for the tests) :
methods = ['cv2.TM_CCOEFF']
# A loop where we are going to filter every image, find the matchs if there are
# and extract the data:
for img in imagenes:
print("**************************")
# Image to read:
img_rgb = cv2.imread(img)
# The image filtered in gray:
gray = get_grayscale(img_rgb)
img_gray = remove_noise(gray)
# With res we will find the matches but we only take the accurate ones (80% or more)
res = cv2.matchTemplate(img_gray, template, cv2.TM_CCOEFF_NORMED)
threshold = 0.8
loc = np.where(res >= threshold)
print(loc)
print(len(loc[0]))
3) In this part of the code, first we enclose the match in a box, we filter the original image, and we look for the coordinates of the match, once this is done, we proceed to enclose in a box the section where the desired information is found.
# If loc contains values it is because there is a match:
if len(loc[0]) > 0:
print("Match Found")
# We enclose the found matches in a box and save the result:
for pt in zip(*loc[::-1]):
cv2.rectangle(img_rgb, pt, (pt[0] + w, pt[1] + h), (0, 0, 255), 2)
cv2.imwrite('res_monitor.png', img_rgb)
# A loop of matching methods:
for meth in methods:
# We filter the image and change it to a gray color
gray = get_grayscale(img_rgb)
img_gray = remove_noise(gray)
# We evaluate the method to use and according to it we have some
# default coordinates
method = eval(meth)
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
print("min_val:", min_val)
print("max_val:", max_val)
print("min_loc:", min_loc)
print("max_loc:", max_loc)
# If the method is TM_SQDIFF or TM_SQDIFF_NORMED, take minimum
if method in [cv2.TM_SQDIFF, cv2.TM_SQDIFF_NORMED]:
top_left = min_loc
else:
top_left = max_loc
# To know the bottom right coordinate, we respectively take the value
# of top_left and add the width w and height h to know this coordinate.
w, h = template.shape[::-1]
bottom_right = (top_left[0] + w, top_left[1] + h)
print("top_left:", top_left)
print("bottom_right:", bottom_right)
print("x:", top_left[0])
print("y:", top_left[1])
# Now, in our original image, which we previously filtered, we will
# place a box or rectangle according to the dimensions established
# before. (top_left and bottom_right)
w, h = img_gray.shape[::-1]
print("w:", w)
print("h:", bottom_right[1])
cv2.rectangle(img_gray, top_left, bottom_right, 255, 2)
imagen = cv2.rectangle(img_gray, top_left, (w, bottom_right[1]), 255, 2)
x = top_left[0]
y = top_left[1]
w = w
h = bottom_right[1]
# Finally we crop this section of the code where we established the area
# to review and with pytesseract we look for the data that we can obtain
# from said cropped image.
crop_image = img_gray[y:h, x:w]
cv2.imwrite("croped.jpg", crop_image)
data = pytesseract.image_to_data(crop_image, config=my_config,
output_type=Output.DICT)
print(data, "\n")
4) Finally we create a dictionary to save the rate of the euro and the dollar, if everything goes well they will be saved correctly.
At the end of this process, a plot is shown to verify that the information was extracted correctly.
# We create a dictionary to store the values of Euro and Dollar
i = 0
currencies= {}
for value in data["text"]:
print(value)
try:
currency= value.replace(".", "").replace(",", ".")
currency= float(currency)
i = i + 1
if i == 1:
currencies["Euro"] = currency
elif i == 2 and currency< currencies["Euro"]:
currencies["Dolar"] = currency
except: ValueError
# We pass the image to string to obtain the rates of the currencies
text = pytesseract.image_to_string(crop_image, config = my_config)
print(text)
print(currencies)
# We graph the results and confirm that the data extraction and the
# demarcated area are correct.
plt.subplot(121),plt.imshow(res, cmap = 'gray')
plt.title('Matching Result'), plt.xticks([]), plt.yticks([])
plt.subplot(122),plt.imshow(img_gray, cmap = 'gray')
plt.title('Detected Point'), plt.xticks([]), plt.yticks([])
plt.suptitle(meth)
plt.show()
else:
print("DOES NOT MATCH")
The results:
With the code and all the logic presented above, it usually works very well on these images, but for some reason sometimes it doesn't read the image properly, it doesn't save the information in the desired way.
As can be seen, the area that the code takes is the desired one, but the currency dictionary does not record any information:
Which is strange because if I run the code on a longer list of images, that same image is recognized perfectly.
So the problem here is that sometimes it worked and sometimes it didn't, and I'm not quite sure why. Does anyone know what I can polish? What am I doing wrong? Any advice?
I am stitching multiple images. While stitching two images it is showing dashed black line in between stitching like below.
Has anyone knows about this how I can remove or get rid of this black dashed line ?
main part of stitching code which stitches two images and calls next image with result of previous stitched images untill all images gets over:
detector = cv2.xfeatures2d.SURF_create(400)
gray1 = cv2.cvtColor(image1,cv2.COLOR_BGR2GRAY)
ret1, mask1 = cv2.threshold(gray1,1,255,cv2.THRESH_BINARY)
kp1, descriptors1 = detector.detectAndCompute(gray1,mask1)
gray2 = cv2.cvtColor(image2,cv2.COLOR_BGR2GRAY)
ret2, mask2 = cv2.threshold(gray2,1,255,cv2.THRESH_BINARY)
kp2, descriptors2 = detector.detectAndCompute(gray2,mask2)
keypoints1Im = cv2.drawKeypoints(image1, kp1, outImage = cv2.DRAW_MATCHES_FLAGS_DEFAULT, color=(0,0,255))
util.display("KEYPOINTS",keypoints1Im)
keypoints2Im = cv2.drawKeypoints(image2, kp2, outImage = cv2.DRAW_MATCHES_FLAGS_DEFAULT, color=(0,0,255))
util.display("KEYPOINTS",keypoints2Im)
matcher = cv2.BFMatcher()
matches = matcher.knnMatch(descriptors2,descriptors1, k=2)
good = []
for m, n in matches:
if m.distance < 0.55 * n.distance:
good.append(m)
print (str(len(good)) + " Matches were Found")
if len(good) <= 10:
return image1
matches = copy.copy(good)
matchDrawing = util.drawMatches(gray2,kp2,gray1,kp1,matches)
util.display("matches",matchDrawing)
src_pts = np.float32([ kp2[m.queryIdx].pt for m in matches ]).reshape(-1,1,2)
dst_pts = np.float32([ kp1[m.trainIdx].pt for m in matches ]).reshape(-1,1,2)
A = cv2.estimateRigidTransform(src_pts,dst_pts,fullAffine=False)
if A is None:
HomogResult = cv2.findHomography(src_pts,dst_pts,method=cv2.RANSAC)
H = HomogResult[0]
height1,width1 = image1.shape[:2]
height2,width2 = image2.shape[:2]
corners1 = np.float32(([0,0],[0,height1],[width1,height1],[width1,0]))
corners2 = np.float32(([0,0],[0,height2],[width2,height2],[width2,0]))
warpedCorners2 = np.zeros((4,2))
for i in range(0,4):
cornerX = corners2[i,0]
cornerY = corners2[i,1]
if A is not None: #check if we're working with affine transform or perspective transform
warpedCorners2[i,0] = A[0,0]*cornerX + A[0,1]*cornerY + A[0,2]
warpedCorners2[i,1] = A[1,0]*cornerX + A[1,1]*cornerY + A[1,2]
else:
warpedCorners2[i,0] = (H[0,0]*cornerX + H[0,1]*cornerY + H[0,2])/(H[2,0]*cornerX + H[2,1]*cornerY + H[2,2])
warpedCorners2[i,1] = (H[1,0]*cornerX + H[1,1]*cornerY + H[1,2])/(H[2,0]*cornerX + H[2,1]*cornerY + H[2,2])
allCorners = np.concatenate((corners1, warpedCorners2), axis=0)
[xMin, yMin] = np.int32(allCorners.min(axis=0).ravel() - 0.5)
[xMax, yMax] = np.int32(allCorners.max(axis=0).ravel() + 0.5)
translation = np.float32(([1,0,-1*xMin],[0,1,-1*yMin],[0,0,1]))
warpedResImg = cv2.warpPerspective(image1, translation, (xMax-xMin, yMax-yMin))
if A is None:
fullTransformation = np.dot(translation,H) #again, images must be translated to be 100% visible in new canvas
warpedImage2 = cv2.warpPerspective(image2, fullTransformation, (xMax-xMin, yMax-yMin))
else:
warpedImageTemp = cv2.warpPerspective(image2, translation, (xMax-xMin, yMax-yMin))
warpedImage2 = cv2.warpAffine(warpedImageTemp, A, (xMax-xMin, yMax-yMin))
result = np.where(warpedImage2 != 0, warpedImage2, warpedResImg)
Please help me out. Thanks.
Edit:
Input image1(resized)
Input image2(resized)
Result(resized)
Update:
Result after #fmw42 anwser:
The problem arises because when you do the warping, the border pixels of the image get resampled/interpolated with black background pixels. This leaves a non-zero border around your warped image of varying values that show as your dashed dark line when merged with the other image. This happens because your merge test is binary, tested with != 0.
So one simple thing you can do is mask the warped image in Python/OpenCV to get its bounds from the black background outside the image and then erode the mask. Then use the mask to erode the image boundary. This can be achieve by the following changes to your last lines of code presented as follows:
if A is None:
fullTransformation = np.dot(translation,H) #again, images must be translated to be 100% visible in new canvas
warpedImage2 = cv2.warpPerspective(image2, fullTransformation, (xMax-xMin, yMax-yMin))
else:
warpedImageTemp = cv2.warpPerspective(image2, translation, (xMax-xMin, yMax-yMin))
warpedImage2 = cv2.warpAffine(warpedImageTemp, A, (xMax-xMin, yMax-yMin))
mask2 = cv2.threshold(warpedImage2, 0, 255, cv2.THRESH_BINARY)[1]
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3,3))
mask2 = cv2.morphologyEx(mask2, cv2.MORPH_ERODE, kernel)
warpedImage2[mask2==0] = 0
result = np.where(warpedImage2 != 0, warpedImage2, warpedResImg)
I simply added the following code lines to your code:
mask2 = cv2.threshold(warpedImage2, 0, 255, cv2.THRESH_BINARY)[1]
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3,3))
mask2 = cv2.morphologyEx(mask2, cv2.MORPH_ERODE, kernel)
warpedImage2[mask2==0] = 0
You can increase the kernel size if desired to erode more.
Here is the before and after. Note that I did not have SURF and tried to use ORB, which did not align well. So your roads do not align. But the mismatch due to misalignment emphasizes the issue as it shows the dashed jagged black border line. The fact that ORB did not work or I do not have proper code from above to make it align is not important. The masking does what I think you want and is extendable to the processing of all your images.
The other thing that can be done in combination with the above is to feather the mask and then ramp blend the two images using the mask. This is done by blurring the mask (a bit more) and then stretching the values over the inside half of the blurred border and making the ramp only on the outside half of the blurred border. Then blend the two images with the ramped mask and its inverse as follows for the same code as above.
if A is None:
fullTransformation = np.dot(translation,H) #again, images must be translated to be 100% visible in new canvas
warpedImage2 = cv2.warpPerspective(image2, fullTransformation, (xMax-xMin, yMax-yMin))
else:
warpedImageTemp = cv2.warpPerspective(image2, translation, (xMax-xMin, yMax-yMin))
warpedImage2 = cv2.warpAffine(warpedImageTemp, A, (xMax-xMin, yMax-yMin))
mask2 = cv2.threshold(warpedImage2, 0, 255, cv2.THRESH_BINARY)[1]
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3,3))
mask2 = cv2.morphologyEx(mask2, cv2.MORPH_ERODE, kernel)
warpedImage2[mask2==0] = 0
mask2 = cv2.blur(mask2, (5,5))
mask2 = skimage.exposure.rescale_intensity(mask2, in_range=(127.5,255), out_range=(0,255)).astype(np.float64)
result = (warpedImage2 * mask2 + warpedResImg * (255 - mask2))/255
result = result.clip(0,255).astype(np.uint8)
cv2.imwrite("image1_image2_merged3.png", result)
The result when compared to the original composite is as follows:
ADDITION
I have corrected my ORB code to reverse the use of images and now it aligns. So here are all 3 techniques: the original, the one that only uses a binary mask and the one that uses a ramped mask for blending (all as described above).
ADDITION2
Here are the 3 requested images: original, binary masked, ramped mask blending.
Here is my ORB code for the last version above
I tried to change as little as possible from your code, except I had to use ORB and I had to swap the names image1 and image2 near the end.
import cv2
import matplotlib.pyplot as plt
import numpy as np
import itertools
from scipy.interpolate import UnivariateSpline
from skimage.exposure import rescale_intensity
image1 = cv2.imread("image1.jpg")
image2 = cv2.imread("image2.jpg")
gray1 = cv2.cvtColor(image1, cv2.COLOR_BGR2GRAY)
gray2 = cv2.cvtColor(image2, cv2.COLOR_BGR2GRAY)
# Detect ORB features and compute descriptors.
MAX_FEATURES = 500
GOOD_MATCH_PERCENT = 0.15
orb = cv2.ORB_create(MAX_FEATURES)
keypoints1, descriptors1 = orb.detectAndCompute(gray1, None)
keypoints2, descriptors2 = orb.detectAndCompute(gray2, None)
# Match features.
matcher = cv2.DescriptorMatcher_create(cv2.DESCRIPTOR_MATCHER_BRUTEFORCE_HAMMING)
matches = matcher.match(descriptors1, descriptors2, None)
# Sort matches by score
matches.sort(key=lambda x: x.distance, reverse=False)
# Remove not so good matches
numGoodMatches = int(len(matches) * GOOD_MATCH_PERCENT)
matches = matches[:numGoodMatches]
# Draw top matches
imMatches = cv2.drawMatches(image1, keypoints1, image2, keypoints2, matches, None)
cv2.imwrite("/Users/fred/desktop/image1_image2_matches.png", imMatches)
# Extract location of good matches
points1 = np.zeros((len(matches), 2), dtype=np.float32)
points2 = np.zeros((len(matches), 2), dtype=np.float32)
for i, match in enumerate(matches):
points1[i, :] = keypoints1[match.queryIdx].pt
points2[i, :] = keypoints2[match.trainIdx].pt
print(points1)
print("")
print(points2)
A = cv2.estimateRigidTransform(points1,points2,fullAffine=False)
#print(A)
if A is None:
HomogResult = cv2.findHomography(points1,points2,method=cv2.RANSAC)
H = HomogResult[0]
height1,width1 = image1.shape[:2]
height2,width2 = image2.shape[:2]
corners1 = np.float32(([0,0],[0,height1],[width1,height1],[width1,0]))
corners2 = np.float32(([0,0],[0,height2],[width2,height2],[width2,0]))
warpedCorners2 = np.zeros((4,2))
# project corners2 into domain of image1 from A affine or H homography
for i in range(0,4):
cornerX = corners2[i,0]
cornerY = corners2[i,1]
if A is not None: #check if we're working with affine transform or perspective transform
warpedCorners2[i,0] = A[0,0]*cornerX + A[0,1]*cornerY + A[0,2]
warpedCorners2[i,1] = A[1,0]*cornerX + A[1,1]*cornerY + A[1,2]
else:
warpedCorners2[i,0] = (H[0,0]*cornerX + H[0,1]*cornerY + H[0,2])/(H[2,0]*cornerX + H[2,1]*cornerY + H[2,2])
warpedCorners2[i,1] = (H[1,0]*cornerX + H[1,1]*cornerY + H[1,2])/(H[2,0]*cornerX + H[2,1]*cornerY + H[2,2])
allCorners = np.concatenate((corners1, warpedCorners2), axis=0)
[xMin, yMin] = np.int32(allCorners.min(axis=0).ravel() - 0.5)
[xMax, yMax] = np.int32(allCorners.max(axis=0).ravel() + 0.5)
translation = np.float32(([1,0,-1*xMin],[0,1,-1*yMin],[0,0,1]))
warpedResImg = cv2.warpPerspective(image2, translation, (xMax-xMin, yMax-yMin))
if A is None:
fullTransformation = np.dot(translation,H) #again, images must be translated to be 100% visible in new canvas
warpedImage2 = cv2.warpPerspective(image2, fullTransformation, (xMax-xMin, yMax-yMin))
else:
warpedImageTemp = cv2.warpPerspective(image1, translation, (xMax-xMin, yMax-yMin))
warpedImage2 = cv2.warpAffine(warpedImageTemp, A, (xMax-xMin, yMax-yMin))
mask2 = cv2.threshold(warpedImage2, 0, 255, cv2.THRESH_BINARY)[1]
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3,3))
mask2 = cv2.morphologyEx(mask2, cv2.MORPH_ERODE, kernel)
warpedImage2[mask2==0] = 0
mask2 = cv2.blur(mask2, (5,5))
mask2 = rescale_intensity(mask2, in_range=(127.5,255), out_range=(0,255)).astype(np.float64)
result = (warpedImage2 * mask2 + warpedResImg * (255 - mask2))/255
result = result.clip(0,255).astype(np.uint8)
cv2.imwrite("image1_image2_merged2.png", result)
You had the following. Note where the names, image1 and image2 are being used compared to my code above.
warpedResImg = cv2.warpPerspective(image1, translation, (xMax-xMin, yMax-yMin))
if A is None:
fullTransformation = np.dot(translation,H) #again, images must be translated to be 100% visible in new canvas
warpedImage2 = cv2.warpPerspective(image2, fullTransformation, (xMax-xMin, yMax-yMin))
else:
warpedImageTemp = cv2.warpPerspective(image2, translation, (xMax-xMin, yMax-yMin))
warpedImage2 = cv2.warpAffine(warpedImageTemp, A, (xMax-xMin, yMax-yMin))
Horizontal Gluing
I will focus on one of the cuts as a prove of concept. I agree with the comments that your code is a bit lengthy and hard to work with. So step one is to glue the pictures myself.
import cv2
import matplotlib.pyplot as plt
import numpy as np
import itertools
from scipy.interpolate import UnivariateSpline
upper_image = cv2.cvtColor(cv2.imread('yQv6W.jpg'), cv2.COLOR_BGR2RGB)/255
lower_image = cv2.cvtColor(cv2.imread('zoWJv.jpg'), cv2.COLOR_BGR2RGB)/255
result_image = np.zeros((466+139,700+22,3))
result_image[139:139+lower_image.shape[0],:lower_image.shape[1]] = lower_image
result_image[0:upper_image.shape[0], 22:22+upper_image.shape[1]] = upper_image
plt.imshow(result_image)
Ok no dashed black line but I admit not perfect either. So the next step is to align at least the street and the little way at the very right of the picture. For that I will need to shrink the picture to non integer size and turn that back into a grid. I will use a knn like method for that.
Edit: As requested in the comments I'll explain the shrinking a bit more detailed since it would have to be done by hand again for an other stitching. The magic happens in the line (I replaced n by it's value)
f = UnivariateSpline([0,290,510,685],[0,310,530,700])
I tried first to scale the lower picture in the x-direction to make the little way on the very right fit the upper image. Unfortunately then the street wouldn't fit the street. So what I do is scale down according to the above function. At pixel 0 I still want to have pixel zero, at 290 I want to have what used to be at 310 and so on.
Notice that 290,510 and 310,530 are the new respectively old x-coordinates of street and way at the hight of the gluing.
class Image_knn():
def fit(self, image):
self.image = image.astype('float')
def predict(self, x, y):
image = self.image
weights_x = [(1-(x % 1)).reshape(*x.shape,1), (x % 1).reshape(*x.shape,1)]
weights_y = [(1-(y % 1)).reshape(*x.shape,1), (y % 1).reshape(*x.shape,1)]
start_x = np.floor(x).astype('int')
start_y = np.floor(y).astype('int')
return sum([image[np.clip(np.floor(start_x + x), 0, image.shape[0]-1).astype('int'),
np.clip(np.floor(start_y + y), 0, image.shape[1]-1).astype('int')] * weights_x[x]*weights_y[y]
for x,y in itertools.product(range(2),range(2))])
image_model = Image_knn()
image_model.fit(lower_image)
n = 685
f = UnivariateSpline([0,290,510,n],[0,310,530,700])
np.linspace(0,lower_image.shape[1],n)
yspace = f(np.arange(n))
result_image = np.zeros((466+139,700+22, 3))
a,b = np.meshgrid(np.arange(0,lower_image.shape[0]), yspace)
result_image[139:139+lower_image.shape[0],:n] = np.transpose(image_model.predict(a,b), [1,0,2])
result_image[0:upper_image.shape[0], 22:22+upper_image.shape[1]] = upper_image
plt.imshow(result_image, 'gray')
Much better, no black line but maybe we can still smoothen the cut a bit. I figured if I take convex combinations of upper and lower image at the cut it would look much better.
result_image = np.zeros((466+139,700+22,3))
a,b = np.meshgrid(np.arange(0,lower_image.shape[0]), yspace)
result_image[139:139+lower_image.shape[0],:n] = np.transpose(image_model.predict(a,b), [1,0,2])
transition_range = 10
result_image[0:upper_image.shape[0]-transition_range, 22:22+upper_image.shape[1]] = upper_image[:-transition_range,:]
transition_pixcels = upper_image[-transition_range:,:]*np.linspace(1,0,transition_range).reshape(-1,1,1)
result_image[upper_image.shape[0]-transition_range:upper_image.shape[0], 22:22+upper_image.shape[1]] *= np.linspace(0,1,transition_range).reshape(-1,1,1)
result_image[upper_image.shape[0]-transition_range:upper_image.shape[0], 22:22+upper_image.shape[1]] += transition_pixcels
plt.imshow(result_image)
plt.savefig('text.jpg')
Tilted Gluing
For completeness here also a version gluing at the top with a tilted bottom. I attach the pictures at a point and turn around that fixed point by a few degrees. And again I am correcting some very slight non alignements in the end. To get the coordinates of that I am using jupyter lab and %matplotlib widget.
fixed_point_upper = np.array([139,379])
fixed_point_lower = np.array([0,400])
angle = np.deg2rad(2)
down_dir = np.array([np.sin(angle+np.pi/2),np.cos(angle+np.pi/2)])
right_dir = np.array([np.sin(angle),np.cos(angle)])
result_image_height = np.ceil((fixed_point_upper+lower_image.shape[0]*down_dir+(lower_image.shape[1]-fixed_point_lower[1])*right_dir)[0]).astype('int')
right_shift = np.ceil(-(fixed_point_upper+lower_image.shape[0]*down_dir-fixed_point_lower[1]*right_dir)[1]).astype('int')
result_image_width = right_shift+upper_image.shape[1]
result_image = np.zeros([result_image_height, result_image_width,3])
fixed_point_result = np.array([fixed_point_upper[0],fixed_point_upper[1]+right_shift])
lower_top_left = fixed_point_result-fixed_point_lower[1]*right_dir
result_image[:upper_image.shape[0],-upper_image.shape[1]:] = upper_image
# calculate points in lower_image
result_coordinates = np.stack(np.where(np.ones(result_image.shape[:2],dtype='bool')),axis=1)
lower_coordinates = np.stack([(result_coordinates-lower_top_left)#down_dir,(result_coordinates-lower_top_left)#right_dir],axis=1)
mask = (0 <= lower_coordinates[:,0]) & (0 <= lower_coordinates[:,1]) \
& (lower_coordinates[:,0] <= lower_image.shape[0]) & (lower_coordinates[:,1] <= lower_image.shape[1])
result_coordinates = result_coordinates[mask]
lower_coordinates = lower_coordinates[mask]
# COORDINATES ON RESULT IMAGE
# left street 254
# left sides of houses 295, 420, 505
# right small street, both sides big street 590,635,664
# COORDINATES ON LOWER IMAGE
# left street 234
# left sides of houses 280, 399, 486
# right small street, both sides big street 571, 617, 642
def coord_transform(y):
return (y-lower_top_left[1])/right_dir[1]
y = tuple(map(coord_transform, [lower_top_left[1], 254, 295, 420, 505, 589, 635, 664]))
f = UnivariateSpline(y,[0, 234, 280, 399, 486, 571, 617, 642])
result_image[result_coordinates[:,0],result_coordinates[:,1]] = image_model.predict(lower_coordinates[:,0],np.vectorize(f)(lower_coordinates[:,1]))
I try to use opencv for search button location on screen. If button exist on screen opencv work perfect but it return some !=0 x,y even if image doesn't exist. How to fix it?
import cv2
def buttonlocation(image):
im = ImageGrab.grab()
im.save('screenshot.png')
img = cv2.imread(image,0)
img2 = img.copy()
template = cv2.imread('screenshot.png',0)
w,h = template.shape[::-1]
meth = 'cv2.TM_SQDIFF'
img = img2.copy()
method = eval(meth)
res = cv2.matchTemplate(img,template,method)
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
top_left = min_loc
x,y = top_left
return x,y
The documentation of opencv details to two steps of the template matching procedure.
R=cv2.matchTemplate(I,T,method) computes an image R. Each pixel x,y of this image represents a mark depending on the similarity between the template T and the sub-image of I starting at x,y. For instance, if the method cv.TM_SQDIFF is applied, the mark is computed as:
If R[x,y] is null, then the sub-image I[x:x+sxT,y:y+syT] is exactly identical to the template T. The smaller R[x,y] is, the closer to the template the sub-image is.
cv2.minMaxLoc(R) is applied to find the minimum of R. The corresponding subimage of I is expected to closer to the template than any other sub-image of I.
If the image I does not contain the template, the sub-image of I corresponding to the minimum of R can be very different from T. But the value of the minimum reflects this ! Indeed, a threshold on R can be applied as a way to decide whether the template is in the image or not.
Choosing the value for the threshold is a tricky task. It could be a fraction of the maximum value of R or a fraction of the mean value of R. The influence of the size of the template can be discarted by dividing R by the sxT*syT. For instance, the maximum value of R depends on the template size and the type of the image. For instance, for CV_8UC3 (unsigned char, 3 channels) the maximum value of R is 255*3*sxT*syT.
Here is an example:
import cv2
img = cv2.imread('image.jpg',eval('cv2.CV_LOAD_IMAGE_COLOR'))
template = cv2.imread('template.jpg',eval('cv2.CV_LOAD_IMAGE_COLOR'))
cv2.imshow('image',img)
#cv2.waitKey(0)
#cv2.destroyAllWindows()
meth = 'cv2.TM_SQDIFF'
method = eval(meth)
res = cv2.matchTemplate(img,template,method)
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
top_left = min_loc
x,y = top_left
h,w,c=template.shape
print 'R='+str( min_val)
if min_val< h*w*3*(20*20):
cv2.rectangle(img,min_loc,(min_loc[0] + w,min_loc[1] + h),(0,255,0),3)
else:
print 'first template not found'
template = cv2.imread('template2.jpg',eval('cv2.CV_LOAD_IMAGE_COLOR'))
res = cv2.matchTemplate(img,template,method)
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
top_left = min_loc
x,y = top_left
h,w,c=template.shape
print 'R='+str( min_val)
if min_val< h*w*3*(20*20):
cv2.rectangle(img,min_loc,(min_loc[0] + w,min_loc[1] + h),(0,0,255),3)
else:
print 'second template not found'
cv2.imwrite( "result.jpg", img);
cv2.namedWindow('res',0)
cv2.imshow('res',img)
cv2.waitKey(0)
cv2.destroyAllWindows()
The image:
The first template is to be found:
The second template is not to be found:
The result:
I am trying to use template matching to find an equation inside a given pdf document that is generated from LaTeX. When I use the code over here, I get only a very good matching when I crop the picture from the original page (converted to jpeg or png), however when I compile the equation code separately and generate an jpg/png output of it the matching goes wrong tremendously.
I believe the reason is relevant to the resolution, but since I am an amateur in this field, I cannot reasonably make the jpg generated out of standalone equation to have the same pixel structure of the jpg for the whole page. Here is the code that is copied (more or less) from the above-mentioned website of OpenCV, which is an implementation for python:
import cv2
from PIL import Image
img = cv2.imread('location of the original image', 0)
img2 = img.copy()
template = cv2.imread('location of the patch I look for',0)
w, h = template.shape[::-1]
# All the 6 methods for comparison in a list
methods = ['cv2.TM_CCOEFF', 'cv2.TM_CCOEFF_NORMED', 'cv2.TM_CCORR',
'cv2.TM_CCORR_NORMED', 'cv2.TM_SQDIFF', 'cv2.TM_SQDIFF_NORMED']
method = eval(methods[0])
# Apply template Matching
res = cv2.matchTemplate(img,template,method)
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
# If the method is TM_SQDIFF or TM_SQDIFF_NORMED, take minimum
if method in [cv2.TM_SQDIFF, cv2.TM_SQDIFF_NORMED]:
top_left = min_loc
else:
top_left = max_loc
bottom_right = (top_left[0] + w, top_left[1] + h)
print top_left, bottom_right
img = Image.open('location of the original image')
#cropping the original image with the found coordinates to make a qualitative comparison
cropped = img.crop((top_left[0], top_left[1], bottom_right[0], bottom_right[1]))
cropped.save('location to save the cropped image using the coordinates found by template matching')
Here is a sample page that I look for the first equation:
The code to generate a specific standalone equation is as follows:
\documentclass[preview]{standalone}
\usepackage{amsmath}
\begin{document}\begin{align*}
(\mu_1+\mu_2)(\emptyset) = \mu_1(\emptyset) + \mu_2(\emptyset) = 0 + 0 =0
\label{eq_0}
\end{align*}
\end{document}
Which I compile and later trim the white space around the equation either using pdfcrop or using .image() method in PythonMagick. Template matching generated with this trimmed output on the original page does not give a reasonable result. Here is the trimmed/converted output using pdfcrop/Mac's Preview.app:
.
Cropping directly the equation from the above page works perfectly. I would appreciate some explanation and help.
EDIT:
I also found the following which uses template matching by bruteforcing different possible scales:
http://www.pyimagesearch.com/2015/01/26/multi-scale-template-matching-using-python-opencv/
However since I am willing to process as many as 1000 of documents, this seems a very slow method to go for. Plus I imagine there should be a more logical way of handling it, by somehow finding the relevant scales.
Instead of template matching you could use features, i.e. keypoints with descriptors. They are scale- invariant, so you do not need to iterate over different scaled versions of the image.
The python example find_obj.py
provieded with OpenCV works with ORB features for your given example.
python find_obj.py --feature=brisk rB4Yy_big.jpg ZjBAA.jpg
Note that I did not use the cropped version of the formula to search for, but a version with some white pixels around it, so the keypoint detection can work correctly. There needs to be some space around it, because keypoints have to be completely inside the image. Otherwise the descriptors can not be calculated.
The big image is the original from your post.
One additional remark: You will always get some matches. If the formula image you are searching for is not present in the big images, the matches will be nonsensical. If you need to sort out these false positives, you have the following options:
Check if the average distance of the resulting DMatches is small enough.
Check if the transformation matrix can be calculated.
Edit: Since you asked for it, here is a version that draws the bounding box around the found formula instead of the matches:
#!/usr/bin/env python
# Python 2/3 compatibility
from __future__ import print_function
import numpy as np
import cv2
def init_feature():
detector = cv2.BRISK_create()
norm = cv2.NORM_HAMMING
matcher = cv2.BFMatcher(norm)
return detector, matcher
def filter_matches(kp1, kp2, matches, ratio = 0.75):
mkp1, mkp2 = [], []
for m in matches:
if len(m) == 2 and m[0].distance < m[1].distance * ratio:
m = m[0]
mkp1.append( kp1[m.queryIdx] )
mkp2.append( kp2[m.trainIdx] )
p1 = np.float32([kp.pt for kp in mkp1])
p2 = np.float32([kp.pt for kp in mkp2])
kp_pairs = zip(mkp1, mkp2)
return p1, p2, kp_pairs
def explore_match(win, img1, img2, kp_pairs, status = None, H = None):
h1, w1 = img1.shape[:2]
h2, w2 = img2.shape[:2]
vis = np.zeros((max(h1, h2), w1+w2), np.uint8)
vis[:h1, :w1] = img1
vis[:h2, w1:w1+w2] = img2
vis = cv2.cvtColor(vis, cv2.COLOR_GRAY2BGR)
if H is not None:
corners = np.float32([[0, 0], [w1, 0], [w1, h1], [0, h1]])
corners = np.int32( cv2.perspectiveTransform(corners.reshape(1, -1, 2), H).reshape(-1, 2) + (w1, 0) )
cv2.polylines(vis, [corners], True, (0, 0, 255))
cv2.imshow(win, vis)
return vis
if __name__ == '__main__':
img1 = cv2.imread('rB4Yy_big.jpg' , 0)
img2 = cv2.imread('ZjBAA.jpg', 0)
detector, matcher = init_feature()
kp1, desc1 = detector.detectAndCompute(img1, None)
kp2, desc2 = detector.detectAndCompute(img2, None)
raw_matches = matcher.knnMatch(desc1, trainDescriptors = desc2, k = 2)
p1, p2, kp_pairs = filter_matches(kp1, kp2, raw_matches)
if len(p1) >= 4:
H, status = cv2.findHomography(p1, p2, cv2.RANSAC, 5.0)
print('%d / %d inliers/matched' % (np.sum(status), len(status)))
vis = explore_match('find_obj', img1, img2, kp_pairs, status, H)
cv2.waitKey()
cv2.destroyAllWindows()
else:
print('%d matches found, not enough for homography estimation' % len(p1))
The problem with template matching is that it only works in very controlled environments. Meaning that it will work perfectly if you take the template from the actual image, but it won't work if the resolution is different or even if the image is a little bit turned.
I would suggest you finding another algorithm more suitable for this problem. In OpenCV docs you can find some specific algorithms for your problem.