Attribute Error in python problems - python

import numpy as np
import cv2
import imutils
import argparse
class Stitcher:
def init(self):
# determine if we are using OpenCV v3.X
self.isv3 = imutils.is_cv3()
def stitch(self, images, ratio=0.75, reprojThresh=4.0,
showMatches=False):
(imageB, imageA) = images
(kpsA, featuresA) = self.detectAndDescribe(imageA)
(kpsB, featuresB) = self.detectAndDescribe(imageB)
# match features between the two images
M = self.matchKeypoints(kpsA, kpsB,
featuresA, featuresB, ratio, reprojThresh)
if M is None:
return None
# apply a perspective warp to stitch the images
(matches, H, status) = M
result = cv2.warpPerspective(imageA, H,
(imageA.shape[1] + imageB.shape[1], imageA.shape[0]))
result[0:imageB.shape[0], 0:imageB.shape[1]] = imageB
# check to see if the keypoint matches should be visualized
if showMatches:
vis = self.drawMatches(imageA, imageB, kpsA, kpsB, matches,
status)
# return a tuple of the stitched image and the
# visualization
return (result, vis)
# return the stitched image
return result
def detectAndDescribe(self, image):
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
if self.isv3:
orb = cv2.ORB_create()
(kps, features) = orb.detectAndCompute(image, None)
# If we are using OpenCV 2.4.X
else:
# detect keypoints in the image
detector = cv2.FeatureDetector_create("ORB")
kps = detector.detect(gray)
# extract features from the image
extractor = cv2.DescriptorExtractor_create("ORB")
(kps, features) = extractor.compute(gray, kps)
# convert the keypoints to NumPy arrays
kps = np.float32([kp.pt for kp in kps])
# return a tuple of keypoints and features
return (kps, features)
def matchKeypoints(self, kpsA, kpsB, featuresA, featuresB,
ratio, reprojThresh):
# compute the raw matches and initialize the list of actual matches
matcher = cv2.DescriptorMatcher_create("BruteForce")
rawMatches = matcher.knnMatch(featuresA, featuresB, 2)
matches = []
# loop over the raw matches
for m in rawMatches:
# ensure the distance is within a certain ratio of each
# other (i.e. Lowe's ratio test)
if len(m) == 2 and m[0].distance < m[1].distance * ratio:
matches.append((m[0].trainIdx, m[0].queryIdx))
# computing a homography requires at least 4 matches
if len(matches) > 4:
# construct the two sets of points
ptsA = np.float32([kpsA[i] for (_, i) in matches])
ptsB = np.float32([kpsB[i] for (i, _) in matches])
# homography between the two sets of points
(H, status) = cv2.findHomography(ptsA, ptsB, cv2.RANSAC,
reprojThresh)
# return the matches along with the homograpy matrix
# and status of each matched point
return (matches, H, status)
return None
def drawMatches(self, imageA, imageB, kpsA, kpsB, matches, status):
# initialize the output visualization image
(hA, wA) = imageA.shape[:2]
(hB, wB) = imageB.shape[:2]
vis = np.zeros((max(hA, hB), wA + wB, 3), dtype="uint8")
vis[0:hA, 0:wA] = imageA
vis[0:hB, wA:] = imageB
# loop over the matches
for ((trainIdx, queryIdx), s) in zip(matches, status):
# matching if the keypoint is matched
if s == 1:
# draw the match
ptA = (int(kpsA[queryIdx][0]), int(kpsA[queryIdx][1]))
ptB = (int(kpsB[trainIdx][0]) + wA, int(kpsB[trainIdx][1]))
cv2.line(vis, ptA, ptB, (240, 255, 100), 1)
# return the visualization
return vis
Output:: Attribute Error: 'module' object has no attribute 'is_cv3
Please check the code and resend the edited code to solve the problem.

you can access to the OpenCV version with cv2.__version__
def is_cv3():
return cv2.__version__[0] == '3'

Related

How do I determine the number of successfully matched keypoints when using keypoint detection?

I've got three images I'm trying to run object detection on. It's either 1, 2 or 3 of the same pentagon, the scale will vary but only by a few %.
I tried using different sections of the screen (they always appear in the same location) for matchTemplate i.e. the first full pentagon and the center section of the second pentagons in a loop to try and catch the correct scale, this worked some of the time but rarely and it felt a bit hacky.
Instead I've moved over to trying keypoint detection, I figured I'm likely to get roughly double the amount of matches on the second, and tripple the amount of matches on the third. Then I can go => x to determine which has been found.
I've written the keypoint detection code and it works, but I can't figure out which variable to use to determine how many keypoints were found.
When I try print good I get this: [<DMatch 0000018FDE162B90>, <DMatch 0000018FDE162ED0>, <DMatch 0000018FDE180150>, <DMatch 0000018FDE180210>, <DMatch 0000018FDE1807D0>, <DMatch 0000018FDE180810>, <DMatch 0000018FDE1820D0>] which I assumed would be 7 keypoints detected, but there's vastly more being found in the cv.imshow.
Which variable can I use to identify the number of matched keypoints?
n.b this is functional MVP code.
import cv2 as cv
import win32gui, win32con, win32ui
import numpy as np
import glob
crewstars = glob.glob(r"C:\Users\images\crewstars\*.png")
def get_haystack_image():
w, h = 1920, 1080
hwnd = None
wDC = win32gui.GetWindowDC(hwnd)
dcObj = win32ui.CreateDCFromHandle(wDC)
cDC = dcObj.CreateCompatibleDC()
dataBitMap = win32ui.CreateBitmap()
dataBitMap.CreateCompatibleBitmap(dcObj, w, h)
cDC.SelectObject(dataBitMap)
cDC.BitBlt((0, 0), (w, h), dcObj, (0, 0), win32con.SRCCOPY)
signedIntsArray = dataBitMap.GetBitmapBits(True)
img = np.frombuffer(signedIntsArray, dtype='uint8')
img.shape = (h, w, 4)
dcObj.DeleteDC()
cDC.DeleteDC()
win32gui.ReleaseDC(hwnd, wDC)
win32gui.DeleteObject(dataBitMap.GetHandle())
img = img[...,:3]
img = np.ascontiguousarray(img)
return img
def loadImages(directory):
# Intialise empty array
image_list = []
# Add images to array
for i in directory:
img = cv.imread(i, cv.IMREAD_UNCHANGED)
image_list.append((img, i))
return image_list
def preProcessNeedle(image_list):
needle_kp1_desc = []
for i in image_list:
img = i[0]
orb = cv.ORB_create(edgeThreshold=0, patchSize=32)
keypoint_needle, descriptors_needle = orb.detectAndCompute(img, None)
needle_kp1_desc.append((keypoint_needle, descriptors_needle, img))
return needle_kp1_desc
def match_keypoints(descriptors_needle, keypoint_haystack, min_match_count):
orbHaystack = cv.ORB_create(edgeThreshold=0, patchSize=32, nfeatures=3000)
keypoints_haystack, descriptors_haystack = orbHaystack.detectAndCompute(keypoint_haystack, None)
FLANN_INDEX_LSH = 6
index_params = dict(algorithm=FLANN_INDEX_LSH, table_number=6, key_size=12, multi_probe_level=1)
search_params = dict(checks=50)
try:
flann = cv.FlannBasedMatcher(index_params, search_params)
matches = flann.knnMatch(descriptors_needle, descriptors_haystack, k=2)
except cv.error:
return None, None, [], []
good = []
points = []
for pair in matches:
if len(pair) == 2:
if pair[0].distance < 0.7*pair[1].distance:
good.append(pair[0])
if len(good) > min_match_count:
for match in good:
points.append(keypoints_haystack[match.trainIdx].pt)
return keypoints_haystack, good, points
def keypointDetection(needle_kp1_desc):
res = False
# Object Detection
for i, img in enumerate(needle_kp1_desc):
kp1 = img[0]
descriptors_needle = img[1]
needle_img = img[2]
# get an updated image of the screen & crop it
keypoint_haystack = get_haystack_image()
keypoint_haystack = keypoint_haystack[170:230, 800:1200]
kp2, matches, match_points = match_keypoints(descriptors_needle, keypoint_haystack, min_match_count=40)
# display the matches
match_image = cv.drawMatches(needle_img, kp1, keypoint_haystack, kp2, matches, None)
cv.imshow('Keypoint Search', match_image)
cv.moveWindow("Keypoint Search",1940,30)
cv.waitKey(0)
if match_points:
cv.imshow('Keypoint Search', match_image)
cv.waitKey(0)
res = True
break
return res
while True:
crewstarsdir = loadImages(crewstars)
needle_kp1_desc = preProcessNeedle(crewstarsdir)
if keypointDetection(needle_kp1_desc):
pass

Using OpenCV homography with python error in method findHomography

I have a goal to do homography on a live video by capturing my screen and processing it.
In order to do so, I took the code from this link, and manipulated it inside a while loop as follows:
from __future__ import print_function
import cv2 as cv
import numpy as np
from windowcapture import WindowCapture
# initialize the WindowCapture class
capture = WindowCapture('My Window')
bar_img = cv.imread('hammer.jpg',cv.IMREAD_GRAYSCALE)
while(True):
# get an updated image of the game
screenshot = capture.get_screenshot()
screenshot = cv.cvtColor(screenshot,cv.IMREAD_GRAYSCALE)
if bar_img is None or screenshot is None:
print('Could not open or find the images!')
exit(0)
#-- Step 1: Detect the keypoints using SURF Detector, compute the descriptors
minHessian = 400
detector = cv.SIFT_create()
keypoints_obj, descriptors_obj = detector.detectAndCompute(bar_img, None)
keypoints_scene, descriptors_scene = detector.detectAndCompute(screenshot, None)
#-- Step 2: Matching descriptor vectors with a FLANN based matcher
# Since SURF is a floating-point descriptor NORM_L2 is used
matcher = cv.DescriptorMatcher_create(cv.DescriptorMatcher_FLANNBASED)
knn_matches = matcher.knnMatch(descriptors_obj, descriptors_scene, 2)
#-- Filter matches using the Lowe's ratio test
ratio_thresh = 0.75
good_matches = []
for m,n in knn_matches:
if m.distance < ratio_thresh * n.distance:
good_matches.append(m)
#-- Draw matches
img_matches = np.empty((max(bar_img.shape[0], screenshot.shape[0]), bar_img.shape[1]+screenshot.shape[1], 3), dtype=np.uint8)
cv.drawMatches(bar_img, keypoints_obj, screenshot, keypoints_scene, good_matches, img_matches, flags=cv.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS)
#-- Localize the object
obj = np.empty((len(good_matches),2), dtype=np.float32)
scene = np.empty((len(good_matches),2), dtype=np.float32)
for i in range(len(good_matches)):
#-- Get the keypoints from the good matches
obj[i,0] = keypoints_obj[good_matches[i].queryIdx].pt[0]
obj[i,1] = keypoints_obj[good_matches[i].queryIdx].pt[1]
scene[i,0] = keypoints_scene[good_matches[i].trainIdx].pt[0]
scene[i,1] = keypoints_scene[good_matches[i].trainIdx].pt[1]
H, _ = cv.findHomography(obj, scene, cv.RANSAC)
#-- Get the corners from the image_1 ( the object to be "detected" )
obj_corners = np.empty((4,1,2), dtype=np.float32)
obj_corners[0,0,0] = 0
obj_corners[0,0,1] = 0
obj_corners[1,0,0] = bar_img.shape[1]
obj_corners[1,0,1] = 0
obj_corners[2,0,0] = bar_img.shape[1]
obj_corners[2,0,1] = bar_img.shape[0]
obj_corners[3,0,0] = 0
obj_corners[3,0,1] = bar_img.shape[0]
scene_corners = cv.perspectiveTransform(obj_corners, H)
#-- Draw lines between the corners (the mapped object in the scene - image_2 )
cv.line(img_matches, (int(scene_corners[0,0,0] + bar_img.shape[1]), int(scene_corners[0,0,1])),\
(int(scene_corners[1,0,0] + bar_img.shape[1]), int(scene_corners[1,0,1])), (0,255,0), 4)
cv.line(img_matches, (int(scene_corners[1,0,0] + bar_img.shape[1]), int(scene_corners[1,0,1])),\
(int(scene_corners[2,0,0] + bar_img.shape[1]), int(scene_corners[2,0,1])), (0,255,0), 4)
cv.line(img_matches, (int(scene_corners[2,0,0] + bar_img.shape[1]), int(scene_corners[2,0,1])),\
(int(scene_corners[3,0,0] + bar_img.shape[1]), int(scene_corners[3,0,1])), (0,255,0), 4)
cv.line(img_matches, (int(scene_corners[3,0,0] + bar_img.shape[1]), int(scene_corners[3,0,1])),\
(int(scene_corners[0,0,0] + bar_img.shape[1]), int(scene_corners[0,0,1])), (0,255,0), 4)
#-- Show detected matches
cv.imshow('Good Matches & Object detection', img_matches)
cv.waitKey()
if cv.waitKey(1) == ord('q'):
cv.destroyAllWindows()
break
print('Done.')
The class WindowCapture that I used uses win32gui to capture the window (maybe it makes a difference if I used it like this and not imread?)
I get the following error when I run the code:
C:\Users\Tester\AppData\Local\Temp\pip-req-build-1i5nllza\opencv\modules\calib3d\src\fundam.cpp:385: error: (-28:Unknown error code -28) The input arrays should have at least 4 corresponding point sets to calculate Homography in function 'cv::findHomography'
Any idea why it happens?

OpenCV Error: Expected Ptr<cv::UMat> for argument '%s'

I am new to OpenCV, and attempting to use OpenCV for Python 3.7 to stitch five images together into one singular image. The source code I originally used only allowed for two images to be stitched together, so I had to modify it to allow five webcam images to be stitched. However, I am presented with this error:
Traceback (most recent call last):
File "C:\Users\colby\OneDrive\Desktop\New folder\photomosaic.py", line 80, in <module>
img3 = cv2.drawMatches(kpsA,secondImg,kpsB,thirdImg, kpsC, forthImg, kpsD, fifthImg, kpsE, flags=cv2.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS)
TypeError: Expected Ptr<cv::UMat> for argument '%s'
My Code:
import cv2
import numpy as np
import matplotlib.pyplot as plt
import imageio
import imutils
import cameracapture
cv2.ocl.setUseOpenCL(False)
feature_extractor = 'orb' # one of 'sift', 'surf', 'brisk', 'orb'
feature_matching = 'bf'
firstImg = imageio.imread('cap1.jpg')
firstImg_gray = cv2.cvtColor(firstImg, cv2.COLOR_RGB2GRAY)
secondImg = imageio.imread('cap2.jpg')
secondImg_gray = cv2.cvtColor(secondImg, cv2.COLOR_RGB2GRAY)
thirdImg = imageio.imread('cap3.jpg')
thirdImg_gray = cv2.cvtColor(thirdImg, cv2.COLOR_RGB2GRAY)
forthImg = imageio.imread('cap4.jpg')
forthImg_gray = cv2.cvtColor(forthImg, cv2.COLOR_RGB2GRAY)
fifthImg = imageio.imread('cap5.jpg')
fifthImg_gray = cv2.cvtColor(fifthImg, cv2.COLOR_RGB2GRAY)
plt.show()
def detectAndDescribe(image, method=None):
assert method is not None, "You need to define a feature detection
method. Values are: 'sift', 'surf'"
if method == 'sift':
descriptor = cv2.xfeatures2d.SIFT_create()
elif method == 'surf':
descriptor = cv2.xfeatures2d.SURF_create()
elif method == 'brisk':
descriptor = cv2.BRISK_create()
elif method == 'orb':
descriptor = cv2.ORB_create()
# get keypoints and descriptors
(kps, features) = descriptor.detectAndCompute(image, None)
return (kps, features)
kpsA, featuresA = detectAndDescribe(firstImg_gray, method=feature_extractor)
kpsB, featuresB = detectAndDescribe(secondImg_gray, method=feature_extractor)
kpsC, featuresC = detectAndDescribe(thirdImg_gray, method=feature_extractor)
kpsD, featuresD = detectAndDescribe(forthImg_gray, method=feature_extractor)
kpsE, featuresE = detectAndDescribe(fifthImg_gray, method=feature_extractor)
def createMatcher(method,crossCheck):
if method == 'sift' or method == 'surf':
bf = cv2.BFMatcher(cv2.NORM_L2, crossCheck=crossCheck)
elif method == 'orb' or method == 'brisk':
bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=crossCheck)
return bf
def matchKeyPointsBF(featuresA, featuresB, featuresC, featuresD, featuresE, method):
bf = createMatcher(method, crossCheck=True)
# Match descriptors.
best_matches = bf.match(featuresA,featuresB)
best_matchs = bf.match(featuresA,featuresB + featuresC + featuresD + featuresE)
# Sort the features in order of distance.
# The points with small distance (more similarity) are ordered first in the vector
rawMatches = sorted(best_matches, key = lambda x:x.distance)
print("Raw matches (Brute force):", len(rawMatches))
return rawMatches
def matchKeyPointsKNN(featuresA, featuresB, featuresC, featuresD, featuresE, ratio, method):
bf = createMatcher(method, crossCheck=False)
# compute the raw matches and initialize the list of actual matches
rawMatches = bf.knnMatch(featuresA,featuresB, featuresC, featuresD, featuresE, 2)
print("Raw matches (knn):", len(rawMatches))
matches = []
# loop over the raw matches
for m,n in rawMatches:
# ensure the distance is within a certain ratio of each
# other (i.e. Lowe's ratio test)
if m.distance < n.distance * ratio:
matches.append(m)
return matches
print("Using: {} feature matcher".format(feature_matching))
fig = plt.figure(figsize=(20,8))
if feature_matching == 'bf':
matches = matchKeyPointsBF(featuresA,featuresB, featuresC, featuresD,
featuresE, method=feature_extractor)
img3 = cv2.drawMatches(kpsA,secondImg,kpsB,thirdImg, kpsC, forthImg,
kpsD, fifthImg, kpsE, flags=cv2.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS)
elif feature_matching == 'knn':
matches = matchKeyPointsKNN(featuresA,featuresB, featuresC, featuresD,
featuresE, ratio=0.75, method=feature_extractor)
img3 = cv2.drawMatches(firstImg,kpsA,secondImg,kpsB,thirdImg, kpsC,
forthImg, kpsD, fifthImg, kpsE, np.random.choice(matches,100),
None,flags=cv2.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS)
plt.imshow(img3)
plt.show()
def getHomography(kpsA, kpsB, kpsC, kpsD, kpsE, featuresA, featuresB,
featuresC, featuresD, featuresE, matches, reprojThresh):
# convert the keypoints to numpy arrays
kpsA = np.float32([kp.pt for kp in kpsA])
kpsB = np.float32([kp.pt for kp in kpsB])
kpsC = np.float32([kp.pt for kp in kpsC])
kpsD = np.float32([kp.pt for kp in kpsD])
kpsE = np.float32([kp.pt for kp in kpsE])
if len(matches) > 4:
# construct the two sets of points
ptsA = np.float32([kpsA[m.firstIdx] for m in matches])
ptsB = np.float32([kpsB[m.secondIdx] for m in matches])
ptsC = np.float32([kpsC[m.thirdIdx] for m in matches])
ptsD = np.float32([kpsD[m.forthIdx] for m in matches])
ptsE = np.float32([kpsE[m.fifthIdx] for m in matches])
# estimate the homography between the sets of points
(H, status) = cv2.findHomography(ptsA,ptsB,ptsC,ptsD,ptsE, cv2.RANSAC, reprojThresh)
return (matches, H, status)
else:
return None
M = getHomography(kpsA, kpsB, kpsC, kpsD, kpsE, featuresA, featuresB,
featuresC, featuresD, featuresE, matches, reprojThresh=4)
if M is None:
print("Error!")
(matches, H, status) = M
print(H)
# Apply panorama correction
width = trainImg.shape[1] + queryImg.shape[1]
height = trainImg.shape[0] + queryImg.shape[0]
result = cv2.warpPerspective(trainImg, H, (width, height))
result[0:queryImg.shape[0], 0:queryImg.shape[1]] = queryImg
plt.figure(figsize=(20,10))
plt.imshow(result)
plt.axis('off')
plt.show()
# transform the panorama image to grayscale and threshold it
gray = cv2.cvtColor(result, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY)[1]
# Finds contours from the binary image
cnts = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)
cnts = imutils.grab_contours(cnts)
# get the maximum contour area
c = max(cnts, key=cv2.contourArea)
# get a bbox from the contour area
(x, y, w, h) = cv2.boundingRect(c)
# crop the image to the bbox coordinates
result = result[y:y + h, x:x + w]
# show the cropped image
plt.figure(figsize=(20,10))
plt.imshow(result)
The expected output:
I expect the program to be able to take five pictures with my webcam (which is imported through cameracapture), and then stitch them together to make one image. Any advice or fixes to this error would be greatly appreciated.
firstImg = imageio.imread('cap1.jpg')
print(type(firstImg))
if the output is
<class 'PIL.JpegImagePlugin.JpegImageFile'>
Then covert it to an array using:-
firstImg = np.asarray(firstImg)
Load this firstImg to convert it to gray scale image

Calculating the distance and yaw between ArUco marker and camera?

I am trying to calculate the exact( 3 cm error rate is acceptable ) distance between aruco marker and camera. I use python, opencv and aruco. I can detect them ( marker side is 0.023 meters which is 2.3 cm ) but I can't interpret the distance because for 40 cm distance the norm of the translation vector gives 1 meter. I am so confused about this. Can anyone help? Full code ( sorry , not documented well ):
import numpy as np
import cv2
import cv2.aruco as aruco
import glob
import argparse
import math
# Marker id infos. Global to access everywhere. It is unnecessary to change it to local.
firstMarkerID = None
secondMarkerID = None
cap = cv2.VideoCapture(0)
image_width = 0
image_height = 0
#hyper parameters
distanceBetweenTwoMarkers = 0.0245 # in meters, 2.45 cm
oneSideOfTheMarker = 0.023 # in meters, 2.3 cm
# termination criteria
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 30, 0.001)
class Namespace:
def __init__(self, **kwargs):
self.__dict__.update(kwargs)
def calibrate(dirpath):
""" Apply camera calibration operation for images in the given directory path. """
# prepare object points, like (0,0,0), (1,0,0), (2,0,0) ....,(8,6,0)
objp = np.zeros((6*9, 3), np.float32)
objp[:, :2] = np.mgrid[0:9, 0:6].T.reshape(-1, 2)
# Arrays to store object points and image points from all the images.
objpoints = [] # 3d point in real world space
imgpoints = [] # 2d points in image plane.
images = glob.glob(dirpath+'/*.jpg')
for fname in images:
img = cv2.imread(fname)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Find the chess board corners
ret, corners = cv2.findChessboardCorners(gray, (9, 6), None)
# If found, add object points, image points (after refining them)
if ret:
objpoints.append(objp)
corners2 = cv2.cornerSubPix(gray, corners, (11, 11), (-1, -1), criteria)
imgpoints.append(corners2)
# Draw and display the corners
img = cv2.drawChessboardCorners(img, (9, 6), corners2, ret)
ret, mtx, dist, rvecs, tvecs = cv2.calibrateCamera(objpoints, imgpoints, gray.shape[::-1], None, None)
return [ret, mtx, dist, rvecs, tvecs]
def saveCoefficients(mtx, dist, path):
""" Save the camera matrix and the distortion coefficients to given path/file. """
cv_file = cv2.FileStorage(path, cv2.FILE_STORAGE_WRITE)
cv_file.write("camera_matrix", mtx)
cv_file.write("dist_coeff", dist)
# note you *release* you don't close() a FileStorage object
cv_file.release()
def loadCoefficients(path):
""" Loads camera matrix and distortion coefficients. """
# FILE_STORAGE_READ
cv_file = cv2.FileStorage(path, cv2.FILE_STORAGE_READ)
# note we also have to specify the type to retrieve other wise we only get a
# FileNode object back instead of a matrix
camera_matrix = cv_file.getNode("camera_matrix").mat()
dist_matrix = cv_file.getNode("dist_coeff").mat()
# Debug: print the values
# print("camera_matrix : ", camera_matrix.tolist())
# print("dist_matrix : ", dist_matrix.tolist())
cv_file.release()
return [camera_matrix, dist_matrix]
def inversePerspective(rvec, tvec):
""" Applies perspective transform for given rvec and tvec. """
rvec, tvec = rvec.reshape((3, 1)), tvec.reshape((3, 1))
R, _ = cv2.Rodrigues(rvec)
R = np.matrix(R).T
invTvec = np.dot(R, np.matrix(-tvec))
invRvec, _ = cv2.Rodrigues(R)
invTvec = invTvec.reshape((3, 1))
invTvec = invTvec.reshape((3, 1))
return invRvec, invTvec
def make_1080p():
global image_width
global image_height
image_width = 1920
image_height = 1080
change_res(image_width, image_height)
def make_720p():
global image_width
global image_height
image_width = 1280
image_height = 720
change_res(image_width, image_height)
def make_480p():
global image_width
global image_height
image_width = 640
image_height = 480
change_res(image_width, image_height)
def change_res(width, height):
cap.set(3, width)
cap.set(4, height)
def relativePosition(rvec1, tvec1, rvec2, tvec2):
""" Get relative position for rvec2 & tvec2. Compose the returned rvec & tvec to use composeRT with rvec2 & tvec2 """
rvec1, tvec1 = rvec1.reshape((3, 1)), tvec1.reshape((3, 1))
rvec2, tvec2 = rvec2.reshape((3, 1)), tvec2.reshape((3, 1))
# Inverse the second marker, the right one in the image
invRvec, invTvec = inversePerspective(rvec2, tvec2)
info = cv2.composeRT(rvec1, tvec1, invRvec, invTvec)
composedRvec, composedTvec = info[0], info[1]
composedRvec = composedRvec.reshape((3, 1))
composedTvec = composedTvec.reshape((3, 1))
return composedRvec, composedTvec
def euclideanDistanceOfTvecs(tvec1, tvec2):
return math.sqrt(math.pow(tvec1[0]-tvec2[0], 2) + math.pow(tvec1[1]-tvec2[1], 2) + math.pow(tvec1[2]-tvec2[2], 2))
def euclideanDistanceOfTvec(tvec):
return euclideanDistanceOfTvecs(tvec, [0, 0, 0])
def draw(img, imgpts, color):
""" draw a line between given two points. """
imgpts = np.int32(imgpts).reshape(-1, 2)
for pointf in range(len(imgpts)):
for points in range(len(imgpts)):
img = cv2.line(img, tuple(imgpts[pointf]), tuple(imgpts[points]), color, 3)
return img
def track(matrix_coefficients, distortion_coefficients):
global image_width
global image_height
""" Real time ArUco marker tracking. """
needleComposeRvec, needleComposeTvec = None, None # Composed for needle
ultraSoundComposeRvec, ultraSoundComposeTvec = None, None # Composed for ultrasound
savedNeedleRvec, savedNeedleTvec = None, None # Pure Composed
savedUltraSoundRvec, savedUltraSoundTvec = None, None # Pure Composed
TcomposedRvecNeedle, TcomposedTvecNeedle = None, None
TcomposedRvecUltrasound, TcomposedTvecUltrasound = None, None
make_480p()
while True:
isFirstMarkerDetected = False
isSecondMarkerDetected = False
ret, frame = cap.read()
# operations on the frame come here
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) # Change grayscale
aruco_dict = aruco.Dictionary_get(aruco.DICT_5X5_250) # Use 5x5 dictionary to find markers
parameters = aruco.DetectorParameters_create() # Marker detection parameters
# lists of ids and the corners beloning to each id
corners, ids, rejected_img_points = aruco.detectMarkers(gray, aruco_dict,
parameters=parameters,
cameraMatrix=matrix_coefficients,
distCoeff=distortion_coefficients)
if np.all(ids is not None): # If there are markers found by detector
zipped = zip(ids, corners)
ids, corners = zip(*(sorted(zipped)))
# print(ids)
for i in range(0, len(ids)): # Iterate in markers
# Estimate pose of each marker and return the values rvec and tvec---different from camera coefficients
rvec, tvec, markerPoints = aruco.estimatePoseSingleMarkers(corners[i], oneSideOfTheMarker, matrix_coefficients,
distortion_coefficients)
if ids[i] == firstMarkerID:
firstRvec = rvec
firstTvec = tvec
isFirstMarkerDetected = True
firstMarkerCorners = corners[i]
elif ids[i] == secondMarkerID:
secondRvec = rvec
secondTvec = tvec
isSecondMarkerDetected = True
secondMarkerCorners = corners[i]
(rvec - tvec).any() # get rid of that nasty numpy value array error
# aruco.drawAxis(frame, matrix_coefficients, distortion_coefficients, rvec, tvec, 0.01) # Draw Axis
aruco.drawDetectedMarkers(frame, corners) # Draw A square around the markers
''' First try
if isFirstMarkerDetected and isSecondMarkerDetected:
composedRvec, composedTvec = relativePosition(firstRvec, firstTvec, secondRvec, secondTvec)
info = cv2.composeRT(composedRvec, composedTvec, secondRvec.T, secondTvec.T)
composedRvec, composedTvec = info[0], info[1]
composedRvec, composedTvec = composedRvec.T, composedTvec.T
differenceRvec, differenceTvec = composedRvec-secondRvec, composedTvec-secondTvec
# print infos
print("composed Rvec: ", composedRvec)
print("composed Tvec: ", composedTvec)
print("Second marker Rvec: ", secondRvec)
print("Second marker Tvec: ", secondTvec)
print("differenceRvec: ", differenceRvec)
print("differenceTvec: ", differenceTvec)
print("real difference: ", euclideanDistanceOfTvecs(composedTvec[0], secondTvec[0][0]))
# draw axis to estimated location
aruco.drawAxis(frame, mtx, dist, composedRvec, composedTvec, 0.0115)
realDistanceInTvec = euclideanDistanceOfTvec(secondTvec[0][0])
difference = euclideanDistanceOfTvecs(composedTvec[0], secondTvec[0][0])
calculatedDistance = realDistanceInTvec * (distanceBetweenTwoMarkers / difference)
calculatedDistance = realDistanceInTvec * (distanceBetweenTwoMarkers / (secondTvec[0][0][2] - firstTvec[0][0][2]))
print(calculatedDistance)
'''
if isFirstMarkerDetected and isSecondMarkerDetected:
composedRvec, composedTvec = relativePosition(firstRvec, firstTvec, secondRvec, secondTvec)
camerafirstRvec, cameraFirstTvec = inversePerspective(firstRvec, firstTvec)
camerasecondRvec, camerasecondTvec = inversePerspective(secondRvec, secondTvec)
differenceRvec, differenceTvec = camerafirstRvec - camerasecondRvec, cameraFirstTvec - camerasecondTvec
# print infos
print("first Rvec: ", camerafirstRvec)
print("first Tvec: ", cameraFirstTvec)
print("Second marker Rvec: ", camerasecondRvec)
print("Second marker Tvec: ", camerasecondTvec)
# print("differenceRvec: ", differenceRvec)
# print("differenceTvec: ", differenceTvec)
realDistanceInTvec = euclideanDistanceOfTvec(secondTvec[0][0])
print(cv2.norm(secondTvec[0][0]))
difference = euclideanDistanceOfTvecs(composedTvec.T[0], secondTvec[0][0])
calculatedDistance = realDistanceInTvec * (distanceBetweenTwoMarkers / difference)
# print(calculatedDistance)
# Display the resulting frame
cv2.namedWindow('frame', cv2.WINDOW_NORMAL)
cv2.resizeWindow('frame', image_width, image_height)
cv2.imshow('frame', frame)
# Wait 3 milisecoonds for an interaction. Check the key and do the corresponding job.
key = cv2.waitKey(3) & 0xFF
if key == ord('q'): # Quit
break
elif key == ord('p'): # print necessary information here
pass # Insert necessary print here
# When everything done, release the capture
cap.release()
cv2.destroyAllWindows()
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Aruco Marker Tracking')
parser.add_argument('--coefficients', metavar='bool', required=True,
help='File name for matrix coefficients and distortion coefficients')
parser.add_argument('--firstMarker', metavar='int', required=True,
help='first')
parser.add_argument('--secondMarker', metavar='int', required=True,
help='second')
# Parse the arguments and take action for that.
args = parser.parse_args()
firstMarkerID = int(args.firstMarker)
secondMarkerID = int(args.secondMarker)
if args.coefficients == '1':
mtx, dist = loadCoefficients("test.yaml")
ret = True
else:
ret, mtx, dist, rvecs, tvecs = calibrate("calib_images")
saveCoefficients(mtx, dist, "calibrationCoefficients.yaml")
print("Calibration is completed. Starting tracking sequence.")
if ret:
track(mtx, dist)
I got the answer. Problem is in the calibration. When calibration with chessboard, I gave the object points like this:
(0,0,0), (1,0,0) and so on.
The thing is when doing pose estimation, camera should be calibrated nicely. My chessboard square size was 1.5 centimeters which means 0.015 meters. I changed the object point matrix as:
(0,0,0), (0.015,0,0) and so on.
So I said to program that the calibration should be in meters. If you do the calibration with different object points matrix than it should be, the pose estimation fails. That was included in the opencv documentation but I couldn't see it. In the docs it was said like "you can pass them like that." and I didn't think that it fails at the pose estimation.

Digit recognition with Tesseract OCR and python

I use Tesseract and python to read digits (from a energy meter).
Everything works well except for the number "1".
Tesseract can not read the "1" Digit.
This is the picture I send to tesseract :
And tesseract reads "0000027 ".
How can I tell Tesseract that the vertical rod is a "1" ?
This is my tesseract initialisation :
import tesseract
TESSERACT_LIBRARY_PATH = "C:\\Program Files (x86)\\Tesseract-OCR"
LANGUAGE = "eng"
CHARACTERS = "0123456789"
FALSE = "0"
TRUE = "1"
def init_ocr():
"""
.. py:function:: init_ocr()
Utilize the Tesseract-OCR library to create an tesseract_ocr that
predicts the numbers to be read off of the meter.
:return: tesseract_ocr Tesseracts OCR API.
:rtype: Class
"""
# Initialize the tesseract_ocr with the english language package.
tesseract_ocr = tesseract.TessBaseAPI()
tesseract_ocr.Init(TESSERACT_LIBRARY_PATH, LANGUAGE,
tesseract.OEM_DEFAULT)
# Limit the characters being seached for to numerics.
tesseract_ocr.SetVariable("tessedit_char_whitelist", CHARACTERS)
# Set the tesseract_ocr to predict for only one character.
tesseract_ocr.SetPageSegMode(tesseract.PSM_AUTO)
# Tesseract's Directed Acyclic Graph.
# Not necessary for number recognition.
tesseract_ocr.SetVariable("load_system_dawg", FALSE)
tesseract_ocr.SetVariable("load_freq_dawg", FALSE)
tesseract_ocr.SetVariable("load_number_dawg", TRUE)
tesseract_ocr.SetVariable("classify_enable_learning", FALSE)
tesseract_ocr.SetVariable("classify_enable_adaptive_matcher", FALSE)
return tesseract_ocr
Slightly irrelevant answer, though may serve your original goal.
I had similar problem with tesseract and I had very strict performance requirements as well. I found this simple solution on SO and crafted simple recogniser with OpenCV.
It boils down to finding bounding rectangles (from edges) on the very clear image that you have and then trying to match found objects versus templates. I believe the solution in your case will be both simple and precise though will require slightly more code than you have now.
I will follow this question, since it will be nice to have working solution with tesseract.
I have a limited time, but it seems to be a working solution:
import os
import cv2
import numpy
KNN_SQUARE_SIDE = 50 # Square 50 x 50 px.
def resize(cv_image, factor):
new_size = tuple(map(lambda x: x * factor, cv_image.shape[::-1]))
return cv2.resize(cv_image, new_size)
def crop(cv_image, box):
x0, y0, x1, y1 = box
return cv_image[y0:y1, x0:x1]
def draw_box(cv_image, box):
x0, y0, x1, y1 = box
cv2.rectangle(cv_image, (x0, y0), (x1, y1), (0, 0, 255), 2)
def draw_boxes_and_show(cv_image, boxes, title='N'):
temp_image = cv2.cvtColor(cv_image, cv2.COLOR_GRAY2RGB)
for box in boxes:
draw_box(temp_image, box)
cv2.imshow(title, temp_image)
cv2.waitKey(0)
class BaseKnnMatcher(object):
distance_threshold = 0
def __init__(self, source_dir):
self.model, self.label_map = self.get_model_and_label_map(source_dir)
#staticmethod
def get_model_and_label_map(source_dir):
responses = []
label_map = []
samples = numpy.empty((0, KNN_SQUARE_SIDE * KNN_SQUARE_SIDE), numpy.float32)
for label_idx, filename in enumerate(os.listdir(source_dir)):
label = filename[:filename.index('.png')]
label_map.append(label)
responses.append(label_idx)
image = cv2.imread(os.path.join(source_dir, filename), 0)
suit_image_standard_size = cv2.resize(image, (KNN_SQUARE_SIDE, KNN_SQUARE_SIDE))
sample = suit_image_standard_size.reshape((1, KNN_SQUARE_SIDE * KNN_SQUARE_SIDE))
samples = numpy.append(samples, sample, 0)
responses = numpy.array(responses, numpy.float32)
responses = responses.reshape((responses.size, 1))
model = cv2.KNearest()
model.train(samples, responses)
return model, label_map
def predict(self, image):
image_standard_size = cv2.resize(image, (KNN_SQUARE_SIDE, KNN_SQUARE_SIDE))
image_standard_size = numpy.float32(image_standard_size.reshape((1, KNN_SQUARE_SIDE * KNN_SQUARE_SIDE)))
closest_class, results, neigh_resp, distance = self.model.find_nearest(image_standard_size, k=1)
if distance[0][0] > self.distance_threshold:
return None
return self.label_map[int(closest_class)]
class DigitKnnMatcher(BaseKnnMatcher):
distance_threshold = 10 ** 10
class MeterValueReader(object):
def __init__(self):
self.digit_knn_matcher = DigitKnnMatcher(source_dir='templates')
#classmethod
def get_symbol_boxes(cls, cv_image):
ret, thresh = cv2.threshold(cv_image.copy(), 150, 255, cv2.THRESH_BINARY)
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
symbol_boxes = []
for contour in contours:
x, y, width, height = cv2.boundingRect(contour)
# You can test here for box size, though not required in your example:
# if cls.is_size_of_digit(width, height):
# symbol_boxes.append((x, y, x+width, y+height))
symbol_boxes.append((x, y, x+width, y+height))
return symbol_boxes
def get_value(self, meter_cv2_image):
symbol_boxes = self.get_symbol_boxes(meter_cv2_image)
symbol_boxes.sort() # x is first in tuple
symbols = []
for box in symbol_boxes:
symbol = self.digit_knn_matcher.predict(crop(meter_cv2_image, box))
symbols.append(symbol)
return symbols
if __name__ == '__main__':
# If you want to see how boxes detection works, uncomment these:
# img_bw = cv2.imread(os.path.join('original.png'), 0)
# boxes = MeterValueReader.get_symbol_boxes(img_bw)
# draw_boxes_and_show(img_bw, boxes)
# Uncomment to generate templates from image
# import random
# TEMPLATE_DIR = 'templates'
# img_bw = cv2.imread(os.path.join('original.png'), 0)
# boxes = MeterValueReader.get_symbol_boxes(img_bw)
# for box in boxes:
# # You need to label templates manually after extraction
# cv2.imwrite(os.path.join(TEMPLATE_DIR, '%s.png' % random.randint(0, 1000)), crop(img_bw, box))
img_bw = cv2.imread(os.path.join('original.png'), 0)
vr = MeterValueReader()
print vr.get_value(img_bw)

Categories