Related
When I using aruco markers, I am getting very low fps(10~15). Firstly I showed the FPS info, marker position, marker attitude, camera position and camera attitude on the detection screen. With this way, I get ~10 FPS. Then I removed all texts except FPS info. It was better(~13 FPS) but still not enough. I need at least 25 FPS. How can I increase FPS?
Resolution: 640x480
aruco_pose_estimation.py
import numpy as np
import cv2
import cv2.aruco as aruco
import sys, time, math
#--- Define Tag
id_to_find = 3
marker_size = 10 #- [cm]
#------------------------------------------------------------------------------
#------- ROTATIONS https://www.learnopencv.com/rotation-matrix-to-euler-angles/
#------------------------------------------------------------------------------
# Checks if a matrix is a valid rotation matrix.
def isRotationMatrix(R):
Rt = np.transpose(R)
shouldBeIdentity = np.dot(Rt, R)
I = np.identity(3, dtype=R.dtype)
n = np.linalg.norm(I - shouldBeIdentity)
return n < 1e-6
# Calculates rotation matrix to euler angles
# The result is the same as MATLAB except the order
# of the euler angles ( x and z are swapped ).
def rotationMatrixToEulerAngles(R):
assert (isRotationMatrix(R))
sy = math.sqrt(R[0, 0] * R[0, 0] + R[1, 0] * R[1, 0])
singular = sy < 1e-6
if not singular:
x = math.atan2(R[2, 1], R[2, 2])
y = math.atan2(-R[2, 0], sy)
z = math.atan2(R[1, 0], R[0, 0])
else:
x = math.atan2(-R[1, 2], R[1, 1])
y = math.atan2(-R[2, 0], sy)
z = 0
return np.array([x, y, z])
#--- Get the camera calibration path
calib_path = ""
camera_matrix = np.loadtxt(calib_path+'cameraMatrix_raspi.txt', delimiter=',')
camera_distortion = np.loadtxt(calib_path+'cameraDistortion_raspi.txt', delimiter=',')
#--- 180 deg rotation matrix around the x axis
R_flip = np.zeros((3,3), dtype=np.float32)
R_flip[0,0] = 1.0
R_flip[1,1] =-1.0
R_flip[2,2] =-1.0
#--- Define the aruco dictionary
aruco_dict = aruco.getPredefinedDictionary(aruco.DICT_ARUCO_ORIGINAL)
#aruco_dict = aruco.getPredefinedDictionary(aruco.DICT_4X4_250)
parameters = aruco.DetectorParameters_create()
#--- Capture the videocamera (this may also be a video or a picture)
cap = cv2.VideoCapture(0)
#-- Set the camera size as the one it was calibrated with
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)
cap.set(cv2.CAP_PROP_FPS, 40)
#-- Font for the text in the image
font = cv2.FONT_HERSHEY_PLAIN
prev_frame_time = time.time()
while True:
ret, frame = cap.read()
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) #-- remember, OpenCV stores color images in Blue, Green, Red
#-- Find all the aruco markers in the image
corners, ids, rejected = aruco.detectMarkers(image=gray, dictionary=aruco_dict, parameters=parameters,
cameraMatrix=camera_matrix, distCoeff=camera_distortion)
if ids is not None:
ret = aruco.estimatePoseSingleMarkers(corners, marker_size, camera_matrix, camera_distortion)
#-- Unpack the output, get only the first
rvec, tvec = ret[0][0,0,:], ret[1][0,0,:]
#-- Draw the detected marker and put a reference frame over it
aruco.drawDetectedMarkers(frame, corners)
aruco.drawAxis(frame, camera_matrix, camera_distortion, rvec, tvec, 10)
#-- Print the tag position in camera frame
str_position = "MARKER Position x=%4.0f y=%4.0f z=%4.0f"%(tvec[0], tvec[1], tvec[2])
cv2.putText(frame, str_position, (0, 400), font, 1.3, (0, 255, 0), 2, cv2.LINE_AA)
#-- Obtain the rotation matrix tag->camera
R_ct = np.matrix(cv2.Rodrigues(rvec)[0])
R_tc = R_ct.T
#-- Get the attitude in terms of euler 321 (Needs to be flipped first)
roll_marker, pitch_marker, yaw_marker = rotationMatrixToEulerAngles(R_flip*R_tc)
#-- Print the marker's attitude respect to camera frame
str_attitude = "MARKER Attitude r=%4.0f p=%4.0f y=%4.0f"%(math.degrees(roll_marker),math.degrees(pitch_marker),
math.degrees(yaw_marker))
cv2.putText(frame, str_attitude, (0, 420), font, 1, (0, 255, 0), 2, cv2.LINE_AA)
#-- Now get Position and attitude f the camera respect to the marker
pos_camera = -R_tc*np.matrix(tvec).T
str_position = "CAMERA Position x=%4.0f y=%4.0f z=%4.0f"%(pos_camera[0], pos_camera[1], pos_camera[2])
cv2.putText(frame, str_position, (0, 440), font, 1, (0, 255, 0), 2, cv2.LINE_AA)
#-- Get the attitude of the camera respect to the frame
roll_camera, pitch_camera, yaw_camera = rotationMatrixToEulerAngles(R_flip*R_tc)
str_attitude = "CAMERA Attitude r=%4.0f p=%4.0f y=%4.0f"%(math.degrees(roll_camera),math.degrees(pitch_camera),
math.degrees(yaw_camera))
cv2.putText(frame, str_attitude, (0, 460), font, 1, (0, 255, 0), 2, cv2.LINE_AA)
#calculate the FPS and display on frame
new_frame_time = time.time()
fps = 1/(new_frame_time - prev_frame_time)
prev_frame_time = new_frame_time
cv2.putText(frame, "FPS" + str(int(fps)), (0,360), font, 1.3, (100, 255, 0), 2, cv2.LINE_AA)
#--- Display the frame
cv2.imshow('frame', frame)
#--- use 'q' to quit
key = cv2.waitKey(1) & 0xFF
if key == ord('q'):
cap.release()
cv2.destroyAllWindows()
break
.
.
After removed texts
So what I want to do is loop through two image crops and then see how many white pixels there are on both of these images. If a certain amount is detected on one image you print out something and if another amount is detected on the other you print out something etc etc.
I currently have this:
import numpy as np
import cv2
#img = cv2.imread('FIFA_Full_Crop_2_button.jpg')
img = cv2.imread('FIFA2.jpg')
#img = cv2.imread('FIFA_Full_Crop_2_button_3.jpg')
mask = np.zeros(img.shape[:2], np.uint8)
bgdModel = np.zeros((1, 65), np.float64)
fgdModel = np.zeros((1, 65), np.float64)
rect = (1512, 20, 180, 185) # boundary of interest
cv2.grabCut(img, mask, rect, bgdModel, fgdModel, 5, cv2.GC_INIT_WITH_RECT)
mask2 = np.where((mask == 2) | (mask == 0), 0, 1).astype('uint8')
img = img * mask2[:, :, np.newaxis]
cv2.imwrite('Image_mask.jpg', img)
mx = (1510, 22, 110, 185)
x, y, h, w = mx
# x,y coordinates for specified "fixed" location
# Left button
# mx = (1525, 58, 27, 22)
# x, y, h, w = mx
# Circle button 2
# mz = (1664, 58, 27, 22)
# x, y, h, w = mz
# Output to files
crop = img[y:y+h, x:x+w]
cv2.imwrite('Image_crop.jpg', crop)
cv2.rectangle(img,(x,y),(x+w,y+h),(0,255,0),2)
cv2.imwrite('Image_cont.jpg', img)
# Detect white pixels from cropped image
img = cv2.imread('Image_crop.jpg')
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
ret,gray = cv2.threshold(gray, 150,255,0)
gray2 = gray.copy()
cv2.imshow('IMG',gray2)
# Example left button detection 72 pixels
# n_white_pix = np.sum(gray2 == 255)
# print('Number of white pixels:', n_white_pix)
# if crop.sum() >= 72:
# print("Left")
# else:
# print("No button detected")
n_white_pix = np.sum(gray2 == 255)
print('Number of white pixels:', n_white_pix)
if n_white_pix > 0:
print("White pixel detected")
else:
print("Nothing detected")
cv2.waitKey(0)
cv2.destroyAllWindows()
As seen above I have commented out the left button and circle part as I can only do one image crop at a time, how would I go about in doing things if I want to check both crops at the same time?
An ideal solution would be looping through the button crops and then checking them with n_white_pix to see if they exceed a certain amount.
So the main problem at hand now is, how do I loop through two image crops, # Left button and # Circle button 2.
I'm working on a project using Python(3.7) and OpenCV in which I have an Image(captured using the camera) of a document with a QR code placed on it.
This QR code has 6 variables respectively as:
Size of QR code image
Top
Right
Bottom
Left
Unit
Latest Update:
Here are the steps I need to perform in the same order:
Detect the qr code & decode it to read size values
So, if the size of QR-code(image) is not equal to the size which is mentioned inside it then scale the image to equal both size values.
Then crop the image towards all sides from QR code image according to the values mentioned inside qr code.
I have tried this code:
def decodeAndCrop(inputImage):
print(str(inputImage))
image = cv2.imread(str(inputImage))
qrCodeDetector = cv2.QRCodeDetector()
decodedText, points, _ = qrCodeDetector.detectAndDecode(image)
qr_data = decodedText.split(",")
print("qr data from fucntion: {}".format(qr_data))
if points is not None:
pts = len(points)
# print(pts)
for i in range(pts):
nextPointIndex = (i + 1) % pts
if str(inputImage) == "scaled_img.jpg":
cv2.line(
image,
tuple(points[i][0]),
tuple(points[nextPointIndex][0]),
(255, 0, 0),
5,
)
print(points[i][0])
width = int(
math.sqrt(
(points[0][0][0] - points[1][0][0]) ** 2
+ (points[0][0][1] - points[1][0][1]) ** 2
)
)
height = int(
math.sqrt(
(points[1][0][0] - points[2][0][0]) ** 2
+ (points[1][0][1] - points[2][0][1]) ** 2
)
)
print("height and width after scaling: {} {}".format(height, width))
if not str(inputImage) == "scaled_img.jpg":
scaled_img = None
if width == qr_data[0] and height == qr_data[0]:
print("Sizes are equal")
# Add the extension values to points and crop
y = int(points[0][0][1]) - int(qr_data[1])
x = int(points[0][0][0]) - int(qr_data[4])
roi = image[
y : y + height + int(qr_data[3]), x : x + width + int(qr_data[2])
]
scaled_img = cv2.imwrite("scaled_img.jpg", roi)
return scaled_img
else:
print(
"Width and height "
+ str(width)
+ "x"
+ str(height)
+ " not equal to "
+ str(qr_data[0])
+ "x"
+ str(qr_data[0])
)
if height > int(qr_data[0]):
scale_width = int(width) - int(qr_data[0])
scale_height = int(height) - int(qr_data[0])
print(f"scaled width: {scale_width} scaled height: {scale_height}")
dimension = (scale_width, scale_height)
scaled_img = cv2.resize(
image, dimension, interpolation=cv2.INTER_AREA
)
print("new img dims: {}".format(scaled_img.shape))
cv2.imshow("scaled image:", scaled_img)
cv2.imwrite("scaled_img.jpg", scaled_img)
elif height < int(qr_data[0]):
scale_width = int(qr_data[0]) - width
scale_height = int(qr_data[0] - height)
print(f"scaled width: {scale_width} scaled height: {scale_height}")
dimension = (scale_width, scale_height)
scaled_img = cv2.resize(
image, dimension, interpolation=cv2.INTER_AREA
)
print("new img dims: {}".format(scaled_img.shape))
cv2.imshow("scaled image:", scaled_img)
cv2.imwrite("scaled_img.jpg", scaled_img)
cv2.imshow("final output:", roi)
return scaled_img
else:
y = int(points[0][0][1]) - int(qr_data[1])
x = int(points[0][0][0]) - int(qr_data[4])
print(" x and y")
print(x)
print(y)
roi = image[
y : y + height + int(qr_data[3]), x : x + width + int(qr_data[2])
]
final_img = cv2.imwrite("finalized_image.jpg", roi)
cv2.imshow("finalized image:", final_img)
return final_img
if __name__ == "__main__":
image_to_crop = decodeAndCrop("example_input_1.jpg")
final_image = decodeAndCrop("scaled_img.jpg")
cv2.imshow("Cropped:", image_to_crop)
# cv2.imshow("Final: ", final_image)
cv2.waitKey(0)
cv2.destroyAllWindows()
The code above gives an error as:
final_img = cv2.imwrite("finalized_image.jpg", roi)
cv2.error: OpenCV(4.2.0) /Users/travis/build/skvark/opencv-python/opencv/modules/imgcodecs/src/loadsave.cpp:715: error: (-215:Assertion failed) !_img.empty() in function 'imwrite'
End of Latest Update:
An example decoded information of a QR code is as: 100, 20, 40, 60, 20, px
Now, I need to detect the QR code from this document image and in the first step I need to compare the size of QR code in captured image of document with the size which is mentioned in the decoded information for example if in the captured image the size of the QR image is 90X90px and the size from decoded info is 100X100px we need to compare that.
Then, in the second step I have to crop the complete image by using the Top, Right, Bottom & Left variables accordingly. According to the above example we need to crop the image from the position of detected QR code to 20px Top, 40px Right, 60px Bottom and 20px Right. I have added an example Image below.
I have done to decode the QR code information but how can I take the detected QR code area as a seprate image and compare it's size with the mentioned size and then crop the Image accordingly?
Here's what I have tried so far:
import cv2
image = cv2.imread('/Users/abdul/PycharmProjects/QScanner/images/second.jpg')
qrCodeDetector = cv2.QRCodeDetector()
decodedText, points, _ = qrCodeDetector.detectAndDecode(image)
qr_data = decodedText.split(',')
qr_size = qr_data[0]
top = qr_data[1]
right = qr_data[2]
bottom = qr_data[3]
left = qr_data[4]
print(f'Size: {qr_size}' + str(qr_data[5]))
print(f'Top: {top}')
print(f'Right: {right}')
print(f'Bottom: {bottom}')
print(f'Left: {left}')
if points is not None:
pts = len(points)
print(pts)
for i in range(pts):
nextPointIndex = (i+1) % pts
cv2.line(image, tuple(points[i][0]), tuple(points[nextPointIndex][0]), (255,0,0), 5)
print(points[i][0])
print(decodedText)
cv2.imshow("Image", image)
cv2.waitKey(0)
cv2.destroyAllWindows()
else:
print("QR code not detected")
Here's an example Image:
and here's a sample of input image:
Here's a simple approach using thresholding, morphological operations, and contour filtering.
Obtain binary image. Load image, grayscale, Gaussian blur, Otsu's threshold
Connect individual QR contours. Create a rectangular structuring kernel with cv2.getStructuringElement() then perform morphological operations with cv2.MORPH_CLOSE.
Filter for QR code. Find contours
and filter using contour approximation, contour area, and aspect ratio.
Detected QR code
Extracted QR code
From here you can compare the QR code with your reference information
Code
import cv2
import numpy as np
# Load imgae, grayscale, Gaussian blur, Otsu's threshold
image = cv2.imread('1.jpg')
original = image.copy()
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (9,9), 0)
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# Morph close
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5,5))
close = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=2)
# Find contours and filter for QR code
cnts = cv2.findContours(close, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.04 * peri, True)
x,y,w,h = cv2.boundingRect(approx)
area = cv2.contourArea(c)
ar = w / float(h)
if len(approx) == 4 and area > 1000 and (ar > .85 and ar < 1.3):
cv2.rectangle(image, (x, y), (x + w, y + h), (36,255,12), 3)
ROI = original[y:y+h, x:x+w]
cv2.imwrite('ROI.png', ROI)
cv2.imshow('thresh', thresh)
cv2.imshow('close', close)
cv2.imshow('image', image)
cv2.imshow('ROI', ROI)
cv2.waitKey()
I got the width and height data using points and compare it with the qr_data size. Then cropped the QR according to needed.
import cv2
import math
image = cv2.imread('/ur/image/directory/qr.jpg')
qrCodeDetector = cv2.QRCodeDetector()
decodedText, points, _ = qrCodeDetector.detectAndDecode(image)
qr_data = decodedText.split(',')
qr_size = qr_data[0]
top = qr_data[1]
right = qr_data[2]
bottom = qr_data[3]
left = qr_data[4]
if points is not None:
pts = len(points)
print(pts)
for i in range(pts):
nextPointIndex = (i+1) % pts
cv2.line(image, tuple(points[i][0]), tuple(points[nextPointIndex][0]), (255,0,0), 5)
print(points[i][0])
width = int(math.sqrt((points[0][0][0]-points[1][0][0])**2 + (points[0][0][1]-points[1][0][1])**2))
height = int(math.sqrt((points[1][0][0]-points[2][0][0])**2 + (points[1][0][1]-points[2][0][1])**2))
# Compare the size
if(width==qr_data[0] and height==qr_data[0]):
print("Sizes are equal")
else:
print("Width and height " + str(width) + "x" + str(height) + " not equal to "
+ str(qr_data[0]) + "x" + str(qr_data[0]))
# Add the extension values to points and crop
y = int(points[0][0][1]) - int(qr_data[1])
x = int(points[0][0][0]) - int(qr_data[4])
roi = image[y:y+height + int(qr_data[3]), x:x+width + int(qr_data[2])]
print(decodedText)
cv2.imshow("Image", image)
cv2.imshow("Crop", roi)
cv2.waitKey(0)
cv2.destroyAllWindows()
else:
print("QR code not detected")
Result:
So, you mainly have 3 problems here.
If the image is rotated with an angle \theta,
If the sheet is one a plane. (i.e., in the images, the upper line doesn't seem to be linear. But it should not be a big deal.)
The black borders. Will you always have those or may it be a different background? This is important because without cropping out those, you won't be able to get a reasonable result.
I improved your code a little bit and removed the border pixels:
import cv2
import matplotlib.pyplot as plt
import math
import numpy as np
image = cv2.imread('/Users/samettaspinar/Public/im.jpg')
qrCodeDetector = cv2.QRCodeDetector()
decodedText, points, _ = qrCodeDetector.detectAndDecode(image)
qr_data = decodedText.split(',')
qr_size = int(qr_data[0])
top = int(qr_data[1])
right = int(qr_data[2])
bottom = int(qr_data[3])
left = int(qr_data[4])
print(f'Size: {qr_size}' + str(qr_data[5]))
print(f'Top: {top}')
print(f'Right: {right}')
print(f'Bottom: {bottom}')
print(f'Left: {left}')
plt.imshow(image)
plt.show()
dists = [] #This is for estimating distances between corner points.
#I will average them to find ratio of pixels in image vs qr_size
#in the optimal case, all dists should be equal
if points is not None:
pts = len(points)
for i in range(pts):
p1 = points[i][0]
p2 = points[(i+1) % pts][0]
dists.append(math.sqrt((p1[0]-p2[0])**2 + (p1[1]-p2[1])**2))
print('line', tuple(p1), tuple(p2))
image = cv2.line(image, tuple(p1), tuple(p2), (255,0,0), 5)
else:
print("QR code not detected")
print('distances: ', dists)
# Remove the black border pixels. I had a simple idea for this
# Get the average intensity of the gray image
# If count the row average of the first half that are less than intensity/2.
# It approx gives number of black borders on the left. etc.
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
inten = np.mean(gray)
x = np.mean(gray, axis=0) # finds the vertical average
y = np.mean(gray, axis=1) # finds horizontal average
bl_left = np.sum([x[:int(col/2)] < inten/2])
bl_right = np.sum([x[int(col/2)+1:] < inten/2])
bl_top = np.sum([y[:int(row/2)] < inten/2])
bl_bottom = np.sum([y[int(row/2)+1:] < inten/2])
print('black margins: ', bl_left, bl_right, bl_top, bl_bottom)
# Estimate how many pixel you will crop out
ratio = np.mean(dists)/ int(qr_size)
print('actual px / qr_size in px: ', ratio)
row,col,dim = image.shape
top, left, right, bottom = int(top*ratio), int(left*ratio), int(right*ratio), int(bottom*ratio)
top += bl_top
left += bl_left
right += bl_right
bottom += bl_bottom
print('num pixels to be cropped: ', top, left, right, bottom)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
image2 = image[top:row-bottom, left:col-right, :]
plt.imshow(image2)
plt.show()
Notice that I ignored the rotation issue. If there is rotation, you can find the angle by calculating the tangents/arctan where I calculated the distances.
For QR detection and parsing
import cv2
import sys
filename = sys.argv[1]
# read the QRCODE image
#in case if QR code is not black/white it is better to convert it into grayscale
img = cv2.imread(filename, 0)# Zero means grayscale
img_origin = cv2.imread(filename)
# initialize the cv2 QRCode detector
detector = cv2.QRCodeDetector()
# detect and decode
data, bbox, straight_qrcode = detector.detectAndDecode(img)
# if there is a QR code
if bbox is not None:
print(f"QRCode data:\n{data}")
# display the image with lines
# length of bounding box
n_lines = len(bbox[0])#Cause bbox = [[[float, float]]], we need to convert fload into int and loop over the first element of array
bbox1 = bbox.astype(int) #Float to Int conversion
for i in range(n_lines):
# draw all lines
point1 = tuple(bbox1[0, [i][0]])
point2 = tuple(bbox1[0, [(i+1) % n_lines][0]])
cv2.line(img_origin, point1, point2, color=(255, 0, 0), thickness=2)
# display the result
cv2.imshow("img", img)
cv2.waitKey(0)
cv2.destroyAllWindows()
else:
print("QR code not detected")
I am working on a sensor for PID controler, so basically I have a camera recording a scene. In order for control to work I need to extract tennis ball's position on a scene in each frame. Scene is set with white background and orange tennis ball as you can see on image:
And when I run it on my lenovo ideapad 100 (which is quite slow pc) I get ball's position each let's say 1.2 secs.
I think this could be done way faster but I don't know what to do.
Any tips and suggestions are welcome.
Im expecting a system that can generate at least 2 position each second
Here is my code:
import numpy as np
import cv2
import time
cap = cv2.VideoCapture(0)
lower_red = np.array([0,100,100])
upper_red = np.array([20,255,255])
while(1):
start = time.time()
# Load an color image in grayscale
_,img = cap.read()
print("read")
#get the image's width and height
blurred = cv2.GaussianBlur(img, (11, 11), 0)
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
hsv2 = cv2.cvtColor(blurred,cv2.COLOR_BGR2HSV)
mask2 = cv2.inRange(hsv2, lower_red, upper_red)
mask2 = cv2.erode(mask2, None, iterations=2)
mask2 = cv2.dilate(mask2, None, iterations=2)
imageWidth = mask2.shape[1]
imageHeight = mask2.shape[0]
sum_i_white = 1
num_white = 1
sum_j_white = 1
for i in range(0,imageHeight):
for j in range(0,imageWidth):
if mask2[i][j] == 255:
sum_i_white += i
sum_j_white += j
num_white += 1
cv2.circle(img,(int(sum_j_white/num_white),int(sum_i_white/num_white)),3,(0,0,255), -1)
cv2.putText(img,'here',(int(sum_j_white/num_white),int(sum_i_white/num_white)), cv2.FONT_HERSHEY_SIMPLEX, 2, (0,0,255), 1, cv2.LINE_AA)
cv2.imshow("final",img)
print(time.time()-start)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cv2.destroyAllWindows()
Do you know what is slow?
I know you can run C program with Python.
So, if your double for is very slow, you can make a C program running this.
If you have juste array and int as input, and int as output, I think it shouldn't be hard.
But I don't know if this would be useful.
newb question here... I have been following this guide to detect the distance between an object and the camera.
Here is the code I am currently running:
# import the necessary packages
import numpy as np
import cv2
def find_marker(image):
# convert the image to grayscale, blur it, and detect edges
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray = cv2.GaussianBlur(gray, (5, 5), 0)
edged = cv2.Canny(gray, 35, 125)
# find the contours in the edged image and keep the largest one;
# we'll assume that this is our piece of paper in the image
(cnts, _) = cv2.findContours(edged.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
c = max(cnts, key = cv2.contourArea)
# compute the bounding box of the of the paper region and return it
return cv2.minAreaRect(c)
def distance_to_camera(knownWidth, focalLength, perWidth):
# compute and return the distance from the maker to the camera
return (knownWidth * focalLength) / perWidth
# initialize the known distance from the camera to the object, which
# in this case is 24 inches
KNOWN_DISTANCE = 24.0
# initialize the known object width, which in this case, the piece of
# paper is 11 inches wide
KNOWN_WIDTH = 11.0
# initialize the list of images that we'll be using
IMAGE_PATHS = ["images/2ft.png", "images/3ft.png", "images/4ft.png"]
# load the furst image that contains an object that is KNOWN TO BE 2 feet
# from our camera, then find the paper marker in the image, and initialize
# the focal length
image = cv2.imread(IMAGE_PATHS[0])
marker = find_marker(image)
focalLength = (marker[1][0] * KNOWN_DISTANCE) / KNOWN_WIDTH
# loop over the images
for imagePath in IMAGE_PATHS:
# load the image, find the marker in the image, then compute the
# distance to the marker from the camera
image = cv2.imread(imagePath)
marker = find_marker(image)
inches = distance_to_camera(KNOWN_WIDTH, focalLength, marker[1][0])
# draw a bounding box around the image and display it
box = np.int0(cv2.cv.BoxPoints(marker))
cv2.drawContours(image, [box], -1, (0, 255, 0), 2)
cv2.putText(image, "%.2fft" % (inches / 12),
(image.shape[1] - 200, image.shape[0] - 20), cv2.FONT_HERSHEY_SIMPLEX,
2.0, (0, 255, 0), 3)
cv2.imshow("image", image)
cv2.waitKey(0)
It works. However, I am unsure as to how to use the code to detect distances between an object and the camera in real-time (video) instead of through a picture taken.
I currently am using a Tello drone with the same code. The difference is I use a video that interpolates contours into rectangles and tracks a model rocket launch. I think what you are looking for is the OpenCV code to understand the video frames. This youtube video uses the Tello video feed and OpenCV to calculate a countour box for a face: https://www.youtube.com/watch?v=LmEcyQnfpDA&t=7253s.
import cv2
from tracker import *
import math, time, numpy as np
# global variables
_w, _h = 0,0
pid = [.01,.01,0]
pError = 0
cap = cv2.VideoCapture("rocketVideo.mp4")
loop = True
rocketPositionList = []
rocketPositionListArea = []
# initialize the known distance from the camera to the object, which
# in this case is 24 inches
KNOWN_DISTANCE = 480
# initialize the known object width, which in this case, the piece of
# paper is 12 inches wide
KNOWN_WIDTH = 5
inches = 0
# distance tracker from Tracker.py
tracker = EuclideanDistTracker()
#object detector
object_detector = cv2.createBackgroundSubtractorMOG2(history=4000,varThreshold=330)
# calulate frame data to get rocket positions in frame
def getFrameCalculation(frame):
if len(rocketPositionListArea) != 0:
i = rocketPositionListArea.index(max(rocketPositionListArea))
return frame, [rocketPositionList[i], rocketPositionListArea[i]]
else:
return frame, [[0,0], 0]
def distance_to_camera(perWidth):
# compute and return the distance from the maker to the camera
return (KNOWN_WIDTH * focalLength) / perWidth
while loop:
# Start Reading OpenCV Video Frame
ret, frame = cap.read()
height, width, _ = frame.shape
w = width
h = height
#extract region of interst
roi = frame[0:1110,0:720]
#Object Detection
mask = object_detector.apply(roi)
contours, _ = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
detections = []
for cnt in contours:
#Calcuate area and remove small elements
area = cv2.contourArea(cnt)
# Get Relative Area of Rocket Contour
if area > 100:
x,y,w,h = cv2.boundingRect(cnt)
if x > 210:
detections.append([x,y,w,h])
# update box ids from tracker->detections
boxes_ids = tracker.update(detections)
# create visual data for boxIds
for boxes_id in boxes_ids:
x,y,w,h, id = boxes_id
cv2.putText(roi,str(id),(x,y-15), cv2.FONT_HERSHEY_PLAIN, 1, (255,0,0),2)
cv2.rectangle(roi, (x, y), (x + w, y + h), (0, 255, 0), 3)
# get circle center and area
cx = x + w // 2
cy = y + h // 2
area = w * h
cv2.circle(roi,(cx,cy), 4, (0,0,255), cv2.FILLED)
rocketPositionList.append([cx,cy])
rocketPositionListArea.append(area)
# Get First Known Focal Length
focalLength = (rocketPositionListArea[0] * KNOWN_DISTANCE) / KNOWN_WIDTH
# calculate Img Frame in Video
frame, info = getFrameCalculation(frame)
feetOut = 0
if info[1] > 0:
# Get Inches from Distance to Camera
inches = distance_to_camera(info[1])
# give data to Tello to Operate Movement Action
feetOut = inches / 12
cv2.putText(roi,str(int(feetOut)) + "ft.",(50,50), cv2.FONT_HERSHEY_PLAIN, 4, (255,0,0),2)
# display cv2 videos
cv2.imshow("ROI",resizeR)
cv2.imshow("Mask",resizeM)
cv2.imshow("Frame", resizeF)
# clean up cv2 and exit
cap.release()
cv2.destroyAllWindows()
exit()