I have this project that combines two data from two different sensors, a TFLuna LiDAR and a Raspberry Pi Camera Module V2, for an Object Detection self driving vehicle. I've tried Threading both sensors to allow concurrent operation but I have trouble displaying those data together. Sometimes they work but the LiDAR data won't continously update itself. here's the code I'm using for the LiDAR:
def read_tfluna_data():
while True:
counter = ser.in_waiting # count the number of bytes of the serial port
if counter > 8:
bytes_serial = ser.read(9) # read 9 bytes
ser.reset_input_buffer() # reset buffer
if bytes_serial[0] == 0x59 and bytes_serial[1] == 0x59: # check first two bytes
distance = bytes_serial[2] + bytes_serial[3]*256 # distance in next two bytes
return distance
class lidar:
def update(self):
# Keep looping indefinitely until the thread is stopped
while True:
distance = read_tfluna_data()
return distance
def __init__(self):
distance = Thread(target=self.update,args=())
distance.start()
and this is the code for the display:
while True:
# Grab frame from video stream
frame1 = videostream.read()
# Acquire frame and resize to expected shape [1xHxWx3]
frame = frame1.copy()
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frame_resized = cv2.resize(frame_rgb, (width, height))
input_data = np.expand_dims(frame_resized, axis=0)
# Normalize pixel values if using a floating model (i.e. if model is non-quantized)
if floating_model:
input_data = (np.float32(input_data) - input_mean) / input_std
# Perform the actual detection by running the model with the image as input
interpreter.set_tensor(input_details[0]['index'],input_data)
interpreter.invoke()
# Retrieve detection results
boxes = interpreter.get_tensor(output_details[boxes_idx]['index'])[0] # Bounding box coordinates of detected objects
classes = interpreter.get_tensor(output_details[classes_idx]['index'])[0] # Class index of detected objects
scores = interpreter.get_tensor(output_details[scores_idx]['index'])[0] # Confidence of detected objects
# Loop over all detections and draw detection box if confidence is above minimum threshold
for i in range(len(scores)):
if ((scores[i] > min_conf_threshold) and (scores[i] <= 1.0)):
# Get bounding box coordinates and draw box
# Interpreter can return coordinates that are outside of image dimensions, need to force them to be within image using max() and min()
ymin = int(max(1,(boxes[i][0] * imH)))
xmin = int(max(1,(boxes[i][1] * imW)))
ymax = int(min(imH,(boxes[i][2] * imH)))
xmax = int(min(imW,(boxes[i][3] * imW)))
cv2.rectangle(frame, (xmin,ymin), (xmax,ymax), (10, 255, 0), 2)
# Draw label
object_name = labels[int(classes[i])] # Look up object name from "labels" array using class index
label = '%s: %d%%' % (object_name, int(scores[i]*100)) # Example: 'person: 72%'
labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2) # Get font size
label_ymin = max(ymin, labelSize[1] + 10) # Make sure not to draw label too close to top of window
cv2.rectangle(frame, (xmin, label_ymin-labelSize[1]-10), (xmin+labelSize[0], label_ymin+baseLine-10), (255, 255, 255), cv2.FILLED) # Draw white box to put label text in
cv2.putText(frame, label, (xmin, label_ymin-7), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2) # Draw label text
#Draw LiDAR Distance in corner of frame
cv2.putText(frame,'Distance: %s cm' %dist_data,(900, 700),cv2.FONT_HERSHEY_SIMPLEX,1,(255,0,0),2,cv2.LINE_AA)
#Draw Rudder in corner of frame
cv2.putText(frame,'Rudder dir: forward!',(50, 700),cv2.FONT_HERSHEY_SIMPLEX,1,(255,0,0),2,cv2.LINE_AA)
# All the results have been drawn on the frame, so it's time to display it.
cv2.imshow('Object detector', frame)
# Press 'q' to quit
if cv2.waitKey(1) == ord('q'):
break
I've tried putting the .update() in the while True loop but it stalls the program and ended up crashing it. I've tried turning the serial port on'off, edited the boot.config to allow uart and turned off the USB serial.
Related
I am new to yolov3 and trying to do object detection using yolov3 but getting the error as shown in fig if possible please tell me what I am doing wrong.
This is the screenshot
Traceback (most recent call last):
File "c:\Yolo\YOLO-3-OpenCV\YOLO-3-OpenCV\yolo-3-video.py", line 359, in
print('FPS:', round((f / t), 1))
ZeroDivisionError: division by zero
# Importing needed libraries
import numpy as np
import cv2
import time
video = cv2.VideoCapture(r'videos\traffic-cars.mp4')
# Preparing variable for writer
# that we will use to write processed frames
writer = None
# Preparing variables for spatial dimensions of the frames
h, w = None, None
with open(r'C:\Yolo\YOLO-3-OpenCV\YOLO-3-OpenCV\yolo-coco-data\coco.names') as f:
# Getting labels reading every line
# and putting them into the list
labels = [line.strip() for line in f]
network = cv2.dnn.readNetFromDarknet(r'C:\Yolo\YOLO-3-OpenCV\YOLO-3-OpenCV\yolo-coco-data\yolov3.cfg',
r'C:\Yolo\YOLO-3-OpenCV\YOLO-3-OpenCV\yolo-coco-data\yolov3.weights')
layers_names_all = network.getLayerNames()
# Getting only output layers' names that we need from YOLO v3 algorithm
# with function that returns indexes of layers with unconnected outputs
layers_names_output = \
[layers_names_all[i - 1] for i in network.getUnconnectedOutLayers()]
# Setting minimum probability to eliminate weak predictions
probability_minimum = 0.5
colours = np.random.randint(0, 255, size=(len(labels), 3), dtype='uint8')
f = 0
# Defining variable for counting total time
# At the end we will show time spent for processing all frames
t = 0
# Defining loop for catching frames
while True:
# Capturing frame-by-frame
ret, frame = video.read()
if not ret:
break
if w is None or h is None:
# Slicing from tuple only first two elements
h, w = frame.shape[:2]
blob = cv2.dnn.blobFromImage(frame, 1 / 255.0, (416, 416),
swapRB=True, crop=False)
network.setInput(blob) # setting blob as input to the network
start = time.time()
output_from_network = network.forward(layers_names_output)
end = time.time()
f += 1
t += end - start
print('Frame number {0} took {1:.5f} seconds'.format(f, end - start))
bounding_boxes = []
confidences = []
class_numbers = []
# Going through all output layers after feed forward pass
for result in output_from_network:
# Going through all detections from current output layer
for detected_objects in result:
# Getting 80 classes' probabilities for current detected object
scores = detected_objects[5:]
# Getting index of the class with the maximum value of probability
class_current = np.argmax(scores)
# Getting value of probability for defined class
confidence_current = scores[class_current]
# Eliminating weak predictions with minimum probability
if confidence_current > probability_minimum:
# Scaling bounding box coordinates to the initial frame size
# YOLO data format keeps coordinates for center of bounding box
# and its current width and height
# That is why we can just multiply them elementwise
# to the width and height
# of the original frame and in this way get coordinates for center
# of bounding box, its width and height for original frame
box_current = detected_objects[0:4] * np.array([w, h, w, h])
# Now, from YOLO data format, we can get top left corner coordinates
# that are x_min and y_min
x_center, y_center, box_width, box_height = box_current
x_min = int(x_center - (box_width / 2))
y_min = int(y_center - (box_height / 2))
# Adding results into prepared lists
bounding_boxes.append([x_min, y_min,
int(box_width), int(box_height)])
confidences.append(float(confidence_current))
class_numbers.append(class_current)
results = cv2.dnn.NMSBoxes(bounding_boxes, confidences,
probability_minimum, threshold)
if len(results) > 0:
# Going through indexes of results
for i in results.flatten():
# Getting current bounding box coordinates,
# its width and height
x_min, y_min = bounding_boxes[i][0], bounding_boxes[i][1]
box_width, box_height = bounding_boxes[i][2], bounding_boxes[i][3]
# Preparing colour for current bounding box
# and converting from numpy array to list
colour_box_current = colours[class_numbers[i]].tolist()
# # # Check point
# print(type(colour_box_current)) # <class 'list'>
# print(colour_box_current) # [172 , 10, 127]
# Drawing bounding box on the original current frame
cv2.rectangle(frame, (x_min, y_min),
(x_min + box_width, y_min + box_height),
colour_box_current, 2)
# Preparing text with label and confidence for current bounding box
text_box_current = '{}: {:.4f}'.format(labels[int(class_numbers[i])],
confidences[i])
# Putting text with label and confidence on the original image
cv2.putText(frame, text_box_current, (x_min, y_min - 5),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, colour_box_current, 2)
if writer is None:
# Constructing code of the codec
# to be used in the function VideoWriter
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
# Writing current processed frame into the video file
# Pay attention! If you're using Windows, yours path might looks like:
# r'videos\result-traffic-cars.mp4'
# or:
# 'videos\\result-traffic-cars.mp4'
writer = cv2.VideoWriter('videos/result-traffic-cars.mp4', fourcc, 30,
(frame.shape[1], frame.shape[0]), True)
# Write processed current frame to the file
writer.write(frame)
"""
End of:
Writing processed frame into the file
"""
"""
End of:
Reading frames in the loop
"""
# Printing final results
print()
print('Total number of frames', f)
print('Total amount of time {:.5f} seconds'.format(t))
print('FPS:', round((f / t), 1))
# Releasing video reader and writer
video.release()
writer.release()
I have a script in python which acts as a motion detector. I read a video file using cv2, convert to grayscale, and do simple background subtraction from the current frame to detect motion, which I draw a rectangle over. The video is eventually saved as a new file, where I can finally view it.
This works fine, except sometimes the starting frame (background frame) already has motion in it, or sometimes there are features in the background which move but I don't want to detect (eg if I was detecting people, I wouldn't be interested in a flag blowing in the breeze). So I want to somehow disregard 'stationary' movement (ie motion which does not move vertically/horizontally over the course of the video). However I'm having trouble with my approach. There doesn't seem to be any functions or scripts on the internet to solve this.
One idea I had was to draw a larger rectangle over the original, and then if the original rectangle doesn't leave the outer rectangle (which stays put) over the video, then that motion can be cancelled altogether. I have no idea how to implement this. I have managed to draw a larger rectangle, but it follows the original and doesn't stay in place.
Does anyone have any idea how I might be able to do this? Or any resources they could point me in? Thank you. Below is my code starting from when I draw the rectangles.
for c in cnts:
# if the contour is too small, ignore it
if cv2.contourArea(c) < min_area:
continue
# compute the bounding box for the contour, draw it on the frame, and update the text
(x, y, w, h) = cv2.boundingRect(c)
cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
text = "Occupied" # frame is occupied
half_w=int(w/2) # get 50% sizing width
half_h=int(h/2) # get 50% sizing height
x_surr = int (x - (half_w/2))
y_surr = int(y - (half_h/2))
w_surr = (w+half_w)
h_surr = (h+half_h)
cv2.rectangle(frame, (x_surr, y_surr), (x_surr+w_surr, y_surr + h_surr), (255, 255, 255), 2)
I think this code might help you. Basically it compares the value of each pixel in the current frame to the corresponding value of that pixel in the average of the previous n frames. When no motion is present, it is all black. When there is motion, it will show the color of the moving option. Since it is keeping track average of recent frames. You should be able to filter our slight movements for flags fluttering, etc. You will probably need to play around with some thresholding on the final image to get the result you want.
Stillness:
Motion:
import cv2
def main():
# define the length of the list of the number of recent frames to keep track of
NUMBER_FRAMES_TO_TRACK = 30
# start the webcam
cap = cv2.VideoCapture(1)
ret, frame = cap.read()
if ret == False:
print("No webcam detected.")
return
# generate a list of recent frames
recent_frames = [frame for n in range(NUMBER_FRAMES_TO_TRACK)]
# start the video loop
while True:
ret, frame = cap.read()
if ret == False:
break
# update the list of recent frames with the most recent frame
recent_frames = recent_frames[1:]
recent_frames.append(frame)
# calculate the average of all recent frames
average = recent_frames[0]
for i in range(len(recent_frames)):
if i == 0:
pass
else:
alpha = 1.0/(i + 1)
beta = 1.0 - alpha
average = cv2.addWeighted(recent_frames[i], alpha, average, beta, 0.0)
# find the difference between the current frame and the average of recent frames
difference = cv2.subtract(frame, average)
# show the results
cv2.imshow("video", frame)
cv2.imshow("average", average)
cv2.imshow("difference", difference)
key = cv2.waitKey(1)
if key == ord('q'):
break
cv2.destroyAllWindows()
cap.release()
if __name__ == "__main__":
main()
This is rather a theoretical question than asking for specific code issue.
I have done a bit of facial landmark detection using Haar Cascades, but this time I have a different type of video on my hands. It's a side view of a horse's eye (camera is mounted to the side of the head) so essentially what I see is a giant eye. I tried using Haar Cascades but it's no use, since there is no face to be detected in my video.
I was wondering what the best way to detect the eye and blinks would be on this horse? Do I try and customize a dlib facial mark detector? I didn't find much information on animal landmarks.
Thanks in advance! :)
I used an object tracker to continue locating the eye after drawing a bounding box around it on the first frame.
I created a set width and height bounding box since we can roughly assume that the eye isn't growing or shrinking relative to the camera. When drawing the bounding box for the tracker, we have to include more than just the eye since it would otherwise lose track of the object whenever they blink.
I looked for whether the saturation of the bounded area dropped below a threshold in each frame as a check for whether or not they blinked. The blue box is the bounding box returned by the tracker, the green box is the area I'm cropping and checking the saturation level of.
Here's a graph of the saturation level over the course of the video
You can clearly see the areas where they blinked
Here's a (heavily compressed to make the 2mb limit) gif of the result
import cv2
import numpy as np
import math
# tuplifies things for opencv
def tup(p):
return (int(p[0]), int(p[1]));
# returns the center of the box
def getCenter(box):
x = box[0];
y = box[1];
x += box[2] / 2.0;
y += box[3] / 2.0;
return [x,y];
# rescales image by percent
def rescale(img, scale):
h,w = img.shape[:2];
h = int(h*scale);
w = int(w*scale);
return cv2.resize(img, (w,h));
# load video
cap = cv2.VideoCapture("blinking.mov");
scale = 0.5;
# font stuff
font = cv2.FONT_HERSHEY_SIMPLEX;
org = (50, 50);
fontScale = 1;
font_color = (255, 255, 0);
thickness = 2;
# set up tracker
tracker = cv2.TrackerCSRT_create(); # I'm using OpenCV 3.4
backup = cv2.TrackerCSRT_create();
# grab the first frame
_, frame = cap.read();
frame = rescale(frame, scale);
# init tracker
box = cv2.selectROI(frame, False);
tracker.init(frame, box);
backup.init(frame, box);
cv2.destroyAllWindows();
# set center bounds
width = 75;
height = 60;
# save numbers
file_index = 0;
# blink counter
blinks = 0;
blink_thresh = 35;
blink_trigger = True;
# show video
done = False;
while not done:
# get frame
ret, frame = cap.read();
if not ret:
break;
frame = rescale(frame, scale);
# choose a color space
hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV);
h,s,v = cv2.split(hsv);
channel = s;
# grab tracking box
ret, box = tracker.update(frame);
if ret:
# get the center
center = getCenter(box);
x, y = center;
# make box on center
tl = [x - width, y - height];
br = [x + width, y + height];
tl = tup(tl);
br = tup(br);
# get top left and bottom right
p1 = [box[0], box[1]];
p2 = [p1[0] + box[2], p1[1] + box[3]];
p1 = tup(p1);
p2 = tup(p2);
# draw a roi around the image
cv2.rectangle(frame, p1, p2, (255,0,0), 3);
cv2.rectangle(frame, tl, br, (0,255,0), 3);
cv2.circle(frame, tup(center), 6, (0,0,255), -1);
# get the channel average in the box
slc = channel[tl[1]:br[1], tl[0]:br[0]];
ave = np.mean(slc);
# if it dips below a set value, then trigger a blink
if ave < blink_thresh:
if blink_trigger:
blinks += 1;
blink_trigger = False;
else:
blink_trigger = True;
# draw blink count
frame = cv2.putText(frame, "Blinks: " + str(blinks), org, font, fontScale,
font_color, thickness, cv2.LINE_AA);
# show
cv2.imshow("Frame", frame);
key = cv2.waitKey(1);
# check keypress
done = key == ord('q');
def run(self):
while True:
_ret, frame = self.cam.read()
frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
vis = frame.copy()
if len(self.tracks) > 0:
img0, img1 = self.prev_gray, frame_gray
p0 = np.float32([tr[-1] for tr in self.tracks]).reshape(-1, 1, 2)
p1, _st, _err = cv2.calcOpticalFlowPyrLK(img0, img1, p0, None, **lk_params)
p0r, _st, _err = cv2.calcOpticalFlowPyrLK(img1, img0, p1, None, **lk_params)
d = abs(p0-p0r).reshape(-1, 2).max(-1)
good = d < 1
new_tracks = []
for i in range(len(p1)):
A.append(math.sqrt((p1[i][0][0])**2 + (p1[i][0][1])**2))
counts,bins,bars = plt.hist(A)
for tr, (x, y), good_flag in zip(self.tracks, p1.reshape(-1, 2), good):
if not good_flag:
continue
tr.append((x, y))
if len(tr) > self.track_len:
del tr[0]
new_tracks.append(tr)
cv2.circle(vis, (x, y), 2, (0, 255, 0), -1)
self.tracks = new_tracks
cv2.polylines(vis, [np.int32(tr) for tr in self.tracks], False, (0, 255, 0))
draw_str(vis, (20, 20), 'track count: %d' % len(self.tracks))
if self.frame_idx % self.detect_interval == 0:
mask = np.zeros_like(frame_gray)
mask[:] = 255
for x, y in [np.int32(tr[-1]) for tr in self.tracks]:
cv2.circle(mask, (x, y), 5, 0, -1)
p = cv2.goodFeaturesToTrack(frame_gray, mask = mask, **feature_params)
if p is not None:
for x, y in np.float32(p).reshape(-1, 2):
self.tracks.append([(x, y)])
self.frame_idx += 1
self.prev_gray = frame_gray
cv2.imshow('lk_track', vis)
ch = cv2.waitKey(1)
if ch == 27:
break
i am using lk_track.py from opencv samples to try and detect a moving object. I am trying to find the camera motion using the histogram of magnitude of optical flow vectors and then calculate the average for similar values which should be directly proportional to the camera motion. I have calculated the magnitude of the vectors and saved it in a list A. Can some suggest on how to find highest similar values from it and calculate the average for only those values?
I created a toy problem to model the approach of binarizing the images by optical flow. This is a massively simplified view of the problem, but gives the general idea well. I'll split the problem up into a few chunks and give functions for them. If you're working directly with video, there will be a lot of additional code needed of course, and I just hardcoded a lot of values that you'll need to turn into parameters.
The first function is just for generating the image sequence. The images are moving through a scene with an object moving inside the sequence. The image sequence is just simply translating through the scene, and the object appears stationary in the sequence, but that means that the object is actually moving in the opposite direction of the camera of course.
import numpy as np
import cv2
def gen_seq():
"""Generate motion sequence with an object"""
scene = cv2.GaussianBlur(np.uint8(255*np.random.rand(400, 500)), (21, 21), 3)
h, w = 400, 400
step = 4
obj_mask = np.zeros((h, w), np.bool)
obj_h, obj_w = 50, 50
obj_x, obj_y = 175, 175
obj_mask[obj_y:obj_y+obj_h, obj_x:obj_x+obj_w] = True
obj_data = np.uint8(255*np.random.rand(obj_h, obj_w)).ravel()
imgs = []
for i in range(0, 1+w//step, step):
img = scene[:, i:i+w].copy()
img[obj_mask] = obj_data
imgs.append(img)
return imgs
# generate image sequence
imgs = gen_seq()
# display images
for img in imgs:
cv2.imshow('Image', img)
k = cv2.waitKey(100) & 0xFF
if k == ord('q'):
break
cv2.destroyWindow('Image')
So here's the basic image sequence visualized. I just used a random scene, translated through, and added a random object in the center.
Great! Now we need to calculate the flow between each frame. I used dense flow here, but sparse flow would be more robust for actual images.
def find_flows(imgs):
"""Finds the dense optical flows"""
optflow_params = [0.5, 3, 15, 3, 5, 1.2, 0]
prev = imgs[0]
flows = []
for img in imgs[1:]:
flow = cv2.calcOpticalFlowFarneback(prev, img, None, *optflow_params)
flows.append(flow)
prev = img
return flows
# find optical flows between images
flows = find_flows(imgs)
# display flows
h, w = imgs[0].shape[:2]
hsv = np.zeros((h, w, 3), dtype=np.uint8)
hsv[..., 1] = 255
for flow in flows:
mag, ang = cv2.cartToPolar(flow[..., 0], flow[..., 1])
hsv[..., 0] = ang*180/np.pi/2
hsv[..., 2] = cv2.normalize(mag, None, 0, 255, cv2.NORM_MINMAX)
rgb = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)
cv2.imshow('Flow', rgb)
k = cv2.waitKey(100) & 0xFF
if k == ord('q'):
break
cv2.destroyWindow('Flow')
Here I colorized the flow based on it's angle and magnitude. The angle will determine the color and the magnitude will determine the intensity/brightness of the color. This is the same view the OpenCV tutorial on dense optical flow uses.
Then, we need to binarize this flow so that we get two distinct sets of pixels based on how they're moving. In the sparse case, this works out the same except you will get two distinct sets of features.
def label_flows(flows):
"""Binarizes the flows by direction and magnitude"""
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0)
flags = cv2.KMEANS_RANDOM_CENTERS
h, w = flows[0].shape[:2]
labeled_flows = []
for flow in flows:
flow = flow.reshape(h*w, -1)
comp, labels, centers = cv2.kmeans(flow, 2, None, criteria, 10, flags)
n = np.sum(labels == 1)
camera_motion_label = np.argmax([labels.size-n, n])
labeled = np.uint8(255*(labels.reshape(h, w) == camera_motion_label))
labeled_flows.append(labeled)
return labeled_flows
# binarize the flows
labeled_flows = label_flows(flows)
# display binarized flows
for labeled_flow in labeled_flows:
cv2.imshow('Labeled Flow', labeled_flow)
k = cv2.waitKey(100) & 0xFF
if k == ord('q'):
break
cv2.destroyWindow('Labeled Flow')
The annoying thing here is the labels will be set randomly, i.e. the labels will be different for each frame. If you visualized the binary image, it would flip between black and white randomly. I'm only using binary labels, 0 and 1, so what I did was considered the label that is assigned to more pixels to be the "camera motion label" and then I set that label to be white in the resulting images, and the other label to be black, that way the camera motion label is always the same in each frame. This may need to be much more sophisticated for working on video feed.
But here we have it, a binarized flow where the color is just showing the two distinct sets of flow vectors.
Now if we wanted to find the target in this flow, we could invert the image and find the connected components of the binary image. The inversion will make the camera motion the background label (0). Then each of the black blobs will be white and will be labeled, and we could find the blob relating to the largest component which, in this case, will be the target. That will give a mask around the target, and we can draw the contours of that mask on the original images to see the target being detected. I'll also cut the borders of the image off before finding the connected components so edge effects from dense flow are ignored.
def find_target_in_labeled_flow(labeled_flow):
labeled_flow = cv2.bitwise_not(labeled_flow)
bw = 10
h, w = labeled_flow.shape[:2]
border_cut = labeled_flow[bw:h-bw, bw:w-bw]
conncomp, stats = cv2.connectedComponentsWithStats(border_cut, connectivity=8)[1:3]
target_label = np.argmax(stats[1:, cv2.CC_STAT_AREA]) + 1
img = np.zeros_like(labeled_flow)
img[bw:h-bw, bw:w-bw] = 255*(conncomp == target_label)
return img
for labeled_flow, img in zip(labeled_flows, imgs[:-1]):
target_mask = find_target_in_labeled_flow(labeled_flow)
display_img = cv2.merge([img, img, img])
contours = cv2.findContours(target_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)[1]
display_img = cv2.drawContours(display_img, contours, -1, (0, 255, 0), 2)
cv2.imshow('Detected Target', display_img)
k = cv2.waitKey(100) & 0xFF
if k == ord('q'):
break
And of course this could get some cleaning up, and you won't be doing exactly this for sparse flow. You could just define a region of interest around the tracked points.
Now, there is still a lot of work to do. You have a binarized flow...you can probably assume that the label which occurs most frequently is the camera motion (like I did) safely. However, you'll have to make sure that the other label is the object you're interested in tracking. You'll have to keep track of it between flows so that if it stops moving, you'll know where it is as the camera is moving. When you do the k-means step, you'll want to make sure that the centers from k-means are "far enough" apart so that you know the object is moving or not.
The basic steps for that would be, from the starting frame of the video:
If the two centers are "close", then you can assume your object is either not in the scene or not moving in the scene.
Once the centers are split enough apart, you'll have found the object to track. Keep track of the location of the object.
During tracking of the object, verify the location is nearby a prediction. You can use the optical flow velocity vectors from the previous frame to predict the location each pixel/feature in the new frame, so make sure your predictions agree with your tracking result.
If the object stops moving, the centers from k-means should be close. Keep track of the optical flow vectors around the object location and follow them to have a prediction of where the object is again once it resumes moving, and again verify the detected location with this prediction.
I've never used these methods before so I'm not sure how robust they are. The typical approach for HOOF or "Histogram of oriented optical flow" is much more advanced than this (see the seminal paper here). Instead of just binarizing, the idea is to use histograms from each frame as a probability distribution, and the way this probability distribution changes over time can be analyzed with the tools from time series analysis, which I assume give a more robust framework to this approach.
with #alkasm's answer to avoid the following error:
(-215:Assertion failed) npoints > 0 in function 'drawContours'
simply replace:
contours = cv2.findContours(target_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)[1]
with
contours, _ = cv2.findContours(target_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
I can't comment this below as an answer due to new account with low reputation.
I'm still hacking together a book scanning script, and for now, all I need is to be able to automagically detect a page turn. The book fills up 90% of the screen (I'm using a cruddy webcam for the motion detection), so when I turn a page, the direction of motion is basically in that same direction.
I have modified a motion-tracking script, but derivatives are getting me nowhere:
#!/usr/bin/env python
import cv, numpy
class Target:
def __init__(self):
self.capture = cv.CaptureFromCAM(0)
cv.NamedWindow("Target", 1)
def run(self):
# Capture first frame to get size
frame = cv.QueryFrame(self.capture)
frame_size = cv.GetSize(frame)
grey_image = cv.CreateImage(cv.GetSize(frame), cv.IPL_DEPTH_8U, 1)
moving_average = cv.CreateImage(cv.GetSize(frame), cv.IPL_DEPTH_32F, 3)
difference = None
movement = []
while True:
# Capture frame from webcam
color_image = cv.QueryFrame(self.capture)
# Smooth to get rid of false positives
cv.Smooth(color_image, color_image, cv.CV_GAUSSIAN, 3, 0)
if not difference:
# Initialize
difference = cv.CloneImage(color_image)
temp = cv.CloneImage(color_image)
cv.ConvertScale(color_image, moving_average, 1.0, 0.0)
else:
cv.RunningAvg(color_image, moving_average, 0.020, None)
# Convert the scale of the moving average.
cv.ConvertScale(moving_average, temp, 1.0, 0.0)
# Minus the current frame from the moving average.
cv.AbsDiff(color_image, temp, difference)
# Convert the image to grayscale.
cv.CvtColor(difference, grey_image, cv.CV_RGB2GRAY)
# Convert the image to black and white.
cv.Threshold(grey_image, grey_image, 70, 255, cv.CV_THRESH_BINARY)
# Dilate and erode to get object blobs
cv.Dilate(grey_image, grey_image, None, 18)
cv.Erode(grey_image, grey_image, None, 10)
# Calculate movements
storage = cv.CreateMemStorage(0)
contour = cv.FindContours(grey_image, storage, cv.CV_RETR_CCOMP, cv.CV_CHAIN_APPROX_SIMPLE)
points = []
while contour:
# Draw rectangles
bound_rect = cv.BoundingRect(list(contour))
contour = contour.h_next()
pt1 = (bound_rect[0], bound_rect[1])
pt2 = (bound_rect[0] + bound_rect[2], bound_rect[1] + bound_rect[3])
points.append(pt1)
points.append(pt2)
cv.Rectangle(color_image, pt1, pt2, cv.CV_RGB(255,0,0), 1)
num_points = len(points)
if num_points:
x = 0
for point in points:
x += point[0]
x /= num_points
movement.append(x)
if len(movement) > 0 and numpy.average(numpy.diff(movement[-30:-1])) > 0:
print 'Left'
else:
print 'Right'
# Display frame to user
cv.ShowImage("Target", color_image)
# Listen for ESC or ENTER key
c = cv.WaitKey(7) % 0x100
if c == 27 or c == 10:
break
if __name__=="__main__":
t = Target()
t.run()
It detects the average motion of the average center of all of the boxes, which is extremely inefficient. How would I go about detecting such motions quickly and accurately (i.e. within a threshold)?
I'm using Python, and I plan to stick with it, as my whole framework is based on Python.
And help is appreciated, so thank you all in advance. Cheers.
I haven't used OpenCV in Python before, just a bit in C++ with openframeworks.
For this I presume OpticalFlow's velx,vely properties would work.
For more on how Optical Flow works check out this paper.
HTH
why don't you use cv.GoodFeaturesToTrack ? it may solve the script runtime ... and shorten the code ...