I am new to yolov3 and trying to do object detection using yolov3 but getting the error as shown in fig if possible please tell me what I am doing wrong.
This is the screenshot
Traceback (most recent call last):
File "c:\Yolo\YOLO-3-OpenCV\YOLO-3-OpenCV\yolo-3-video.py", line 359, in
print('FPS:', round((f / t), 1))
ZeroDivisionError: division by zero
# Importing needed libraries
import numpy as np
import cv2
import time
video = cv2.VideoCapture(r'videos\traffic-cars.mp4')
# Preparing variable for writer
# that we will use to write processed frames
writer = None
# Preparing variables for spatial dimensions of the frames
h, w = None, None
with open(r'C:\Yolo\YOLO-3-OpenCV\YOLO-3-OpenCV\yolo-coco-data\coco.names') as f:
# Getting labels reading every line
# and putting them into the list
labels = [line.strip() for line in f]
network = cv2.dnn.readNetFromDarknet(r'C:\Yolo\YOLO-3-OpenCV\YOLO-3-OpenCV\yolo-coco-data\yolov3.cfg',
r'C:\Yolo\YOLO-3-OpenCV\YOLO-3-OpenCV\yolo-coco-data\yolov3.weights')
layers_names_all = network.getLayerNames()
# Getting only output layers' names that we need from YOLO v3 algorithm
# with function that returns indexes of layers with unconnected outputs
layers_names_output = \
[layers_names_all[i - 1] for i in network.getUnconnectedOutLayers()]
# Setting minimum probability to eliminate weak predictions
probability_minimum = 0.5
colours = np.random.randint(0, 255, size=(len(labels), 3), dtype='uint8')
f = 0
# Defining variable for counting total time
# At the end we will show time spent for processing all frames
t = 0
# Defining loop for catching frames
while True:
# Capturing frame-by-frame
ret, frame = video.read()
if not ret:
break
if w is None or h is None:
# Slicing from tuple only first two elements
h, w = frame.shape[:2]
blob = cv2.dnn.blobFromImage(frame, 1 / 255.0, (416, 416),
swapRB=True, crop=False)
network.setInput(blob) # setting blob as input to the network
start = time.time()
output_from_network = network.forward(layers_names_output)
end = time.time()
f += 1
t += end - start
print('Frame number {0} took {1:.5f} seconds'.format(f, end - start))
bounding_boxes = []
confidences = []
class_numbers = []
# Going through all output layers after feed forward pass
for result in output_from_network:
# Going through all detections from current output layer
for detected_objects in result:
# Getting 80 classes' probabilities for current detected object
scores = detected_objects[5:]
# Getting index of the class with the maximum value of probability
class_current = np.argmax(scores)
# Getting value of probability for defined class
confidence_current = scores[class_current]
# Eliminating weak predictions with minimum probability
if confidence_current > probability_minimum:
# Scaling bounding box coordinates to the initial frame size
# YOLO data format keeps coordinates for center of bounding box
# and its current width and height
# That is why we can just multiply them elementwise
# to the width and height
# of the original frame and in this way get coordinates for center
# of bounding box, its width and height for original frame
box_current = detected_objects[0:4] * np.array([w, h, w, h])
# Now, from YOLO data format, we can get top left corner coordinates
# that are x_min and y_min
x_center, y_center, box_width, box_height = box_current
x_min = int(x_center - (box_width / 2))
y_min = int(y_center - (box_height / 2))
# Adding results into prepared lists
bounding_boxes.append([x_min, y_min,
int(box_width), int(box_height)])
confidences.append(float(confidence_current))
class_numbers.append(class_current)
results = cv2.dnn.NMSBoxes(bounding_boxes, confidences,
probability_minimum, threshold)
if len(results) > 0:
# Going through indexes of results
for i in results.flatten():
# Getting current bounding box coordinates,
# its width and height
x_min, y_min = bounding_boxes[i][0], bounding_boxes[i][1]
box_width, box_height = bounding_boxes[i][2], bounding_boxes[i][3]
# Preparing colour for current bounding box
# and converting from numpy array to list
colour_box_current = colours[class_numbers[i]].tolist()
# # # Check point
# print(type(colour_box_current)) # <class 'list'>
# print(colour_box_current) # [172 , 10, 127]
# Drawing bounding box on the original current frame
cv2.rectangle(frame, (x_min, y_min),
(x_min + box_width, y_min + box_height),
colour_box_current, 2)
# Preparing text with label and confidence for current bounding box
text_box_current = '{}: {:.4f}'.format(labels[int(class_numbers[i])],
confidences[i])
# Putting text with label and confidence on the original image
cv2.putText(frame, text_box_current, (x_min, y_min - 5),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, colour_box_current, 2)
if writer is None:
# Constructing code of the codec
# to be used in the function VideoWriter
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
# Writing current processed frame into the video file
# Pay attention! If you're using Windows, yours path might looks like:
# r'videos\result-traffic-cars.mp4'
# or:
# 'videos\\result-traffic-cars.mp4'
writer = cv2.VideoWriter('videos/result-traffic-cars.mp4', fourcc, 30,
(frame.shape[1], frame.shape[0]), True)
# Write processed current frame to the file
writer.write(frame)
"""
End of:
Writing processed frame into the file
"""
"""
End of:
Reading frames in the loop
"""
# Printing final results
print()
print('Total number of frames', f)
print('Total amount of time {:.5f} seconds'.format(t))
print('FPS:', round((f / t), 1))
# Releasing video reader and writer
video.release()
writer.release()
Related
I am trying to get this to work: https://circuitdigest.com/tutorial/real-life-object-detection-using-opencv-python-detecting-objects-in-live-video (scroll to the 'Object detection using ORB' part)
`
import cv2
import numpy as np
def ORB_detector(new_image, image_template):
# Function that compares input image to template
# It then returns the number of ORB matches between them
image1 = cv2.cvtColor(new_image, cv2.COLOR_BGR2GRAY)
# Create ORB detector with 1000 keypoints with a scaling pyramid factor of 1.2
orb = cv2.ORB_create(1000, 1.2)
# Detect keypoints of original image
(kp1, des1) = orb.detectAndCompute(image1, None)
# Detect keypoints of rotated image
(kp2, des2) = orb.detectAndCompute(image_template, None)
# Create matcher
# Note we're no longer using Flannbased matching
bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
# Do matching
matches = bf.match(des1,des2)
# Sort the matches based on distance. Least distance
# is better
matches = sorted(matches, key=lambda val: val.distance)
return len(matches)
cap = cv2.VideoCapture(0)
# Load our image template, this is our reference image
image_template = cv2.imread("phone.jpg", 0)
# image_template = cv2.imread('images/kitkat.jpg', 0)
while True:
# Get webcam images
ret, frame = cap.read()
# Get height and width of webcam frame
height, width = frame.shape[:2]
# Define ROI Box Dimensions (Note some of these things should be outside the loop)
top_left_x = int(width / 3)
top_left_y = int((height / 2) + (height / 4))
bottom_right_x = int((width / 3) * 2)
bottom_right_y = int((height / 2) - (height / 4))
# Draw rectangular window for our region of interest
cv2.rectangle(frame, (top_left_x,top_left_y), (bottom_right_x,bottom_right_y), 255, 3)
# Crop window of observation we defined above
cropped = frame[bottom_right_y:top_left_y , top_left_x:bottom_right_x]
# Flip frame orientation horizontally
frame = cv2.flip(frame,1)
# Get number of ORB matches
matches = ORB_detector(cropped, image_template)
# Display status string showing the current no. of matches
output_string = "Matches = " + str(matches)
cv2.putText(frame, output_string, (50,450), cv2.FONT_HERSHEY_COMPLEX, 2, (250,0,150), 2)
# Our threshold to indicate object deteciton
# For new images or lightening conditions you may need to experiment a bit
# Note: The ORB detector to get the top 1000 matches, 350 is essentially a min 35% match
threshold = 250
# If matches exceed our threshold then object has been detected
if matches > threshold:
cv2.rectangle(frame, (top_left_x,top_left_y), (bottom_right_x,bottom_right_y), (0,255,0), 3)
cv2.putText(frame,'Object Found',(50,50), cv2.FONT_HERSHEY_COMPLEX, 2 ,(0,255,0), 2)
cv2.imshow('Object Detector using ORB', frame)
if cv2.waitKey(1) == 13: #13 is the Enter Key
break
cap.release()
cv2.destroyAllWindows()
`
For some reason I am getting an error on the line 41 at 'height, width = frame.shape[:2]' and I cannot get it to work (the error is the title), I have tried fixing the image path and that has not worked. I am running this in Jupyter Lab.
I tried fixing the image path and checking if its .shape works (so image_template.shape), this is what many other stackoverflows told.
Your read is not returning a frame. The following code:
cap = cv2.VideoCapture(0)
ret, frame = cap.read()
should check if you actually managed to read a frame. cap.read() will return False if it failed, so you need to:
if not ret:
print("Couldn't read a frame!")
... do something to handle this failure
Because the read failed, frame is None, so when you call frame.shape, python complains that the 'NoneType' object has no attribute 'shape'
This code generates error:
IndexError: invalid index to scalar variable.
at the line: results.append(RMSPE(np.expm1(y_train[testcv]), [y[1] for y in y_test]))
How to fix it?
import pandas as pd
import numpy as np
from sklearn import ensemble
from sklearn import cross_validation
def ToWeight(y):
w = np.zeros(y.shape, dtype=float)
ind = y != 0
w[ind] = 1./(y[ind]**2)
return w
def RMSPE(y, yhat):
w = ToWeight(y)
rmspe = np.sqrt(np.mean( w * (y - yhat)**2 ))
return rmspe
forest = ensemble.RandomForestRegressor(n_estimators=10, min_samples_split=2, n_jobs=-1)
print ("Cross validations")
cv = cross_validation.KFold(len(train), n_folds=5)
results = []
for traincv, testcv in cv:
y_test = np.expm1(forest.fit(X_train[traincv], y_train[traincv]).predict(X_train[testcv]))
results.append(RMSPE(np.expm1(y_train[testcv]), [y[1] for y in y_test]))
testcv is:
[False False False ..., True True True]
You are trying to index into a scalar (non-iterable) value:
[y[1] for y in y_test]
# ^ this is the problem
When you call [y for y in test] you are iterating over the values already, so you get a single value in y.
Your code is the same as trying to do the following:
y_test = [1, 2, 3]
y = y_test[0] # y = 1
print(y[0]) # this line will fail
I'm not sure what you're trying to get into your results array, but you need to get rid of [y[1] for y in y_test].
If you want to append each y in y_test to results, you'll need to expand your list comprehension out further to something like this:
[results.append(..., y) for y in y_test]
Or just use a for loop:
for y in y_test:
results.append(..., y)
YOLO Object Detection
layer_names = net.getLayerNames() output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
Don't need to indexing i in layer_names[i[0] - 1] . Just remove it and do layer_names[i - 1]
layer_names = net.getLayerNames() output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]
It Work For Me
YOLO Object Detection
python <= 3.7
ln = net.getLayerNames()
ln = [ln[i[0] - 1] for i in net.getUnconnectedOutLayers()]
python >3.7
ln = net.getLayerNames()
ln = [ln[i - 1] for i in net.getUnconnectedOutLayers()]
Basically, 1 is not a valid index of y. If the visitor is coming from his own code he should check if his y contains the index which he tries to access (in this case the index is 1).
In the for, you have an iteration, then for each element of that loop which probably is a scalar, has no index. When each element is an empty array, single variable, or scalar and not a list or array you cannot use indices.
Editing the yolo_video.py file in repo is required for those who are using darknet code.`This file works, replaced with required edits
# import the necessary packages
import numpy as np
import argparse
import imutils
import time
import cv2
import os
# construct the argument parse and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--input", required=True,
help="path to input video")
ap.add_argument("-o", "--output", required=True,
help="path to output video")
ap.add_argument("-y", "--yolo", required=True,
help="base path to YOLO directory")
ap.add_argument("-c", "--confidence", type=float, default=0.5,
help="minimum probability to filter weak detections")
ap.add_argument("-t", "--threshold", type=float, default=0.3,
help="threshold when applyong non-maxima suppression")
args = vars(ap.parse_args())
# load the COCO class labels our YOLO model was trained on
labelsPath = os.path.sep.join([args["yolo"], "biscuits.names"])
LABELS = open(labelsPath).read().strip().split("\n")
# initialize a list of colors to represent each possible class label
np.random.seed(42)
COLORS = np.random.randint(0, 255, size=(len(LABELS), 3),
dtype="uint8")
# derive the paths to the YOLO weights and model configuration
weightsPath = os.path.sep.join([args["yolo"], "yolov4-custom_best.weights"])
configPath = os.path.sep.join([args["yolo"], "yolov4-custom.cfg"])
# load our YOLO object detector trained on COCO dataset (80 classes)
# and determine only the *output* layer names that we need from YOLO
print("[INFO] loading YOLO from disk...")
net = cv2.dnn.readNetFromDarknet(configPath, weightsPath)
ln = net.getLayerNames()
print("ln",net)
ln = [ln[i - 1] for i in net.getUnconnectedOutLayers()]
# initialize the video stream, pointer to output video file, and
# frame dimensions
vs = cv2.VideoCapture(args["input"])
writer = None
(W, H) = (None, None)
# try to determine the total number of frames in the video file
try:
prop = cv2.cv.CV_CAP_PROP_FRAME_COUNT if imutils.is_cv2()\
else cv2.CAP_PROP_FRAME_COUNT
total = int(vs.get(prop))
print("[INFO] {} total frames in video".format(total))
# an error occurred while trying to determine the total
# number of frames in the video file
except:
print("[INFO] could not determine # of frames in video")
print("[INFO] no approx. completion time can be provided")
total = -1
# loop over frames from the video file stream
while True:
# read the next frame from the file
(grabbed, frame) = vs.read()
# if the frame was not grabbed, then we have reached the end
# of the stream
if not grabbed:
break
# if the frame dimensions are empty, grab them
if W is None or H is None:
(H, W) = frame.shape[:2]
# construct a blob from the input frame and then perform a forward
# pass of the YOLO object detector, giving us our bounding boxes
# and associated probabilities
blob = cv2.dnn.blobFromImage(frame, 1 / 255.0, (416, 416),
swapRB=True, crop=False)
net.setInput(blob)
start = time.time()
layerOutputs = net.forward(ln)
end = time.time()
# initialize our lists of detected bounding boxes, confidences,
# and class IDs, respectively
boxes = []
confidences = []
classIDs = []
# loop over each of the layer outputs
for output in layerOutputs:
# loop over each of the detections
for detection in output:
# extract the class ID and confidence (i.e., probability)
# of the current object detection
scores = detection[5:]
classID = np.argmax(scores)
confidence = scores[classID]
# filter out weak predictions by ensuring the detected
# probability is greater than the minimum probability
if confidence > args["confidence"]:
# scale the bounding box coordinates back relative to
# the size of the image, keeping in mind that YOLO
# actually returns the center (x, y)-coordinates of
# the bounding box followed by the boxes' width and
# height
box = detection[0:4] * np.array([W, H, W, H])
(centerX, centerY, width, height) = box.astype("int")
# use the center (x, y)-coordinates to derive the top
# and and left corner of the bounding box
x = int(centerX - (width / 2))
y = int(centerY - (height / 2))
# update our list of bounding box coordinates,
# confidences, and class IDs
boxes.append([x, y, int(width), int(height)])
confidences.append(float(confidence))
classIDs.append(classID)
# apply non-maxima suppression to suppress weak, overlapping
# bounding boxes
idxs = cv2.dnn.NMSBoxes(boxes, confidences, args["confidence"],
args["threshold"])
# ensure at least one detection exists
if len(idxs) > 0:
# loop over the indexes we are keeping
for i in idxs.flatten():
# extract the bounding box coordinates
(x, y) = (boxes[i][0], boxes[i][1])
(w, h) = (boxes[i][2], boxes[i][3])
# draw a bounding box rectangle and label on the frame
color = [int(c) for c in COLORS[classIDs[i]]]
cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2)
text = "{}: {:.4f}".format(LABELS[classIDs[i]],
confidences[i])
cv2.putText(frame, text, (x, y - 5),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
# check if the video writer is None
if writer is None:
# initialize our video writer
fourcc = cv2.VideoWriter_fourcc(*"MJPG")
writer = cv2.VideoWriter(args["output"], fourcc, 30,
(frame.shape[1], frame.shape[0]), True)
# some information on processing single frame
if total > 0:
elap = (end - start)
print("[INFO] single frame took {:.4f} seconds".format(elap))
print("[INFO] estimated total time to finish: {:.4f}".format(
elap * total))
# write the output frame to disk
writer.write(frame)
# release the file pointers
print("[INFO] cleaning up...")
writer.release()
vs.release()`
I have this project that combines two data from two different sensors, a TFLuna LiDAR and a Raspberry Pi Camera Module V2, for an Object Detection self driving vehicle. I've tried Threading both sensors to allow concurrent operation but I have trouble displaying those data together. Sometimes they work but the LiDAR data won't continously update itself. here's the code I'm using for the LiDAR:
def read_tfluna_data():
while True:
counter = ser.in_waiting # count the number of bytes of the serial port
if counter > 8:
bytes_serial = ser.read(9) # read 9 bytes
ser.reset_input_buffer() # reset buffer
if bytes_serial[0] == 0x59 and bytes_serial[1] == 0x59: # check first two bytes
distance = bytes_serial[2] + bytes_serial[3]*256 # distance in next two bytes
return distance
class lidar:
def update(self):
# Keep looping indefinitely until the thread is stopped
while True:
distance = read_tfluna_data()
return distance
def __init__(self):
distance = Thread(target=self.update,args=())
distance.start()
and this is the code for the display:
while True:
# Grab frame from video stream
frame1 = videostream.read()
# Acquire frame and resize to expected shape [1xHxWx3]
frame = frame1.copy()
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frame_resized = cv2.resize(frame_rgb, (width, height))
input_data = np.expand_dims(frame_resized, axis=0)
# Normalize pixel values if using a floating model (i.e. if model is non-quantized)
if floating_model:
input_data = (np.float32(input_data) - input_mean) / input_std
# Perform the actual detection by running the model with the image as input
interpreter.set_tensor(input_details[0]['index'],input_data)
interpreter.invoke()
# Retrieve detection results
boxes = interpreter.get_tensor(output_details[boxes_idx]['index'])[0] # Bounding box coordinates of detected objects
classes = interpreter.get_tensor(output_details[classes_idx]['index'])[0] # Class index of detected objects
scores = interpreter.get_tensor(output_details[scores_idx]['index'])[0] # Confidence of detected objects
# Loop over all detections and draw detection box if confidence is above minimum threshold
for i in range(len(scores)):
if ((scores[i] > min_conf_threshold) and (scores[i] <= 1.0)):
# Get bounding box coordinates and draw box
# Interpreter can return coordinates that are outside of image dimensions, need to force them to be within image using max() and min()
ymin = int(max(1,(boxes[i][0] * imH)))
xmin = int(max(1,(boxes[i][1] * imW)))
ymax = int(min(imH,(boxes[i][2] * imH)))
xmax = int(min(imW,(boxes[i][3] * imW)))
cv2.rectangle(frame, (xmin,ymin), (xmax,ymax), (10, 255, 0), 2)
# Draw label
object_name = labels[int(classes[i])] # Look up object name from "labels" array using class index
label = '%s: %d%%' % (object_name, int(scores[i]*100)) # Example: 'person: 72%'
labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2) # Get font size
label_ymin = max(ymin, labelSize[1] + 10) # Make sure not to draw label too close to top of window
cv2.rectangle(frame, (xmin, label_ymin-labelSize[1]-10), (xmin+labelSize[0], label_ymin+baseLine-10), (255, 255, 255), cv2.FILLED) # Draw white box to put label text in
cv2.putText(frame, label, (xmin, label_ymin-7), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2) # Draw label text
#Draw LiDAR Distance in corner of frame
cv2.putText(frame,'Distance: %s cm' %dist_data,(900, 700),cv2.FONT_HERSHEY_SIMPLEX,1,(255,0,0),2,cv2.LINE_AA)
#Draw Rudder in corner of frame
cv2.putText(frame,'Rudder dir: forward!',(50, 700),cv2.FONT_HERSHEY_SIMPLEX,1,(255,0,0),2,cv2.LINE_AA)
# All the results have been drawn on the frame, so it's time to display it.
cv2.imshow('Object detector', frame)
# Press 'q' to quit
if cv2.waitKey(1) == ord('q'):
break
I've tried putting the .update() in the while True loop but it stalls the program and ended up crashing it. I've tried turning the serial port on'off, edited the boot.config to allow uart and turned off the USB serial.
help meee TT i received error in my coding of social distancing detection system using webcam. i done search the error but there is nothing difference with my code TT i wite my coding using notepad++ and run using command prompt. below is my error :
C:\Users\User\Downloads\Social_Distancing_Detection_Real_Time>python Run.py
[INFO] loading YOLO from disk...
[INFO] setting preferable backend and target to CUDA...
[INFO] accessing video stream...
[ WARN:0] global D:\a\opencv-python\opencv-python\opencv\modules\dnn\src\dnn.cpp (1447) cv::dnn::dnn4_v20211004::Net::Impl::setUpNet DNN module was not built with CUDA backend; switching to CPU
Traceback (most recent call last):
File "C:\Users\User\Downloads\Social_Distancing_Detection_Real_Time\Run.py", line 77, in <module>
results = detect_people(frame, net, ln,
File "C:\Users\User\Downloads\Social_Distancing_Detection_Real_Time\mylib\detection.py", line 58, in detect_people
idxs = cv2.dnn.NMSBoxes(boxes, confidence, MIN_CORP, NMS_THRESH)
TypeError: Can't parse 'scores'. Input argument doesn't provide sequence protocol
[ WARN:1] global D:\a\opencv-python\opencv-python\opencv\modules\videoio\src\cap_msmf.cpp (438) `anonymous-namespace'::SourceReaderCB::~SourceReaderCB terminating async callback
my error
below here is my full code of file detection.py
#import the necessary packages
from .config import NMS_THRESH, MIN_CORP, People_Counter
import numpy as np
import cv2
def detect_people(frame, net, In, personIdx = 0):
#grab the dimensions of the frame and initialize the list of results
(H, W) = frame.shape[:2]
results = []
#construct a blob from the input frame and then perform a forward
#pass of the YOLO object detector, giving us our boarding boxes
#add associated probabilities
blob = cv2.dnn.blobFromImage(frame, 1 / 255.0, (416, 416),
swapRB=True, crop=False)
net.setInput(blob)
layerOutputs = net.forward(In)
#initialize out lists of detected bounding boxes, centroids and
#confidence, respectively
boxes = []
centroids = []
confidences = []
#loop over each of the layer outputs
for output in layerOutputs:
#for detection in output;
for detection in output:
#extract the class ID and confidence[i.e., probability)
#of the current object detection
scores = detection[5:]
classID = np.argmax(scores)
confidence = scores[classID]
#filter detections by (1) ensuring that the object
#detected was a person and (2) that the minimum
#confidence is met
if classID == personIdx and confidence > MIN_CORP:
#scale the bounding box coordinates back relative to
#the size of the image, keeping in mind that YOLO
#actually returns the center (x,y) -coordinates of
#the bounding box followed by the boxes' width and height
box = detection[0:4] * np.array([W, H, W, H])
(centerX, centerY, width, height) = box.astype("int")
#use the center (x,y) -coordinates to derive the top
#and left corner of the bounding box
x = int(centerX - (width / 2))
y = int(centerY - (height / 2))
#update our list of bounding box coordinates,
#centroids and confidences
boxes.append([x, y, int(width), int(height)])
centroids.append((centerX, centerY))
confidences.append(float(confidence))
#apply non-maxim suppression to suppress weak, overlapping bounding boxes
idxs = cv2.dnn.NMSBoxes(boxes, confidence, MIN_CORP, NMS_THRESH)
#print('Total people count:', len(idxs))
#compute the total people counter
#if People_Counter:
#human_count = "Human count: {}".format(len(idxs))
#cv2.putText(frame, human_count, (470, frame.shape[0] - 75), cv2.FONT_HERSHEY_SIMPLEX, 0.70, (0, 0, 0), 2)
#ensure at least one detection exists
if len(idxs) > 0:
#loop over the indexes we are keeping
for i in idxs.flatten():
#extract the bounding box coordinates
(x, y) = (boxes[i][0], boxes[i][1])
(w, h) = (boxes[i][2], boxes[i][3])
#update our results list to consist of the person
#prediction probability, bounding box coordinates,
#and the centroids
r = (confidences[i], (x, y, x + w, y + h), centroids[i])
results.append(r)
#return the list of the results
return results
The answer to your problem (as usually) likes in response from the interpreter:
TypeError: Can't parse 'scores'. Input argument doesn't provide sequence protocol
scores is the second argument to cv2.dnn.NMSBoxes which in your case is confidence. confidence is a single number, you can't iterate over it. You've made a typo and probably you wanted to pass confidences which is a list.
Change your code to:
idxs = cv2.dnn.NMSBoxes(boxes, confidences, MIN_CORP, NMS_THRESH)
Here is a code to get the optical flow output from a stabilized video (no camera movement) and save it as a set of frames
import cv2 as cv
import numpy as np
# The video feed is read in as a VideoCapture object
cap = cv.VideoCapture("2_stable_video.avi")
# ret = a boolean return value from getting the frame, first_frame = the first frame in the entire video sequence
ret, first_frame = cap.read()
# Converts frame to grayscale because we only need the luminance channel for detecting edges - less computationally expensive
prev_gray = cv.cvtColor(first_frame, cv.COLOR_BGR2GRAY)
# Creates an image filled with zero intensities with the same dimensions as the frame
mask = np.zeros_like(first_frame)
# Sets image saturation to maximum
mask[..., 1] = 255
count = 0
while(cap.isOpened()):
# ret = a boolean return value from getting the frame, frame = the current frame being projected in the video
ret, frame = cap.read()
# Opens a new window and displays the input frame
cv.imshow("input", frame)
# Converts each frame to grayscale - we previously only converted the first frame to grayscale
gray = cv.cvtColor(frame, cv.COLOR_BGR2GRAY)
# Calculates dense optical flow by Farneback method
flow = cv.calcOpticalFlowFarneback(prev_gray, gray, None, 0.5, 3, 15, 3, 5, 1.2, 0)
# Computes the magnitude and angle of the 2D vectors
magnitude, angle = cv.cartToPolar(flow[..., 0], flow[..., 1])
# Sets image hue according to the optical flow direction
mask[..., 0] = angle * 180 / np.pi / 2
# Sets image value according to the optical flow magnitude (normalized)
mask[..., 2] = cv.normalize(magnitude, None, 0, 255, cv.NORM_MINMAX)
# Converts HSV to RGB (BGR) color representation
rgb = cv.cvtColor(mask, cv.COLOR_HSV2BGR)
# Opens a new window and displays the output frame
cv.imshow("dense optical flow", rgb[40:150,120:220])
cv.imwrite("frames_modified_2/%d.png" % count, rgb[40:150,120:220])
count +=1
# Updates previous frame
prev_gray = gray
# Frames are read by intervals of 1 millisecond. The programs breaks out of the while loop when the user presses the 'q' key
if cv.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv.destroyAllWindows()
Can someone please suggest how to quantify the difference between the frames? i.e. to estimate speed/velocity ?
Here's an example to obtain pixel magnitude translation from .bsq frames. You can modify the the code to input a video file instead. You are probably most interested in the get_translation() function. Example:
Graph displaying pixel translation from frame-to-frame
Code
import numpy as np
import argparse
import os
import cv2
from matplotlib import pyplot as plt
from matplotlib import cm
import time
import random
# Usage: python translate_analyzer.py -p <filename.bsq>
# Automatic brightness and contrast optimization with optional histogram clipping
def automatic_brightness_and_contrast(image, clip_hist_percent=25):
if len(image.shape) == 3:
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
else:
gray = image
# Calculate grayscale histogram
hist = cv2.calcHist([gray],[0],None,[256],[0,256])
hist_size = len(hist)
# Calculate cumulative distribution from the histogram
accumulator = []
accumulator.append(float(hist[0]))
for index in range(1, hist_size):
accumulator.append(accumulator[index -1] + float(hist[index]))
# Locate points to clip
maximum = accumulator[-1]
clip_hist_percent *= (maximum/100.0)
clip_hist_percent /= 2.0
# Locate left cut
minimum_gray = 0
while accumulator[minimum_gray] < clip_hist_percent:
minimum_gray += 1
# Locate right cut
maximum_gray = hist_size -1
while accumulator[maximum_gray] >= (maximum - clip_hist_percent):
maximum_gray -= 1
# Calculate alpha and beta values
alpha = 255 / (maximum_gray - minimum_gray)
beta = -minimum_gray * alpha
auto_result = cv2.convertScaleAbs(image, alpha=alpha, beta=beta)
return (auto_result, alpha, beta)
# Draw flow
def draw_flow(img, flow, step=30):
h, w = img.shape[:2]
y, x = np.mgrid[step/2:h:step, step/2:w:step].reshape(2,-1).astype(int)
fx, fy = flow[y,x].T
lines = np.vstack([x, y, x+fx, y+fy]).T.reshape(-1, 2, 2)
lines = np.int32(lines + 0.5)
vis = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
cv2.polylines(vis, lines, 1, (36, 255, 12))
for (x1, y1), (_x2, _y2) in lines:
cv2.circle(vis, (x1, y1), 2, (36, 255, 12), -1)
return vis
# Return translation value
def get_translation(img, flow, step=30):
return (np.median(flow[:,:,0].T), flow[:, :, 0].T)
# Get file path
ap = argparse.ArgumentParser()
ap.add_argument("-p", "--path", help="Path to the directory")
args = vars(ap.parse_args())
if not args['path']:
print('Usage: python translate_analyzer.py -p <directory>')
exit(1)
# Extract file name
bsq_fname = os.path.split(args['path'])[-1]
if '.bsq' not in bsq_fname:
print('ERROR: Invalid bsq file. Select correct file.')
exit(1)
width = 640
height = 512
frame_count = int(os.path.getsize(bsq_fname)/(2*height*width))
x,y,w,h = 0,0,100,512
# Simulates calibrated frames to display on video frame
data_file = np.fromfile(bsq_fname, dtype=np.uint16, count=-1)
data_file = data_file.reshape((width, height, frame_count), order='F')
data_file = np.rot90(data_file)
print(bsq_fname)
fname = bsq_fname.split()[0]
prev = data_file[:,:,0].copy()
prev //= 64
prev = automatic_brightness_and_contrast(prev)[0]
prev = prev[y:y+h, x:x+w]
translation_data = []
frame_direction = []
start = time.time()
for index in range(1, frame_count):
data = data_file[:,:,index].copy()
data //= 64
data = automatic_brightness_and_contrast(data)[0]
data = data[y:y+h, x:x+w]
flow = cv2.calcOpticalFlowFarneback(prev=prev, next=data, flow=None, pyr_scale=0.5, levels=2, winsize=80, iterations=2, poly_n=7, poly_sigma=4.5, flags=0)
translation, pixel_direction = get_translation(data, flow)
prev = data
cv2.imshow('flow', draw_flow(data, flow))
cv2.waitKey(1)
translation_data.append(translation)
frame_direction = pixel_direction
index = (index+1) % frame_count
end = time.time()
print('Time:', end - start)
plt.figure()
plt.title(bsq_fname)
plt.xlabel("Frames")
plt.ylabel("Magnitude")
plt.plot(translation_data)
plt.figure()
plt.title("Pixel Direction")
plt.xlabel("Width")
plt.ylabel("Height")
plt.imshow(frame_direction.T)
plt.colorbar(orientation='vertical')
plt.show()