Error while loading Yolov5 custom model using OpenCV Python - python

I was trying to predict defects on a metal plate using yolov5 pre-trained weights.it was throwing this error:
**
File "C:\Users\acer.spyder-py3\metallic surface defect detection\untitled3.py", line 59, in post_process
if confidence >= CONFIDENCE_THRESHOLD:
ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()
**
import cv2
import numpy as np
# Constants.
INPUT_WIDTH = 640
INPUT_HEIGHT = 640
SCORE_THRESHOLD = 0.5
NMS_THRESHOLD = 0.45
CONFIDENCE_THRESHOLD = 0.45
# Text parameters.
FONT_FACE = cv2.FONT_HERSHEY_SIMPLEX
FONT_SCALE = 0.7
THICKNESS = 1
# Colors.
BLACK = (0,0,0)
BLUE = (255,178,50)
YELLOW = (0,255,255)
def draw_label(im, label, x, y):
"""Draw text onto image at location."""
# Get text size.
text_size = cv2.getTextSize(label, FONT_FACE, FONT_SCALE, THICKNESS)
dim, baseline = text_size[0], text_size[1]
# Use text size to create a BLACK rectangle.
cv2.rectangle(im, (x,y), (x + dim[0], y + dim[1] + baseline), (0,0,0), cv2.FILLED);
# Display text inside the rectangle.
cv2.putText(im, label, (x, y + dim[1]), FONT_FACE, FONT_SCALE, YELLOW, THICKNESS, cv2.LINE_AA)
def pre_process(input_image, net):
# Create a 4D blob from a frame.
blob = cv2.dnn.blobFromImage(input_image, 1/255, (INPUT_WIDTH, INPUT_HEIGHT), [0,0,0], 1, crop=False)
# Sets the input to the network.
net.setInput(blob)
# Run the forward pass to get output of the output layers.
outputs = net.forward(net.getUnconnectedOutLayersNames())
return outputs
def post_process(input_image, outputs):
# Lists to hold respective values while unwrapping.
class_ids = []
confidences = []
boxes = []
# Rows.
rows = outputs[0].shape[1]
image_height, image_width = input_image.shape[:2]
# Resizing factor.
x_factor = image_width / INPUT_WIDTH
y_factor = image_height / INPUT_HEIGHT
# Iterate through detections.
for r in range(rows):
row = outputs[0][0][r]
confidence = row[4]
# Discard bad detections and continue.
if confidence >= CONFIDENCE_THRESHOLD:
classes_scores = row[5:]
# Get the index of max class score.
class_id = np.argmax(classes_scores)
# Continue if the class score is above threshold.
if (classes_scores[class_id] > SCORE_THRESHOLD):
confidences.append(confidence)
class_ids.append(class_id)
cx, cy, w, h = row[0], row[1], row[2], row[3]
left = int((cx - w/2) * x_factor)
top = int((cy - h/2) * y_factor)
width = int(w * x_factor)
height = int(h * y_factor)
box = np.array([left, top, width, height])
boxes.append(box)
# Perform non maximum suppression to eliminate redundant, overlapping boxes with lower confidences.
indices = cv2.dnn.NMSBoxes(boxes, confidences, CONFIDENCE_THRESHOLD, NMS_THRESHOLD)
for i in indices:
box = boxes[i]
left = box[0]
top = box[1]
width = box[2]
height = box[3]
# Draw bounding box.
cv2.rectangle(input_image, (left, top), (left + width, top + height), BLUE, 3*THICKNESS)
# Class label.
label = "{}:{:.2f}".format(classes[class_ids[i]], confidences[i])
# Draw label.
draw_label(input_image, label, left, top)
return input_image
if __name__ == '__main__':
# Load class names.
classesFile = "defects.names"
classes = None
with open(classesFile, 'rt') as f:
classes = f.read().rstrip('\n').split('\n')
# Load image.
frame = cv2.imread('img_02_3436787300_00007_jpg.rf.e9923d3a70d1aeb92e45896b9c12cfa3.jpg')
# Give the weight files to the model and load the network using them.
modelWeights = "models_train/best.onnx"
net = cv2.dnn.readNet(modelWeights)
# Process image.
detections = pre_process(frame, net)
img = post_process(frame.copy(), detections)
"""
Put efficiency information. The function getPerfProfile returns the overall time for inference(t)
and the timings for each of the layers(in layersTimes).
"""
t, _ = net.getPerfProfile()
label = 'Inference time: %.2f ms' % (t * 1000.0 / cv2.getTickFrequency())
print(label)
cv2.putText(img, label, (20, 40), FONT_FACE, FONT_SCALE, (0, 0, 255), THICKNESS, cv2.LINE_AA)
cv2.imshow('Output', img)
cv2.waitKey(0)
I have little bit idea of deploying models into commercial use. If you find any other errors also please inform me . thanks in advance

A simple search led me to this SO post, highlighting a common issue recently.
Following this blog got me close but I faced the issue above.
net.getUnconnectedOutLayers() returns an array of index values. The output layers are obtained from net.getLayerNames() based on these index values.
In the following case net.getUnconnectedOutLayers() returns:
array([200, 227, 254])
We get the output layers from output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers() which returns:
['yolo_82', 'yolo_94', 'yolo_106']
Code:
The following is the complete working code for OpenCV version 4.5.5 (CPU):
image = cv2.imread(os.path.join(path, 'horse.jpg'))
Width = image.shape[1]
Height = image.shape[0]
scale = 0.00392
classes = None
with open(os.path.join(path, 'coco.names'), 'r') as f:
classes = [line.strip() for line in f.readlines()]
COLORS = np.random.uniform(0, 255, size=(len(classes), 3))
net = cv2.dnn.readNet(os.path.join(path, 'yolov3.weights'), os.path.join(path, 'yolov3.cfg'))
blob = cv2.dnn.blobFromImage(image, scale, (416,416), (0,0,0), True, crop=False)
net.setInput(blob)
def get_output_layers(net):
layer_names = net.getLayerNames()
output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]
return output_layers
def draw_bounding_box(img, class_id, confidence, x, y, x_plus_w, y_plus_h):
label = str(classes[class_id])
color = COLORS[class_id]
img = cv2.rectangle(img, (x,y), (x_plus_w,y_plus_h), color, 2)
img = cv2.putText(img, label, (x-10,y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
# run inference through the network
# and gather predictions from output layers
outs = net.forward(get_output_layers(net))
# initialization
class_ids = []
confidences = []
boxes = []
conf_threshold = 0.5
nms_threshold = 0.4
image2 = image.copy()
for out in outs:
for detection in out:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > 0.5:
center_x = int(detection[0] * Width)
center_y = int(detection[1] * Height)
w = int(detection[2] * Width)
h = int(detection[3] * Height)
x = center_x - w / 2
y = center_y - h / 2
class_ids.append(class_id)
confidences.append(float(confidence))
boxes.append([x, y, w, h])
# apply non-max suppression
indices = cv2.dnn.NMSBoxes(boxes, confidences, conf_threshold, nms_threshold)
for i in indices:
i = i # i[0]
box = boxes[i]
x = box[0]
y = box[1]
w = box[2]
h = box[3]
draw_bounding_box(image2, class_ids[i], confidences[i], round(x), round(y), round(x+w), round(y+h))
Result:
Sample output:

The problem may be related to incompatible version of your installed modules.
Download .pt model.
wget https://github.com/ultralytics/YOLOv5/releases/download/v6.1/YOLOv5s.pt
And export to ONNX using your machine:
git clone https://github.com/ultralytics/YOLOv5
cd YOLOv5
pip install -r requirements.txt
pip install onnx
python3 export.py --weights models/YOLOv5s.pt --include onnx
Use the new converted .onnx file can solve the problem.

Related

How do i make detection only on spesific area

I'm working on a project trying to do object detection and text detection using both yolo and easyocr. Since I'm a beginner and really new to computer vision, I would be glad if someone can help me.
Here's the code:
import cv2
import numpy as np
import easyocr
# Load Yolo
net = cv2.dnn.readNet('yolov4-tiny-custom_3000.weights', 'yolov4-tiny-custom.cfg')
classes = []
with open("obj.names", "r") as f:
classes = [line.strip() for line in f.readlines()]
layer_names = net.getLayerNames()
output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]
colors = np.random.uniform(0, 255, size=(len(classes), 3))
cap = cv2.VideoCapture('car1.mp4')
# Declare Ocr
cascade_src = 'haarcascade_russian_plate_number.xml'
cascade = cv2.CascadeClassifier(cascade_src)
reader = easyocr.Reader(['en'], gpu = False)
# Declare Ocr
while True:
_, frame = cap.read()
height, width, channels = frame.shape
#frame = cv2.resize(frame, (800, 600))
# Yolo Detection
# Detecting objects
blob = cv2.dnn.blobFromImage(frame, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
net.setInput(blob)
outs = net.forward(output_layers)
# Showing informations on the screen
class_ids = []
confidences = []
boxes = []
for out in outs:
for detection in out:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > 0.5:
# Object detected
center_x = int(detection[0] * width)
center_y = int(detection[1] * height)
w = int(detection[2] * width)
h = int(detection[3] * height)
# Rectangle coordinates
x = int(center_x - w / 2)
y = int(center_y - h / 2)
boxes.append([x, y, w, h])
confidences.append(float(confidence))
class_ids.append(class_id)
indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
for i in range(len(boxes)):
if i in indexes:
x, y, w, h = boxes[i]
label = str(classes[class_ids[i]])
color = colors[class_ids[i]]
cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2)
cv2.putText(frame, label, (x, y + 30), cv2.FONT_HERSHEY_PLAIN, 3, color, 3)
print("Jenis Mobil: " +label)
# Text Reader Using Ocr
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
plate = cascade.detectMultiScale(gray, 1.1, 5)
for x,y,w,h in plate:
wT,hT,cT = frame.shape
a,b = (int(0.02*wT),int(0.02*hT))
plate2 = frame[y+a:y+h-a,x+b:x+w-b,:]
cv2.rectangle(frame,(x,y),(x+w,y+h),(60,60,255),2)
cv2.rectangle(frame,(x-1,y-40),(x+w+1,y),(60,60,255),-1)
result = reader.readtext(plate2)
for detek in result:
top_left = (int(detek[0][0][0]), int(detek[0][0][1]))
bottom_right = (int(detek[0][2][0]), int(detek[0][2][1]))
text = detek[1]
cv2.putText(frame,text,(x,y-10),cv2.FONT_HERSHEY_SIMPLEX,0.9,(255,255,255),2)
print("Nomor Kendaran: " + text)
# Text Reader Using Ocr
cv2.imshow("Detection", frame)
key = cv2.waitKey(1)
if key == 27:
break
cap.release()
cv2.destroyAllWindows()
I am trying to use ROI to detect the object but I am not able to do it.
Any advice please?
Crop the image before it is fed to the model
while True:
_, frame = cap.read()
im_crop = im[y1:y2, x1:x2] # set x1,x2,y1,y2 based on your ROI
blob = cv2.dnn.blobFromImage(frame, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
this will speed up the inference time as well as there is less data to process by the model

How to convert cv2 pictures into a video frame

I have the following code which i am trying to use to detect objects. I have made some changes and now i am trying to make it work so that it will use a camera to detect objects. What it does now is takes a picture in a loop and doesnt use a video.
Here is my code.
import numpy as np
import cv2
net = cv2.dnn.readNet("yolov3.weights", "yolov3.cfg")
classes = []
with open("coco.names", "r") as f:
classes = [line.strip() for line in f.readlines()]
layers_names = net.getLayerNames()
outputlayers = [layers_names[i - 1] for i in net.getUnconnectedOutLayers()]
colors = np.random.uniform(0, 255, size=(len(classes), 3))
class_ids = []
confidences = []
boxes = []
cap = cv2.VideoCapture(0)
while True:
_, image = cap.read()
height, width, channels = image.shape
blob = cv2.dnn.blobFromImage(image, 0.00392, (416, 416), (0, 0, 0), True,
crop=False)
net.setInput(blob)
outs = net.forward(outputlayers)
for out in outs:
for detection in out:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence >= 0.5:
center_x = int(detection[0] * width)
center_y = int(detection[1] * height)
w = int(detection[3] * width)
h = int(detection[3] * height)
x = int(center_x - width / 2)
y = int(center_y - height / 2)
boxes.append([x, y, width, height])
confidences.append(float(confidence))
class_ids.append(class_id)
indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
objects_detecetd = len(boxes)
font = cv2.FONT_HERSHEY_PLAIN
for i in range(len(boxes)):
if i in indexes:
x, y, width, height = boxes[i]
label = str(classes[class_ids[i]])
color = colors[i]
print(label)
cv2.rectangle(image, (x, y), (x + width, y + height), color, 2)
cv2.putText(image, label, (x, y + 30), font, 4, color, 3)
cv2.imshow("Image", image)
cv2.waitKey(5)
cv2.destroyAllWindows()
cv2.destroyAllWindows()
cap.release

yolov7 Object Detector

I used yolov5 for my object detection. Now I am trying to improve it to YoloV7. I trained my dataset and convert it from .pt to .onnx. But I cannot implement it to my code. I shared the code in the below. I got that error:
v2.error: OpenCV(4.5.5) /Users/runner/work/opencv-python/opencv-python/opencv/modules/dnn/src/onnx/onnx_importer.cpp:928: error: (-2:Unspecified error) in function 'handleNode'
> Node [NonMaxSuppression#ai.onnx]:(onnx::Gather_626) parse error: OpenCV(4.5.5) /Users/runner/work/opencv-python/opencv-python/opencv/modules/dnn/src/dnn.cpp:621: error: (-2:Unspecified error) Can't create layer "onnx::Gather_626" of type "NonMaxSuppression" in function 'getLayerInstance'
import cv2
import numpy as np
from PIL import Image
import webcolors
import time
import requests
start = time.time()
path = "/Users/admin/Desktop/ML/"
productsArray = []
products = []
classNames = []
allProductsArray = []
def format_yolov5(frame):
row, col, _ = frame.shape
_max = max(col, row)
result = np.zeros((_max, _max, 3), np.uint8)
result[0:row, 0:col] = frame
return result
# Loading image
image = cv2.imread(path+"Images/2.jpg")
img = format_yolov5(image) # making the image square
#######DETECTION###########
def Detect():
net = cv2.dnn.readNet(path+"Config/data.onnx")
# Detecting objects
blob = cv2.dnn.blobFromImage(img , 1/255.0, (640, 640), swapRB=True)
net.setInput(blob)
predictions = net.forward()
class_list = []
with open(path+"Config/obj.names", "r") as f:
class_list = [cname.strip() for cname in f.readlines()]
# Showing informations on the screen
class_ids = []
confidences = []
boxes = []
output_data = predictions[0]
image_width, image_height, _ = img.shape
x_factor = image_width / 640
y_factor = image_height / 640
for r in range(25200):
row = output_data[r]
confidence = row[4]
if confidence >= 0.55:
classes_scores = row[5:]
_, _, _, max_indx = cv2.minMaxLoc(classes_scores)
class_id = max_indx[1]
if (classes_scores[class_id] > .25):
confidences.append(confidence)
class_ids.append(class_id)
x, y, w, h = row[0].item(), row[1].item(), row[2].item(), row[3].item()
left = int((x - 0.5 * w) * x_factor)
top = int((y - 0.5 * h) * y_factor)
width = int(w * x_factor)
height = int(h * y_factor)
box = np.array([left, top, width, height])
boxes.append(box)
indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.3, 0.4)
result_class_ids = []
result_confidences = []
result_boxes = []
for i in indexes:
result_confidences.append(confidences[i])
result_class_ids.append(class_ids[i])
result_boxes.append(boxes[i])
for i in range(len(result_class_ids)):
box = result_boxes[i]
class_id = result_class_ids[i]
label =(class_list[class_id])
allProductsArray.append(label)
cv2.rectangle(img, box, (0, 255, 255), 2)
cv2.rectangle(img, (box[0], box[1] - 20), (box[0] + box[2], box[1]), (0, 255, 255), -1)
cv2.putText(img, class_list[class_id], (box[0], box[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, .5, (0,0,0))
cv2.putText(img, str(result_confidences[i]), (box[0]+60, box[1]), cv2.FONT_HERSHEY_SIMPLEX, .6, (0,0,0))
Detect()
print(allProductsArray)
Not sure if I understand well what you want to do (Run Yolov7 inference or Yolov5) but I hope this repo could help:
https://github.com/ibaiGorordo/ONNX-YOLOv7-Object-Detection

Python, pillow grab not load the images frames correctly

I'm trying to use pillow.grab to get frames of my screen. For later detect objects for every frame.
import numpy as np
import cv2
from PIL import ImageGrab as ig
import time
# Load Yolo
net = cv2.dnn.readNet("yolov3_training_last.weights", "yolov3_testing.cfg")
# Name custom object
classes = ["amongus"]
layer_names = net.getLayerNames()
output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
colors = np.random.uniform(0, 255, size=(len(classes), 3))
last_time = time.time()
while(True):
img_ = ig.grab(bbox=None)
open_cv_image = np.array(img_)
open_cv_image = open_cv_image[:, :, ::-1].copy()
img = cv2.resize(open_cv_image, None, fx=0.4, fy=0.4)
height, width, channels = img.shape
# Detecting objects
blob = cv2.dnn.blobFromImage(img, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
net.setInput(blob)
outs = net.forward(output_layers)
# Showing informations on the screen
class_ids = []
confidences = []
boxes = []
for out in outs:
for detection in out:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > 0.3:
# Object detected
print(class_id)
center_x = int(detection[0] * width)
center_y = int(detection[1] * height)
w = int(detection[2] * width)
h = int(detection[3] * height)
# Rectangle coordinates
x = int(center_x - w / 2)
y = int(center_y - h / 2)
boxes.append([x, y, w, h])
confidences.append(float(confidence))
class_ids.append(class_id)
indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
print(indexes)
font = cv2.FONT_HERSHEY_PLAIN
for i in range(len(boxes)):
if i in indexes:
x, y, w, h = boxes[i]
label = str(classes[class_ids[i]])
color = colors[class_ids[i]]
cv2.rectangle(img, (x, y), (x + w, y + h), color, 2)
#cv2.putText(img, label, (x, y + 30), font, 3, color, 2)
cv2.imshow("Image", np.array(img_))
#key = cv2.waitKey(0)
cv2.destroyAllWindows()
If I uncomment the #key = cv2.waitKey(0) works because I'm taking just one frame for every time I press a key, but once I comment that part, I got this screen.
I'm assuming (not sure at all if it's because of that) that this is happening because a lot of frames are coming, but if I sleep(n) the while fps will be so low. (I guess?)
What's happening / how to fix it?

How to combine two yolov3 models for object detection?

We have two yolov3-tiny models for license plate detection.The first yolov3-tiny model detecting licence plate on cars and cropping it for the second yolov3-tiny model, then should send these output cropped images to the second yolov3-tiny model as input images. The second yolov3-tiny model should have receive these cropped images as input for detecting country region.
We have a problem with combining two models. Any advice and help is appreciated.
The code below:
# This code is written at BigVision LLC. It is based on the OpenCV project. It is subject to the license terms in the LICENSE file found in this distribution and at http://opencv.org/license.html
# Usage example: python3 object_detection_yolo.py --video=run.mp4
# python3 object_detection_yolo.py --image=bird.jpg
import cv2 as cv
import argparse
import sys
import numpy as np
import os.path
# Initialize the parameters
confThreshold = 0.5 #Confidence threshold
nmsThreshold = 0.4 #Non-maximum suppression threshold
inpWidth = 608 #Width of network's input image
inpHeight = 608 #Height of network's input image
parser = argparse.ArgumentParser(description='Object Detection using YOLO in OPENCV')
parser.add_argument('--image', help='Path to image file.')
parser.add_argument('--video', help='Path to video file.')
args = parser.parse_args()
# Load names of classes
classesFile = "obj.names"
classes = None
with open(classesFile, 'rt') as f:
classes = f.read().rstrip('\n').split('\n')
# Load names of classes
classesFile1 = "obj1.names"
classes1 = None
with open(classesFile1, 'rt') as f:
classes1 = f.read().rstrip('\n').split('\n')
# Give the configuration and weight files for the model and load the network using them.
modelConfiguration = "yolov3-tiny_obj.cfg"
modelWeights = "yolov3-tiny_obj_final.weights"
# Give the configuration and weight files for the model and load the network using them.
modelConfiguration1 = "yolov3-tiny_obj1.cfg"
modelWeights1 = "yolov3-tiny_obj_final1.weights"
net = cv.dnn.readNetFromDarknet(modelConfiguration, modelWeights)
net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV)
net.setPreferableTarget(cv.dnn.DNN_TARGET_CPU)
net1 = cv.dnn.readNetFromDarknet(modelConfiguration1, modelWeights1)
net1.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV)
net1.setPreferableTarget(cv.dnn.DNN_TARGET_CPU)
# Get the names of the output layers
def getOutputsNames(net):
# Get the names of all the layers in the network
layersNames = net.getLayerNames()
# Get the names of the output layers, i.e. the layers with unconnected outputs
return [layersNames[i[0] - 1] for i in net.getUnconnectedOutLayers()]
#====================================================================================
# Get the names of the output layers
def getOutputsNames1(net1):
# Get the names of all the layers in the network
layersNames = net1.getLayerNames()
# Get the names of the output layers, i.e. the layers with unconnected outputs
return [layersNames[i[0] - 1] for i in net1.getUnconnectedOutLayers()]
#====================================================================================
# Draw the predicted bounding box
def drawPred(classId, conf, left, top, right, bottom):
# Draw a bounding box.
cv.rectangle(frame, (left, top), (right, bottom), (255, 178, 50), 3)
label = '%.2f' % conf
# Get the label for the class name and its confidence
if classes:
assert(classId < len(classes))
label = '%s:%s' % (classes[classId], label)
#Display the label at the top of the bounding box
labelSize, baseLine = cv.getTextSize(label, cv.FONT_HERSHEY_SIMPLEX, 0.5, 1)
top = max(top, labelSize[1])
cv.rectangle(frame, (left, top - round(1.5*labelSize[1])), (left + round(1.5*labelSize[0]), top + baseLine), (255, 255, 255), cv.FILLED)
cv.putText(frame, label, (left, top), cv.FONT_HERSHEY_SIMPLEX, 0.75, (0,0,0), 1)
#=====================================================================================================================================
def drawPred1(classId, conf, left, top, right, bottom):
# Draw a bounding box.
cv.rectangle(crop, (left, top), (right, bottom), (255, 178, 50), 3)
label = '%.2f' % conf
# Get the label for the class name and its confidence
if classes1:
assert(classId < len(classes1))
label = '%s:%s' % (classes1[classId], label)
#Display the label at the top of the bounding box
labelSize, baseLine = cv.getTextSize(label, cv.FONT_HERSHEY_SIMPLEX, 0.5, 1)
top = max(top, labelSize[1])
cv.rectangle(crop, (left, top - round(1.5*labelSize[1])), (left + round(1.5*labelSize[0]), top + baseLine), (255, 255, 255), cv.FILLED)
cv.putText(crop, label, (left, top), cv.FONT_HERSHEY_SIMPLEX, 0.75, (0,0,0), 1)
# Remove the bounding boxes with low confidence using non-maxima suppression
crop = ''
def postprocess(frame, outs):
frameHeight = frame.shape[0]
frameWidth = frame.shape[1]
# Scan through all the bounding boxes output from the network and keep only the
# ones with high confidence scores. Assign the box's class label as the class with the highest score.
classIds = []
confidences = []
boxes = []
global crop
for out in outs:
for detection in out:
scores = detection[5:]
classId = np.argmax(scores)
confidence = scores[classId]
if confidence > confThreshold:
center_x = int(detection[0] * frameWidth)
center_y = int(detection[1] * frameHeight)
width = int(detection[2] * frameWidth)
height = int(detection[3] * frameHeight)
left = int(center_x - width / 2)
top = int(center_y - height / 2)
classIds.append(classId)
confidences.append(float(confidence))
boxes.append([left, top, width, height])
print('this is: ', classIds, confidence, boxes)
# Perform non maximum suppression to eliminate redundant overlapping boxes with
# lower confidences.
indices = cv.dnn.NMSBoxes(boxes, confidences, confThreshold, nmsThreshold)
for i in indices:
i = i[0]
box = boxes[i]
left = box[0]
top = box[1]
width = box[2]
height = box[3]
drawPred(classIds[i], confidences[i], left, top, left + width, top + height)
# print(left,top,width,height)
#координаты y1=top, y2=top+height, x1=left, x2=left+width
#срез frame по указанным координатам и копирование в папку
crop = frame[top:top + height+1, left:left + width+1].copy()
# print("//////////////////////", type(crop))
cv.imwrite("/DISK/KrisServis/CNN/dark/darknet/detect_yolov3_spp/1.jpg", crop)
#=======================================================================================================================================================
def post(crop, outs1):
cropHeight = crop.shape[0]
cropWidth = crop.shape[1]
# Scan through all the bounding boxes output from the network and keep only the
# ones with high confidence scores. Assign the box's class label as the class with the highest score.
classIds_for_crop = []
confidences_crop = []
boxes_crop = []
for out in outs1:
# print('this is out: ', out)
for detection in out:
scores = detection[5:]
classId = np.argmax(scores)
print('scores', classId)
confidence_crop = scores[classId]
if confidence_crop > confThreshold:
center_x = int(detection[0] * cropWidth)
center_y = int(detection[1] * cropHeight)
width = int(detection[2] * cropWidth)
height = int(detection[3] * cropHeight)
left = int(center_x - width / 2)
top = int(center_y - height / 2)
classIds_for_crop.append(classId)
confidences_crop.append(float(confidence_crop))
boxes_crop.append([left, top, width, height])
# Perform non maximum suppression to eliminate redundant overlapping boxes with
# lower confidences.
print('all ', confidence_crop, boxes_crop, classIds_for_crop)
indices = cv.dnn.NMSBoxes(boxes_crop, confidences_crop, confThreshold, nmsThreshold)
for i in indices:
i = i[0]
box = boxes_crop[i]
left = box[0]
top = box[1]
width = box[2]
height = box[3]
drawPred1(classIds_for_crop[i], confidences_crop[i], left, top, left + width, top + height)
# print('this is: l t w h', left,top,width,height)
# Process inputs
winName = 'Deep learning object detection in OpenCV'
cv.namedWindow(winName, cv.WINDOW_NORMAL)
outputFile = "yolo_out_py.avi"
if (args.image):
# Open the image file
if not os.path.isfile(args.image):
print("Input image file ", args.image, " doesn't exist")
sys.exit(1)
# =========================================================================================================
# Подгрузка видео
cap = cv.VideoCapture(args.image)
outputFile = args.image[:-4]+'_yolo_out_py.jpg'
elif (args.video):
# Open the video file
if not os.path.isfile(args.video):
print("Input video file ", args.video, " doesn't exist")
sys.exit(1)
cap = cv.VideoCapture(args.video)
outputFile = args.video[:-4]+'_yolo_out_py.avi'
else:
# Webcam input
cap = cv.VideoCapture(0)
# Get the video writer initialized to save the output video
if (not args.image):
vid_writer = cv.VideoWriter(outputFile, cv.VideoWriter_fourcc('M','J','P','G'), 30, (round(cap.get(cv.CAP_PROP_FRAME_WIDTH)),round(cap.get(cv.CAP_PROP_FRAME_HEIGHT))))
while cv.waitKey(1) < 0:
# get frame from the video
hasFrame, frame = cap.read()
# Stop the program if reached end of video
if not hasFrame:
print("Done processing !!!")
print("Output file is stored as ", outputFile)
cv.waitKey(3000)
# Release device
cap.release()
break
# Create a 4D blob from a frame.
blob = cv.dnn.blobFromImage(frame, 1/255, (inpWidth, inpHeight), [0,0,0], 1, crop=False)
# blob1 = cv.dnn.blobFromImage(, 1/255, (inpWidth, inpHeight), [0,0,0], 1, crop=False)
# Sets the input to the network
net.setInput(blob)
# net1.setInput(blob1)
# Runs the forward pass to get output of the output layers
outs = net.forward(getOutputsNames(net))
# outs1 = net1.forward(getOutputsNames1(net1))
# Remove the bounding boxes with low confidence
postprocess(frame, outs)
blob1 = cv.dnn.blobFromImage(crop, 1/255, (inpWidth, inpHeight), [0,0,0], 1, crop=False)
# print('this is blob1',blob1)
net1.setInput(blob1)
outs1 = net1.forward(getOutputsNames1(net1))
# print('this is outs1',outs1)
post(crop, outs1)
# Put efficiency information. The function getPerfProfile returns the overall time for inference(t) and the timings for each of the layers(in layersTimes)
t, _ = net.getPerfProfile()
# t1, _1 = net1.getPerfProfile()
# label = 'Inference time: %.2f ms' % (t * 1000.0 / cv.getTickFrequency())
# cv.putText(frame, label, (0, 15), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255))
# Write the frame with the detection boxes
if (args.image):
cv.imwrite(outputFile, crop.astype(np.uint8))
else:
vid_writer.write(frame.astype(np.uint8))
cv.imshow(winName, frame)
# cv.imshow(winName, crop)
cv.waitKey()

Categories