How to combine two yolov3 models for object detection? - python

We have two yolov3-tiny models for license plate detection.The first yolov3-tiny model detecting licence plate on cars and cropping it for the second yolov3-tiny model, then should send these output cropped images to the second yolov3-tiny model as input images. The second yolov3-tiny model should have receive these cropped images as input for detecting country region.
We have a problem with combining two models. Any advice and help is appreciated.
The code below:
# This code is written at BigVision LLC. It is based on the OpenCV project. It is subject to the license terms in the LICENSE file found in this distribution and at http://opencv.org/license.html
# Usage example: python3 object_detection_yolo.py --video=run.mp4
# python3 object_detection_yolo.py --image=bird.jpg
import cv2 as cv
import argparse
import sys
import numpy as np
import os.path
# Initialize the parameters
confThreshold = 0.5 #Confidence threshold
nmsThreshold = 0.4 #Non-maximum suppression threshold
inpWidth = 608 #Width of network's input image
inpHeight = 608 #Height of network's input image
parser = argparse.ArgumentParser(description='Object Detection using YOLO in OPENCV')
parser.add_argument('--image', help='Path to image file.')
parser.add_argument('--video', help='Path to video file.')
args = parser.parse_args()
# Load names of classes
classesFile = "obj.names"
classes = None
with open(classesFile, 'rt') as f:
classes = f.read().rstrip('\n').split('\n')
# Load names of classes
classesFile1 = "obj1.names"
classes1 = None
with open(classesFile1, 'rt') as f:
classes1 = f.read().rstrip('\n').split('\n')
# Give the configuration and weight files for the model and load the network using them.
modelConfiguration = "yolov3-tiny_obj.cfg"
modelWeights = "yolov3-tiny_obj_final.weights"
# Give the configuration and weight files for the model and load the network using them.
modelConfiguration1 = "yolov3-tiny_obj1.cfg"
modelWeights1 = "yolov3-tiny_obj_final1.weights"
net = cv.dnn.readNetFromDarknet(modelConfiguration, modelWeights)
net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV)
net.setPreferableTarget(cv.dnn.DNN_TARGET_CPU)
net1 = cv.dnn.readNetFromDarknet(modelConfiguration1, modelWeights1)
net1.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV)
net1.setPreferableTarget(cv.dnn.DNN_TARGET_CPU)
# Get the names of the output layers
def getOutputsNames(net):
# Get the names of all the layers in the network
layersNames = net.getLayerNames()
# Get the names of the output layers, i.e. the layers with unconnected outputs
return [layersNames[i[0] - 1] for i in net.getUnconnectedOutLayers()]
#====================================================================================
# Get the names of the output layers
def getOutputsNames1(net1):
# Get the names of all the layers in the network
layersNames = net1.getLayerNames()
# Get the names of the output layers, i.e. the layers with unconnected outputs
return [layersNames[i[0] - 1] for i in net1.getUnconnectedOutLayers()]
#====================================================================================
# Draw the predicted bounding box
def drawPred(classId, conf, left, top, right, bottom):
# Draw a bounding box.
cv.rectangle(frame, (left, top), (right, bottom), (255, 178, 50), 3)
label = '%.2f' % conf
# Get the label for the class name and its confidence
if classes:
assert(classId < len(classes))
label = '%s:%s' % (classes[classId], label)
#Display the label at the top of the bounding box
labelSize, baseLine = cv.getTextSize(label, cv.FONT_HERSHEY_SIMPLEX, 0.5, 1)
top = max(top, labelSize[1])
cv.rectangle(frame, (left, top - round(1.5*labelSize[1])), (left + round(1.5*labelSize[0]), top + baseLine), (255, 255, 255), cv.FILLED)
cv.putText(frame, label, (left, top), cv.FONT_HERSHEY_SIMPLEX, 0.75, (0,0,0), 1)
#=====================================================================================================================================
def drawPred1(classId, conf, left, top, right, bottom):
# Draw a bounding box.
cv.rectangle(crop, (left, top), (right, bottom), (255, 178, 50), 3)
label = '%.2f' % conf
# Get the label for the class name and its confidence
if classes1:
assert(classId < len(classes1))
label = '%s:%s' % (classes1[classId], label)
#Display the label at the top of the bounding box
labelSize, baseLine = cv.getTextSize(label, cv.FONT_HERSHEY_SIMPLEX, 0.5, 1)
top = max(top, labelSize[1])
cv.rectangle(crop, (left, top - round(1.5*labelSize[1])), (left + round(1.5*labelSize[0]), top + baseLine), (255, 255, 255), cv.FILLED)
cv.putText(crop, label, (left, top), cv.FONT_HERSHEY_SIMPLEX, 0.75, (0,0,0), 1)
# Remove the bounding boxes with low confidence using non-maxima suppression
crop = ''
def postprocess(frame, outs):
frameHeight = frame.shape[0]
frameWidth = frame.shape[1]
# Scan through all the bounding boxes output from the network and keep only the
# ones with high confidence scores. Assign the box's class label as the class with the highest score.
classIds = []
confidences = []
boxes = []
global crop
for out in outs:
for detection in out:
scores = detection[5:]
classId = np.argmax(scores)
confidence = scores[classId]
if confidence > confThreshold:
center_x = int(detection[0] * frameWidth)
center_y = int(detection[1] * frameHeight)
width = int(detection[2] * frameWidth)
height = int(detection[3] * frameHeight)
left = int(center_x - width / 2)
top = int(center_y - height / 2)
classIds.append(classId)
confidences.append(float(confidence))
boxes.append([left, top, width, height])
print('this is: ', classIds, confidence, boxes)
# Perform non maximum suppression to eliminate redundant overlapping boxes with
# lower confidences.
indices = cv.dnn.NMSBoxes(boxes, confidences, confThreshold, nmsThreshold)
for i in indices:
i = i[0]
box = boxes[i]
left = box[0]
top = box[1]
width = box[2]
height = box[3]
drawPred(classIds[i], confidences[i], left, top, left + width, top + height)
# print(left,top,width,height)
#координаты y1=top, y2=top+height, x1=left, x2=left+width
#срез frame по указанным координатам и копирование в папку
crop = frame[top:top + height+1, left:left + width+1].copy()
# print("//////////////////////", type(crop))
cv.imwrite("/DISK/KrisServis/CNN/dark/darknet/detect_yolov3_spp/1.jpg", crop)
#=======================================================================================================================================================
def post(crop, outs1):
cropHeight = crop.shape[0]
cropWidth = crop.shape[1]
# Scan through all the bounding boxes output from the network and keep only the
# ones with high confidence scores. Assign the box's class label as the class with the highest score.
classIds_for_crop = []
confidences_crop = []
boxes_crop = []
for out in outs1:
# print('this is out: ', out)
for detection in out:
scores = detection[5:]
classId = np.argmax(scores)
print('scores', classId)
confidence_crop = scores[classId]
if confidence_crop > confThreshold:
center_x = int(detection[0] * cropWidth)
center_y = int(detection[1] * cropHeight)
width = int(detection[2] * cropWidth)
height = int(detection[3] * cropHeight)
left = int(center_x - width / 2)
top = int(center_y - height / 2)
classIds_for_crop.append(classId)
confidences_crop.append(float(confidence_crop))
boxes_crop.append([left, top, width, height])
# Perform non maximum suppression to eliminate redundant overlapping boxes with
# lower confidences.
print('all ', confidence_crop, boxes_crop, classIds_for_crop)
indices = cv.dnn.NMSBoxes(boxes_crop, confidences_crop, confThreshold, nmsThreshold)
for i in indices:
i = i[0]
box = boxes_crop[i]
left = box[0]
top = box[1]
width = box[2]
height = box[3]
drawPred1(classIds_for_crop[i], confidences_crop[i], left, top, left + width, top + height)
# print('this is: l t w h', left,top,width,height)
# Process inputs
winName = 'Deep learning object detection in OpenCV'
cv.namedWindow(winName, cv.WINDOW_NORMAL)
outputFile = "yolo_out_py.avi"
if (args.image):
# Open the image file
if not os.path.isfile(args.image):
print("Input image file ", args.image, " doesn't exist")
sys.exit(1)
# =========================================================================================================
# Подгрузка видео
cap = cv.VideoCapture(args.image)
outputFile = args.image[:-4]+'_yolo_out_py.jpg'
elif (args.video):
# Open the video file
if not os.path.isfile(args.video):
print("Input video file ", args.video, " doesn't exist")
sys.exit(1)
cap = cv.VideoCapture(args.video)
outputFile = args.video[:-4]+'_yolo_out_py.avi'
else:
# Webcam input
cap = cv.VideoCapture(0)
# Get the video writer initialized to save the output video
if (not args.image):
vid_writer = cv.VideoWriter(outputFile, cv.VideoWriter_fourcc('M','J','P','G'), 30, (round(cap.get(cv.CAP_PROP_FRAME_WIDTH)),round(cap.get(cv.CAP_PROP_FRAME_HEIGHT))))
while cv.waitKey(1) < 0:
# get frame from the video
hasFrame, frame = cap.read()
# Stop the program if reached end of video
if not hasFrame:
print("Done processing !!!")
print("Output file is stored as ", outputFile)
cv.waitKey(3000)
# Release device
cap.release()
break
# Create a 4D blob from a frame.
blob = cv.dnn.blobFromImage(frame, 1/255, (inpWidth, inpHeight), [0,0,0], 1, crop=False)
# blob1 = cv.dnn.blobFromImage(, 1/255, (inpWidth, inpHeight), [0,0,0], 1, crop=False)
# Sets the input to the network
net.setInput(blob)
# net1.setInput(blob1)
# Runs the forward pass to get output of the output layers
outs = net.forward(getOutputsNames(net))
# outs1 = net1.forward(getOutputsNames1(net1))
# Remove the bounding boxes with low confidence
postprocess(frame, outs)
blob1 = cv.dnn.blobFromImage(crop, 1/255, (inpWidth, inpHeight), [0,0,0], 1, crop=False)
# print('this is blob1',blob1)
net1.setInput(blob1)
outs1 = net1.forward(getOutputsNames1(net1))
# print('this is outs1',outs1)
post(crop, outs1)
# Put efficiency information. The function getPerfProfile returns the overall time for inference(t) and the timings for each of the layers(in layersTimes)
t, _ = net.getPerfProfile()
# t1, _1 = net1.getPerfProfile()
# label = 'Inference time: %.2f ms' % (t * 1000.0 / cv.getTickFrequency())
# cv.putText(frame, label, (0, 15), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255))
# Write the frame with the detection boxes
if (args.image):
cv.imwrite(outputFile, crop.astype(np.uint8))
else:
vid_writer.write(frame.astype(np.uint8))
cv.imshow(winName, frame)
# cv.imshow(winName, crop)
cv.waitKey()

Related

How do i make detection only on spesific area

I'm working on a project trying to do object detection and text detection using both yolo and easyocr. Since I'm a beginner and really new to computer vision, I would be glad if someone can help me.
Here's the code:
import cv2
import numpy as np
import easyocr
# Load Yolo
net = cv2.dnn.readNet('yolov4-tiny-custom_3000.weights', 'yolov4-tiny-custom.cfg')
classes = []
with open("obj.names", "r") as f:
classes = [line.strip() for line in f.readlines()]
layer_names = net.getLayerNames()
output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]
colors = np.random.uniform(0, 255, size=(len(classes), 3))
cap = cv2.VideoCapture('car1.mp4')
# Declare Ocr
cascade_src = 'haarcascade_russian_plate_number.xml'
cascade = cv2.CascadeClassifier(cascade_src)
reader = easyocr.Reader(['en'], gpu = False)
# Declare Ocr
while True:
_, frame = cap.read()
height, width, channels = frame.shape
#frame = cv2.resize(frame, (800, 600))
# Yolo Detection
# Detecting objects
blob = cv2.dnn.blobFromImage(frame, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
net.setInput(blob)
outs = net.forward(output_layers)
# Showing informations on the screen
class_ids = []
confidences = []
boxes = []
for out in outs:
for detection in out:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > 0.5:
# Object detected
center_x = int(detection[0] * width)
center_y = int(detection[1] * height)
w = int(detection[2] * width)
h = int(detection[3] * height)
# Rectangle coordinates
x = int(center_x - w / 2)
y = int(center_y - h / 2)
boxes.append([x, y, w, h])
confidences.append(float(confidence))
class_ids.append(class_id)
indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
for i in range(len(boxes)):
if i in indexes:
x, y, w, h = boxes[i]
label = str(classes[class_ids[i]])
color = colors[class_ids[i]]
cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2)
cv2.putText(frame, label, (x, y + 30), cv2.FONT_HERSHEY_PLAIN, 3, color, 3)
print("Jenis Mobil: " +label)
# Text Reader Using Ocr
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
plate = cascade.detectMultiScale(gray, 1.1, 5)
for x,y,w,h in plate:
wT,hT,cT = frame.shape
a,b = (int(0.02*wT),int(0.02*hT))
plate2 = frame[y+a:y+h-a,x+b:x+w-b,:]
cv2.rectangle(frame,(x,y),(x+w,y+h),(60,60,255),2)
cv2.rectangle(frame,(x-1,y-40),(x+w+1,y),(60,60,255),-1)
result = reader.readtext(plate2)
for detek in result:
top_left = (int(detek[0][0][0]), int(detek[0][0][1]))
bottom_right = (int(detek[0][2][0]), int(detek[0][2][1]))
text = detek[1]
cv2.putText(frame,text,(x,y-10),cv2.FONT_HERSHEY_SIMPLEX,0.9,(255,255,255),2)
print("Nomor Kendaran: " + text)
# Text Reader Using Ocr
cv2.imshow("Detection", frame)
key = cv2.waitKey(1)
if key == 27:
break
cap.release()
cv2.destroyAllWindows()
I am trying to use ROI to detect the object but I am not able to do it.
Any advice please?
Crop the image before it is fed to the model
while True:
_, frame = cap.read()
im_crop = im[y1:y2, x1:x2] # set x1,x2,y1,y2 based on your ROI
blob = cv2.dnn.blobFromImage(frame, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
this will speed up the inference time as well as there is less data to process by the model

Error while loading Yolov5 custom model using OpenCV Python

I was trying to predict defects on a metal plate using yolov5 pre-trained weights.it was throwing this error:
**
File "C:\Users\acer.spyder-py3\metallic surface defect detection\untitled3.py", line 59, in post_process
if confidence >= CONFIDENCE_THRESHOLD:
ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()
**
import cv2
import numpy as np
# Constants.
INPUT_WIDTH = 640
INPUT_HEIGHT = 640
SCORE_THRESHOLD = 0.5
NMS_THRESHOLD = 0.45
CONFIDENCE_THRESHOLD = 0.45
# Text parameters.
FONT_FACE = cv2.FONT_HERSHEY_SIMPLEX
FONT_SCALE = 0.7
THICKNESS = 1
# Colors.
BLACK = (0,0,0)
BLUE = (255,178,50)
YELLOW = (0,255,255)
def draw_label(im, label, x, y):
"""Draw text onto image at location."""
# Get text size.
text_size = cv2.getTextSize(label, FONT_FACE, FONT_SCALE, THICKNESS)
dim, baseline = text_size[0], text_size[1]
# Use text size to create a BLACK rectangle.
cv2.rectangle(im, (x,y), (x + dim[0], y + dim[1] + baseline), (0,0,0), cv2.FILLED);
# Display text inside the rectangle.
cv2.putText(im, label, (x, y + dim[1]), FONT_FACE, FONT_SCALE, YELLOW, THICKNESS, cv2.LINE_AA)
def pre_process(input_image, net):
# Create a 4D blob from a frame.
blob = cv2.dnn.blobFromImage(input_image, 1/255, (INPUT_WIDTH, INPUT_HEIGHT), [0,0,0], 1, crop=False)
# Sets the input to the network.
net.setInput(blob)
# Run the forward pass to get output of the output layers.
outputs = net.forward(net.getUnconnectedOutLayersNames())
return outputs
def post_process(input_image, outputs):
# Lists to hold respective values while unwrapping.
class_ids = []
confidences = []
boxes = []
# Rows.
rows = outputs[0].shape[1]
image_height, image_width = input_image.shape[:2]
# Resizing factor.
x_factor = image_width / INPUT_WIDTH
y_factor = image_height / INPUT_HEIGHT
# Iterate through detections.
for r in range(rows):
row = outputs[0][0][r]
confidence = row[4]
# Discard bad detections and continue.
if confidence >= CONFIDENCE_THRESHOLD:
classes_scores = row[5:]
# Get the index of max class score.
class_id = np.argmax(classes_scores)
# Continue if the class score is above threshold.
if (classes_scores[class_id] > SCORE_THRESHOLD):
confidences.append(confidence)
class_ids.append(class_id)
cx, cy, w, h = row[0], row[1], row[2], row[3]
left = int((cx - w/2) * x_factor)
top = int((cy - h/2) * y_factor)
width = int(w * x_factor)
height = int(h * y_factor)
box = np.array([left, top, width, height])
boxes.append(box)
# Perform non maximum suppression to eliminate redundant, overlapping boxes with lower confidences.
indices = cv2.dnn.NMSBoxes(boxes, confidences, CONFIDENCE_THRESHOLD, NMS_THRESHOLD)
for i in indices:
box = boxes[i]
left = box[0]
top = box[1]
width = box[2]
height = box[3]
# Draw bounding box.
cv2.rectangle(input_image, (left, top), (left + width, top + height), BLUE, 3*THICKNESS)
# Class label.
label = "{}:{:.2f}".format(classes[class_ids[i]], confidences[i])
# Draw label.
draw_label(input_image, label, left, top)
return input_image
if __name__ == '__main__':
# Load class names.
classesFile = "defects.names"
classes = None
with open(classesFile, 'rt') as f:
classes = f.read().rstrip('\n').split('\n')
# Load image.
frame = cv2.imread('img_02_3436787300_00007_jpg.rf.e9923d3a70d1aeb92e45896b9c12cfa3.jpg')
# Give the weight files to the model and load the network using them.
modelWeights = "models_train/best.onnx"
net = cv2.dnn.readNet(modelWeights)
# Process image.
detections = pre_process(frame, net)
img = post_process(frame.copy(), detections)
"""
Put efficiency information. The function getPerfProfile returns the overall time for inference(t)
and the timings for each of the layers(in layersTimes).
"""
t, _ = net.getPerfProfile()
label = 'Inference time: %.2f ms' % (t * 1000.0 / cv2.getTickFrequency())
print(label)
cv2.putText(img, label, (20, 40), FONT_FACE, FONT_SCALE, (0, 0, 255), THICKNESS, cv2.LINE_AA)
cv2.imshow('Output', img)
cv2.waitKey(0)
I have little bit idea of deploying models into commercial use. If you find any other errors also please inform me . thanks in advance
A simple search led me to this SO post, highlighting a common issue recently.
Following this blog got me close but I faced the issue above.
net.getUnconnectedOutLayers() returns an array of index values. The output layers are obtained from net.getLayerNames() based on these index values.
In the following case net.getUnconnectedOutLayers() returns:
array([200, 227, 254])
We get the output layers from output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers() which returns:
['yolo_82', 'yolo_94', 'yolo_106']
Code:
The following is the complete working code for OpenCV version 4.5.5 (CPU):
image = cv2.imread(os.path.join(path, 'horse.jpg'))
Width = image.shape[1]
Height = image.shape[0]
scale = 0.00392
classes = None
with open(os.path.join(path, 'coco.names'), 'r') as f:
classes = [line.strip() for line in f.readlines()]
COLORS = np.random.uniform(0, 255, size=(len(classes), 3))
net = cv2.dnn.readNet(os.path.join(path, 'yolov3.weights'), os.path.join(path, 'yolov3.cfg'))
blob = cv2.dnn.blobFromImage(image, scale, (416,416), (0,0,0), True, crop=False)
net.setInput(blob)
def get_output_layers(net):
layer_names = net.getLayerNames()
output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]
return output_layers
def draw_bounding_box(img, class_id, confidence, x, y, x_plus_w, y_plus_h):
label = str(classes[class_id])
color = COLORS[class_id]
img = cv2.rectangle(img, (x,y), (x_plus_w,y_plus_h), color, 2)
img = cv2.putText(img, label, (x-10,y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
# run inference through the network
# and gather predictions from output layers
outs = net.forward(get_output_layers(net))
# initialization
class_ids = []
confidences = []
boxes = []
conf_threshold = 0.5
nms_threshold = 0.4
image2 = image.copy()
for out in outs:
for detection in out:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > 0.5:
center_x = int(detection[0] * Width)
center_y = int(detection[1] * Height)
w = int(detection[2] * Width)
h = int(detection[3] * Height)
x = center_x - w / 2
y = center_y - h / 2
class_ids.append(class_id)
confidences.append(float(confidence))
boxes.append([x, y, w, h])
# apply non-max suppression
indices = cv2.dnn.NMSBoxes(boxes, confidences, conf_threshold, nms_threshold)
for i in indices:
i = i # i[0]
box = boxes[i]
x = box[0]
y = box[1]
w = box[2]
h = box[3]
draw_bounding_box(image2, class_ids[i], confidences[i], round(x), round(y), round(x+w), round(y+h))
Result:
Sample output:
The problem may be related to incompatible version of your installed modules.
Download .pt model.
wget https://github.com/ultralytics/YOLOv5/releases/download/v6.1/YOLOv5s.pt
And export to ONNX using your machine:
git clone https://github.com/ultralytics/YOLOv5
cd YOLOv5
pip install -r requirements.txt
pip install onnx
python3 export.py --weights models/YOLOv5s.pt --include onnx
Use the new converted .onnx file can solve the problem.

'NoneType' object has no attribute 'shape' in OpenCV recursion [duplicate]

This question already has answers here:
Why do I get AttributeError: 'NoneType' object has no attribute 'something'?
(11 answers)
Closed 1 year ago.
I came across a problem: the code works so that when a car passes the camera, a video of its passage is saved, then the function starts again through recursion, but exactly at 4th cycle the code gives an error
(H, W) = image.shape[:2]
AttributeError: 'NoneType' object has no attribute 'shape'
Here is the code itself:
import cv2 as cv
import numpy as np
import uuid
import os.path
import os
def camera():
print("new cycle")
confThreshold = 0.6
nmsThreshold = 0.1
inpWidth = 416
inpHeight = 416
classesFile = "car.names"
classes = None
with open(classesFile, 'rt') as f:
classes = f.read().rstrip('\n').split('\n')
modelConf = "car.cfg"
modelWeights = "cars_last.weights"
cap = cv.VideoCapture("rtsp://.../user=admin_password=tlJwpbo6_channel=1_stream=0.sdp?real_stream:")
ret, image = cap.read()
(H, W) = image.shape[:2]
global empty
global newname
newname = str(uuid.uuid4())
global output
output = cv.VideoWriter(newname+'.avi', cv.VideoWriter_fourcc('M','J','P','G'), 10,
(W, H))
print(newname)
empty = []
def postprocess(frame, outs):
frameHeight = frame.shape[0]
frameWidth = frame.shape[1]
classIDs = []
confidences = []
boxes = []
boxes1=[]
cv.cvtColor(frame, cv.COLOR_RGB2BGR)
for out in outs:
for detection in out:
scores = detection[5:]
classID = np.argmax(scores)
confidence = scores[classID]
if confidence > confThreshold:
centerX = int(detection[0] * frameWidth)
centerY = int(detection[1] * frameHeight)
width = int(detection[2] * frameWidth)
height = int(detection[3] * frameHeight)
left = int(centerX - width / 2)
top = int(centerY - height / 2)
x = int(centerX - (width / 2))
y = int(centerY - (height / 2))
boxes1.append([x, y, int(width), int(height)])
classIDs.append(classID)
confidences.append(float(confidence))
boxes.append([left, top, width, height])
idxs = cv.dnn.NMSBoxes(boxes, confidences, confThreshold, nmsThreshold)
if len(idxs) > 0:
empty.clear()
empty.append(0)
else:
empty.append(1)
if len(empty)>100:
empty.clear()
if (os.path.isfile(newname+".avi"))==True and os.path.getsize(newname+".avi")!=0:
camera()
indices = cv.dnn.NMSBoxes(boxes, confidences, confThreshold, nmsThreshold)
for i in indices:
i = i[0]
box = boxes[i]
left = box[0]
top = box[1]
width = box[2]
height = box[3]
drawPred(classIDs[i], confidences[i], left, top, left + width, top + height)
output.write(frame)
def drawPred(classId, conf, left, top, right, bottom):
# Draw a bounding box.
cv.rectangle(frame, (left, top), (right, bottom), (255, 178, 50), 3)
label = '%.2f' % conf
# Get the label for the class name and its confidence
if classes:
assert (classId < len(classes))
label = '%s:%s' % (classes[classId], label)
labelSize, baseLine = cv.getTextSize(label, cv.FONT_HERSHEY_SIMPLEX, 0.5, 1)
top = max(top, labelSize[1])
cv.rectangle(frame, (left, top - round(1.5 * labelSize[1])), (left + round(1.5 * labelSize[0]), top + baseLine),
(255, 255, 255), cv.FILLED)
cv.rectangle(frame, (left,top),(right,bottom), (255,255,255), 1 )
cv.putText(frame, label, (left, top), cv.FONT_HERSHEY_SIMPLEX, 0.75, (0, 0, 0), 1)
def getOutputsNames(net):
layersNames = net.getLayerNames()
return [layersNames[i[0] - 1] for i in net.getUnconnectedOutLayers()]
net = cv.dnn.readNetFromDarknet(modelConf, modelWeights)
net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV)
net.setPreferableTarget(cv.dnn.DNN_TARGET_CPU)
winName = 'DL OD with OpenCV'
cv.namedWindow(winName, cv.WINDOW_NORMAL)
while cv.waitKey(1) < 0:
hasFrame, frame = cap.read()
blob = cv.dnn.blobFromImage(frame, 1 / 255, (inpWidth, inpHeight), [0, 0, 0], 1, crop=False)
net.setInput(blob)
outs = net.forward(getOutputsNames(net))
postprocess(frame, outs)
cv.imshow(winName, frame)
cap.release()
output.release()
camera()
As spotted in the doc about the read function :
Parameters
[out]
image
the video frame is returned here. If no frames has been grabbed the image will be empty.
You should check the return status to know if you have image data.
Returns
false if no frames has been grabbed
all i had to do is to put camera connection out of recursion, I have put it outside the camera func thx all for help)
cap = cv.VideoCapture("rtsp:///user=admin_password=tlJwpbo6_channel=1_stream=0.sdp?real_stream:")
ret, image = cap.read()

How to count an object after passing through the line?

Hey I am new in Python for this level but i am trying my best to do it.
I have detect the object in the video frames and labelled it and also count the total objects in the frame but my question is how can i count the object after passing the line as shown in image. and also with the object category.
Here is my code, please answer in detail and try to add code also.
In the image i have count the total object in the frame but i want to count them when they cross the line
Thanks in advance :)
import cv2
import numpy as np
net = cv2.dnn.readNet('yolov3.weights','yolov3.cfg')
classes = []
with open('coco.names','r') as f:
classes = f.read().splitlines()
# printing the data which is loaded from the names file
#print(classes)
cap = cv2.VideoCapture('video.mp4')
while True:
_,img = cap.read()
height, width, _ = img.shape
blob = cv2.dnn.blobFromImage(img, 1/255, (416 , 416), (0,0,0) ,swapRB=True,crop=False)
net.setInput(blob)
output_layer_names = net.getUnconnectedOutLayersNames()
layerOutput = net.forward(output_layer_names)
boxes = []
person =0
truck =0
car = 0
confidences = []
class_ids =[]
for output in layerOutput:
for detection in output:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > 0.5:
center_x = int(detection[0]*width)
center_y = int(detection[1]*height)
w = int(detection[2]*width)
h = int(detection[3]*height)
x = int(center_x - w/2)
y = int(center_y - h/2)
boxes.append([x,y,w,h])
confidences.append((float(confidence)))
class_ids.append(class_id)
indexes = cv2.dnn.NMSBoxes(boxes,confidences,0.5,0.4)
font = cv2.QT_FONT_NORMAL
colors = np.random.uniform(0,255,size=(len(boxes),3))
for i in indexes.flatten():
labelsss = str(classes[class_ids[i]])
if(labelsss == 'person'):
person+=1
if(labelsss == 'car'):
car+=1
if(labelsss == 'truck'):
truck+=1
for i in indexes.flatten():
x,y,w,h = boxes[i]
label =str(classes[class_ids[i]])
confidence = str(round(confidences[i],1))
color = colors[i]
cv2.rectangle(img,(x,y),(x+w , y+h), color, 2)
cv2.line(img,(1000,250),(5,250),(0,0,0),2)
cv2.putText(img, label + " ", (x, y+20), font, 0.5, (255,255,255),2)
cv2.putText(img, 'Car'+ ":" + str(car), (20, 20), font, 0.8, (0,0,0),2)
cv2.putText(img, 'Person'+ ":" + str(person), (20, 50), font, 0.8, (0,0,0),2)
cv2.putText(img, 'Truck'+ ":" + str(truck), (20, 80), font, 0.8, (0,0,0),2)
cv2.imshow('Image',img)
key = cv2.waitKey(1)
if key == 10:
break
cap.release()
cv2.destroyAllWindows()
I did a project just like this during my internship. You can check out the code here: https://github.com/sarimmehdi/nanonets_object_tracking/blob/master/test_on_video.py
In a nutshell: You should instead draw a rectangle (narrow) and count a tracked ID when it passes through the rectangle. If the rectangle is just narrow enough, you can also avoid the problem of re-identification.

How can I capture detected image of object Yolov3 and display in flask

I am working on Real Time Object Detection using YOLOv3 with OpenCV and Python. It's works well. Currently I try to capture detected image of object and display in flask. Do someone know how to implement this features? Hope someone can helps. I got the tutorial based on this link https://medium.com/analytics-vidhya/real-time-object-detection-using-yolov3-with-opencv-and-python-64c985e14786
import cv2
import numpy as np
import os
import time
import detect as dt
from PIL import Image
labelsPath = os.path.sep.join(["yolo-coco", "coco.names"])
weightsPath = os.path.sep.join(["yolo-coco", "yolov3.weights"])
configPath = os.path.sep.join(["yolo-coco", "yolov3.cfg"])
labelsPath = os.path.sep.join(["yolo-coco", "coco.names"])
LABELS = open(labelsPath).read().strip().split("\n")
net = cv2.dnn.readNet(configPath, weightsPath)
layer_names = net.getLayerNames()
outputlayers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
colors= np.random.uniform(0,255,size=(len(LABELS),3))
#loading image
cap0=cv2.VideoCapture(0) #0 for 1st webcam
cap1=cv2.VideoCapture(1)
font = cv2.FONT_HERSHEY_PLAIN
starting_time= time.time()
frame_id = 0
count = 0
use_cuda = 1
# configPath.cuda()
while True:
ret0,frame0= cap0.read() #
ret1,frame1= cap1.read() #
image = cv2.cvtColor(frame0, cv2.COLOR_BGR2RGB)
im_pil = Image.fromarray(image)
im_pil = im_pil.resize((200, 200))
boxes = dt.do_detect(image, im_pil, 0.5, 0.4, use_cuda)
if (ret0):
frame_id+=1
#print(frame_id)
height,width,channels = frame0.shape
#print (frame.shape)
#detecting objects
blob = cv2.dnn.blobFromImage(frame0,0.00392,(320,320),(0,0,0),True,crop=False) #reduce 416 to 320
net.setInput(blob)
outs = net.forward(outputlayers)
#print(outs)
print(outs[1])
#Showing info on screen/ get confidence score of algorithm in detecting an object in blob
class_ids=[]
confidences=[]
boxes=[]
for out in outs:
#print(out)
for detection in out:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
print(confidence)
if confidence > 0.3:
#onject detected
center_x= int(detection[0]*width)
center_y= int(detection[1]*height)
w = int(detection[2]*width)
h = int(detection[3]*height)
#cv2.circle(img,(center_x,center_y),10,(0,255,0),2)
#rectangle co-ordinaters
x=int(center_x - w/2)
y=int(center_y - h/2)
#cv2.rectangle(img,(x,y),(x+w,y+h),(0,255,0),2)
boxes.append([x,y,w,h]) #put all rectangle areas
confidences.append(float(confidence)) #how confidence was that object detected and show that percentage
class_ids.append(class_id) #name of the object tha was detected
indexes = cv2.dnn.NMSBoxes(boxes,confidences,0.4,0.6)
result = open('C:/Users/HP/Miniconda3/envs/count_vechicle/coding/images/frame%04d.txt'%(count), 'w')
for i in range(len(boxes)):
result.write(boxes[i])
count = count + 1
success, image = vidcap.read()
result.close()
if i in indexes:
x,y,w,h = boxes[i]
label = str(LABELS[class_ids[i]])
confidence= confidences[i]
color = colors[class_ids[i]]
cv2.rectangle(frame0,(x,y),(x+w,y+h),color,2)
cv2.putText(frame0,label+" "+str(round(confidence,2)),(x,y+30),font,1,(255,255,255),2)
elapsed_time = time.time() - starting_time
fps=frame_id/elapsed_time
cv2.putText(frame0,"FPS:"+str(round(fps,2)),(10,50),font,2,(0,0,0),1)
cv2.imshow("Image0",frame0)
key = cv2.waitKey(1) #wait 1ms the loop will start again and we will process the next frame
if (ret1):
cv2.imshow("Image1",frame1)
key = cv2.waitKey(1) #wait 1ms the loop will start again and we will process the next frame
if key == 27: #esc key stops the process
break;
cap0.release()
cap1.release()
cv2.destroyAllWindows()
Using the bounding box coordinates of the detected object, you can crop a new image out of it and then save it to display.
Try this:
# extract the bounding box coordinates
(x, y) = (boxes[i][0], boxes[i][1])
(w, h) = (boxes[i][2], boxes[i][3])
crop_img = frame[y:y + h, x:x + w] #frame of video you are looping through
cv2.imwrite(<filename>, crop_img)
Update: according to your code :
if i in indexes:
x,y,w,h = boxes[i]
crop_img = frame0[y:y + h, x:x + w]
cv2.imwrite(<filename>, crop_img)
Using the box coordinates you can crop that part
Hope it helps.

Categories