How do i make detection only on spesific area - python

I'm working on a project trying to do object detection and text detection using both yolo and easyocr. Since I'm a beginner and really new to computer vision, I would be glad if someone can help me.
Here's the code:
import cv2
import numpy as np
import easyocr
# Load Yolo
net = cv2.dnn.readNet('yolov4-tiny-custom_3000.weights', 'yolov4-tiny-custom.cfg')
classes = []
with open("obj.names", "r") as f:
classes = [line.strip() for line in f.readlines()]
layer_names = net.getLayerNames()
output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]
colors = np.random.uniform(0, 255, size=(len(classes), 3))
cap = cv2.VideoCapture('car1.mp4')
# Declare Ocr
cascade_src = 'haarcascade_russian_plate_number.xml'
cascade = cv2.CascadeClassifier(cascade_src)
reader = easyocr.Reader(['en'], gpu = False)
# Declare Ocr
while True:
_, frame = cap.read()
height, width, channels = frame.shape
#frame = cv2.resize(frame, (800, 600))
# Yolo Detection
# Detecting objects
blob = cv2.dnn.blobFromImage(frame, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
net.setInput(blob)
outs = net.forward(output_layers)
# Showing informations on the screen
class_ids = []
confidences = []
boxes = []
for out in outs:
for detection in out:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > 0.5:
# Object detected
center_x = int(detection[0] * width)
center_y = int(detection[1] * height)
w = int(detection[2] * width)
h = int(detection[3] * height)
# Rectangle coordinates
x = int(center_x - w / 2)
y = int(center_y - h / 2)
boxes.append([x, y, w, h])
confidences.append(float(confidence))
class_ids.append(class_id)
indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
for i in range(len(boxes)):
if i in indexes:
x, y, w, h = boxes[i]
label = str(classes[class_ids[i]])
color = colors[class_ids[i]]
cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2)
cv2.putText(frame, label, (x, y + 30), cv2.FONT_HERSHEY_PLAIN, 3, color, 3)
print("Jenis Mobil: " +label)
# Text Reader Using Ocr
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
plate = cascade.detectMultiScale(gray, 1.1, 5)
for x,y,w,h in plate:
wT,hT,cT = frame.shape
a,b = (int(0.02*wT),int(0.02*hT))
plate2 = frame[y+a:y+h-a,x+b:x+w-b,:]
cv2.rectangle(frame,(x,y),(x+w,y+h),(60,60,255),2)
cv2.rectangle(frame,(x-1,y-40),(x+w+1,y),(60,60,255),-1)
result = reader.readtext(plate2)
for detek in result:
top_left = (int(detek[0][0][0]), int(detek[0][0][1]))
bottom_right = (int(detek[0][2][0]), int(detek[0][2][1]))
text = detek[1]
cv2.putText(frame,text,(x,y-10),cv2.FONT_HERSHEY_SIMPLEX,0.9,(255,255,255),2)
print("Nomor Kendaran: " + text)
# Text Reader Using Ocr
cv2.imshow("Detection", frame)
key = cv2.waitKey(1)
if key == 27:
break
cap.release()
cv2.destroyAllWindows()
I am trying to use ROI to detect the object but I am not able to do it.
Any advice please?

Crop the image before it is fed to the model
while True:
_, frame = cap.read()
im_crop = im[y1:y2, x1:x2] # set x1,x2,y1,y2 based on your ROI
blob = cv2.dnn.blobFromImage(frame, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
this will speed up the inference time as well as there is less data to process by the model

Related

How to convert cv2 pictures into a video frame

I have the following code which i am trying to use to detect objects. I have made some changes and now i am trying to make it work so that it will use a camera to detect objects. What it does now is takes a picture in a loop and doesnt use a video.
Here is my code.
import numpy as np
import cv2
net = cv2.dnn.readNet("yolov3.weights", "yolov3.cfg")
classes = []
with open("coco.names", "r") as f:
classes = [line.strip() for line in f.readlines()]
layers_names = net.getLayerNames()
outputlayers = [layers_names[i - 1] for i in net.getUnconnectedOutLayers()]
colors = np.random.uniform(0, 255, size=(len(classes), 3))
class_ids = []
confidences = []
boxes = []
cap = cv2.VideoCapture(0)
while True:
_, image = cap.read()
height, width, channels = image.shape
blob = cv2.dnn.blobFromImage(image, 0.00392, (416, 416), (0, 0, 0), True,
crop=False)
net.setInput(blob)
outs = net.forward(outputlayers)
for out in outs:
for detection in out:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence >= 0.5:
center_x = int(detection[0] * width)
center_y = int(detection[1] * height)
w = int(detection[3] * width)
h = int(detection[3] * height)
x = int(center_x - width / 2)
y = int(center_y - height / 2)
boxes.append([x, y, width, height])
confidences.append(float(confidence))
class_ids.append(class_id)
indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
objects_detecetd = len(boxes)
font = cv2.FONT_HERSHEY_PLAIN
for i in range(len(boxes)):
if i in indexes:
x, y, width, height = boxes[i]
label = str(classes[class_ids[i]])
color = colors[i]
print(label)
cv2.rectangle(image, (x, y), (x + width, y + height), color, 2)
cv2.putText(image, label, (x, y + 30), font, 4, color, 3)
cv2.imshow("Image", image)
cv2.waitKey(5)
cv2.destroyAllWindows()
cv2.destroyAllWindows()
cap.release

Detection or display problem with YOLO / OpenCV on python

I need to use the YOLO algorithm for schoolwork.
I have to know the center of any object detected by YOLO, that's why I don't use a YOLO GitHub, because I don't know how to have this data.
I just followed a YTB video (https://www.youtube.com/watch?v=xKK2mkJ-pHU&t=161s&ab_channel=Pysource) and copy the code in Spyder (my python launcher).
Here is the code: (I'm French that's why I have French quotes, and a standard English level !)
import cv2
import numpy as np
import time
#--------------------- Charger YOLOv3 ---------------------
# Charger model YOLOv3
net = cv2.dnn.readNet('yolov3.weights','yolov3.cfg')
# Charger le nom des classes de la BDD COCO
class_names = []
with open("coco.names", "r") as f:
class_names = [line.strip() for line in f.readlines()]
# Charger les layers (??)
layer_names = net.getLayerNames()
output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]
colors = np.random.uniform(0, 255, size=(len(class_names), 3))
#--------------------- Charger l'image ---------------------
# Prendre le code camera.py
cap = cv2.VideoCapture(0)
font = cv2.FONT_HERSHEY_PLAIN
starting_time = time.time()
frame_id = 0
while True:
_, frame = cap.read()
frame_id += 1
height, width, channels = frame.shape
#------------------- Détecter les objets -------------------
# On décompose la frame pour les 3 couleurs RVB
blob = cv2.dnn.blobFromImage(frame, 0.004, (320, 320), (0, 0, 0), True, crop=False)
net.setInput(blob)
outs = net.forward(output_layers)
#--------------- Information sur les objets ----------------
class_ids = []
confidences = []
boxes = []
for out in outs:
for detection in out:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > 0.5:
# L'objet est maintenant détecté
center_x = int(detection[0] * width)
center_y = int(detection[1] * height)
w = int(detection[2] * width)
h = int(detection[3] * height)
x = int(center_x - w / 2)
y = int(center_y - h / 2)
boxes.append([x, y, w, h])
confidences.append(float(confidence))
class_ids.append(class_id)
# cv2.circle(img, (center_x, center_y), 10, (0, 255, 0), 2)
indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.4, 0.3)
for i in range(len(boxes)):
if i in indexes:
x, y, w, h = boxes[i]
label = str(class_names[class_ids[i]])
confidence = confidences[i]
color = colors[class_ids[i]]
cv2.rectangle(frame, (x,y), (x+ w, y + h), color, 2)
cv2.putText(frame, label + " " + str(round(confidence, 2)), (x, y + 30), font, 3, color, (255,255,255), 3)
time_past = time.time() - starting_time
fps = frame_id / time_past
cv2.putText(frame, "FPS: " + str(round(fps, 1)), (150, 50), font, 3, (0, 255, 0), 4)
cv2.imshow("Image", frame)
key = cv2.waitKey(1)
if key == 27:
break
cap.release()
cv2.destroyAllWindows()
When I run this script, on the 'cv2.imshow(...)' window, I can see the webcam from my phone (thanks to iVCam app, with a cable link), but the problem is the following.
I just don't detect any object, I mean YOLO doesn't detect anything. :/
I don't think it's installation issues because I already do it well (I think...), and I can see the version of cv2 on python
>>> print(cv2.__version)
4.6.0
I don't know if past with iVCam app can cause some trouble or not.
If someone can fix the problem, I would be really happy !

yolov7 Object Detector

I used yolov5 for my object detection. Now I am trying to improve it to YoloV7. I trained my dataset and convert it from .pt to .onnx. But I cannot implement it to my code. I shared the code in the below. I got that error:
v2.error: OpenCV(4.5.5) /Users/runner/work/opencv-python/opencv-python/opencv/modules/dnn/src/onnx/onnx_importer.cpp:928: error: (-2:Unspecified error) in function 'handleNode'
> Node [NonMaxSuppression#ai.onnx]:(onnx::Gather_626) parse error: OpenCV(4.5.5) /Users/runner/work/opencv-python/opencv-python/opencv/modules/dnn/src/dnn.cpp:621: error: (-2:Unspecified error) Can't create layer "onnx::Gather_626" of type "NonMaxSuppression" in function 'getLayerInstance'
import cv2
import numpy as np
from PIL import Image
import webcolors
import time
import requests
start = time.time()
path = "/Users/admin/Desktop/ML/"
productsArray = []
products = []
classNames = []
allProductsArray = []
def format_yolov5(frame):
row, col, _ = frame.shape
_max = max(col, row)
result = np.zeros((_max, _max, 3), np.uint8)
result[0:row, 0:col] = frame
return result
# Loading image
image = cv2.imread(path+"Images/2.jpg")
img = format_yolov5(image) # making the image square
#######DETECTION###########
def Detect():
net = cv2.dnn.readNet(path+"Config/data.onnx")
# Detecting objects
blob = cv2.dnn.blobFromImage(img , 1/255.0, (640, 640), swapRB=True)
net.setInput(blob)
predictions = net.forward()
class_list = []
with open(path+"Config/obj.names", "r") as f:
class_list = [cname.strip() for cname in f.readlines()]
# Showing informations on the screen
class_ids = []
confidences = []
boxes = []
output_data = predictions[0]
image_width, image_height, _ = img.shape
x_factor = image_width / 640
y_factor = image_height / 640
for r in range(25200):
row = output_data[r]
confidence = row[4]
if confidence >= 0.55:
classes_scores = row[5:]
_, _, _, max_indx = cv2.minMaxLoc(classes_scores)
class_id = max_indx[1]
if (classes_scores[class_id] > .25):
confidences.append(confidence)
class_ids.append(class_id)
x, y, w, h = row[0].item(), row[1].item(), row[2].item(), row[3].item()
left = int((x - 0.5 * w) * x_factor)
top = int((y - 0.5 * h) * y_factor)
width = int(w * x_factor)
height = int(h * y_factor)
box = np.array([left, top, width, height])
boxes.append(box)
indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.3, 0.4)
result_class_ids = []
result_confidences = []
result_boxes = []
for i in indexes:
result_confidences.append(confidences[i])
result_class_ids.append(class_ids[i])
result_boxes.append(boxes[i])
for i in range(len(result_class_ids)):
box = result_boxes[i]
class_id = result_class_ids[i]
label =(class_list[class_id])
allProductsArray.append(label)
cv2.rectangle(img, box, (0, 255, 255), 2)
cv2.rectangle(img, (box[0], box[1] - 20), (box[0] + box[2], box[1]), (0, 255, 255), -1)
cv2.putText(img, class_list[class_id], (box[0], box[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, .5, (0,0,0))
cv2.putText(img, str(result_confidences[i]), (box[0]+60, box[1]), cv2.FONT_HERSHEY_SIMPLEX, .6, (0,0,0))
Detect()
print(allProductsArray)
Not sure if I understand well what you want to do (Run Yolov7 inference or Yolov5) but I hope this repo could help:
https://github.com/ibaiGorordo/ONNX-YOLOv7-Object-Detection

'NoneType' object has no attribute 'shape' in OpenCV recursion [duplicate]

This question already has answers here:
Why do I get AttributeError: 'NoneType' object has no attribute 'something'?
(11 answers)
Closed 1 year ago.
I came across a problem: the code works so that when a car passes the camera, a video of its passage is saved, then the function starts again through recursion, but exactly at 4th cycle the code gives an error
(H, W) = image.shape[:2]
AttributeError: 'NoneType' object has no attribute 'shape'
Here is the code itself:
import cv2 as cv
import numpy as np
import uuid
import os.path
import os
def camera():
print("new cycle")
confThreshold = 0.6
nmsThreshold = 0.1
inpWidth = 416
inpHeight = 416
classesFile = "car.names"
classes = None
with open(classesFile, 'rt') as f:
classes = f.read().rstrip('\n').split('\n')
modelConf = "car.cfg"
modelWeights = "cars_last.weights"
cap = cv.VideoCapture("rtsp://.../user=admin_password=tlJwpbo6_channel=1_stream=0.sdp?real_stream:")
ret, image = cap.read()
(H, W) = image.shape[:2]
global empty
global newname
newname = str(uuid.uuid4())
global output
output = cv.VideoWriter(newname+'.avi', cv.VideoWriter_fourcc('M','J','P','G'), 10,
(W, H))
print(newname)
empty = []
def postprocess(frame, outs):
frameHeight = frame.shape[0]
frameWidth = frame.shape[1]
classIDs = []
confidences = []
boxes = []
boxes1=[]
cv.cvtColor(frame, cv.COLOR_RGB2BGR)
for out in outs:
for detection in out:
scores = detection[5:]
classID = np.argmax(scores)
confidence = scores[classID]
if confidence > confThreshold:
centerX = int(detection[0] * frameWidth)
centerY = int(detection[1] * frameHeight)
width = int(detection[2] * frameWidth)
height = int(detection[3] * frameHeight)
left = int(centerX - width / 2)
top = int(centerY - height / 2)
x = int(centerX - (width / 2))
y = int(centerY - (height / 2))
boxes1.append([x, y, int(width), int(height)])
classIDs.append(classID)
confidences.append(float(confidence))
boxes.append([left, top, width, height])
idxs = cv.dnn.NMSBoxes(boxes, confidences, confThreshold, nmsThreshold)
if len(idxs) > 0:
empty.clear()
empty.append(0)
else:
empty.append(1)
if len(empty)>100:
empty.clear()
if (os.path.isfile(newname+".avi"))==True and os.path.getsize(newname+".avi")!=0:
camera()
indices = cv.dnn.NMSBoxes(boxes, confidences, confThreshold, nmsThreshold)
for i in indices:
i = i[0]
box = boxes[i]
left = box[0]
top = box[1]
width = box[2]
height = box[3]
drawPred(classIDs[i], confidences[i], left, top, left + width, top + height)
output.write(frame)
def drawPred(classId, conf, left, top, right, bottom):
# Draw a bounding box.
cv.rectangle(frame, (left, top), (right, bottom), (255, 178, 50), 3)
label = '%.2f' % conf
# Get the label for the class name and its confidence
if classes:
assert (classId < len(classes))
label = '%s:%s' % (classes[classId], label)
labelSize, baseLine = cv.getTextSize(label, cv.FONT_HERSHEY_SIMPLEX, 0.5, 1)
top = max(top, labelSize[1])
cv.rectangle(frame, (left, top - round(1.5 * labelSize[1])), (left + round(1.5 * labelSize[0]), top + baseLine),
(255, 255, 255), cv.FILLED)
cv.rectangle(frame, (left,top),(right,bottom), (255,255,255), 1 )
cv.putText(frame, label, (left, top), cv.FONT_HERSHEY_SIMPLEX, 0.75, (0, 0, 0), 1)
def getOutputsNames(net):
layersNames = net.getLayerNames()
return [layersNames[i[0] - 1] for i in net.getUnconnectedOutLayers()]
net = cv.dnn.readNetFromDarknet(modelConf, modelWeights)
net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV)
net.setPreferableTarget(cv.dnn.DNN_TARGET_CPU)
winName = 'DL OD with OpenCV'
cv.namedWindow(winName, cv.WINDOW_NORMAL)
while cv.waitKey(1) < 0:
hasFrame, frame = cap.read()
blob = cv.dnn.blobFromImage(frame, 1 / 255, (inpWidth, inpHeight), [0, 0, 0], 1, crop=False)
net.setInput(blob)
outs = net.forward(getOutputsNames(net))
postprocess(frame, outs)
cv.imshow(winName, frame)
cap.release()
output.release()
camera()
As spotted in the doc about the read function :
Parameters
[out]
image
the video frame is returned here. If no frames has been grabbed the image will be empty.
You should check the return status to know if you have image data.
Returns
false if no frames has been grabbed
all i had to do is to put camera connection out of recursion, I have put it outside the camera func thx all for help)
cap = cv.VideoCapture("rtsp:///user=admin_password=tlJwpbo6_channel=1_stream=0.sdp?real_stream:")
ret, image = cap.read()

Python, pillow grab not load the images frames correctly

I'm trying to use pillow.grab to get frames of my screen. For later detect objects for every frame.
import numpy as np
import cv2
from PIL import ImageGrab as ig
import time
# Load Yolo
net = cv2.dnn.readNet("yolov3_training_last.weights", "yolov3_testing.cfg")
# Name custom object
classes = ["amongus"]
layer_names = net.getLayerNames()
output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
colors = np.random.uniform(0, 255, size=(len(classes), 3))
last_time = time.time()
while(True):
img_ = ig.grab(bbox=None)
open_cv_image = np.array(img_)
open_cv_image = open_cv_image[:, :, ::-1].copy()
img = cv2.resize(open_cv_image, None, fx=0.4, fy=0.4)
height, width, channels = img.shape
# Detecting objects
blob = cv2.dnn.blobFromImage(img, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
net.setInput(blob)
outs = net.forward(output_layers)
# Showing informations on the screen
class_ids = []
confidences = []
boxes = []
for out in outs:
for detection in out:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > 0.3:
# Object detected
print(class_id)
center_x = int(detection[0] * width)
center_y = int(detection[1] * height)
w = int(detection[2] * width)
h = int(detection[3] * height)
# Rectangle coordinates
x = int(center_x - w / 2)
y = int(center_y - h / 2)
boxes.append([x, y, w, h])
confidences.append(float(confidence))
class_ids.append(class_id)
indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
print(indexes)
font = cv2.FONT_HERSHEY_PLAIN
for i in range(len(boxes)):
if i in indexes:
x, y, w, h = boxes[i]
label = str(classes[class_ids[i]])
color = colors[class_ids[i]]
cv2.rectangle(img, (x, y), (x + w, y + h), color, 2)
#cv2.putText(img, label, (x, y + 30), font, 3, color, 2)
cv2.imshow("Image", np.array(img_))
#key = cv2.waitKey(0)
cv2.destroyAllWindows()
If I uncomment the #key = cv2.waitKey(0) works because I'm taking just one frame for every time I press a key, but once I comment that part, I got this screen.
I'm assuming (not sure at all if it's because of that) that this is happening because a lot of frames are coming, but if I sleep(n) the while fps will be so low. (I guess?)
What's happening / how to fix it?

Categories