np.argmax returns 0 always - python

To determine the class name of the detected object, I need to get the class_id of the image. The problem is, np.argmax always returns 0 and gets the first class name. When I detect another object, it should print class_id 1 but it prints 0 and I can't get the proper label name to display.
When I look at my .txt files, I see this:
0 0.170103 0.449807 0.319588 0.521236
1 0.266791 0.148936 0.496269 0.287234
2 0.265464 0.422780 0.510309 0.420849
def detect_img(self, img):
blob = cv2.dnn.blobFromImage(img, 0.00392 ,(416,416), (0,0,0), True, crop=False)
input_img = self.net.setInput(blob)
output = self.net.forward(self.output)
height, width, channel = img.shape
boxes = []
trusts = []
class_ids = []
for out in output:
for detect in out:
total_scores = detect[5:]
class_id = np.argmax(total_scores)
print(np.argmax(detect))
trust_factor = total_scores[class_id]
if trust_factor > 0.2:
x_center = int(detect[0] * width)
y_center = int(detect[1] * height)
w = int(detect[2] * width)
h = int(detect[3] * height)
x = int(x_center - w / 2)
y = int(x_center - h / 2)
boxes.append([x,y,w,h])
trusts.append(float(trust_factor))
class_ids.append(class_id)
for index in range(len(boxes)):
# if index in indexes:
x,y,w,h = boxes[index]
label = self.classes[class_ids[index]]
# always self.classes[0], self.classes=['samsung','iphone'] so result is always samsung
trust = round(trusts[index], 2)
text = f"{label}, Trust: {trust}"
cv2.rectangle(img, (x,y), (x + w, y + h), (0,255,0), 2)
cv2.putText(img, text, (x - 20, y + 40), cv2.FONT_HERSHEY_PLAIN, 1, (0,0,255), 2)

Related

Saving the IDs for cars with the help of Centroid, The IDs don't remove or update

I am using OpenCv and Trained model and trying assign IDs to the cars with the help of Centroid. There are the following scenarios:
at the beginning all recognized cars get centroid and ID
the cars that leave the frame, their IDs should be removed
new cars that enter into the frame (later) should get new IDs
I have the ID code from a youtube Video , but it dosen't work as it should. the Remove method , removes everything.
If I leave the update method on, the Cars get new IDs every frame. that should not happen.
I am new to the programming and I would appreciate if someone could help me here out.
import cv2
import numpy as np
import pandas as pd
import math
video_name = "videos.mp4"
cap = cv2.VideoCapture(video_name)
net = cv2.dnn.readNetFromONNX("best.onnx")
classes = ['car', 'free_space']
count = 0
center_points_prev_frame = []
tracking_objects = {}
track_id = 0
while True:
ret, img = cap.read()
# ret, frame1 = cap.read()
count += 1
if img is None:
break
img = cv2.resize(img, (1500, 1000))
# frame1 = cv2.resize(frame1, (1500, 1000))
blob = cv2.dnn.blobFromImage(img, scalefactor=1 / 255,
size=(640, 640),
mean=[0, 0, 0, 0],
swapRB=True,
crop=False)
net.setInput(blob)
detections = net.forward()[0]
# print(detections.shape)
# Aufbau: cx, cy, w, h, confidence, class_score
classes_ids = []
confidences = []
boxes = []
rows = detections.shape[0]
img_width, img_height = img.shape[1], img.shape[0]
x_scale = img_width / 640
y_scale = img_height / 640
# apply Non-Maximum Suppression
for i in range(rows):
row = detections[i]
confidence = row[4]
if confidence > 0.3:
classes_score = row[5:]
ind = np.argmax(classes_score)
if classes_score[ind] > 0.3:
classes_ids.append(ind)
confidences.append(confidence)
cx, cy, w, h = row[:4]
x1 = int((cx - w / 2) * x_scale)
y1 = int((cy - h / 2) * y_scale)
# print("X1:",x1 ,"Y1",y1)
width = int(w * x_scale)
height = int(h * y_scale)
box = np.array([x1, y1, width, height])
boxes.append(box)
indices = cv2.dnn.NMSBoxes(boxes, confidences, 0.3, 0.3)
# Point current frame
center_points_cur_frame = []
for i in indices:
x1, y1, w, h = boxes[i]
label = classes[classes_ids[i]]
conf = confidences[i]
text = label + "{:.2f}".format(conf)
if label == 'car':
car_coordinates = [(x1, y1), (x1 + w, y1 + h)]
#cv2.rectangle(img, (x1, y1), (x1 + w, y1 + h), (51, 51, 255), 2)
# center points
cx = int((x1 + x1 + w) / 2)
cy = int((y1 + y1 + h) / 2)
cv2.circle(img, (cx,cy), 3, (255, 0, 255), -1)
cv2.putText(img, str(track_id), (cx,cy), cv2.FONT_HERSHEY_COMPLEX, 0.3, (255, 0, 255), 1)
center_points_cur_frame.append((cx, cy))
# Only at the beginning we compare previous and current frame
if count <= 2:
for pt in center_points_cur_frame:
for pt2 in center_points_prev_frame:
distance = math.hypot(pt2[0] - pt[0], pt2[1] - pt[1])
if distance < 20:
tracking_objects[track_id] = pt
track_id += 1
else:
tracking_objects_copy = tracking_objects.copy()
center_points_cur_frame_copy = center_points_cur_frame.copy()
for object_id, pt2 in tracking_objects_copy.items():
object_exists = False
for pt in center_points_cur_frame_copy:
distance = math.hypot(pt2[0] - pt[0], pt2[1] - pt[1])
# Update IDs position
if distance < 20:
tracking_objects[object_id] = pt
object_exists = True
if pt in center_points_cur_frame:
center_points_cur_frame.remove(pt)
continue
############################### Problem ##########################################
# Remove IDs lost
if not object_exists:
tracking_objects.pop(object_id)
# Add new IDs found
for pt in center_points_cur_frame:
tracking_objects[track_id] = pt
track_id += 1
############################### Problem ##########################################
for object_id, pt in tracking_objects.items():
cv2.circle(img, pt, 3, (255, 0, 255), -1)
cv2.putText(img, str(object_id), (pt[0], pt[1] - 2), cv2.FONT_HERSHEY_COMPLEX, 0.3, (255, 0, 255), 1)
print("Tracking objects")
print(tracking_objects)
print("CUR FRAME LEFT PTS")
print(center_points_cur_frame)
# Make a copy of the points
center_points_prev_frame = center_points_cur_frame.copy()
cv2.imshow("Video", img)
cv2.waitKey(1)
# After the loop release the cap object
cap.release()
# Destroy all the windows
cv2.destroyAllWindows()

How to convert cv2 pictures into a video frame

I have the following code which i am trying to use to detect objects. I have made some changes and now i am trying to make it work so that it will use a camera to detect objects. What it does now is takes a picture in a loop and doesnt use a video.
Here is my code.
import numpy as np
import cv2
net = cv2.dnn.readNet("yolov3.weights", "yolov3.cfg")
classes = []
with open("coco.names", "r") as f:
classes = [line.strip() for line in f.readlines()]
layers_names = net.getLayerNames()
outputlayers = [layers_names[i - 1] for i in net.getUnconnectedOutLayers()]
colors = np.random.uniform(0, 255, size=(len(classes), 3))
class_ids = []
confidences = []
boxes = []
cap = cv2.VideoCapture(0)
while True:
_, image = cap.read()
height, width, channels = image.shape
blob = cv2.dnn.blobFromImage(image, 0.00392, (416, 416), (0, 0, 0), True,
crop=False)
net.setInput(blob)
outs = net.forward(outputlayers)
for out in outs:
for detection in out:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence >= 0.5:
center_x = int(detection[0] * width)
center_y = int(detection[1] * height)
w = int(detection[3] * width)
h = int(detection[3] * height)
x = int(center_x - width / 2)
y = int(center_y - height / 2)
boxes.append([x, y, width, height])
confidences.append(float(confidence))
class_ids.append(class_id)
indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
objects_detecetd = len(boxes)
font = cv2.FONT_HERSHEY_PLAIN
for i in range(len(boxes)):
if i in indexes:
x, y, width, height = boxes[i]
label = str(classes[class_ids[i]])
color = colors[i]
print(label)
cv2.rectangle(image, (x, y), (x + width, y + height), color, 2)
cv2.putText(image, label, (x, y + 30), font, 4, color, 3)
cv2.imshow("Image", image)
cv2.waitKey(5)
cv2.destroyAllWindows()
cv2.destroyAllWindows()
cap.release

Draw a bounding box of second class on main image which was cropped to get detection of second class

I have a problem.
I have an object detection model that detects two classes, what I want to do is:
Detect class 1 (say c1) on source image (640x640) Draw bounding box and crop bounding box -> (c1 image) and then resize it to (640x640) (DONE)
Detect class 2 (say c2) on c1 image (640x640) (DONE)
Now I want to draw bounding box of c2 on source image
I have tried to explain it here by visualizing it
how can I do it? please help.
Code:
frame = self.REC.ImgResize(frame)
frame, score1, self.FLAG1, x, y, w, h = self.Detect(frame, "c1")
if self.FLAG1 and x > 0 and y > 0:
x1, y1 = w,h
cv2.rectangle(frame, (x, y), (w, h), self.COLOR1, 1)
c1Img = frame[y:h, x:w]
c1Img = self.REC.ImgResize(c1Img)
ratio = c2Img.shape[1] / float(frame.shape[1])
if ratio > 0.35:
c2Img, score2, self.FLAG2, xN, yN, wN, hN = self.Detect(c1Img, "c2")
if self.FLAG2 and xN > 0 and yN > 0:
# What should be the values for these => (__, __),(__,__)
cv2.rectangle(frame, (__, __), (__, __), self.COLOR2, 1)
I had tried a way which could only solve (x,y) coordinates but width and height was a mess
what I tried was
first found the rate of width and height at which the cropped c1 image increased after resize.
for example
x1 = 329
y1 = 102
h1 = 637
w1 = 630
r_w = 630 / 640 # 0.9843
r_h = 637 / 640 # 0.9953
x2 = 158
y2 = 393
h2 = 499
w2 = 588
new_x2 = 158 * 0.9843 # 156
new_y2 = 389 * 0.9953 # 389
new_x2 = x1 + new_x2
new_y2 = y1 + new_y2
this work to find (x,y)
but I am still trying to find a way to get (w,h) of the bounding box.
EDIT
The complete code is:
import cv2
import random
import numpy as np
import onnxruntime as ort
cuda = False
w = "models/model.onnx"
providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if cuda else ['CPUExecutionProvider']
session = ort.InferenceSession(w, providers=providers)
names = ['face', 'glasses']
colors = {name:[random.randint(0, 255) for _ in range(3)] for name in names}
img = cv2.imread("test.jpg")
def ImgResize(image, width = 640, height = 640, inter = cv2.INTER_CUBIC):
if image is not None:
resized = cv2.resize(image, (width,height), interpolation = inter)
return resized
def Detect(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleup=True, stride=32):
flag = False
w, h = 0, 0
x, y = 0, 0
score = 0
try:
if im is None:
raise Exception(IOError())
shape = im.shape[:2]
if isinstance(new_shape, int):
new_shape = (new_shape, new_shape)
ratio = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
if not scaleup:
ratio = min(ratio, 1.0)
new_unpad = int(round(shape[1] * ratio)), int(round(shape[0] * ratio))
dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]
if auto:
dw, dh = np.mod(dw, stride), np.mod(dh, stride)
dw /= 2
dh /= 2
if shape[::-1] != new_unpad:
im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)
image_ = im.transpose((2, 0, 1))
image_ = np.expand_dims(image_, 0)
image_ = np.ascontiguousarray(image_)
im = image_.astype(np.float32)
im /= 255
outname = [i.name for i in session.get_outputs()]
inname = [i.name for i in session.get_inputs()]
inp = {inname[0]:im}
outputs = session.run(outname, inp)[0]
return im, outputs, ratio, (dw, dh)
except IOError:
print("Invalid Image File")
def Detection(img, c_name):
score = 0
name = ""
a, b, c, d = 0, 0, 0, 0
image_, outputs, ratio, dwdh = Detect(img)
ori_images = [img.copy()]
for batch_id, x0, y0, x1, y1, cls_id, score in outputs:
img = ori_images[int(batch_id)]
box = np.array([x0, y0, x1, y1])
box -= np.array(dwdh * 2)
box /= ratio
box = box.round().astype(np.int32).tolist()
cls_id = int(cls_id)
score = round(float(score), 3)
if score > 0.55:
name = names[cls_id]
if name != c_name:
return img, 0, False, 0, 0, 0, 0, "Could Not Detect"
flag = True
a, b, c, d = tuple(box)
score = round(score * 100, 0)
return img, score, flag, a, b, c, d, name
COLORF = (212, 15, 24)
COLORG = (25, 240, 255)
nameW = "Det"
flagF, flagN = False, False
img = ImgResize(img)
c1_img, score, flagF, x1,y1,w1,h1,name = Detection(img,"face")
print(score, flagF, x1,y1,w1,h1,name)
if flagF:
cv2.rectangle(img, (x1,y1), (w1,h1), COLORF, 1)
cv2.putText(img, name, (x1,y1),cv2.FONT_HERSHEY_PLAIN, 2,COLORF,2)
cv2.imshow("face", img)
c1_img = c1_img[y1:h1,x1:w1]
c1_img_orig = c1_img.copy()
c1_img = ImgResize(c1_img)
c2_img, score, flagG, x2,y2,w2,h2,name = Detection(c1_img,"glasses")
if flagG:
c2_img = c2_img[y2:h2,x2:w2]
cv2.rectangle(c1_img_orig, (x2,y2), (w2,h2), COLORG, 1)
cv2.putText(c1_img_orig, name, (x1,y1),cv2.FONT_HERSHEY_PLAIN, 2,COLORG,2)
cv2.imshow("glasses", c2_img)
x3 = x1 + int(x2 * w1 / 640)
y3 = y1 + int(y2 * h1 / 640)
w3 = int(w2 * w1 / 640)
h3 = int(h2 * h1 / 640)
cv2.rectangle(img, (x3,y3), (w3,h3), COLORG, 1)
cv2.imshow(nameW, img)
cv2.waitKey(0)
cv2.destroyAllWindows()
what this code does is for some images it draws the bounding box as required:
but for other images and in video stream this is what happens:
Here is a complete programming example. Please keep in mind that for cv2.rectangle you need to pass top-left corner and bottom-right corner of the rectangle. As you didn't share ImgResize and Detect I made some assumptions:
import cv2
import numpy as np
COLOR1 = (0, 255, 0)
COLOR2 = (0, 0, 255)
DETECT_c1 = (40, 20, 120, 160)
DETECT_c2 = (20, 120, 160, 40)
RESIZE_x, RESIZE_y = 200, 200
frame = np.zeros((RESIZE_y, RESIZE_x, 3), np.uint8)
x1, y1, w1, h1 = DETECT_c1
c1Img = frame[y1:h1, x1:w1]
cv2.rectangle(frame, (x1, y1), (x1 + w1, y1 + h1), COLOR1, 1)
c1Img = cv2.resize(c1Img, (RESIZE_x, RESIZE_y))
x2, y2, w2, h2 = DETECT_c2
x3 = x1 + int(x2 * w1 / RESIZE_x)
y3 = y1 + int(y2 * h1 / RESIZE_y)
w3 = int(w2 * w1 / RESIZE_x)
h3 = int(h2 * h1 / RESIZE_y)
cv2.rectangle(frame, (x3, y3), (x3 + w3, y3 + h3), COLOR2, 1)
cv2.imwrite('out.png', frame)
Output:
I suggest that you treat your bounding box coordinates relatively.
If I understand correctly, your problem is that you have different referential. One way to bypass that is to normalize at each step your bbox coordinates.
c1_box is relative to your image, so :
c1_x = x/640
c1_y = y/640
When you crop, you can record the ratio values between main image and your cropped object.
image_vs_c1_x = c1_x / img_x
image_vs_c1_y = c1_y / img_y
Then you need to multiply your c2 bounding box coordinates by those ratios.
this is how I was able to solve it.
rwf = round((w1-x1)/640, 2)
rhf = round((h1-y1)/640, 2)
x3 = int(x2*rwf )
y3 = int(y2*rhf)
w3 = int(w2*rwf)
h3 = int(h2*rhf)
# these are the top right and bottom left cooridinates
x4 = x1 + x3
y4 = y1 + y3
w4 = x1 + w3
h4 = y1 + h3

kalman filter with python in function EuclideanDistTracker()

I have a car tracking project with yolov4 and it works with EuclideanDistTracker for tracking and counting.
but sometimes, it lost the cars and found them wih new count id so the counting is wrong.
so, I think my problem is in tracking. I want to increase the accuracy with tracking. so I research it and found kalman filter.
I tried to add kalman to my code but I couldn't.
How can I add kalman filter in my project.
here is the code I used:
import cv2 as cv
import numpy as np
import time
import math
class EuclideanDistTracker:
def __init__(self):
# Store the center positions of the objects
self.center_points = {}
# Keep the count of the IDs
# each time a new object id detected, the count will increase by one
self.id_count = 1
def update(self, objects_rect):
# Objects boxes and ids
objects_bbs_ids = []
# Get center point of new object
for rect in objects_rect:
x, y, w, h = rect
cx = (x + x + w) // 2
cy = (y + y + h) // 2
# Find out if that object was detected already
same_object_detected = False
for id, pt in self.center_points.items():
dist = math.hypot(cx - pt[0], cy - pt[1])
if dist < 25:
self.center_points[id] = (cx, cy)
#print(self.center_points)
objects_bbs_ids.append([x, y, w, h, id])
same_object_detected = True
break
# New object is detected we assign the ID to that object
if same_object_detected is False:
self.center_points[self.id_count] = (cx, cy)
objects_bbs_ids.append([x, y, w, h, self.id_count])
self.id_count += 1
# Clean the dictionary by center points to remove IDS not used anymore
new_center_points = {}
for obj_bb_id in objects_bbs_ids:
_, _, _, _, object_id = obj_bb_id
center = self.center_points[object_id]
new_center_points[object_id] = center
# Update dictionary with IDs not used removed
self.center_points = new_center_points.copy()
return objects_bbs_ids
Toplam_filtered =[]
KF = KalmanFilter(0.1, 1, 1, 1, 0.1,0.1)
# Create tracker object
tracker = EuclideanDistTracker()
# Load Yolo
net = cv.dnn.readNet("yolov4-tiny_best.weights", "yolov4-tiny.cfg")
net.setPreferableBackend(cv.dnn.DNN_BACKEND_CUDA)
net.setPreferableTarget(cv.dnn.DNN_TARGET_CUDA)
classes = []
with open("obj.names", "r") as f:
classes = [line.strip() for line in f.readlines()]
layer_names = net.getLayerNames()
output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
# Loading video
cap = cv.VideoCapture("test2.mp4")
cap.set(3, 1280) # set video widht
cap.set(4, 720) # set video height
font = cv.FONT_HERSHEY_COMPLEX_SMALL
starting_time = time.time()
frame_id = 0
toplam = 0
print("\033[H\033[J")
say=0
while True:
# Get frame
_, frame = cap.read()
frame_id += 1
if frame_id % 2 != 0:
continue
height, width, channels = frame.shape
# Detecting objects
blob = cv.dnn.blobFromImage(frame, 0.00261, (416, 416), (0, 0, 0), True, crop=False)
net.setInput(blob)
outs = net.forward(output_layers)
result = []
boxes = []
liste = []
for out in outs:
for detection in out:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > 0.3:
# Object detected
center_x = int(detection[0] * width)
center_y = int(detection[1] * height)
w = int(detection[2] * width)
h = int(detection[3] * height)
# Rectangle coordinates
x = int(center_x - w / 2)
y = int(center_y - h / 2)
if 75 < center_x < 1150 and 300 < center_y < 500:
boxes.append([x, y, w, h])
boxes_ids = tracker.update(boxes)
eklenecekler = []
filtered =[]
for i in range(len(boxes_ids)):
if boxes_ids[i][4] not in eklenecekler:
eklenecekler.append(boxes_ids[i][4])
filtered.append(boxes_ids[i])
for box_id in filtered:
x, y, w, h, id = box_id
cv.rectangle(frame, (x,y+h-20), (x+30,y+h), (0,0,255),-1) #köşe kırmızı kare
cv.putText(frame, str(id), (x , y + h-5), font, 1, (255,255,255), 1) # count yazısı
cv.rectangle(frame, (x, y), (x + w, y + h), (0,0,0), 2) #araç kareleri
(kalx, kaly) = KF.predict()
print(kalx, kaly)
cv.rectangle(frame, (int(kalx), int(kaly)), (int(kalx + w), int(kaly + h)), (0, 0, 255), 2)
Toplam_filtered = [*Toplam_filtered, *filtered]
# For fps
elapsed_time = time.time() - starting_time
fps = frame_id / elapsed_time
#total car count
try:
_,_,_,_,value = max(filtered, key=lambda item: item[4])
if value > toplam:
toplam = value
except:
pass
#mavi çizgi
cv.line(frame,(0,350),(1280,350),(255,0,0),2)
#Some texts
cv.putText(frame, "FPS: " + str(round(fps, 2)), (5, 156), font, 1, (0, 0, 255), 2)
cv.putText(frame, "NAZIM CORAKLI", (600, 70), font, 1, (0, 0, 255), 2)
cv.putText(frame, str(toplam), (1200, 70), font, 2, (0, 0, 255), 2)
#Final
cv.imshow("Awsome Car Counting", frame)
# Press 'ESC' for exiting video
k = cv.waitKey(1) & 0xff
if k == 27:
break
if k == ord('p'):
cv.waitKey(-1) #wait until any key is pressed
cap.release()
cv.destroyAllWindows()
and here is kalman filter code I found:
'''
File name : KalmanFilter.py
Description : KalmanFilter class used for object tracking
Author : Rahmad Sadli
Date created : 20/02/2020
Python Version : 3.7
'''
import numpy as np
import matplotlib.pyplot as plt
class KalmanFilter(object):
def __init__(self, dt, u_x,u_y, std_acc, x_std_meas, y_std_meas):
"""
:param dt: sampling time (time for 1 cycle)
:param u_x: acceleration in x-direction
:param u_y: acceleration in y-direction
:param std_acc: process noise magnitude
:param x_std_meas: standard deviation of the measurement in x-direction
:param y_std_meas: standard deviation of the measurement in y-direction
"""
# Define sampling time
self.dt = dt
# Define the control input variables
self.u = np.matrix([[u_x],[u_y]])
# Intial State
self.x = np.matrix([[0], [0], [0], [0]])
# Define the State Transition Matrix A
self.A = np.matrix([[1, 0, self.dt, 0],
[0, 1, 0, self.dt],
[0, 0, 1, 0],
[0, 0, 0, 1]])
# Define the Control Input Matrix B
self.B = np.matrix([[(self.dt**2)/2, 0],
[0,(self.dt**2)/2],
[self.dt,0],
[0,self.dt]])
# Define Measurement Mapping Matrix
self.H = np.matrix([[1, 0, 0, 0],
[0, 1, 0, 0]])
#Initial Process Noise Covariance
self.Q = np.matrix([[(self.dt**4)/4, 0, (self.dt**3)/2, 0],
[0, (self.dt**4)/4, 0, (self.dt**3)/2],
[(self.dt**3)/2, 0, self.dt**2, 0],
[0, (self.dt**3)/2, 0, self.dt**2]]) * std_acc**2
#Initial Measurement Noise Covariance
self.R = np.matrix([[x_std_meas**2,0],
[0, y_std_meas**2]])
#Initial Covariance Matrix
self.P = np.eye(self.A.shape[1])
def predict(self):
# Refer to :Eq.(9) and Eq.(10) in https://machinelearningspace.com/object-tracking-simple-implementation-of-kalman-filter-in-python/?preview_id=1364&preview_nonce=52f6f1262e&preview=true&_thumbnail_id=1795
# Update time state
#x_k =Ax_(k-1) + Bu_(k-1) Eq.(9)
self.x = np.dot(self.A, self.x) + np.dot(self.B, self.u)
# Calculate error covariance
# P= A*P*A' + Q Eq.(10)
self.P = np.dot(np.dot(self.A, self.P), self.A.T) + self.Q
return self.x[0:2]
def update(self, z):
# Refer to :Eq.(11), Eq.(12) and Eq.(13) in https://machinelearningspace.com/object-tracking-simple-implementation-of-kalman-filter-in-python/?preview_id=1364&preview_nonce=52f6f1262e&preview=true&_thumbnail_id=1795
# S = H*P*H'+R
S = np.dot(self.H, np.dot(self.P, self.H.T)) + self.R
# Calculate the Kalman Gain
# K = P * H'* inv(H*P*H'+R)
K = np.dot(np.dot(self.P, self.H.T), np.linalg.inv(S)) #Eq.(11)
self.x = np.round(self.x + np.dot(K, (z - np.dot(self.H, self.x)))) #Eq.(12)
I = np.eye(self.H.shape[1])
# Update error covariance matrix
self.P = (I - (K * self.H)) * self.P #Eq.(13)
return self.x[0:2]

Python, pillow grab not load the images frames correctly

I'm trying to use pillow.grab to get frames of my screen. For later detect objects for every frame.
import numpy as np
import cv2
from PIL import ImageGrab as ig
import time
# Load Yolo
net = cv2.dnn.readNet("yolov3_training_last.weights", "yolov3_testing.cfg")
# Name custom object
classes = ["amongus"]
layer_names = net.getLayerNames()
output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
colors = np.random.uniform(0, 255, size=(len(classes), 3))
last_time = time.time()
while(True):
img_ = ig.grab(bbox=None)
open_cv_image = np.array(img_)
open_cv_image = open_cv_image[:, :, ::-1].copy()
img = cv2.resize(open_cv_image, None, fx=0.4, fy=0.4)
height, width, channels = img.shape
# Detecting objects
blob = cv2.dnn.blobFromImage(img, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
net.setInput(blob)
outs = net.forward(output_layers)
# Showing informations on the screen
class_ids = []
confidences = []
boxes = []
for out in outs:
for detection in out:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > 0.3:
# Object detected
print(class_id)
center_x = int(detection[0] * width)
center_y = int(detection[1] * height)
w = int(detection[2] * width)
h = int(detection[3] * height)
# Rectangle coordinates
x = int(center_x - w / 2)
y = int(center_y - h / 2)
boxes.append([x, y, w, h])
confidences.append(float(confidence))
class_ids.append(class_id)
indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
print(indexes)
font = cv2.FONT_HERSHEY_PLAIN
for i in range(len(boxes)):
if i in indexes:
x, y, w, h = boxes[i]
label = str(classes[class_ids[i]])
color = colors[class_ids[i]]
cv2.rectangle(img, (x, y), (x + w, y + h), color, 2)
#cv2.putText(img, label, (x, y + 30), font, 3, color, 2)
cv2.imshow("Image", np.array(img_))
#key = cv2.waitKey(0)
cv2.destroyAllWindows()
If I uncomment the #key = cv2.waitKey(0) works because I'm taking just one frame for every time I press a key, but once I comment that part, I got this screen.
I'm assuming (not sure at all if it's because of that) that this is happening because a lot of frames are coming, but if I sleep(n) the while fps will be so low. (I guess?)
What's happening / how to fix it?

Categories