i am currently workingon a project which requires body detection, so i created a file for it and when i tried to use it my main file but the body detection keeps on running and never stops making my rest of the program non executabale . I know it is because of the infinite for loop. But do i have any other ways where i could use in my main file
i have attached the body detection program below
kindly help
import cv2
import mediapipe as mp
import time
mpDraw = mp.solutions.drawing_utils
mppose = mp.solutions.pose
pose = mppose.Pose()
cap = cv2.VideoCapture('open cv/squidgamee/3.mp4')
cap.set(3, 400)
cap.set(4, 800)
ptime =0
while True:
succ, img = cap.read()
imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
results = pose.process(imgRGB)
#print(results.pose_landmarks)
if results.pose_landmarks:
mpDraw.draw_landmarks(img, results.pose_landmarks, mppose.POSE_CONNECTIONS)
lmlist= []
if results.pose_landmarks:
for id, lm in enumerate(results.pose_landmarks.landmark):
h, w, c = img.shape
cx , cy = int(lm.x*w), int(lm.y*h)
lmlist.append([id, cx, cy])
cv2.circle(img, (lmlist[0][1], lmlist[0][2]), 15 , (255, 0, 255), cv2.FILLED)
print(lmlist[0])
ctime = time.time()
fps = 1/(ctime-ptime)
ptime = ctime
cv2.putText(img, str(int(fps)), (70, 50), cv2.FONT_HERSHEY_COMPLEX, 3, (255,255,0), 3)
cv2.imshow("image", img)
key= cv2.waitKey(1)
if key == ord('q'):
break
You can call the content in the file in function on a separate thread.
import cv2
import mediapipe as mp
import time
def func():
mpDraw = mp.solutions.drawing_utils
mppose = mp.solutions.pose
pose = mppose.Pose()
cap = cv2.VideoCapture('open cv/squidgamee/3.mp4')
cap.set(3, 400)
cap.set(4, 800)
ptime =0
while True:
succ, img = cap.read()
imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
results = pose.process(imgRGB)
if results.pose_landmarks:
mpDraw.draw_landmarks(img, results.pose_landmarks, mppose.POSE_CONNECTIONS)
lmlist= []
if results.pose_landmarks:
for id, lm in enumerate(results.pose_landmarks.landmark):
h, w, c = img.shape
cx, cy = int(lm.x*w), int(lm.y*h)
lmlist.append([id, cx, cy])
cv2.circle(img, (lmlist[0][1], lmlist[0][2]), 15 , (255, 0, 255), cv2.FILLED)
print(lmlist[0])
ctime = time.time()
fps = 1/(ctime-ptime)
ptime = ctime
cv2.putText(img, str(int(fps)), (70, 50), cv2.FONT_HERSHEY_COMPLEX, 3, (255,255,0), 3)
cv2.imshow("image", img)
key= cv2.waitKey(1)
if key == ord('q'):
break
Main:
import threading
from file import func
x = threading.Thread(target=func, args=())
x.start()
Related
from scipy.spatial import distance as dist
from imutils import face_utils
import imutils
import dlib
import cv2
import winsound
from tkinter import Tk, Button
frequency = 2500
duration = 5000
def eyeAspectRatio(eye):
A = dist.euclidean(eye[1], eye[5])
B = dist.euclidean(eye[2], eye[4])
C = dist.euclidean(eye[0], eye[3])
ear = (A + B) / (2.0 * C)
return ear
def run():
count = 0
earThresh = 0.3 #distance between vertical eye coordinate Threshold
earFrames = 48 #consecutive frames for eye closure
#shapePredictor = "C:\\Users\\ATUL GHUMADE\\Downloads\\code\\code\\shape_predictor_68_face_landmarks.dat"
cam = cv2.VideoCapture(0)
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat")
#get the coord of left & right eye
(lStart, lEnd) = face_utils.FACIAL_LANDMARKS_IDXS["left_eye"]
(rStart, rEnd) = face_utils.FACIAL_LANDMARKS_IDXS["right_eye"]
while True:
_, frame = cam.read()
frame = imutils.resize(frame, width=450)
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
rects = detector(gray, 0)
for rect in rects:
shape = predictor(gray, rect)
shape = face_utils.shape_to_np(shape)
leftEye = shape[lStart:lEnd]
rightEye = shape[rStart:rEnd]
leftEAR = eyeAspectRatio(leftEye)
rightEAR = eyeAspectRatio(rightEye)
ear = (leftEAR + rightEAR) / 2.0
leftEyeHull = cv2.convexHull(leftEye)
rightEyeHull = cv2.convexHull(rightEye)
cv2.drawContours(frame, [leftEyeHull], -1, (0, 0, 255), 1)
cv2.drawContours(frame, [rightEyeHull], -1, (0, 0, 255), 1)
if ear < earThresh:
count += 1
if count >= earFrames:
cv2.putText(frame, "DROWSINESS DETECTED", (10, 30),
cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
winsound.Beep(frequency, duration)
else:
count = 0
cv2.imshow("Frame", frame)
key = cv2.waitKey(1) & 0xFF
if key == ord("q"):
break
cam.release()
cv2.destroyAllWindows()
'''
tk=Tk()
tk.geometry('500x500')
sub=Button(tk,text="Inspect",command=run).pack()
tk.mainloop()
'''
run()
Traceback (most recent call last):
File "C:\Users\ASUS\Desktop\Drowssiness detection - Copy\drowsiness detection\code\main.py", line 82, in
run()
File "C:\Users\ASUS\Desktop\Drowssiness detection - Copy\drowsiness detection\code\main.py", line 35, in run
frame = imutils.resize(frame, width=450)
File "C:\Users\ASUS\AppData\Local\Programs\Python\Python39\lib\site-packages\imutils\convenience.py", line 69, in resize
(h, w) = image.shape[:2]
AttributeError: 'NoneType' object has no attribute 'shape'
So I have been watching a video online and was trying to replicate the same project with the same code. It worked in the beginning but after I added another script to my VS Code project folder it completely broke and throws an error every time I run the program. I tried reinstalling and upgrading all of the packages I used but still the same error. Can anyone please suggest what may be the result of this error?
import cv2
import mediapipe as mp
import time
class handDetector():
def __init__(self, mode=False, maxHands=2, detectionCon=0.5, trackCon=0.5):
self.mode = mode
self.maxHands = maxHands
self.detectionCon = detectionCon
self.trackCon = trackCon
self.mpHands = mp.solutions.hands
self.hands = self.mpHands.Hands(self.mode, self.maxHands,
self.detectionCon, self.trackCon)
self.mpDraw = mp.solutions.drawing_utils
def findHands(self, img, draw=True):
imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
self.results = self.hands.process(imgRGB)
# print(results.multi_hand_landmarks)
if self.results.multi_hand_landmarks:
for handLms in self.results.multi_hand_landmarks:
if draw:
self.mpDraw.draw_landmarks(img, handLms,
self.mpHands.HAND_CONNECTIONS)
return img
def findPosition(self, img, handNo=0, draw=True):
lmList = []
if self.results.multi_hand_landmarks:
myHand = self.results.multi_hand_landmarks[handNo]
for id, lm in enumerate(myHand.landmark):
# print(id, lm)
h, w, c = img.shape
cx, cy = int(lm.x * w), int(lm.y * h)
# print(id, cx, cy)
lmList.append([id, cx, cy])
if draw:
cv2.circle(img, (cx, cy), 15, (255, 0, 255), cv2.FILLED)
return lmList
def main():
pTime = 0
cTime = 0
cap = cv2.VideoCapture(1)
detector = handDetector()
while True:
success, img = cap.read()
img = detector.findHands(img)
lmList = detector.findPosition(img)
if len(lmList) != 0:
print(lmList[4])
cTime = time.time()
fps = 1 / (cTime - pTime)
pTime = cTime
cv2.putText(img, str(int(fps)), (10, 70), cv2.FONT_HERSHEY_PLAIN, 3,
(255, 0, 255), 3)
cv2.imshow("Image", img)
cv2.waitKey(1)
if __name__ == "__main__":
main()
Error Message:
Traceback (most recent call last):
File "d:\Hand Gensture Tracking AI\HandTrackingModule.py", line 71, in <module>
main()
File "d:\Hand Gensture Tracking AI\HandTrackingModule.py", line 51, in main
detector = handDetector()
File "d:\Hand Gensture Tracking AI\HandTrackingModule.py", line 14, in __init__
self.hands = self.mpHands.Hands(self.mode, self.maxHands,
File "C:\Users\user\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\mediapipe\python\solutions\hands.py", line 114, in __init__
super().__init__(
File "C:\Users\user\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\mediapipe\python\solution_base.py", line 288, in __init__
self._input_side_packets = {
File "C:\Users\user\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\mediapipe\python\solution_base.py", line 289, in <dictcomp>
name: self._make_packet(self._side_input_type_info[name], data)
File "C:\Users\user\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\mediapipe\python\solution_base.py", line 591, in _make_packet
return getattr(packet_creator, 'create_' + packet_data_type.value)(data)
TypeError: create_int(): incompatible function arguments. The following argument types are supported:
1. (arg0: int) -> mediapipe.python._framework_bindings.packet.Packet
Invoked with: 0.5
The problem with your code lies in line 11-12:
self.hands = self.mpHands.Hands(self.mode, self.maxHands, self.detectionCon, self.trackCon)
The class Hands() takes in 5 arguments, but it seems like you have missed the "complexity" parameter.
Your modified code:
import cv2
import mediapipe as mp
import time
class handDetector():
def __init__(self, mode=False, maxHands=2, complexity=1, detectionCon=0.5, trackCon=0.5):
self.mode = mode
self.maxHands = maxHands
self.detectionCon = detectionCon
self.trackCon = trackCon
self.mpHands = mp.solutions.hands
self.hands = self.mpHands.Hands(mode, maxHands, complexity, detectionCon, trackCon)
self.mpDraw = mp.solutions.drawing_utils
def findHands(self, img, draw=True):
imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
self.results = self.hands.process(imgRGB)
# print(results.multi_hand_landmarks)
if self.results.multi_hand_landmarks:
for handLms in self.results.multi_hand_landmarks:
if draw:
self.mpDraw.draw_landmarks(img, handLms,
self.mpHands.HAND_CONNECTIONS)
return img
def findPosition(self, img, handNo=0, draw=True):
lmList = []
if self.results.multi_hand_landmarks:
myHand = self.results.multi_hand_landmarks[handNo]
for id, lm in enumerate(myHand.landmark):
# print(id, lm)
h, w, c = img.shape
cx, cy = int(lm.x * w), int(lm.y * h)
# print(id, cx, cy)
lmList.append([id, cx, cy])
if draw:
cv2.circle(img, (cx, cy), 15, (255, 0, 255), cv2.FILLED)
return lmList
def main():
pTime = 0
cTime = 0
cap = cv2.VideoCapture(1)
detector = handDetector()
while True:
success, img = cap.read()
img = detector.findHands(img)
lmList = detector.findPosition(img)
if len(lmList) != 0:
print(lmList[4])
cTime = time.time()
fps = 1 / (cTime - pTime)
pTime = cTime
cv2.putText(img, str(int(fps)), (10, 70), cv2.FONT_HERSHEY_PLAIN, 3,
(255, 0, 255), 3)
cv2.imshow("Image", img)
cv2.waitKey(1)
if __name__ == "__main__":
main()
If this still doesn't solve your problem, then take note of the following things:
If you are using a virtual environment in vscode and try installing mediapipe or opencv-python, you may face a "numpy" version error. However, if you try running this code in PyCharm and installing these packages in the python interpreter, you won't face any errors.
You must be using python 3.7.0 or below as mediapipe doesn't support the newer versions.
Another tip to keep things simpler would be to use the "pydetection" module. You will need to install these packages:
pip install mediapipe
pip install opencv-python
pip install pydetetcion
The pydetection module will simplify the code a lot!
import pydetection as hand # The pydetection module
import cv2 # We will use this for reading and displaying webcam frames
detector = hand.HandRecogniser()
webcam = cv2.VideoCapture(0) # Change the number for a different camera. 0 is the default one.
while True:
_, frame = webcam.read() # Gets a frame of the current webcam feed
processed_image, hand_landmark_position = detector.findHands(frame, draw=True)
cv2.imshow(processed_image) # Show the image with the hands outlined
# Add a delay of 1 millisecond and check for the keystroke "q"
key = cv2.waitKey(1)
if key == ord("q"):
break
Github page to pydetection: https://github.com/Ayaan-Imran/pydetection
So I am working on a OpenCV project which would detect if a certain hand sign has taken place. That part works.
I also wanted to make it so that it waits for a few seconds so it does not count accidental movements. But when I use time.sleep() or start time end time methods it either freezes my program and always executes the program and stops following the if conditions, or worse it simply doesn't execute the if statement ever.
Here is the code snippet I used. Btw I have already tried time.sleep() It would just freeze the program and always play the clip regardless of whether it followed my if statement or not.
Is there any way to resolve this issue?
import cv2
import time
import os
import HandTrackingModule as htm
from playsound import playsound
wCam, hCam = 1920, 1080
cap = cv2.VideoCapture(2)
cap.set(3, wCam)
cap.set(4, hCam)
frame = cap.read()
pTime = 0
detector = htm.handDetector(detectionCon=1)
Player1 = []
Player2 = []
while 1 > 0:
success, img = cap.read()
img = detector.findHands(img)
lmlist = detector.findPosition(img, handNo=0, draw=False)
if len(lmlist) != 0:
Player1 = []
Player2 = []
if (lmlist[4][1] < lmlist[3][1] and lmlist[8][2] < lmlist[6][2] and lmlist[20][2] < lmlist[18][2] and lmlist[12][2] > lmlist[10][2] and lmlist[16][2] > lmlist[14][2]) == False:
StartTime = time.time()
if lmlist[4][1] < lmlist[3][1] and lmlist[8][2] < lmlist[6][2] and lmlist[20][2] < lmlist[18][2] and lmlist[12][2] > lmlist[10][2] and lmlist[16][2] > lmlist[14][2]:
EndTime = time.time()
Eyetime = EndTime - StartTime
if Eyetime > 5:
Player1.append("hello")
playsound(r'C:\Users\haris\Documents\GitHub\Haz3-jolt\Pong_with_opencv\venv\notw.mp3')
cTime = time.time()
fps = 1 / (cTime-pTime)
pTime = cTime
cv2.putText(img, f'FPS: {int(fps)}',(400,70), cv2.FONT_HERSHEY_COMPLEX, 3, (255, 0, 0), 3)
cv2.imshow("Image", img)
cv2.waitKey(1)
I also have a bonus script Import called handtrackingmodule.
import cv2
import mediapipe as mp
import time
class handDetector():
def __init__(self, mode=False, maxHands=2, detectionCon=0.5, trackCon=0.5):
self.mode = mode
self.maxHands = maxHands
self.detectionCon = detectionCon
self.trackCon = trackCon
self.mpHands = mp.solutions.hands
self.hands = self.mpHands.Hands(self.mode, self.maxHands, self.detectionCon, self.trackCon)
self.mpDraw = mp.solutions.drawing_utils
def findHands(self, img, draw=True):
imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
self.results = self.hands.process(imgRGB)
#print(results.multi_hand_landmarks)
if self.results.multi_hand_landmarks:
for handLms in self.results.multi_hand_landmarks:
if draw:
self.mpDraw.draw_landmarks(img, handLms,
self.mpHands.HAND_CONNECTIONS)
return img
def findPosition(self, img, handNo=0, draw=True):
lmList = []
if self.results.multi_hand_landmarks:
myHand = self.results.multi_hand_landmarks[handNo]
for id, lm in enumerate(myHand.landmark):
# print(id, lm)
h, w, c = img.shape
cx, cy = int(lm.x * w), int(lm.y * h)
# print(id, cx, cy)
lmList.append([id, cx, cy])
if draw:
cv2.circle(img, (cx, cy), 15, (255, 0, 255), cv2.FILLED)
return lmList
def Marks(self,frame):
myHands=[]
frameRGB=cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
results=self.hands.process(frameRGB)
if results.multi_hand_landmarks != None:
for handLandMarks in results.multi_hand_landmarks:
myHand=[]
for landMark in handLandMarks.landmark:
myHand.append((int(landMark.x*width), int(landMark.y*height)))
myHands.append(myHand)
return myHands
width=1920
height=1080
def main():
pTime = 0
cTime = 0
cap = cv2.VideoCapture(2)
detector = handDetector()
while True:
success, img = cap.read()
img = detector.findHands(img)
lmList = detector.findPosition(img)
if len(lmList) != 0:
print(lmList[4])
cTime = time.time()
fps = 1 / (cTime - pTime)
pTime = cTime
cv2.putText(img, str(int(fps)), (10, 70), cv2.FONT_HERSHEY_PLAIN, 3, (255, 0, 255), 3)
cv2.imshow("Image", img)
cv2.waitKey(1)
if __name__ == "__main__":
main()
GUI (imshow) only works as long as waitKey/pollKey run continuously or frequently.
When you sleep(), you choke the GUI (imshow window). It can't update, or handle any events.
If you need to "sleep", give waitKey a suitable integer argument in milliseconds.
Be aware that waitKey can return before the time is up, e.g. if all windows were closed or if a key was pressed.
So, I was making a face detection program and everything was going right but as soon as I run it, it showed me as error saying:
line 40, in <module>
faceNet=cv2.dnn.readNet(faceModel, faceProto)
cv2.error: OpenCV(4.5.4-dev) D:\a\opencv-python\opencv-python\opencv\modules\dnn\src\caffe\caffe_io.cpp:1138: error: (-2:Unspecified error) FAILED: fs.is_open(). Can't open "opencv_face_detector_uint8.pb" in function 'cv::dnn::ReadProtoFromBinaryFile'
The code is:
import cv2
import math
import argparse
def highlightFace(net, frame, conf_thershold=0.7):
frameOpencvDnn=frame.copy()
frameHight=frameOpencvDnn.shape[0]
frameWidth=frameOpencvDnn.shape[1]
blob=cv2.dnnblobFromImage(frameOpencvDnn, 1.0, (300, 300), [104, 117, 123], True, False)
net.setInput(blob)
detections=net.forword()
faceBoxes=[]
for i in range(detections.shape[2]):
confidence=detections[0,0,1,2]
if confidence>conf_thershold:
x1=int(detections[0,0,i,3]*frameWidth)
y1=int(detections[0,0,i,4]*frameHight)
x2=int(detections[0,0,i,5]*frameWidth)
y2=int(detections[0,0,i,6]*frameHight)
faceBoxes.append([x1,y1,x2,y2])
cv2.rectangel(frameOpencvDnn, (x1,y1), (x2,y2), (0,225,0), int(round(frameHight/150)), 8)
return frameOpencvDnn,faceBoxes
parser=argparse.ArgumentParser()
parser.add_argument('--image')
args=parser.parse_args()
faceProto="opencv_face_detector.pbtxt"
faceModel="opencv_face_detector_uint8.pb"
ageProto="age_deploy.prototxt"
ageModel="age_net.caffemodel"
genderProto="gender_deploy.prototxt"
genderModel="gender_net.caffmodel"
MODEL_MEAN_VALUES=(78.4263377603, 87.7689143744, 144.895847746)
ageList=['(0-2)', '(4-6)', '(8-12)','(15-20)', '(25-32)', '(38-43)', '(48-53)', '(60-100)']
genderList=['Male','Female']
faceNet=cv2.dnn.readNet(faceModel, faceProto)
ageNet=cv2.dnn.readNet(ageModel, ageProto)
genderNet=cv2.dnn.readNet(genderModel, genderProto)
video=cv2.VideoCapture(args.image if args.image else 0)
padding=20
while cv2.waitKey(1)<0 :
hasFrame,frame=video.read()
if not hasFrame:
cv2.waitKey()
break
resultImg,faceBoxes=highlightFace(faceNet,frame)
if not faceBoxes:
print("No Face is being Detected")
for faceBox in faceBoxes:
face=frame[max(0,faceBox[1]-padding):
min(faceBox[3]+padding,frame.shape[0]-1),max(0,faceBox[0]-padding)
:min(faceBox[2]+padding, frame.shape[1]-1)]
blob=cv2.dnn.blobFromImage(face, 1.0, (227.227), MODEL_MEAN_VALUES, swapRB=False)
genderNet.setInput(blob)
genderPreds=genderNet.forword()
gender=genderList[genderPreds[0].argmax()]
print(f'Gender: {gender}')
ageNet.setInput(blob)
agePreds=ageNet.forword()
age=ageList[agePreds[0].argmax()]
print(f'Age: {age[1:-1]} years')
cv2.putText(resultImg, f'{gender}, {age}', (faceBox[0], faceBox[1]-10),
cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0,225,225), 2, cv2.LINE_AA)
cv2.imshow("Detecting age and gender", resultImg)
The code is a little dodgy mate, it does not display the video. Try this.
import cv2
import math
import argparse
""" Identification """
faceProto = "opencv_face_detector.pbtxt"
faceModel = "opencv_face_detector_uint8.pb"
ageProto = "age_deploy.prototxt"
ageModel = "age_net.caffemodel"
genderProto = "gender_deploy.prototxt"
genderModel = "gender_net.caffemodel"
faceNet=cv2.dnn.readNet(faceModel, faceProto)
ageNet=cv2.dnn.readNet(ageModel,ageProto)
genderNet=cv2.dnn.readNet(genderModel,genderProto)
MODEL_MEAN_VALUES = (78.4263377603, 87.7689143744, 114.895847746)
ageList = ['(0-2)', '(4-6)', '(8-12)', '(15-20)', '(25-32)', '(38-43)', '(48-53)', '(60-100)']
genderList = ['Male', 'Female']
padding=20
""" Face highliting """
def faceBox(faceNet, frames):
frameHeight=frames.shape[0]
frameWidth=frames.shape[1]
blob=cv2.dnn.blobFromImage(frames, 1.0, (300,300), [104,117,123], swapRB=False)
faceNet.setInput(blob)
detection=faceNet.forward()
bboxs=[]
for i in range(detection.shape[2]):
confidence=detection[0,0,i,2]
if confidence>0.7:
x1=int(detection[0,0,i,3]*frameWidth)
y1=int(detection[0,0,i,4]*frameHeight)
x2=int(detection[0,0,i,5]*frameWidth)
y2=int(detection[0,0,i,6]*frameHeight)
bboxs.append([x1,y1,x2,y2])
cv2.rectangle(frames, (x1,y1),(x2,y2),(0,255,0), 1)
return frames, bboxs
""" Video display """
def DisplayVid():
cap = cv2.VideoCapture(0, cv2.CAP_DSHOW)
fourcc = cv2.VideoWriter_fourcc(*'XVID')
out = cv2.VideoWriter('testvideo', fourcc, 20.0, (640, 480))
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1920)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 1080)
while (True):
ret, frame = cap.read()
frameFace, bboxes = faceBox(faceNet, frame)
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
out.write(frame)
for bbox in bboxes:
face = frame[bbox[1]:bbox[3], bbox[0]:bbox[2]]
blob = cv2.dnn.blobFromImage(face, 1.0, (227, 227), MODEL_MEAN_VALUES, swapRB=False)
genderNet.setInput(blob)
genderPreds = genderNet.forward()
gender = genderList[genderPreds[0].argmax()]
ageNet.setInput(blob)
agePreds = ageNet.forward()
age = ageList[agePreds[0].argmax()]
label = "{},{}".format(gender, age)
cv2.rectangle(frameFace, (bbox[0], bbox[1] - 30), (bbox[2], bbox[1]), (0, 255, 0), -1)
cv2.putText(frameFace, label, (bbox[0], bbox[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 255), 2,
cv2.LINE_AA)
cv2.imshow("Age-Gender", frameFace)
k = cv2.waitKey(1)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
if not (cap.isOpened()):
print("Could not open video device")
cap.release()
out.release()
cv2.destroyAllWindows()
DisplayVid()
I wrote a code for pose estimation using OpenCV and mediapipe library. The program was working well and I was getting around 30-35 fps. When I tried to convert the same program to a module so that I can use it easily in future for different projects, the fps of the new code(module) reduced drastically to 3-4 fps.
My original Program:
import cv2
import mediapipe as mp
import time
cap = cv2.VideoCapture(1)
pTime = 0
cTime = 0
mpDraw = mp.solutions.drawing_utils
mpPose = mp.solutions.pose
pose = mpPose.Pose()
while True:
success, img1 = cap.read()
img = cv2.flip(img1, 1)
imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
results = pose.process(imgRGB)
if results.pose_landmarks:
mpDraw.draw_landmarks(img, results.pose_landmarks, mpPose.POSE_CONNECTIONS)
for id, lm in enumerate(results.pose_landmarks.landmark):
h, w, c = img.shape
cx, cy = int(lm.x*w), int(lm.y*h)
cv2.circle(img, (cx, cy), 5, (255, 0, 0), cv2.FILLED)
cTime = time.time()
fps = 1/(cTime - pTime)
pTime = cTime
cv2.putText(img, "FPS : " + str(int(fps)), (10, 50), cv2.FONT_HERSHEY_COMPLEX, 1, (255, 0, 8), 2)
cv2.imshow("Live Feed", img)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
My attempt at converting it into a module :
import cv2
import mediapipe as mp
import time
class poseDetector():
def __init__(self, mode=False, upBody=False, smooth=True, detectionCon = 0.5, trackingCon=0.5):
self.mode = mode
self.upBody = upBody
self.smooth = smooth
self.detectionCon = detectionCon
self.trackingCon = trackingCon
self.mpDraw = mp.solutions.drawing_utils
self.mpPose = mp.solutions.pose
self.pose =self.mpPose.Pose(self.mode, self.upBody, self.smooth, self.detectionCon, self.trackingCon)
def findPose(self, img, draw=True):
imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
self.results = self.pose.process(imgRGB)
if self.results.pose_landmarks:
if draw:
self.mpDraw.draw_landmarks(img, self.results.pose_landmarks, self.mpPose.POSE_CONNECTIONS)
return img
def findPosition(self, img, draw=True):
lmList = []
if self.results.pose_landmarks:
for id, lm in enumerate(self.results.pose_landmarks.landmark):
h, w, c = img.shape
cx, cy = int(lm.x*w), int(lm.y*h)
lmList.append([id, cx, cy])
if draw:
cv2.circle(img, (cx, cy), 5, (255, 0, 0), cv2.FILLED)
return lmList
def main():
cap = cv2.VideoCapture(1)
pTime = 0
cTime = 0
while True:
success, img1 = cap.read()
img = cv2.flip(img1, 1)
detector = poseDetector()
img = detector.findPose(img)
lmList = detector.findPosition(img)
cTime = time.time()
fps = 1/(cTime - pTime)
pTime = cTime
cv2.putText(img, "FPS : " + str(int(fps)), (10, 50), cv2.FONT_HERSHEY_COMPLEX, 1, (255, 0, 8), 2)
cv2.imshow("Live Feed", img)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
if __name__ == '__main__':
main()
According to me , both the code should have been working in the same manner, but they are not. Can anyone tell where am I making mistake ?
You need to place detector = poseDetector() to be before the while True::
detector = poseDetector()
while True:
success, img1 = cap.read()
...
Your "module" implementation creates a new poseDetector object every iteration of the main loop.
Each execution of detector = poseDetector() includes a call to poseDetector.__init__ that calls self.pose =self.mpPose.Pose...
There is a lot of overhead...
while True:
success, img1 = cap.read()
img = cv2.flip(img1, 1)
detector = poseDetector()
...
In your original ("non-module") implementation, you are executing pose = mpPose.Pose() only once (before the loop).
pose = mpPose.Pose()
while True:
success, img1 = cap.read()
...
I have tested your code before and after moving detector = poseDetector() outside the loop.
After moving the line above the loop, the frame rate is the same as the "non-module" implementation.