I wrote a code for pose estimation using OpenCV and mediapipe library. The program was working well and I was getting around 30-35 fps. When I tried to convert the same program to a module so that I can use it easily in future for different projects, the fps of the new code(module) reduced drastically to 3-4 fps.
My original Program:
import cv2
import mediapipe as mp
import time
cap = cv2.VideoCapture(1)
pTime = 0
cTime = 0
mpDraw = mp.solutions.drawing_utils
mpPose = mp.solutions.pose
pose = mpPose.Pose()
while True:
success, img1 = cap.read()
img = cv2.flip(img1, 1)
imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
results = pose.process(imgRGB)
if results.pose_landmarks:
mpDraw.draw_landmarks(img, results.pose_landmarks, mpPose.POSE_CONNECTIONS)
for id, lm in enumerate(results.pose_landmarks.landmark):
h, w, c = img.shape
cx, cy = int(lm.x*w), int(lm.y*h)
cv2.circle(img, (cx, cy), 5, (255, 0, 0), cv2.FILLED)
cTime = time.time()
fps = 1/(cTime - pTime)
pTime = cTime
cv2.putText(img, "FPS : " + str(int(fps)), (10, 50), cv2.FONT_HERSHEY_COMPLEX, 1, (255, 0, 8), 2)
cv2.imshow("Live Feed", img)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
My attempt at converting it into a module :
import cv2
import mediapipe as mp
import time
class poseDetector():
def __init__(self, mode=False, upBody=False, smooth=True, detectionCon = 0.5, trackingCon=0.5):
self.mode = mode
self.upBody = upBody
self.smooth = smooth
self.detectionCon = detectionCon
self.trackingCon = trackingCon
self.mpDraw = mp.solutions.drawing_utils
self.mpPose = mp.solutions.pose
self.pose =self.mpPose.Pose(self.mode, self.upBody, self.smooth, self.detectionCon, self.trackingCon)
def findPose(self, img, draw=True):
imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
self.results = self.pose.process(imgRGB)
if self.results.pose_landmarks:
if draw:
self.mpDraw.draw_landmarks(img, self.results.pose_landmarks, self.mpPose.POSE_CONNECTIONS)
return img
def findPosition(self, img, draw=True):
lmList = []
if self.results.pose_landmarks:
for id, lm in enumerate(self.results.pose_landmarks.landmark):
h, w, c = img.shape
cx, cy = int(lm.x*w), int(lm.y*h)
lmList.append([id, cx, cy])
if draw:
cv2.circle(img, (cx, cy), 5, (255, 0, 0), cv2.FILLED)
return lmList
def main():
cap = cv2.VideoCapture(1)
pTime = 0
cTime = 0
while True:
success, img1 = cap.read()
img = cv2.flip(img1, 1)
detector = poseDetector()
img = detector.findPose(img)
lmList = detector.findPosition(img)
cTime = time.time()
fps = 1/(cTime - pTime)
pTime = cTime
cv2.putText(img, "FPS : " + str(int(fps)), (10, 50), cv2.FONT_HERSHEY_COMPLEX, 1, (255, 0, 8), 2)
cv2.imshow("Live Feed", img)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
if __name__ == '__main__':
main()
According to me , both the code should have been working in the same manner, but they are not. Can anyone tell where am I making mistake ?
You need to place detector = poseDetector() to be before the while True::
detector = poseDetector()
while True:
success, img1 = cap.read()
...
Your "module" implementation creates a new poseDetector object every iteration of the main loop.
Each execution of detector = poseDetector() includes a call to poseDetector.__init__ that calls self.pose =self.mpPose.Pose...
There is a lot of overhead...
while True:
success, img1 = cap.read()
img = cv2.flip(img1, 1)
detector = poseDetector()
...
In your original ("non-module") implementation, you are executing pose = mpPose.Pose() only once (before the loop).
pose = mpPose.Pose()
while True:
success, img1 = cap.read()
...
I have tested your code before and after moving detector = poseDetector() outside the loop.
After moving the line above the loop, the frame rate is the same as the "non-module" implementation.
Related
So I have been watching a video online and was trying to replicate the same project with the same code. It worked in the beginning but after I added another script to my VS Code project folder it completely broke and throws an error every time I run the program. I tried reinstalling and upgrading all of the packages I used but still the same error. Can anyone please suggest what may be the result of this error?
import cv2
import mediapipe as mp
import time
class handDetector():
def __init__(self, mode=False, maxHands=2, detectionCon=0.5, trackCon=0.5):
self.mode = mode
self.maxHands = maxHands
self.detectionCon = detectionCon
self.trackCon = trackCon
self.mpHands = mp.solutions.hands
self.hands = self.mpHands.Hands(self.mode, self.maxHands,
self.detectionCon, self.trackCon)
self.mpDraw = mp.solutions.drawing_utils
def findHands(self, img, draw=True):
imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
self.results = self.hands.process(imgRGB)
# print(results.multi_hand_landmarks)
if self.results.multi_hand_landmarks:
for handLms in self.results.multi_hand_landmarks:
if draw:
self.mpDraw.draw_landmarks(img, handLms,
self.mpHands.HAND_CONNECTIONS)
return img
def findPosition(self, img, handNo=0, draw=True):
lmList = []
if self.results.multi_hand_landmarks:
myHand = self.results.multi_hand_landmarks[handNo]
for id, lm in enumerate(myHand.landmark):
# print(id, lm)
h, w, c = img.shape
cx, cy = int(lm.x * w), int(lm.y * h)
# print(id, cx, cy)
lmList.append([id, cx, cy])
if draw:
cv2.circle(img, (cx, cy), 15, (255, 0, 255), cv2.FILLED)
return lmList
def main():
pTime = 0
cTime = 0
cap = cv2.VideoCapture(1)
detector = handDetector()
while True:
success, img = cap.read()
img = detector.findHands(img)
lmList = detector.findPosition(img)
if len(lmList) != 0:
print(lmList[4])
cTime = time.time()
fps = 1 / (cTime - pTime)
pTime = cTime
cv2.putText(img, str(int(fps)), (10, 70), cv2.FONT_HERSHEY_PLAIN, 3,
(255, 0, 255), 3)
cv2.imshow("Image", img)
cv2.waitKey(1)
if __name__ == "__main__":
main()
Error Message:
Traceback (most recent call last):
File "d:\Hand Gensture Tracking AI\HandTrackingModule.py", line 71, in <module>
main()
File "d:\Hand Gensture Tracking AI\HandTrackingModule.py", line 51, in main
detector = handDetector()
File "d:\Hand Gensture Tracking AI\HandTrackingModule.py", line 14, in __init__
self.hands = self.mpHands.Hands(self.mode, self.maxHands,
File "C:\Users\user\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\mediapipe\python\solutions\hands.py", line 114, in __init__
super().__init__(
File "C:\Users\user\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\mediapipe\python\solution_base.py", line 288, in __init__
self._input_side_packets = {
File "C:\Users\user\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\mediapipe\python\solution_base.py", line 289, in <dictcomp>
name: self._make_packet(self._side_input_type_info[name], data)
File "C:\Users\user\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\mediapipe\python\solution_base.py", line 591, in _make_packet
return getattr(packet_creator, 'create_' + packet_data_type.value)(data)
TypeError: create_int(): incompatible function arguments. The following argument types are supported:
1. (arg0: int) -> mediapipe.python._framework_bindings.packet.Packet
Invoked with: 0.5
The problem with your code lies in line 11-12:
self.hands = self.mpHands.Hands(self.mode, self.maxHands, self.detectionCon, self.trackCon)
The class Hands() takes in 5 arguments, but it seems like you have missed the "complexity" parameter.
Your modified code:
import cv2
import mediapipe as mp
import time
class handDetector():
def __init__(self, mode=False, maxHands=2, complexity=1, detectionCon=0.5, trackCon=0.5):
self.mode = mode
self.maxHands = maxHands
self.detectionCon = detectionCon
self.trackCon = trackCon
self.mpHands = mp.solutions.hands
self.hands = self.mpHands.Hands(mode, maxHands, complexity, detectionCon, trackCon)
self.mpDraw = mp.solutions.drawing_utils
def findHands(self, img, draw=True):
imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
self.results = self.hands.process(imgRGB)
# print(results.multi_hand_landmarks)
if self.results.multi_hand_landmarks:
for handLms in self.results.multi_hand_landmarks:
if draw:
self.mpDraw.draw_landmarks(img, handLms,
self.mpHands.HAND_CONNECTIONS)
return img
def findPosition(self, img, handNo=0, draw=True):
lmList = []
if self.results.multi_hand_landmarks:
myHand = self.results.multi_hand_landmarks[handNo]
for id, lm in enumerate(myHand.landmark):
# print(id, lm)
h, w, c = img.shape
cx, cy = int(lm.x * w), int(lm.y * h)
# print(id, cx, cy)
lmList.append([id, cx, cy])
if draw:
cv2.circle(img, (cx, cy), 15, (255, 0, 255), cv2.FILLED)
return lmList
def main():
pTime = 0
cTime = 0
cap = cv2.VideoCapture(1)
detector = handDetector()
while True:
success, img = cap.read()
img = detector.findHands(img)
lmList = detector.findPosition(img)
if len(lmList) != 0:
print(lmList[4])
cTime = time.time()
fps = 1 / (cTime - pTime)
pTime = cTime
cv2.putText(img, str(int(fps)), (10, 70), cv2.FONT_HERSHEY_PLAIN, 3,
(255, 0, 255), 3)
cv2.imshow("Image", img)
cv2.waitKey(1)
if __name__ == "__main__":
main()
If this still doesn't solve your problem, then take note of the following things:
If you are using a virtual environment in vscode and try installing mediapipe or opencv-python, you may face a "numpy" version error. However, if you try running this code in PyCharm and installing these packages in the python interpreter, you won't face any errors.
You must be using python 3.7.0 or below as mediapipe doesn't support the newer versions.
Another tip to keep things simpler would be to use the "pydetection" module. You will need to install these packages:
pip install mediapipe
pip install opencv-python
pip install pydetetcion
The pydetection module will simplify the code a lot!
import pydetection as hand # The pydetection module
import cv2 # We will use this for reading and displaying webcam frames
detector = hand.HandRecogniser()
webcam = cv2.VideoCapture(0) # Change the number for a different camera. 0 is the default one.
while True:
_, frame = webcam.read() # Gets a frame of the current webcam feed
processed_image, hand_landmark_position = detector.findHands(frame, draw=True)
cv2.imshow(processed_image) # Show the image with the hands outlined
# Add a delay of 1 millisecond and check for the keystroke "q"
key = cv2.waitKey(1)
if key == ord("q"):
break
Github page to pydetection: https://github.com/Ayaan-Imran/pydetection
Whenever I run this code, I get this error!
TypeError: create_bool(): incompatible function arguments. The following argument types are supported:
1. (arg0: bool) -> mediapipe.python._framework_bindings.packet.Packet
Invoked with: 0.5.
please help me to solve this problem. thank you in advance
here is my code....
import cv2
import mediapipe as mp
import time
class FaceMeshDetector():
def __init__(self, staticMode=False, maxFaces=2, minDetectionCon=0.5, minTrackCon=0.5):
self.staticMode = staticMode
self.maxFaces = maxFaces
self.minDetectionCon = minDetectionCon
self.minTrackCon = minTrackCon
self.mpDraw = mp.solutions.drawing_utils
self.mpFaceMesh = mp.solutions.face_mesh
self.faceMesh = self.mpFaceMesh.FaceMesh(self.staticMode, self.maxFaces,
self.minDetectionCon, self.minTrackCon)
self.drawSpec = self.mpDraw.DrawingSpec(thickness=1, circle_radius=2)
def findFaceMesh(self, img, draw=True):
self.imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
self.results = self.faceMesh.process(self.imgRGB)
faces = []
if self.results.multi_face_landmarks:
for faceLms in self.results.multi_face_landmarks:
if draw:
self.mpDraw.draw_landmarks(img, faceLms, self.mpFaceMesh.FACEMESH_CONTOURS,
self.drawSpec, self.drawSpec)
face = []
for id,lm in enumerate(faceLms.landmark):
ih, iw, ic = img.shape
x,y = int(lm.x*iw), int(lm.y*ih)
face.append([x,y])
faces.append(face)
return img, faces
def main():
cap = cv2.VideoCapture("Videos/1.mp4")
pTime = 0
detector = FaceMeshDetector(maxFaces=2)
while True:
success, img = cap.read()
img, faces = detector.findFaceMesh(img)
if len(faces)!= 0:
print(faces[0])
cTime = time.time()
fps = 1 / (cTime - pTime)
pTime = cTime
cv2.putText(img, f'FPS: {int(fps)}', (20, 70), cv2.FONT_HERSHEY_PLAIN,
3, (0, 255, 0), 3)
cv2.imshow("Image", img)
cv2.waitKey(1)
if __name__ == "__main__":
main()
"""
I am doing a project to visualise hand movement from a wearable device (gloves) to a screen. I am using Mediapipe to get hand landmarks. This is to get synthetic data for my project. Now I am planning to use Blender to visualise and animate the hand movement. My questions are as follows:
Is there any alternatives for this method?
How to I import these points (as a csv file)and map them on to my blender image?
Note: I am a novice in both Python and Blender.
Any and all help would be appreciated . Thanks in advance.
I used Mediapipe to get the coordinates.
my code for Mediapipe:
'''
import cv2
import mediapipe as mp
import time
import uuid
import os
import numpy as np
cap = cv2.VideoCapture(0)
mpHands = mp.solutions.hands
hands = mpHands.Hands(max_num_hands=1)
mpDraw = mp.solutions.drawing_utils
pTime = 0
cTime = 0
while True:
success, img = cap.read()
#img = cv2.resize(img, (680,420))
imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
imgRGB.flags.writeable = False
results = hands.process(imgRGB)
imgRGB.flags.writeable = True
imgRGB = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
print(results.multi_hand_landmarks)
if results.multi_hand_landmarks:
for handLms in results.multi_hand_landmarks:
for id, lm in enumerate(handLms.landmark):
#to get width and height of the image
h, w, c = img.shape
#to get center points
cx,cy = int(lm.x*w), int(lm.y*h)
print(id, cx, cy)
#lms= lm.append([id, cx, cy])
if id == 0:
cv2.circle(img, (cx,cy), 15, (255,0,255),cv2.FILLED)
mpDraw.draw_landmarks(img, handLms, mpHands.HAND_CONNECTIONS,
mpDraw.DrawingSpec(color=(201,122,76), thickness=2, circle_radius=2),
mpDraw.DrawingSpec(color=(121,44,250), thickness=4, circle_radius=4),)
cTime = time.time()
fps = 1/(cTime-pTime)
pTime = cTime
cv2.putText(img, str(int(fps)), (10,70), cv2.FONT_HERSHEY_PLAIN, 3, (255 ,125,0),2)
#cv2.imwrite(os.path.join('Output Images', '{}.jpg'.format(uuid.uuid1())),img)
cv2.imshow('Image', img)
if cv2.waitKey(10) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
''''
So I am working on a OpenCV project which would detect if a certain hand sign has taken place. That part works.
I also wanted to make it so that it waits for a few seconds so it does not count accidental movements. But when I use time.sleep() or start time end time methods it either freezes my program and always executes the program and stops following the if conditions, or worse it simply doesn't execute the if statement ever.
Here is the code snippet I used. Btw I have already tried time.sleep() It would just freeze the program and always play the clip regardless of whether it followed my if statement or not.
Is there any way to resolve this issue?
import cv2
import time
import os
import HandTrackingModule as htm
from playsound import playsound
wCam, hCam = 1920, 1080
cap = cv2.VideoCapture(2)
cap.set(3, wCam)
cap.set(4, hCam)
frame = cap.read()
pTime = 0
detector = htm.handDetector(detectionCon=1)
Player1 = []
Player2 = []
while 1 > 0:
success, img = cap.read()
img = detector.findHands(img)
lmlist = detector.findPosition(img, handNo=0, draw=False)
if len(lmlist) != 0:
Player1 = []
Player2 = []
if (lmlist[4][1] < lmlist[3][1] and lmlist[8][2] < lmlist[6][2] and lmlist[20][2] < lmlist[18][2] and lmlist[12][2] > lmlist[10][2] and lmlist[16][2] > lmlist[14][2]) == False:
StartTime = time.time()
if lmlist[4][1] < lmlist[3][1] and lmlist[8][2] < lmlist[6][2] and lmlist[20][2] < lmlist[18][2] and lmlist[12][2] > lmlist[10][2] and lmlist[16][2] > lmlist[14][2]:
EndTime = time.time()
Eyetime = EndTime - StartTime
if Eyetime > 5:
Player1.append("hello")
playsound(r'C:\Users\haris\Documents\GitHub\Haz3-jolt\Pong_with_opencv\venv\notw.mp3')
cTime = time.time()
fps = 1 / (cTime-pTime)
pTime = cTime
cv2.putText(img, f'FPS: {int(fps)}',(400,70), cv2.FONT_HERSHEY_COMPLEX, 3, (255, 0, 0), 3)
cv2.imshow("Image", img)
cv2.waitKey(1)
I also have a bonus script Import called handtrackingmodule.
import cv2
import mediapipe as mp
import time
class handDetector():
def __init__(self, mode=False, maxHands=2, detectionCon=0.5, trackCon=0.5):
self.mode = mode
self.maxHands = maxHands
self.detectionCon = detectionCon
self.trackCon = trackCon
self.mpHands = mp.solutions.hands
self.hands = self.mpHands.Hands(self.mode, self.maxHands, self.detectionCon, self.trackCon)
self.mpDraw = mp.solutions.drawing_utils
def findHands(self, img, draw=True):
imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
self.results = self.hands.process(imgRGB)
#print(results.multi_hand_landmarks)
if self.results.multi_hand_landmarks:
for handLms in self.results.multi_hand_landmarks:
if draw:
self.mpDraw.draw_landmarks(img, handLms,
self.mpHands.HAND_CONNECTIONS)
return img
def findPosition(self, img, handNo=0, draw=True):
lmList = []
if self.results.multi_hand_landmarks:
myHand = self.results.multi_hand_landmarks[handNo]
for id, lm in enumerate(myHand.landmark):
# print(id, lm)
h, w, c = img.shape
cx, cy = int(lm.x * w), int(lm.y * h)
# print(id, cx, cy)
lmList.append([id, cx, cy])
if draw:
cv2.circle(img, (cx, cy), 15, (255, 0, 255), cv2.FILLED)
return lmList
def Marks(self,frame):
myHands=[]
frameRGB=cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
results=self.hands.process(frameRGB)
if results.multi_hand_landmarks != None:
for handLandMarks in results.multi_hand_landmarks:
myHand=[]
for landMark in handLandMarks.landmark:
myHand.append((int(landMark.x*width), int(landMark.y*height)))
myHands.append(myHand)
return myHands
width=1920
height=1080
def main():
pTime = 0
cTime = 0
cap = cv2.VideoCapture(2)
detector = handDetector()
while True:
success, img = cap.read()
img = detector.findHands(img)
lmList = detector.findPosition(img)
if len(lmList) != 0:
print(lmList[4])
cTime = time.time()
fps = 1 / (cTime - pTime)
pTime = cTime
cv2.putText(img, str(int(fps)), (10, 70), cv2.FONT_HERSHEY_PLAIN, 3, (255, 0, 255), 3)
cv2.imshow("Image", img)
cv2.waitKey(1)
if __name__ == "__main__":
main()
GUI (imshow) only works as long as waitKey/pollKey run continuously or frequently.
When you sleep(), you choke the GUI (imshow window). It can't update, or handle any events.
If you need to "sleep", give waitKey a suitable integer argument in milliseconds.
Be aware that waitKey can return before the time is up, e.g. if all windows were closed or if a key was pressed.
i am currently workingon a project which requires body detection, so i created a file for it and when i tried to use it my main file but the body detection keeps on running and never stops making my rest of the program non executabale . I know it is because of the infinite for loop. But do i have any other ways where i could use in my main file
i have attached the body detection program below
kindly help
import cv2
import mediapipe as mp
import time
mpDraw = mp.solutions.drawing_utils
mppose = mp.solutions.pose
pose = mppose.Pose()
cap = cv2.VideoCapture('open cv/squidgamee/3.mp4')
cap.set(3, 400)
cap.set(4, 800)
ptime =0
while True:
succ, img = cap.read()
imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
results = pose.process(imgRGB)
#print(results.pose_landmarks)
if results.pose_landmarks:
mpDraw.draw_landmarks(img, results.pose_landmarks, mppose.POSE_CONNECTIONS)
lmlist= []
if results.pose_landmarks:
for id, lm in enumerate(results.pose_landmarks.landmark):
h, w, c = img.shape
cx , cy = int(lm.x*w), int(lm.y*h)
lmlist.append([id, cx, cy])
cv2.circle(img, (lmlist[0][1], lmlist[0][2]), 15 , (255, 0, 255), cv2.FILLED)
print(lmlist[0])
ctime = time.time()
fps = 1/(ctime-ptime)
ptime = ctime
cv2.putText(img, str(int(fps)), (70, 50), cv2.FONT_HERSHEY_COMPLEX, 3, (255,255,0), 3)
cv2.imshow("image", img)
key= cv2.waitKey(1)
if key == ord('q'):
break
You can call the content in the file in function on a separate thread.
import cv2
import mediapipe as mp
import time
def func():
mpDraw = mp.solutions.drawing_utils
mppose = mp.solutions.pose
pose = mppose.Pose()
cap = cv2.VideoCapture('open cv/squidgamee/3.mp4')
cap.set(3, 400)
cap.set(4, 800)
ptime =0
while True:
succ, img = cap.read()
imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
results = pose.process(imgRGB)
if results.pose_landmarks:
mpDraw.draw_landmarks(img, results.pose_landmarks, mppose.POSE_CONNECTIONS)
lmlist= []
if results.pose_landmarks:
for id, lm in enumerate(results.pose_landmarks.landmark):
h, w, c = img.shape
cx, cy = int(lm.x*w), int(lm.y*h)
lmlist.append([id, cx, cy])
cv2.circle(img, (lmlist[0][1], lmlist[0][2]), 15 , (255, 0, 255), cv2.FILLED)
print(lmlist[0])
ctime = time.time()
fps = 1/(ctime-ptime)
ptime = ctime
cv2.putText(img, str(int(fps)), (70, 50), cv2.FONT_HERSHEY_COMPLEX, 3, (255,255,0), 3)
cv2.imshow("image", img)
key= cv2.waitKey(1)
if key == ord('q'):
break
Main:
import threading
from file import func
x = threading.Thread(target=func, args=())
x.start()