Related
I'm working on a project trying to do object detection and text detection using both yolo and easyocr. Since I'm a beginner and really new to computer vision, I would be glad if someone can help me.
Here's the code:
import cv2
import numpy as np
import easyocr
# Load Yolo
net = cv2.dnn.readNet('yolov4-tiny-custom_3000.weights', 'yolov4-tiny-custom.cfg')
classes = []
with open("obj.names", "r") as f:
classes = [line.strip() for line in f.readlines()]
layer_names = net.getLayerNames()
output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]
colors = np.random.uniform(0, 255, size=(len(classes), 3))
cap = cv2.VideoCapture('car1.mp4')
# Declare Ocr
cascade_src = 'haarcascade_russian_plate_number.xml'
cascade = cv2.CascadeClassifier(cascade_src)
reader = easyocr.Reader(['en'], gpu = False)
# Declare Ocr
while True:
_, frame = cap.read()
height, width, channels = frame.shape
#frame = cv2.resize(frame, (800, 600))
# Yolo Detection
# Detecting objects
blob = cv2.dnn.blobFromImage(frame, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
net.setInput(blob)
outs = net.forward(output_layers)
# Showing informations on the screen
class_ids = []
confidences = []
boxes = []
for out in outs:
for detection in out:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > 0.5:
# Object detected
center_x = int(detection[0] * width)
center_y = int(detection[1] * height)
w = int(detection[2] * width)
h = int(detection[3] * height)
# Rectangle coordinates
x = int(center_x - w / 2)
y = int(center_y - h / 2)
boxes.append([x, y, w, h])
confidences.append(float(confidence))
class_ids.append(class_id)
indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
for i in range(len(boxes)):
if i in indexes:
x, y, w, h = boxes[i]
label = str(classes[class_ids[i]])
color = colors[class_ids[i]]
cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2)
cv2.putText(frame, label, (x, y + 30), cv2.FONT_HERSHEY_PLAIN, 3, color, 3)
print("Jenis Mobil: " +label)
# Text Reader Using Ocr
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
plate = cascade.detectMultiScale(gray, 1.1, 5)
for x,y,w,h in plate:
wT,hT,cT = frame.shape
a,b = (int(0.02*wT),int(0.02*hT))
plate2 = frame[y+a:y+h-a,x+b:x+w-b,:]
cv2.rectangle(frame,(x,y),(x+w,y+h),(60,60,255),2)
cv2.rectangle(frame,(x-1,y-40),(x+w+1,y),(60,60,255),-1)
result = reader.readtext(plate2)
for detek in result:
top_left = (int(detek[0][0][0]), int(detek[0][0][1]))
bottom_right = (int(detek[0][2][0]), int(detek[0][2][1]))
text = detek[1]
cv2.putText(frame,text,(x,y-10),cv2.FONT_HERSHEY_SIMPLEX,0.9,(255,255,255),2)
print("Nomor Kendaran: " + text)
# Text Reader Using Ocr
cv2.imshow("Detection", frame)
key = cv2.waitKey(1)
if key == 27:
break
cap.release()
cv2.destroyAllWindows()
I am trying to use ROI to detect the object but I am not able to do it.
Any advice please?
Crop the image before it is fed to the model
while True:
_, frame = cap.read()
im_crop = im[y1:y2, x1:x2] # set x1,x2,y1,y2 based on your ROI
blob = cv2.dnn.blobFromImage(frame, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
this will speed up the inference time as well as there is less data to process by the model
i am currently workingon a project which requires body detection, so i created a file for it and when i tried to use it my main file but the body detection keeps on running and never stops making my rest of the program non executabale . I know it is because of the infinite for loop. But do i have any other ways where i could use in my main file
i have attached the body detection program below
kindly help
import cv2
import mediapipe as mp
import time
mpDraw = mp.solutions.drawing_utils
mppose = mp.solutions.pose
pose = mppose.Pose()
cap = cv2.VideoCapture('open cv/squidgamee/3.mp4')
cap.set(3, 400)
cap.set(4, 800)
ptime =0
while True:
succ, img = cap.read()
imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
results = pose.process(imgRGB)
#print(results.pose_landmarks)
if results.pose_landmarks:
mpDraw.draw_landmarks(img, results.pose_landmarks, mppose.POSE_CONNECTIONS)
lmlist= []
if results.pose_landmarks:
for id, lm in enumerate(results.pose_landmarks.landmark):
h, w, c = img.shape
cx , cy = int(lm.x*w), int(lm.y*h)
lmlist.append([id, cx, cy])
cv2.circle(img, (lmlist[0][1], lmlist[0][2]), 15 , (255, 0, 255), cv2.FILLED)
print(lmlist[0])
ctime = time.time()
fps = 1/(ctime-ptime)
ptime = ctime
cv2.putText(img, str(int(fps)), (70, 50), cv2.FONT_HERSHEY_COMPLEX, 3, (255,255,0), 3)
cv2.imshow("image", img)
key= cv2.waitKey(1)
if key == ord('q'):
break
You can call the content in the file in function on a separate thread.
import cv2
import mediapipe as mp
import time
def func():
mpDraw = mp.solutions.drawing_utils
mppose = mp.solutions.pose
pose = mppose.Pose()
cap = cv2.VideoCapture('open cv/squidgamee/3.mp4')
cap.set(3, 400)
cap.set(4, 800)
ptime =0
while True:
succ, img = cap.read()
imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
results = pose.process(imgRGB)
if results.pose_landmarks:
mpDraw.draw_landmarks(img, results.pose_landmarks, mppose.POSE_CONNECTIONS)
lmlist= []
if results.pose_landmarks:
for id, lm in enumerate(results.pose_landmarks.landmark):
h, w, c = img.shape
cx, cy = int(lm.x*w), int(lm.y*h)
lmlist.append([id, cx, cy])
cv2.circle(img, (lmlist[0][1], lmlist[0][2]), 15 , (255, 0, 255), cv2.FILLED)
print(lmlist[0])
ctime = time.time()
fps = 1/(ctime-ptime)
ptime = ctime
cv2.putText(img, str(int(fps)), (70, 50), cv2.FONT_HERSHEY_COMPLEX, 3, (255,255,0), 3)
cv2.imshow("image", img)
key= cv2.waitKey(1)
if key == ord('q'):
break
Main:
import threading
from file import func
x = threading.Thread(target=func, args=())
x.start()
I wrote a code for pose estimation using OpenCV and mediapipe library. The program was working well and I was getting around 30-35 fps. When I tried to convert the same program to a module so that I can use it easily in future for different projects, the fps of the new code(module) reduced drastically to 3-4 fps.
My original Program:
import cv2
import mediapipe as mp
import time
cap = cv2.VideoCapture(1)
pTime = 0
cTime = 0
mpDraw = mp.solutions.drawing_utils
mpPose = mp.solutions.pose
pose = mpPose.Pose()
while True:
success, img1 = cap.read()
img = cv2.flip(img1, 1)
imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
results = pose.process(imgRGB)
if results.pose_landmarks:
mpDraw.draw_landmarks(img, results.pose_landmarks, mpPose.POSE_CONNECTIONS)
for id, lm in enumerate(results.pose_landmarks.landmark):
h, w, c = img.shape
cx, cy = int(lm.x*w), int(lm.y*h)
cv2.circle(img, (cx, cy), 5, (255, 0, 0), cv2.FILLED)
cTime = time.time()
fps = 1/(cTime - pTime)
pTime = cTime
cv2.putText(img, "FPS : " + str(int(fps)), (10, 50), cv2.FONT_HERSHEY_COMPLEX, 1, (255, 0, 8), 2)
cv2.imshow("Live Feed", img)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
My attempt at converting it into a module :
import cv2
import mediapipe as mp
import time
class poseDetector():
def __init__(self, mode=False, upBody=False, smooth=True, detectionCon = 0.5, trackingCon=0.5):
self.mode = mode
self.upBody = upBody
self.smooth = smooth
self.detectionCon = detectionCon
self.trackingCon = trackingCon
self.mpDraw = mp.solutions.drawing_utils
self.mpPose = mp.solutions.pose
self.pose =self.mpPose.Pose(self.mode, self.upBody, self.smooth, self.detectionCon, self.trackingCon)
def findPose(self, img, draw=True):
imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
self.results = self.pose.process(imgRGB)
if self.results.pose_landmarks:
if draw:
self.mpDraw.draw_landmarks(img, self.results.pose_landmarks, self.mpPose.POSE_CONNECTIONS)
return img
def findPosition(self, img, draw=True):
lmList = []
if self.results.pose_landmarks:
for id, lm in enumerate(self.results.pose_landmarks.landmark):
h, w, c = img.shape
cx, cy = int(lm.x*w), int(lm.y*h)
lmList.append([id, cx, cy])
if draw:
cv2.circle(img, (cx, cy), 5, (255, 0, 0), cv2.FILLED)
return lmList
def main():
cap = cv2.VideoCapture(1)
pTime = 0
cTime = 0
while True:
success, img1 = cap.read()
img = cv2.flip(img1, 1)
detector = poseDetector()
img = detector.findPose(img)
lmList = detector.findPosition(img)
cTime = time.time()
fps = 1/(cTime - pTime)
pTime = cTime
cv2.putText(img, "FPS : " + str(int(fps)), (10, 50), cv2.FONT_HERSHEY_COMPLEX, 1, (255, 0, 8), 2)
cv2.imshow("Live Feed", img)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
if __name__ == '__main__':
main()
According to me , both the code should have been working in the same manner, but they are not. Can anyone tell where am I making mistake ?
You need to place detector = poseDetector() to be before the while True::
detector = poseDetector()
while True:
success, img1 = cap.read()
...
Your "module" implementation creates a new poseDetector object every iteration of the main loop.
Each execution of detector = poseDetector() includes a call to poseDetector.__init__ that calls self.pose =self.mpPose.Pose...
There is a lot of overhead...
while True:
success, img1 = cap.read()
img = cv2.flip(img1, 1)
detector = poseDetector()
...
In your original ("non-module") implementation, you are executing pose = mpPose.Pose() only once (before the loop).
pose = mpPose.Pose()
while True:
success, img1 = cap.read()
...
I have tested your code before and after moving detector = poseDetector() outside the loop.
After moving the line above the loop, the frame rate is the same as the "non-module" implementation.
i have a code for face recognition by open cv python
import face_recognition as fr
import os
import cv2
import face_recognition
import numpy as np
from time import sleep
def get_encoded_faces():
"""
looks through the faces folder and encodes all
the faces
:return: dict of (name, image encoded)
"""
encoded = {}
for dirpath, dnames, fnames in os.walk("./faces"):
for f in fnames:
if f.endswith(".jpg") or f.endswith(".png"):
face = fr.load_image_file("faces/" + f)
encoding = fr.face_encodings(face)[0]
encoded[f.split(".")[0]] = encoding
return encoded
def unknown_image_encoded(img):
"""
encode a face given the file name
"""
face = fr.load_image_file("faces/" + img)
encoding = fr.face_encodings(face)[0]
return encoding
def classify_face(im):
"""
will find all of the faces in a given image and label
them if it knows what they are
:param im: str of file path
:return: list of face names
"""
faces = get_encoded_faces()
faces_encoded = list(faces.values())
known_face_names = list(faces.keys())
img = cv2.imread(im, 1)
#img = cv2.resize(img, (0, 0), fx=0.5, fy=0.5)
#img = img[:,:,::-1]
face_locations = face_recognition.face_locations(img)
unknown_face_encodings = face_recognition.face_encodings(img, face_locations)
face_names = []
for face_encoding in unknown_face_encodings:
# See if the face is a match for the known face(s)
matches = face_recognition.compare_faces(faces_encoded, face_encoding)
name = "Unknown"
# use the known face with the smallest distance to the new face
face_distances = face_recognition.face_distance(faces_encoded, face_encoding)
best_match_index = np.argmin(face_distances)
if matches[best_match_index]:
name = known_face_names[best_match_index]
face_names.append(name)
for (top, right, bottom, left), name in zip(face_locations, face_names):
# Draw a box around the face
cv2.rectangle(img, (left-20, top-20), (right+20, bottom+20), (255, 0, 0), 2)
# Draw a label with a name below the face
cv2.rectangle(img, (left-20, bottom -15), (right+20, bottom+20), (255, 0, 0), cv2.FILLED)
font = cv2.FONT_HERSHEY_DUPLEX
cv2.putText(img, name, (left -20, bottom + 15), font, 1.0, (255, 255, 255), 2)
# Display the resulting image
while True:
cv2.imshow('Video', img)
if cv2.waitKey(1) & 0xFF == ord('q'):
return face_names
print(classify_face("test-image");
it is taking image that we save to test but i want it to take image from camera then recognize it So any one here can please tell how i can change test image so it will take test image from camera not what we save in data base to test ...........
If you want to check single frame from camera then simply use
cap = cv2.VideoCapture(0)
status, img = cap.read()
instead of
img = cv2.imread(im, 1)
If you want to check faces in stream then you need to put all in loop
def classify_face(im):
faces = get_encoded_faces()
faces_encoded = list(faces.values())
known_face_names = list(faces.keys())
cap = cv2.VideoCapture(0)
while True:
status, img = cap.read()
face_locations = face_recognition.face_locations(img)
unknown_face_encodings = face_recognition.face_encodings(img, face_locations)
face_names = []
for face_encoding in unknown_face_encodings:
# See if the face is a match for the known face(s)
matches = face_recognition.compare_faces(faces_encoded, face_encoding)
name = "Unknown"
# use the known face with the smallest distance to the new face
face_distances = face_recognition.face_distance(faces_encoded, face_encoding)
best_match_index = np.argmin(face_distances)
if matches[best_match_index]:
name = known_face_names[best_match_index]
face_names.append(name)
for (top, right, bottom, left), name in zip(face_locations, face_names):
# Draw a box around the face
cv2.rectangle(img, (left-20, top-20), (right+20, bottom+20), (255, 0, 0), 2)
# Draw a label with a name below the face
cv2.rectangle(img, (left-20, bottom -15), (right+20, bottom+20), (255, 0, 0), cv2.FILLED)
font = cv2.FONT_HERSHEY_DUPLEX
cv2.putText(img, name, (left -20, bottom + 15), font, 1.0, (255, 255, 255), 2)
cv2.imshow('Video', img)
print(face_names)
if cv2.waitKey(1) & 0xFF == ord('q'):
return
EDIT: Full code (with small changes) for work with video stream. It works for me. But if previous function did works for you then this version may not help you - function classify_face is almost the same as in previous example.
import os
import cv2
import face_recognition as fr
import numpy as np
def get_encoded_faces(folder="./faces"):
"""
looks through the faces folder and encodes all
the faces
:return: dict of (name, image encoded)
"""
encoded = {}
for dirpath, dnames, fnames in os.walk(folder):
for f in fnames:
if f.lower().endswith(".jpg") or f.lower().endswith(".png"):
fullpath = os.path.join(dirpath, f)
face = fr.load_image_file(fullpath)
# normally face_encodings check if face is on image - and it can get empty result
height, width = face.shape[:2]
encoding = fr.face_encodings(face, known_face_locations=[(0, width, height, 0)])
if len(encoding) > 0:
encoding = encoding[0]
encoded[f.split(".")[0]] = encoding
return encoded
def classify_face(im):
"""
will find all of the faces in a given image and label
them if it knows what they are
:param im: str of file path
:return: list of face names
"""
faces = get_encoded_faces()
faces_encoded = list(faces.values())
known_face_names = list(faces.keys())
cap = cv2.VideoCapture(0)
while True:
status, img = cap.read()
#print('status:', status)
face_locations = fr.face_locations(img)
unknown_face_encodings = fr.face_encodings(img, face_locations)
face_names = []
for location, face_encoding in zip(face_locations, unknown_face_encodings): # I moved `zip()` in this place
# See if the face is a match for the known face(s)
matches = fr.compare_faces(faces_encoded, face_encoding)
name = "Unknown"
# use the known face with the smallest distance to the new face
face_distances = fr.face_distance(faces_encoded, face_encoding)
best_match_index = np.argmin(face_distances)
if matches[best_match_index]:
name = known_face_names[best_match_index]
face_names.append(name)
top, right, bottom, left = location
# Draw a box around the face
cv2.rectangle(img, (left-20, top-20), (right+20, bottom+20), (255, 0, 0), 2)
# Draw a label with a name below the face
cv2.rectangle(img, (left-20, bottom -15), (right+20, bottom+20), (255, 0, 0), cv2.FILLED)
font = cv2.FONT_HERSHEY_DUPLEX
cv2.putText(img, name, (left -20, bottom + 15), font, 1.0, (255, 255, 255), 2)
print('face_names:', face_names)
cv2.imshow('Video', img)
if cv2.waitKey(1) & 0xFF == ord('q'):
return face_names
# --- main ---
print(classify_face("test-image"))
cv2.destroyAllWindows()
I am working on some code to recognises the face and speak someone's name. I am working this code in raspberry pi 3 with opencv 3.1 and python 2.7. This code works fine on windows but when I try it on raspberry, it gives an error:
Type error: 'int' object has no attriibute '__getitem__'
For the line:
for line 'if prediction[1]<100'
Full code:
import cv2, sys, numpy, os, pyttsx, time,picamera
haar_file = 'haarcascade_frontalface_default.xml'
datasets = 'datasets'
engine = pyttsx.init()
rate = engine.getProperty('rate')
engine.setProperty('rate', rate-40)
print('Training...')
# Create a list of images and a list of corresponding names
(images, labels, names, id) = ([], [], {}, 0)
for (subdirs, dirs, files) in os.walk(datasets):
for subdir in dirs:
names[id] = subdir
subjectpath = os.path.join(datasets, subdir)
for filename in os.listdir(subjectpath):
path = subjectpath + '/' + filename
label = id
images.append(cv2.imread(path, 0))
labels.append(int(label))
id += 1
(width, height) = (130, 100)
# Create a Numpy array from the two lists above
(images, labels) = [numpy.array(lis) for lis in [images, labels]]
model = cv2.createLBPHFaceRecognizer()
model.train(images, labels)
#use LBPHFace recognizer on camera frame
face_cascade = cv2.CascadeClassifier(haar_file)
camera = picamera.PiCamera()
camera.resolution = (320, 240)
def getFrame():
jpegBuffer = io.BytesIO()
camera.capture(jpegBuffer, format='jpeg')
buff = numpy.fromstring(jpegBuffer.getvalue(), dtype=numpy.uint8)
return cv2.imdecode(buff, 1)
while True:
im = fetFrame()
gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
faces = face_cascade.detectMultiScale(gray, 1.3, 5)
for (x,y,w,h) in faces:
cv2.rectangle(im,(x,y),(x+w,y+h),(255,0,0),2)
face = gray[y:y + h, x:x + w]
face_resize = cv2.resize(face, (width, height))
#Try to recognize the face
prediction = model.predict(face_resize)
cv2.rectangle(im, (x, y), (x + w, y + h), (0, 255, 0), 3)
if prediction[1]<100:
cv2.putText(im,'%s - %.0f' % (names[prediction[0]],prediction[1]),(x-10, y-10), cv2.FONT_HERSHEY_PLAIN,1,(0, 255, 0))
engine.say('Hello')
engine.say(names[prediction[0]],prediction[1])
time.sleep(3)
else:
cv2.putText(im,'not recognized',(x-10, y-10), cv2.FONT_HERSHEY_PLAIN,1,(0, 255, 0))
engine.say('Hello, Newface')
time.sleep(3)
engine.runAndWait()
cv2.imshow('OpenCV', im)
key = cv2.waitKey(10)
if key == 27:
break