Face Detection with less cpu load cv2 [closed] - python

Closed. This question needs to be more focused. It is not currently accepting answers.
Want to improve this question? Update the question so it focuses on one problem only by editing this post.
Closed 2 years ago.
Improve this question
for a university project I am programming a face mask recognition. For detecting faces, I use the cv2.CascadeClassifier('face_detector.xml'). As I noticed, this program is taking up way too much of the CPU resulting in a heavily disordered video stream frame rate.
I am running the code on a MacBook Air with a 1.6Hz Dual Core (Intel Core i5).
Can someone explain what I can change to make it smoother? Or maybe recommend another face detection?
Here is my code:
import numpy as np
import os
import tensorflow as tf
import cv2
from matplotlib.pyplot import gray
# Disable tensorflow compilation warnings
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import cv2
# Load the cascade
face_cascade = cv2.CascadeClassifier('face_detector.xml')
# To capture video from webcam.
cap = cv2.VideoCapture(0)
# To use a video file as input
# cap = cv2.VideoCapture('filename.mp4')
model = tf.keras.models.load_model('checkpoint19.ckpt')
i = 0
while True:
# Read the frame
_, img = cap.read()
# Detect the faces
faces = face_cascade.detectMultiScale(img, 1.3, 4)
# save each frame as image with PNG format
image = cv2.imwrite('database/{index}.png'.format(index=i), img)
i += 1
# cut out the fragment in the box of the image
# Draw the rectangle around each face
for (x, y, w, h) in faces:
crop_img = img[y:y + h, x:x + w]
resizedImg = cv2.resize(crop_img, (224, 224))
gray = cv2.cvtColor(resizedImg, cv2.COLOR_BGR2GRAY)
imgArrNew = gray.reshape(1, 224, 224, 1)
prediction = model.predict(imgArrNew)
print(prediction)
label = np.argmax(prediction)
print(label)
# font
font = cv2.FONT_HERSHEY_SIMPLEX
# org
for (x, y, w, h) in faces:
org = (x, y+h+30)
# fontScale
fontScale = 1
# Blue color in BGR
color = (255, 0, 0)
# Line thickness of 2 px
thickness = 2
# output the predicted label/sign on the live-stream frame
if label == 0:
color = (0,0,225)
label_out = "Mask off"
if label == 1:
color = (50,205,50)
label_out = "Mask on"
if label == 2:
color = (0,255,225)
label_out = "incorrect Mask"
cv2.rectangle(img, (x, y), (x + w, y + h), color, 2)
image1 = cv2.putText(img, label_out, org, font,
fontScale, color, thickness, cv2.LINE_AA)
# Display
cv2.imshow('Face_Regonition', img)
# Stop if escape key is pressed
k = cv2.waitKey(30) & 0xff
if k == 27:
break
# Release the VideoCapture object
cap.release()
Thanks for your help :)

haar cascaded classifier is slow. . To do detection in every single frame is hard for low-end computing devices.
The easiest way is to use a lower resolution image or lower FPS. But it will appear to be cheap
The better way is to use a detection and tracking framework where detection happens at a 1hz interval at a new thread and tracking can happen at 30hz, which human eye cant tell the difference.
For detection of face, you can choose any method such as hear, HOG, CNN and put it in a new thread. In the main tracking thread (which can run in real time) update the model and predict the bounding box and display it.
You may look for the tracking from here. I suggest KCF based method for it is fast and reliable.
https://www.pyimagesearch.com/2018/07/30/opencv-object-tracking/
Just put the detection box rect as input rect box for the tracking. THen it should work directly.

Related

Webcam based object Scanner in C# 9/10 .net 6 [closed]

Closed. This question needs to be more focused. It is not currently accepting answers.
Want to improve this question? Update the question so it focuses on one problem only by editing this post.
Closed 1 year ago.
Improve this question
Im trying to port this Python code from theDataFox card_scanner_app to an C# WPF Application.
The Python Programm uses a webcam to recognize a Object (Trading Card) and makes a Snapshot of the Object by pressing the Space bar.
As i know the Code Hashes the Snapshot and compares it to a database of 64.000 Images wich are already hashed.
Then the Code uses the Image ID, given in the Image Filename and generates the Output from the Database with all Informations about this specific Object.
import cv2
import pygame
import imutils
import numpy as np
import pygame.camera
from pygame.locals import KEYDOWN, K_q, K_s, K_SPACE
from . import util
import argparse
import glob
DEBUG = False
# calibrate this for camera position
MIN_CARD_AREA = 250000.0 / 3
# calibrate these
THRESHOLD = (100, 255)
FILTER = (11, 17, 17)
ROTATION = 0
# noinspection PyUnresolvedReferences
def scan(img):
# preprocess image
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
gray = cv2.bilateralFilter(gray, FILTER[0], FILTER[1], FILTER[2])
ret, gray = cv2.threshold(gray, THRESHOLD[0], THRESHOLD[1], cv2.THRESH_BINARY)
edges = imutils.auto_canny(gray)
# extract contours
cnts, _ = cv2.findContours(edges.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
cnts = [c for c in cnts if cv2.contourArea(c) >= MIN_CARD_AREA]
card, c = None, None
if cnts:
# get largest contour
c = sorted(cnts, key=cv2.contourArea, reverse=True)[0]
# approximate the contour
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.05 * peri, True)
pts = np.float32(approx)
x, y, w, h = cv2.boundingRect(c)
# Find center point of card by taking x and y average of the four corners.
# average = np.sum(pts, axis=0)/len(pts)
# cent_x = int(average[0][0])
# cent_y = int(average[0][1])
# center = [cent_x, cent_y]
# Warp card into 200x300 flattened image using perspective transform
card = util.flattener(img, pts, w, h)
card = util.cv2_to_pil(card).rotate(ROTATION)
return card, c, gray, edges
# noinspection PyUnresolvedReferences
def detect(on_detect):
dim = (800, 600)
pygame.init()
pygame.camera.init()
cams = pygame.camera.list_cameras()
# print(cams)
display = pygame.display.set_mode(dim, 0)
cam = pygame.camera.Camera(cams[-1], dim)
cam.start()
capture = True
while capture:
img = cam.get_image()
img = pygame.transform.scale(img, dim)
img = util.pygame_to_cv2(img)
card, c, gray, edges = scan(img)
# q to quit
for event in pygame.event.get():
if event.type == KEYDOWN:
if event.key == K_q:
capture = False
elif event.key == K_s or event.key == K_SPACE:
if card is None:
print('nothing found')
else:
on_detect(card)
# display
if c is not None:
cv2.drawContours(img, [c], -1, (0, 0, 255), 3)
img = util.cv2_to_pygame(img)
display.blit(img, (0, 0))
pygame.display.update()
if card is not None:
card_img = util.pil_to_pygame(card)
display.blit(card_img, (0, 0))
if DEBUG:
for layer in [gray, edges]:
layer = cv2.cvtColor(layer, cv2.COLOR_GRAY2RGB)
layer = util.cv2_to_pygame(layer)
layer.set_alpha(100)
display.blit(layer, (0, 0))
pygame.display.flip()
cam.stop()
pygame.quit()
Now i'm trying to find a Solution to get similar Webcam Access with C# (Visual Studio).
As to i'm still an Beginner I have no Idea how to do this.
I tried several NuGet Packages and got nothing working. I guess my skills aren't good enough.
Therefor I now have to ask you, do you have any Ideas or Solutions that could do the given task?
I already ported most of the Programm and it's working fine.
This is my full Code
There is no built in webcam api directly in c#, the closest you can get is the UWP video capture API. When I tried this, it did not work very well with my webcamera, but that might just be me.
There are lots of image processing libraries that have webcamera support built in. This includes OpenCV/emguCV, but also for example aforge.
I have used a "versatile webcam library" that is a wrapper around the native webcam APIs, and it seem to work well.
More or less all of the libraries have good documentation with nice examples how to get images, many also have sample applications that demonstrate how to use the library, and this should be plenty to get you started. So "I tried and it didn't work" is not a good explanation. What have you tried? Why did it not work?

Why is my HAAR-Cascade Model not accurate? Python + OpenCV

I am using Cascade Trainer GUI to get an XML file. I have 100 positive images and 400 negative images. The training process only took about 5 minutes, and the results are not accurate. The object I trained the model for is a small screwdriver. The resulting .xml file was only 31.5 KB. Please see image.
enter image description here
Also, the rectangle in the photo is quite small, let alone not accurate.
Besides adding more positive and negative images, what should I do to create a more accurate model? I eventually need to do image tracking as well. Thanks
#import numpy as np
import cv2
import time
"""
This program uses openCV to detect faces, smiles, and eyes. It uses haarcascades which are public domain. Haar cascades rely on
xml files which contain model training data. An xml file can be generated through training many positive and negative images.
Try your built-in camera with 'cap = cv2.VideoCapture(0)' or use any video. cap = cv2.VideoCapture("videoNameHere.mp4")
"""
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
eye_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_eye.xml')
smile = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_smile.xml')
screw = cv2.CascadeClassifier('cascade.xml')
cap = cv2.VideoCapture(0)
font = cv2.FONT_HERSHEY_SIMPLEX
prev_frame_time, new_frame_time = 0,0
while 1:
ret, img = cap.read()
img = cv2.resize(img,(1920,1080))
#faces = face_cascade.detectMultiScale(img, 1.5, 5)
#eyes = eye_cascade.detectMultiScale(img,1.5,6)
# smiles = smile.detectMultiScale(img,1.1,400)
screws = screw.detectMultiScale(img,1.2,3)
new_frame = time.time()
try:
fps = 1/(new_frame_time-prev_frame_time)
except:
fps = 0
fps = int(fps)
cv2.putText(img,"FPS: "+str(fps),(10,450), font, 3, (0,0,0), 5, cv2.LINE_AA)
# for (x,y,w,h) in smiles:
#cv2.rectangle(img,(x,y),(x+w,y+h),(0,69,255),2)
# cv2.putText(img,"smile",(int(x-.1*x),int(y-.1*y)),font,1,(255,255,255),2)
for (x,y,w,h) in screws:
cv2.rectangle(img,(x,y),(x+w,y+h),(255,0,255),2)
cv2.putText(img,"screwdriver",(int(x-.1*x),int(y-.1*y)),font,1,(255,0,255),2)
# for (x,y,w,h) in faces:
# cv2.rectangle(img,(x,y),(x+w,y+h),(255,0,0),2)
# cv2.putText(img,"FACE",(int(x-.1*x),int(y-.1*y)),font,1,(255,255,255),2)
# roi_color = img[y:y+h, x:x+w]
# eyes = eye_cascade.detectMultiScale(roi_color)
# for (ex,ey,ew,eh) in eyes:
# cv2.rectangle(roi_color,(ex,ey),(ex+ew,ey+eh),(0,255,0),2)
cv2.imshow('img',img)
k = cv2.waitKey(30) & 0xff
if k == 27:
break
prev_frame_time = new_frame_time
cap.release()
cv2.destroyAllWindows()
Most resources on the topic recommend 3000-5000 images for positive and negative each. That might very well be the reason for lower accuracy.
Some resources:
Link 1 - sonots
Link 2 - opencv-user-blog
Link 3 - computer vision software
Link 4 - pythonprogramming.net
if your image above is a 'typical' one, then it cannot work ever, using cascades.
those need reliable texture and pose, your scene lacks both.
(i also guess, that you do not really have 100 positive images, but that you tried to "synthesize" them from a few or a single image only, proven NOT to work in real life)
dont waste more time on this.
get more (real !) images, and read up on object detection cnn's like SSD or YOLO, which are far more robust with your situation.

Open CV Car Detection with conditions

I'm attempting to create a programĀ in which my code analyses a video from my security camera and locates the cars that have been spotted. I've figured out how to find the cars and draw red rectangles around them, but I'd like to add a condition that only draws boxes if there are more than 5 cars detected. However, I am unable to do so due to the presence of arrays. How would I go about fixing this code?
import cv2
classifier_file = 'cars.xml'
car_tracker = cv2.CascadeClassifier(classifier_file)
video = cv2.VideoCapture("footage.mp4")
while True:
(read_successful, frame) = video.read()
if read_successful:
grayscaled_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
else:
break
cars = car_tracker.detectMultiScale(grayscaled_frame)
if cars > 4:
for (x, y, w, h) in cars:
cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 0, 255), 2)
cv2.imshow('Car Detector', frame)
cv2.waitKey(0)
By the way, I am using an anaconda environment along with python 3.8.8
You have an array of detections.
All you need is to call len() on the array to get the number of elements. This is a built-in function of Python.
cars = car_tracker.detectMultiScale(grayscaled_frame)
if len(cars) >= 5:
for (x, y, w, h) in cars:
cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 0, 255), 2)
No need for any other libraries.
Here is one example using a different technique, in this case you can use cvlib it has a class object_detection to detect multiple classes. This uses the yolov4 weights, you will be capable of detect any car in the road or in this case from one webcam, only make sure that the car are ahead the camera not in 2d position.
import cv2
import matplotlib.pyplot as plt
# pip install cvlib
import cvlib as cv
from cvlib.object_detection import draw_bbox
im = cv2.imread('cars3.jpg')
#cv2.imshow("cars", im)
#cv2.waitKey(0)
bbox, label, conf = cv.detect_common_objects(im)
#yolov4 weights will be downloaded. Check: C:\Users\USER_NAME\.cvlib\object_detection\yolo
# if the download was not successful delete yolo folder and try again, it will be downloaded again
# after download the weights keep running the script it will say 0 cars, then close Anaconda spyder anda reopen it and try again.
#get bbox
output_image_with_bbox = draw_bbox(im, bbox, label, conf)
number_cars = label.count('car')
if number_cars > 5:
print('Number of cars in the image is: '+ str(number_cars))
plt.imshow(output_image_with_bbox)
plt.show()
else:
print('Number of cars in the image is: '+ str(number_cars))
plt.imshow(im)
plt.show()
output:
Greetings.

Is there a simple way to map a texture to a different "UV" system in python?

I really don't know if "UV's" is the right word as i'm from the world of Unity and am trying to write some stuff in python. What i'm trying to do is to take a picture of a human (from webcam) take the placement of their landmarks/key features and alter a second image (of a different person) to make their key features in the same place whilst morphing / warping the parts of their skin that are within the face to fit the position of the first input image (webcam)'s landmarks. After i do that I need to put the face back on the non-webcam input. (i'm sorry for how much that made me sound like a serial killer, stretching and cutting faces) I know that probably didn't make any sense but I want it to look like this.
I have the face landmark and cutting done with DLIB and OpenCV but i need a way to find a way to take these "cut" face chunks and stretch them "dynamically". What I mean by dynamically is that you don't just put a mask on by linearly re-sizing it on 1 or 2 axises. You can select a point of the mask and change that, I wanna do that but my mask is my cut chunk and the point is a section of that chunk that needs to change for the chunk to comply with the position of the generated landmarks. I know this is a very hard topic to think about and if you guys need any clarification just ask. My code:
import cv2
import numpy as np
import dlib
cap = cv2.VideoCapture(0)
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat")
while True:
_, frame = cap.read()
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
faces = detector(gray)
for face in faces:
x1 = face.left()
y1 = face.top()
x2 = face.right()
y2 = face.bottom()
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 3)
landmarks = predictor(gray, face)
for n in range(0, 68):
x = landmarks.part(n).x
y = landmarks.part(n).y
cv2.circle(frame, (x, y), 4, (255, 0, 0), -1)
cv2.imshow("Frame", frame)
key = cv2.waitKey(1)
if key == 27:
break
EDIT: No i'm not a serial killer
If you need to deform source image like a rubber sheet using 2 sets of keypoints, you need to use thin plate spline (TPS), or, better, piecewice affine transformation like here. The last one is more similar to texture rasterization methods (triangle to triangle texture transform).

how to add custom Keras model in OpenCv in python

i have created a model for classification of two types of shoes
now how to deploy it in OpenCv (videoObject detection)??
thanks in advance
You can do that with the help of OpenCV DNN module:
import cv2
# Load a model imported from Tensorflow
tensorflowNet = cv2.dnn.readNetFromTensorflow('card_graph/frozen_inference_graph.pb', 'exported_pbtxt/output.pbtxt')
# Input image
img = cv2.imread('image.jpg')
rows, cols, channels = img.shape
# Use the given image as input, which needs to be blob(s).
tensorflowNet.setInput(cv2.dnn.blobFromImage(img, size=(300, 300), swapRB=True, crop=False))
# Runs a forward pass to compute the net output
networkOutput = tensorflowNet.forward()
# Loop on the outputs
for detection in networkOutput[0,0]:
score = float(detection[2])
if score > 0.9:
left = detection[3] * cols
top = detection[4] * rows
right = detection[5] * cols
bottom = detection[6] * rows
#draw a red rectangle around detected objects
cv2.rectangle(img, (int(left), int(top)), (int(right), int(bottom)), (0, 0, 255), thickness=2)
# Show the image with a rectagle surrounding the detected objects
cv2.imshow('Image', img)
cv2.waitKey()
cv2.destroyAllWindows()
you need frozen inference graph and pbtxt file to run your model in OpenCV
You would save the model to H5 file model.save("modelname.h5") , then load it in OpenCV code load_model("modelname.h5"). Then in a loop detect the objects you find via model.predict(ImageROI)

Categories