I want to read a video and detect the faces by cutting it into frames. But, it did not work well and I don't understand where the problem is. Could you help me fix it please.
NB :: I work with google collaboratory and I detect the faces with the library face_recognition.
import face_recognition
import cv2
from google.colab.patches import cv2_imshow
input_video = cv2.VideoCapture('/content/My Drive/video-3.mp4')
# Metadata from the input video
frames_per_second = int(input_video.get(cv2.CAP_PROP_FPS))
frame_width = int(input_video.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(input_video.get(cv2.CAP_PROP_FRAME_HEIGHT))
print('Metadata from input video:',
'\nFrames per second:', frames_per_second,
'\nFrame width:', frame_width,
'\nFrame height:', frame_height)
codec = cv2.VideoWriter.fourcc(*'XVID')
video_writer = cv2.VideoWriter('output_video.mp4',
codec,
frames_per_second,
(frame_width, frame_height))
# An array to hold the locations of faces that are detected on individual frames
face_locations = []
# A counter to keep track of the number of frames processed
count = 1
# Loop through all the frames in the video
while (count != no_of_frames):
# Read the video to retrieve individual frames. 'frame' will reference the inidivdual frames read from the video.
ret, frame = input_video.read()
# Check the 'ret' (return value) to see if we have read all the frames in the video to exit the loop
if not ret:
print('Processed all frames')
break
# Convert the image (frame) to RGB format as by default Open CV uses BGR format.
# This conversion is done as face_recognition and other libraries usually use RGB format.
rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
# Get the coordinates in the image where a face is detected. Use the model 'cnn' after greater accuracy.
face_locations = face_recognition.face_locations(rgb_frame, model='cnn')
# Loop through the face locations array and draw a rectangle around each face that is detected in the frame
for top, right, bottom, left in face_locations:
cv2.rectangle(frame, (left, top), (right, bottom), (0, 0, 255), 2)
# Write the frame to the output vide0
video_writer.write(frame)
# Print for every 50 frames processed
if(count % 50 == 0):
print('Processed', count, 'frames')
count += 1
# Release to close all the resources that we have opened for reading and writing video
input_video.release()
video_writer.release()
cv2.destroyAllWindows()
The result:
in line 7: name 'no_of_frames' is not defined
Here it is the complete code
import face_recognition
import cv2
from google.colab.patches import cv2_imshow
input_video = cv2.VideoCapture('/content/My Drive/video-3.mp4')
# Metadata from the input video
frames_per_second = int(input_video.get(cv2.CAP_PROP_FPS))
frame_width = int(input_video.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(input_video.get(cv2.CAP_PROP_FRAME_HEIGHT))
print('Metadata from input video:',
'\nFrames per second:', frames_per_second,
'\nFrame width:', frame_width,
'\nFrame height:', frame_height)
codec = cv2.VideoWriter.fourcc(*'XVID')
video_writer = cv2.VideoWriter('output_video.mp4',
codec,
frames_per_second,
(frame_width, frame_height))
# An array to hold the locations of faces that are detected on individual frames
face_locations = []
# A counter to keep track of the number of frames processed
count = 1
# Loop through all the frames in the video
while (count != no_of_frames):
# Read the video to retrieve individual frames. 'frame' will reference the inidivdual frames read from the video.
ret, frame = input_video.read()
# Check the 'ret' (return value) to see if we have read all the frames in the video to exit the loop
if not ret:
print('Processed all frames')
break
# Convert the image (frame) to RGB format as by default Open CV uses BGR format.
# This conversion is done as face_recognition and other libraries usually use RGB format.
rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
# Get the coordinates in the image where a face is detected. Use the model 'cnn' after greater accuracy.
face_locations = face_recognition.face_locations(rgb_frame, model='cnn')
# Loop through the face locations array and draw a rectangle around each face that is detected in the frame
for top, right, bottom, left in face_locations:
cv2.rectangle(frame, (left, top), (right, bottom), (0, 0, 255), 2)
# Write the frame to the output vide0
video_writer.write(frame)
# Print for every 50 frames processed
if(count % 50 == 0):
print('Processed', count, 'frames')
count += 1
# Release to close all the resources that we have opened for reading and writing video
input_video.release()
video_writer.release()
cv2.destroyAllWindows()
You have not declared a 'no_of_frames' variable. Simple as that.
Here's sample code that accomplishes the same task.
capture = cv2.VideoCapture(0)
while 1:
_, image = capture.read()
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
faces = face_cascade.detectMultiScale(gray, 1.3, 5)
for (x,y,w,h) in faces:
cv2.circle(image,(x+int(w/2),y+int(h/2)),(int(h)),(255,0,0),4)
roi_gray = gray[y:y+h, x:x+w]
roi_color = image[y:y+h, x:x+w]
eyes = eye_cascade.detectMultiScale(roi_gray)
for (ex,ey,ew,eh) in eyes:
cv2.circle(roi_color, (ex + int(ew/2), ey + int(eh/2)), (int(eh/2)), (0,0,255), 2)
cv2.imshow('Image', image)
k = cv2.waitKey(30) & 0xff
if k == 27:
break
# Close the window
capture.release()
# De-allocate any associated memory usage
cv2.destroyAllWindows()
Related
Using OpenCV and Mediapipe to create an application where multiple faces in the live feed is not desired. Hence I require a solution to either destroy video feed and display an image until there is only 1 face in the frame(and display the feed again of course)
or Overlay an image on the entire video feed display window(hence hide the feed display)
Here's the code so far:
import cv2
import mediapipe as mp
import time
cap = cv2.VideoCapture(0)
face_detection = mp.solutions.face_detection.FaceDetection(0.8)
mul = cv2.imread('images.png')
mul = cv2.resize(mul, (640, 480))
while True:
ret, frame = cap.read()
frame = cv2.resize(frame, (640, 480))
results = face_detection.process(imgRGB)
for count, detection in enumerate(results.detections):
continue
count += 1
if count > 1:
cv2.destroyAllWindows()
cv2.imshow("output", mul)
time.sleep(10)
continue
cv2.imshow("output", frame)
cap.release()
cv2.destroyAllWindows()
I'm trying to destroy the feed and display. The introduced delay using time.sleep(10) is because without it the windows switch between the video feed and the image at a very high rate, making it hard to see what's happening.
The problem is that image is not being displayed, the window appears blank grey; and after 10 seconds the video feed comes back up and is taking very long to display the image again even though the multiple faces never leave the frame
Thank you
You observe gray frames because you are destroying the window every time loop starts over. You are stucking at the if count > 1: statement since count is being increased for each frame because it does not depend on any condition and never initialize again (so count always >1 after detecting 2 faces although faces are in different frames). Here is my solution to the problem hope it helps.
import cv2
import mediapipe as mp
import time
cap = cv2.VideoCapture(0)
face_detection = mp.solutions.face_detection.FaceDetection(0.8)
mul = cv2.imread('image.jpg')
mul = cv2.resize(mul, (640, 480))
count = 0
while True:
ret, frame = cap.read()
frame = cv2.resize(frame, (640, 480))
cv2.imshow("Original", frame)
imgRGB = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
results = face_detection.process(imgRGB)
if(results.detections):
for count, detection in enumerate(results.detections):
count += 1
print("Number of Faces: ", count)
if count > 1:
cv2.imshow('StackOverflow', mul)
else:
cv2.imshow('StackOverflow', frame)
count = 0
if cv2.waitKey(5) & 0xFF == 27:
break
cap.release()
cv2.destroyAllWindows()
Here is the result with one face in frame;
Result with multiple faces;
When i try to detect face using my laptop or computer web cam it work fine but when i try to detect using IP cam it looks like it take to much time to detect one frame. Is there any solution for this because I also try YOLO. It take more time than opencv haar cascade
There I have a simple code that detect face and crop than part of frame.
cap = cv2.VideoCapture("web_Cam_IP")
cropScal = 25
while(True):
# Capture frame-by-frame
for i in range(10): #this loop skip 10 frames if I don't skip frame it looks like it stack there
ret, frame = cap.read()
frame = cv2.resize(frame, (0, 0), fx=0.70, fy=0.70)
# Our operations on the frame come here
gray = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY)
faces = faceCascade.detectMultiScale(gray, scaleFactor=1.02, minNeighbors=5, minSize=(30, 30))
for (x, y, w, h) in faces:
if len(faces) > 0 :
try:
img = gray[y-cropScal:y+h+cropScal, x-cropScal:x+w+cropScal]
img = cv2.resize(img,(200,200))
img = Image.fromarray(img)
img.save('images/'+datetime.now().strftime("%d_%m_%Y_%I_%M_%S_%p")+'.png')
except Exception as e:
pass
cv2.rectangle(gray, (x-cropScal, y-cropScal), (x+w+cropScal, y+h+cropScal), (0, 255, 0), 2)
cv2.imshow('frame',gray)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# When everything done, release the capture
cap.release()
cv2.destroyAllWindows()
You're only scaling the input frames by a factor of 0.70, not to an absolute resolution. It's possible that your IP cam has a higher resolution than your webcam and so the detection requires more time to analyze a larger frame.
Try rescaling the frames to a definite size (eg. 800x600) before the face detection.
I'm a newbie in programming and I need to write code to detect balloon on the fixed background using numpy and openCV in live video and to return the centre of the object [balloon].
Sorry about the ignorance of the questions.
Since I'm new, I had troubles with thinking about the logic of doing it, I don't have the resources to "teach the machine" and creating cascade XML to detect balloons so I thought about 1 possible solution :
Using cv2.createBackgroundSubtractorMOG2() to detect motion with the same background and once there is some object [balloon], count all the white pixels in the live video and return the centre of it, with the right threshold amount of white pixels.
The problem is, I don't know how to get the value of the pixel from 0-255 to know if it's white or black and shows the video at the same time, I think that there is a much easier way that I couldn't find guides for it.
import numpy as np
import cv2
cap = cv2.VideoCapture(0)
fgbg = cv2.createBackgroundSubtractorMOG2()
while(1):
ret, frame = cap.read()
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
fgmask = fgbg.apply(gray)
img_arr = np.array(fgmask)
cv2.imshow('frame',fgmask)
for i in fgmask:
for j in i:
print(fgmask)
k = cv2.waitKey(30) & 0xff
if k == 27:
break
cap.release()
cv2.destroyAllWindows()
I'm getting fray video on the output and lots of values that I don't know how to understand them on the output.
I would use
changes = (fgmask>200).sum()
to compare all pixels with almost white value (>200) and count these pixels.
And then I can compare result with some value to treat it as move.
import numpy as np
import cv2
cap = cv2.VideoCapture(0)
fgbg = cv2.createBackgroundSubtractorMOG2()
while True:
ret, frame = cap.read()
if frame is None:
break
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
fgmask = fgbg.apply(gray)
#changes = sum(sum(fgmask>200))
changes = (fgmask>200).sum()
is_moving = (changes > 10000)
print(changes, is_moving)
cv2.imshow('frame', fgmask)
k = cv2.waitKey(10) & 0xff
if k == 27:
break
cv2.destroyAllWindows()
cap.release()
print() needs some time to display text so printing all pixels (many times in loop) can slow down program. So I skip this. I don't have to know values of all pixels.
EDIT: Using answer in how to detect region of large # of white pixels using opencv? and add code which can find white regions and draw rectangle. Program opens two window - one with grayscale fgmask and other with RGB frame and they can be hidden one behind another. You have to move one window to see another.
EDIT: I added code which use cv2.contourArea(cnt) and (x,y,w,h) = cv2.boundingRect(cnt) to create list with items (area,x,y,w,h) for all counturs and then get max(items) to get contour with the biggest area. And then it use (x + w//2, y + h//2) as center for red circle.
import numpy as np
import cv2
cap = cv2.VideoCapture(0)
fgbg = cv2.createBackgroundSubtractorMOG2()
while True:
ret, frame = cap.read()
if frame is None:
break
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
fgmask = fgbg.apply(gray)
#changes = sum(sum(fgmask>200))
changes = (fgmask>200).sum() #
is_moving = (changes > 10000)
print(changes, is_moving)
items = []
contours, hier = cv2.findContours(fgmask, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
for cnt in contours:
area = cv2.contourArea(cnt)
if 200 < area:
(x,y,w,h) = cv2.boundingRect(cnt)
cv2.rectangle(fgmask, (x,y),(x+w,y+h),255, 2)
cv2.rectangle(frame, (x,y),(x+w,y+h),(0,255,0), 2)
items.append( (area, x, y, w, h) )
if items:
main_item = max(items)
area, x, y, w, h = main_item
if w > h:
r = w//2
else:
r = h//2
cv2.circle(frame, (x+w//2, y+h//2), r, (0,0,255), 2)
cv2.imshow('fgmask', fgmask)
cv2.imshow('frame', frame)
k = cv2.waitKey(10) & 0xff
if k == 27:
break
cv2.destroyAllWindows()
cap.release()
I am trying to plot facial key points on the video frame using Open CV Video Capture. I am using a trained pytorch CNN model. Here is the code:
cap = cv.VideoCapture(0)
time.sleep(2.0)
while cap.isOpened():
ret, frame = cap.read()
gray = cv.cvtColor(frame, cv.COLOR_BGR2GRAY)
faces = face_cascade.detectMultiScale(gray, 1.3, 5)
for (x,y,w,h) in faces:
cv.rectangle(frame,(x,y),(x+w,y+h),(255,0,0),2)
face_gray = gray[y:y+h, x:x+w]
sample = cv.resize(face_gray, (96, 96))
sample = sample.astype('float32')/255
sample = np.asarray(sample).reshape(1,96,96)
sample = torch.from_numpy(sample).unsqueeze(0).to(device)
output = saved_model(sample)
output = output.view(-1, 2).detach()
output = (output * 48) + 48
output = output.cpu().numpy()
print(output)
for i in range(15):
cv.circle(frame, (output[i][0], output[i][1]), 1, (0, 0, 255), -1)
cv.imshow("Frame", frame)
key = cv.waitKey(1) & 0xFF
if key == ord('q'):
break
cap.release()
cv.destroyAllWindows()
Input dimension: torch.Tensor([1,1,96,96]), 1 grayscale image
Output dimension: torch.Tensor([15, 2]), (x,y) of 15 facial key points
When the face is detected (using Haar Cascade) in the Video Capture, the output values are the same due to which the key points plot does not change.
I don't see anything wrong with your code block. The only possibility of error that can happen is when you have a static face in the video frame and that is detected as the last face by the HaarCascade detector. By looking at your code block, it is apparent that you are trying to detect keypoints only one face per video frame. Try moving the sample = ..., output = ..., and for ...: block to render keypoints into the for ... iterator of faces.
The code block after suggested edits will look like this:
cap = cv.VideoCapture(0)
time.sleep(2.0)
while cap.isOpened():
ret, frame = cap.read()
gray = cv.cvtColor(frame, cv.COLOR_BGR2GRAY)
faces = face_cascade.detectMultiScale(gray, 1.3, 5)
for (x,y,w,h) in faces:
cv.rectangle(frame,(x,y),(x+w,y+h),(255,0,0),2)
face_gray = gray[y:y+h, x:x+w]
# Push this block into for iterator of faces
sample = cv.resize(face_gray, (96, 96))
sample = sample.astype('float32')/255
sample = np.asarray(sample).reshape(1,96,96)
sample = torch.from_numpy(sample).unsqueeze(0).to(device)
output = saved_model(sample)
output = output.view(-1, 2).detach()
output = (output * 48) + 48
output = output.cpu().numpy()
print(output)
for i in range(15):
cv.circle(frame, (output[i][0], output[i][1]), 1, (0, 0, 255), -1)
# End block
cv.imshow("Frame", frame)
key = cv.waitKey(1) & 0xFF
if key == ord('q'):
break
cap.release()
cv.destroyAllWindows()
That code isn't handling the case of len(faces) > 0 initially for some number of iterations, then len(faces) == 0 subsequently. Should that happen, face_gray will retain its prior value, and you'll be drawing onto a new frame based on a stale face_gray.
There is a code that detects faces in video file while displaying it frame by frame:
cap = cv2.VideoCapture(videoPath)
faceCascade = cv2.CascadeClassifier(cascPath)
while (cap.isOpened()):
# Capture frame-by-frame
ret, frame = cap.read()
# Our operations on the frame come here
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
faces = faceCascade.detectMultiScale(
gray,
scaleFactor=1.1,
minNeighbors=5,
minSize=(30, 30),
flags=cv2.CASCADE_SCALE_IMAGE
)
# Draw a rectangle around the faces
for (x, y, w, h) in faces:
cv2.rectangle(frame, (x, y), (x+w, y+h), (233, 153, 22), 2)
# Display the resulting frame
cv2.imshow('frame', frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# When everything done, release the capture
cap.release()
cv2.destroyAllWindows()
I need a code that can give you periods of time when a face is detected and periods of time when a face is not detected. Can somebody help me with that? At least some tips how to solve this problem, where to look etc..
Keep track of the timestamp of current frame, previous frame, and the starting frame of the current sequence containing faces.
Once you no longer detect a face in a frame, append a pair (starting, previous) to a list.
import time
# ....
def get_timestamp():
# Make the timestamp whatever you want...
return time.strftime("%Y%m%d-%H%M%S")
# ....
face_present = []
ts_start = None
ts_prev = None
# ....
while (cap.isOpened()):
ret, frame = cap.read()
ts = get_timestamp()
# ....
if len(faces) > 0: # Some faces detected
if ts_start is None: # This is the start of current sequence
ts_start = ts
elif (ts_start is not None) and (ts_prev is not None):
# First frame without face following a sequence with face...
face_present.append((ts_start, ts_prev))
ts_start = None
ts_prev = ts
You could make the timestamp whatever you want, could even be a frame number if that's what you're after.
Same approach can be used to determine the times when face is not present, you only need to change the condition of the first if statement.