I wanted to create an application where it detects the face in fisheye camera but i have no idea how to compress it to fisheye camera but it detects the faces in the normal webcam i tired different ways like editing the points in the face and i couldn't even print the points in my face below are the source code
import face_recognition
import cv2
import numpy as np
import dlib
# This is a demo of running face recognition on live video from your webcam. It's a little more complicated than the
# other example, but it includes some basic performance tweaks to make things run a lot faster:
# 1. Process each video frame at 1/4 resolution (though still display it at full resolution)
# 2. Only detect faces in every other frame of video.
# PLEASE NOTE: This example requires OpenCV (the `cv2` library) to be installed only to read from your webcam.
# OpenCV is *not* required to use the face_recognition library. It's only required if you want to run this
# specific demo. If you have trouble installing it, try any of the other demos that don't require it instead.
# Get a reference to webcam #0 (the default one)
video_capture = cv2.VideoCapture(0)
# Load a sample picture and learn how to recognize it.
obama_image = face_recognition.load_image_file("obama.jpg")
obama_face_encoding = face_recognition.face_encodings(obama_image)[0]
# Load a second sample picture and learn how to recognize it.
biden_image = face_recognition.load_image_file("biden.jpg")
biden_face_encoding = face_recognition.face_encodings(biden_image)[0]
# Load a Third sample picture and learn how to recognize it.
Logesh_image = face_recognition.load_image_file("Upside Logesh.jpg")
Logesh_face_encoding = face_recognition.face_encodings(Logesh_image)[0]
# Create arrays of known face encodings and their names
known_face_encodings = [
obama_face_encoding,
biden_face_encoding,
Logesh_face_encoding
]
known_face_names = [
"Barack Obama",
"Joe Biden",
"Logesh"
]
# Initialize some variables
face_locations = []
face_encodings = []
face_names = []
face_position = []
process_this_frame = True
face_landmarks = []
while True:
# Grab a single frame of video
ret, frame = video_capture.read()
#Postion Frame
Direction_frame = cv2.resize(frame, (50, 50), fx=1.50, fy=1.50)
# Resize frame of video to 1/4 size for faster face recognition processing
small_frame = cv2.resize(frame, (0, 0), fx=0.25, fy=0.25)
# Convert the image from BGR color (which OpenCV uses) to RGB color (which face_recognition uses)
rgb_small_frame = small_frame[:, :, ::1]
# Only process every other frame of video to save time
if process_this_frame:
# Find all the faces and face encodings in the current frame of video
face_position = face_recognition.face_landmarks(rgb_small_frame, face_locations)
face_position = face_recognition.face_landmarks(rgb_small_frame, face_locations)
face_locations = face_recognition.face_locations(rgb_small_frame)
face_encodings = face_recognition.face_encodings(rgb_small_frame, face_locations)
face_landmarks = face_recognition.face_landmarks(rgb_small_frame, face_locations)
face_names = []
for face_encoding in face_encodings:
# See if the face is a match for the known face(s)
matches = face_recognition.compare_faces(known_face_encodings, face_encoding)
name = "Unknown"
# # If a match was found in known_face_encodings, just use the first one.
# if True in matches:
# first_match_index = matches.index(True)
# name = known_face_names[first_match_index]
# Or instead, use the known face with the smallest distance to the new face
face_distances = face_recognition.face_distance(known_face_encodings, face_encoding)
best_match_index = np.argmin(face_distances)
if matches[best_match_index]:
name = known_face_names[best_match_index]
face_names.append(name)
process_this_frame = not process_this_frame
def rect_to_bb(rect):
# take a bounding predicted by dlib and convert it
# to the format (x, y, w, h) as we would normally do
# with OpenCV
x = rect.left()
y = rect.top()
w = rect.right() - x
h = rect.bottom() - y
# return a tuple of (x, y, w, h)
return x, y, w, h
# Display the results
for (top, right, bottom, left), name in zip(face_locations, face_names):
# Scale back up face locations since the frame we detected in was scaled to 1/4 size
top *= 4
right *= 4
bottom *= 4
left *= 4
# Draw a box around the face
cv2.rectangle(frame, (left, top), (right, bottom), (0, 0, 255), 2)
# Draw a label with a name below the face
cv2.rectangle(frame, (left, bottom - 35), (right, bottom), (0, 0, 255), cv2.FILLED)
font = cv2.FONT_HERSHEY_DUPLEX
cv2.putText(frame, name, (left + 10, bottom - 6), font, 1.0, (255, 255, 255), 1)
# Movement of a person
if right < 448:
Right_Command = "You are in the right side"
cv2.putText(frame, Right_Command, (left - 100, bottom - 300), font, 1.0, (255, 255, 255), 1)
if left > 928:
Left_Command = "You are in the left side"
cv2.putText(frame, Left_Command, (left - 100, bottom - 300), font, 1.0, (255, 255, 255), 1)
# Display the resulting image
cv2.imshow('Video', frame)
# Hit 'q' on the keyboard to quit!
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap = cv2.VideoCapture(0)
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat")
while True:
_, frame = cap.read()
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
faces = detector(gray)
for face in faces:
x1 = face.left()
y1 = face.top()
x2 = face.right()
y2 = face.bottom()
#cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 3)
landmarks = predictor(gray, face)
for n in range(0, 68):
x = landmarks.part(n).x
y = landmarks.part(n).y
cv2.circle(frame, (x, y), 4, (255, 0, 0), -1)
cv2.imshow("Frame", frame)
key = cv2.waitKey(1)
if key == 27:
break
# Release handle to the webcam
video_capture.release()
cv2.destroyAllWindows()
You could remove the fisheye distortion and try face recognition afterwards, I don't know how fast that is though. You can do it in OpenCV, first you'll need to find out the camera's optical parameters with cv2.fisheye.calibrate() and then remove the distortion. This answer gives a brief tutorial.
Related
I am currently working on a face detection algorithm using MTCNN.
With the below code, I am able to show the frame containing my face:
`def run_detection(fast_mtcnn):
frames = []
frames_processed = 0
faces_detected = 0
batch_size = 60
start = time.time()
cap = cv2.VideoCapture(0)
while True:
__, frame = cap.read()
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frames.append(frame)
faces = fast_mtcnn(frames)
frames_processed += len(frames)
faces_detected += len(faces)
frames = []
if len(faces) > 0:
for face in faces:
cv2.imshow('frame',face)
print(
f'Frames per second: {frames_processed / (time.time() - start):.3f},',
f'faces detected: {faces_detected}\r',
end=''
)
if cv2.waitKey(1) &0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()`
FYI, the cv2.imshow('frame',face) is just for me to put some value there as I would like to get its bouding box.
Which looks something like this(please forgive my silly looking face):
enter image description here
This I just to show that it is detecting the face and its taking out the frames relating to my face.
What my challenge is, is how to take the smaller frame face (containing my face) and get the edge coordinates of it to draw a bounding box for each person in the frame.
What I tried is the following:
cv2.rectangle(frame,
(face[0], face[1]),
(face[0]+face[2], face[1] + face[3]),
(0,155,255),
2)
Which i assume is completely wrong.
The solution will depend on how the points are returned by the face detection system which can vary depending on the library you use.
In this case the detected face contains the following information
x, y, w, h = face where x and y is a point, w is the width of the bounding box, and h is the height.
Hence you can draw a rectangle of the face as follows:
cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 0, 255), 2)
Objective:
I have a drone and it has to detect and decode all barcodes and QR-codes(small or big) from 3 meters distance. Even though I have been able to achieve the same on a hand-held camera, the same does not work on a drone, as it's very hard to keep the drone constant without moving.
Research Done:
I surfed the internet and found a program by pyimagesearch, where it draws bounding boxes on the detected barcode. The program consists of two files namely,
1.simple_barcode_detection.py: Processes the received frame and return the bounding box locations of the barcode.
2.detect_barcode.py: Sends the input frames to simple_barcode_detection.py and displays the returned frames. note:(This program successfully drew bounding boxes on the barcode).
I am attaching the current status of the code which I have built from this.
simple_barcode_detection.py:
# import the necessary packages
import numpy as np
import cv2
import imutils
from pyzbar import pyzbar
import imutils
global text, box
def detect(image):
# convert the image to grayscale
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# compute the Scharr gradient magnitude representation of the images
# in both the x and y direction using OpenCV 2.4
ddepth = cv2.cv.CV_32F if imutils.is_cv2() else cv2.CV_32F
gradX = cv2.Sobel(gray, ddepth=ddepth, dx=1, dy=0, ksize=-1)
gradY = cv2.Sobel(gray, ddepth=ddepth, dx=0, dy=1, ksize=-1)
# subtract the y-gradient from the x-gradient
gradient = cv2.subtract(gradX, gradY)
gradient = cv2.convertScaleAbs(gradient)
# blur and threshold the image
blurred = cv2.blur(gradient, (9, 9))
(_, thresh) = cv2.threshold(blurred, 225, 255, cv2.THRESH_BINARY)
# construct a closing kernel and apply it to the thresholded image
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (21, 7))
closed = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
# perform a series of erosions and dilations
closed = cv2.erode(closed, None, iterations=4)
closed = cv2.dilate(closed, None, iterations=4)
# find the contours in the thresholded image
cnts = cv2.findContours(closed.copy(), cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)
cnts = imutils.grab_contours(cnts)
# if no contours were found, return None
if len(cnts) == 0:
return None
# otherwise, sort the contours by area and compute the rotated
# bounding box of the largest contour
c = sorted(cnts, key=cv2.contourArea, reverse=True)[0]
rect = cv2.minAreaRect(c)
box = cv2.cv.BoxPoints(rect) if imutils.is_cv2() else cv2.boxPoints(rect)
box = np.int0(box)
return box
# draw a bounding box arounded the detected barcode and display the frame
def process(image,box):
min_y = int(np.min(box[:,-1]))
max_y = int(np.max(box[:,-1]))
min_x = int(np.min(box[:,0]))
max_x = int(np.max(box[:,0]))
image = image[min_y:max_y, min_x:max_x]
barcodes = pyzbar.decode(image)
# Processing the barcode #
for barcode in barcodes:
(x, y, w, h) = barcode.rect
cv2.rectangle(image, (x, y), (x + w, y + h), (0, 0, 255), 2)
barcodeData = barcode.data.decode("utf-8")
barcodeType = barcode.type
text = "{} ({})".format(barcodeData, barcodeType)
return text
detect_barcode.py
# import the necessary packages
import simple_barcode_detection
from imutils.video import VideoStream
import argparse
import time
import cv2
from pyzbar import pyzbar
import imutils
# construct the argument parse and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-v", "--video",
help="path to the (optional) video file")
args = vars(ap.parse_args())
# if the video path was not supplied, grab the reference to the
# camera
if not args.get("video", False):
vs = VideoStream(src=0).start()
time.sleep(2.0)
# otherwise, load the video
else:
vs = cv2.VideoCapture(args["video"])
# keep looping over the frames
while True:
frame = vs.read()
frame = frame[1] if args.get("video", False) else frame
if frame is None:
break
box = simple_barcode_detection.detect(frame)
if box is not None:
cv2.putText(frame, text, (x, y - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
cv2.imshow("Frame", frame)
# cv2.imshow("Frame", image)
key = cv2.waitKey(1) & 0xFF
# if the 'q' key is pressed, stop the loop
if key == ord("q"):
break
# if we are not using a video file, stop the video file stream
if not args.get("video", False):
vs.stop()
# otherwise, release the camera pointer
else:
vs.release()
# close all windows
cv2.destroyAllWindows()
Error:
line 30, in <module> cv2.putText(frame, text, (x, y - 10), NameError: name 'text' is not defined
Adding to the same I also found a research paper, where yolo and pyzbar are used to identify and decode the barcode.
Question:
Is there any possibility that we can accept the returned bounding box locations and send them to pyzbar so that it scans that region for barcodes?
How does pyzbar work & can we create a similar library like pyzbar?
Reference images:
Drone not able to scan barcode
Drone not able to scan barcodes of this size
Links:
https://www.pyimagesearch.com/2014/12/15/real-time-barcode-detection-video-python-opencv/
https://d1wqtxts1xzle7.cloudfront.net/63796937/rahman-2019-ijais-451835_IJAIS20200701-113597-137d8us-with-cover-page-v2.pdf?Expires=1637915838&Signature=geCHJHKsaMnDCJMzNAz-OHHjxXSdX~rLtTf-MO0gGuNutOnHl5x33q8-Xiab4haQGnhuE8ektKO4Ah2REj9nebwfwnO3GYxBGRqZgqMsK63375AUQV7YsTjJz9Qwp~OwUa9st2h4a6~ep3eSwvCjCX-Dl-g612osElU6ATgdI4DGqqaaat-QuLAmjQqywXTZrTWs0~nLvVeZBLEJRnNbcphtlJPv1yM35Ix2AhiwKhax4X4qCqLR7Wzt3XR5IaW33X3zSPNoo0QhDLEZrEFG0m-Hi156fsq488mC4n6Udyoi8KNhaUxqQ316b7Zru7XF1z1UaBbGU44O4nuI5AtflA__&Key-Pair-Id=APKAJLOHF5GGSLRBV4ZA
I am learning the OpenCV. Here is my code:
import cv2
face_patterns = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
sample_image = cv2.imread('1.jpg')
gray = cv2.cvtColor(sample_image,cv2.COLOR_RGB2GRAY)
faces = face_patterns.detectMultiScale(gray,1.3,5)
print(len(faces))
for (x, y, w, h) in faces:
cv2.rectangle(sample_image, (x, y), (x+w, y+h), (0, 255, 0), 2)
cv2.imwrite('result.jpg', sample_image)
If I use the picture A, I could get a lot of faces, if I use the picture B, I get none.
I changed argument in detectMultiScale(gray,1.3,5) many times, it still doesn't work.
Picture A
Picture A Result
Picture B no face
I see this more as a problem of Cv2 module itself. There are better models than HAAR CASCADES for detecting faces. face_recognition library is also very useful to detect and recognize face. It uses hog as default model. You can also use cnn for better accuracy but the detection process will be slow.
Find more here.
import cv2
import face_recognition as fr
sample_image = fr.load_image_file("1.jpg")
unknown_face_loc = fr.face_locations(sample_image, model="hog")
print(len(unknown_face_loc)) #detected face count
for faceloc in unknown_face_loc:
y1, x2, y2, x1 = faceloc
cv2.rectangle(sample_image, (x1, y1), (x2, y2), (0, 0, 255), 2)
sample_image = sample_image[:, :, ::-1] #converting bgr image to rbg
cv2.imwrite("result.jpg", sample_image)
Instead of -
faces = face_patterns.detectMultiScale(gray,1.3,5)
Try Using -
faces = face_patterns.detectMultiScale(blackandwhite,1.3,5)
If the problem occurs even after this check out my code for face detection.
It uses hog as default model. You can also use cnn for better accuracy but the detection process will be slow.
cascade_classifier = cv2.CascadeClassifier('haarcascades/haarcascade_eye.xml')
cap = cv2.VideoCapture(0)
while True:
# Capture frame-by-frame
ret, frame = cap.read()
# Our operations on the frame come here
gray = cv2.cvtColor(frame, 0)
detections = cascade_classifier.detectMultiScale(gray,scaleFactor=1.3,minNeighbors=5)
if(len(detections) > 0):
(x,y,w,h) = detections[0]
frame = cv2.rectangle(frame,(x,y),(x+w,y+h),(255,0,0),2)
# for (x,y,w,h) in detections:
# frame = cv2.rectangle(frame,(x,y),(x+w,y+h),(255,0,0),2)
# Display the resulting frame
cv2.imshow('frame',frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# When everything done, release the capture
cap.release()
cv2.destroyAllWindows()```
I am trying to make a filter using OpenCV in which I am putting glasses over eyes in Live Video Capture
Feed. The problem I am facing is that the Video Feed starts off with good quality of overlaying glasses image but with each frame the image quality of glasses seems to decrease itself and the height of glasses seem to increase slowly frame by frame itself.
Here is my code:-
mport cv2
face_Cascade = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_frontalface_default.xml")
eye_Cascade = cv2.CascadeClassifier(cv2.data.haarcascades + "frontalEyes35x16.xml")
nose_Cascade = cv2.CascadeClassifier(cv2.data.haarcascades + "Nose18x15.xml")
glasses = cv2.imread('glasses.png', -1)
mustache = cv2.imread('mustache.png',-1)
cap = cv2.VideoCapture(0)
while True:
ret, frame = cap.read()
gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
if ret == False:
continue
frame = cv2.cvtColor(frame , cv2.COLOR_BGR2BGRA) # so that we can use glasses and mustaches alpha value
# otherwise we get white box around them
faces = face_Cascade.detectMultiScale(gray_frame, 1.3, 5)
for (x,y,w,h) in faces:
#cv2.rectangle(frame, (x,y), (x+w, y+h), (255,255,255),3)
roi_gray = gray_frame[y:y+h , x:x+w]
roi_color = frame[y:y+h , x:x+w]
eyes = eye_Cascade.detectMultiScale(roi_gray, 1.3, 5)
for (ex,ey,ew,eh) in eyes:
#cv2.rectangle(roi_color, (ex,ey), (ex+ew, ey+eh), (0,255,0),3)
roi_eye_gray = roi_gray[ey:ey+eh, ex:ex+ew]
roi_eye_color = roi_color[ey:ey+eh, ex:ex+ew]
glasses = cv2.resize(glasses, (ew,eh), interpolation = cv2.INTER_AREA)
gw, gh, gc = glasses.shape
# We are going to iterate through every single pixel value in the glasses image and then we
# are going to replace it with roi_color
for i in range (0,gw):
for j in range(0,gh):
if glasses[i, j][3] != 0: # 3rd value [3] means alpha value there is 0 so we want it
#to be transparent and we dont need to change that pixel value in roi_color
roi_color[ey + i, ex+ j ] = glasses[i , j]
#nose = nose_Cascade.detectMultiScale(roi_gray, 1.3, 5)
#for (nx,ny,nw,nh) in nose:
#cv2.rectangle(roi_color, (nx,ny), (nx+nw, ny+nh), (255,0,0),3)
#roi_nose_gray = roi_gray[ny:ny+nh , nx:nx+nw]
#roi_nose_color = roi_color[ny:ny+nh , nx:nx+nw]
cv2.imshow("Video Frame",frame)
frame = cv2.cvtColor(frame , cv2.COLOR_BGRA2BGR)
# Wait for user Input s, then you will stop the loop
key_pressed = cv2.waitKey(1) & 0xFF # for converting waitkey(32 bit) into 8 bit
if key_pressed == ord('s'):
break
cap.release()
cv2.destroyAllWindows()
It is happening at this line:
glasses = cv2.resize(glasses, (ew,eh), interpolation = cv2.INTER_AREA)
because you keep resizing the glasses up and down in size at every iteration overwriting the original, so the same pair of glasses gets made bigger, then smaller, then bigger.
Instead, you should start from the original, high-quality glasses rather than from the resized glasses from the previous frame. So, outside the loop, change this line:
glasses = cv2.imread('glasses.png', -1)
to
origGlasses = cv2.imread('glasses.png', -1)
And inside the loop, change this line:
glasses = cv2.resize(glasses, (ew,eh), interpolation = cv2.INTER_AREA)
to:
glasses = cv2.resize(origGlasses, (ew,eh), interpolation = cv2.INTER_AREA)
I'm trying to modify this code to allow tracking of multiple objects of the same color and draw a path where the object travel. Currenlty the code just tracks the largest object based on color and the travel path dissappears as the object moves around the video. Finally, I could use some guidance on how to create a new video file capturing the paths. This is my first post so I'm not sure if the code was posted correctly lol. Go easy on me ;)
from collections import deque
import numpy as np
import argparse
import imutils
import cv2
ap = argparse.ArgumentParser()
ap.add_argument("-v", "--video",
help="path to the (optional) video file")
ap.add_argument("-b", "--buffer", type=int, default=64,
help="max buffer size")
args = vars(ap.parse_args())
# define the lower and upper boundaries of the "green"
# object in the HSV color space, then initialize the
# list of tracked points
greenLower = (29, 86, 6)
greenUpper = (64, 255, 255)
pts = deque(maxlen=args["buffer"])
# if a video path was not supplied, grab the reference
# to the webcam
if not args.get("video", False):
camera = cv2.VideoCapture(0)
# otherwise, grab a reference to the video file
else:
camera = cv2.VideoCapture(args["video"])
# keep looping
while True:
# grab the current frame
(grabbed, frame) = camera.read()
# if we are viewing a video and we did not grab a frame,
# then we have reached the end of the video
if args.get("video") and not grabbed:
break
# resize the frame, blur it, and convert it to the HSV
# color space
frame = imutils.resize(frame, width=600)
# blurred = cv2.GaussianBlur(frame, (11, 11), 0)
hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
# construct a mask for the color "green", then perform
# a series of dilations and erosions to remove any small
# blobs left in the mask
mask = cv2.inRange(hsv, greenLower, greenUpper)
mask = cv2.erode(mask, None, iterations=2)
mask = cv2.dilate(mask, None, iterations=2)
# find contours in the mask and initialize the current
# (x, y) center of the ball
cnts = cv2.findContours(mask.copy(), cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)[-2]
center = None
# only proceed if at least one contour was found
if len(cnts) > 0:
# find the largest contour in the mask, then use
# it to compute the minimum enclosing circle and
# centroid
c = max(cnts, key=cv2.contourArea)
((x, y), radius) = cv2.minEnclosingCircle(c)
M = cv2.moments(c)
center = (int(M["m10"] / M["m00"]), int(M["m01"] / M["m00"]))
# only proceed if the radius meets a minimum size
if radius > 10:
# draw the circle and centroid on the frame,
# then update the list of tracked points
cv2.circle(frame, (int(x), int(y)), int(radius),
(0, 255, 255), 2)
cv2.circle(frame, center, 5, (0, 0, 255), -1)
# update the points queue
pts.appendleft(center)
# loop over the set of tracked points
for i in xrange(1, len(pts)):
# if either of the tracked points are None, ignore
# them
if pts[i - 1] is None or pts[i] is None:
continue
# otherwise, compute the thickness of the line and
# draw the connecting lines
thickness = int(np.sqrt(args["buffer"] / float(i + 1)) * 2.5)
cv2.line(frame, pts[i - 1], pts[i], (0, 0, 255), thickness)
# show the frame to our screen
cv2.imshow("Frame", frame)
key = cv2.waitKey(1) & 0xFF
# if the 'q' key is pressed, stop the loop
if key == ord("q"):
break
# cleanup the camera and close any open windows
camera.release()
cv2.destroyAllWindows()