I am trying to do a gender recognition program, below is the code..
import caffe
import os
import numpy as np
import sys
import cv2
import time
#Models root folder
models_path = "./models"
#Loading the mean image
mean_filename=os.path.join(models_path,'./mean.binaryproto')
proto_data = open(mean_filename, "rb").read()
a = caffe.io.caffe_pb2.BlobProto.FromString(proto_data)
mean_image = caffe.io.blobproto_to_array(a)[0]
#Loading the gender network
gender_net_pretrained=os.path.join(models_path,
'./gender_net.caffemodel')
gender_net_model_file=os.path.join(models_path,
'./deploy_gender.prototxt')
gender_net = caffe.Classifier(gender_net_model_file, gender_net_pretrained)
#Reshaping mean input image
mean_image = np.transpose(mean_image,(2,1,0))
#Gender labels
gender_list=['Male','Female']
#cv2 Haar Face detector
face_cascade=cv2.CascadeClassifier(os.path.join
(models_path,'haarcascade_frontalface_default.xml'))
#Getting prediction from live camera
cap = cv2.VideoCapture(0)
while True:
ret,frame = cap.read()
if ret is True:
start_time = time.time()
frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
rects = face_cascade.detectMultiScale(frame_gray, 1.3, 5)
#Finding the largest face
if len(rects) >= 1:
rect_area = [rects[i][2]*rects[i][3] for i in xrange(len(rects))]
rect = rects[np.argmax(rect_area)]
x,y,w,h = rect
cv2.rectangle(frame,(x,y),(x+w,y+h),(255,0,0),2)
roi_color = frame[y:y+h, x:x+w]
#Resizing the face image
crop = cv2.resize(roi_color, (256,256))
#Subtraction from mean file
#input_image = crop -mean_image
input_image = rect
#Getting the prediction
start_prediction = time.time()
prediction = gender_net.predict([input_image])
gender = gender_list[prediction[0].argmax()]
print("Time taken by DeepNet model: {}").format(time.time()-start_prediction)
print prediction,gender
cv2.putText(frame,gender,(x,y), cv2.FONT_HERSHEY_SIMPLEX, 1,(0,255,0),2)
print("Total Time taken to process: {}").format(time.time()-start_time)
#Showing output
cv2.imshow("Gender Detection",frame)
cv2.waitKey(1)
#Delete objects
cap.release()
cv2.killAllWindows()
When I am running the I am getting an error:
a = caffe.io.caffe_pb2.BlobProto.FromString(proto_data)
AttributeError: 'module' object has no attribute 'io'
How Can I solve it. I am using cnn_gender_age_prediction model. I want to make a real time gender recognition program using python and cnn_gender_age model.
io is a module in caffe package. Basically when you type import caffe, it will not automatically try to import all modules in caffe package including io. There are two solutions.
First one: import caffe.io manually
import caffe
import caffe.io
Second one: update to the latest caffe version, in which you should find a line in __init__.py under python/caffe directory:
from . import io
Related
I want to capture the box recognized while using YOLOv4 webcam recognition.
So i used this code.
import cv2
import detect as dt
from darknet import Darknet
from PIL import Image
vidcap = cv2.VideoCapture(0)
success, image = vidcap.read()
count = 0
m = Darknet('darknet/data/yolo-obj.cfg')
m.load_weights('darknet/backup/yolo-obj_30000.weights')
use_cuda = 1
m.cuda()
while success:
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
im_pil = Image.fromarray(image)
im_pil = im_pil.resize((m.width, m.height))
boxes = dt.do_detect(m, im_pil, 0.5, 0.4, use_cuda)
result = open('Desktop/captureyolobox/capture%04d.jpg'%(count), 'w')
for i in range(len(boxes)):
result.write(boxes[i])
count = count + 1
success, image = vidcap.read()
result.close()
I've encountered this problem. I surfed the web to solve the problem, but I couldn't find it. Can you help me?
Traceback (most recent call last):
File "yoloshort.py", line 2, in <module>
import detect as dt
ImportError: No module named detect
Do you mean detect_image in darknet.py? You can check the darknet.py which have you want or not.
I'm learning opencv via the following link link link2. And I got error related to image training process for face recognition. Please correct me or help with the problem I'm having. thank you
System Information :
OpenCV 4.5.2.52
Python 3.9.5
Detailed Description
I need to train an images by using the code below and the results was :
- OpenCV/faces-train.py", line 50, in <module>
recognizer.train(x_train, np.array(y_labels))
cv2.error: OpenCV(4.5.2) :-1: error: (-5:Bad argument) in function 'train'
> Overload resolution failed:
> - src is not a numpy array, neither a scalar
> - Expected Ptr<cv::UMat> for argument 'src'
Code
import cv2
import os
import numpy as np
from PIL import Image
import pickle
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
image_dir = os.path.join(BASE_DIR, "images")
face_cascade = cv2.CascadeClassifier('cascade/data/haarcascade_frontalface_alt2.xml')
recognizer = cv2.face.LBPHFaceRecognizer_create()
current_id = 0
label_ids = {}
y_labels = []
x_train = []
for root, dirs, files, in os.walk(image_dir):
for file in files:
if file.endswith("png") or file.endswith("jpg") or file.endswith("jpeg") :
path = os.path.join(root, file)
label = os.path.basename(root).replace(" ", "-").lower()
# print(label, path)
if not label in label_ids:
label_ids[label] = current_id
current_id += 1
id_ = label_ids[label]
# print(label_ids)
y_labels.append(label)
x_train.append(path)
pil_image = Image.open(path).convert("L")
image_array = np.array(pil_image, "uint8")
# print(image_array)
faces = face_cascade.detectMultiScale(image_array, scaleFactor=1.5, minNeighbors=5)
for(x,y,w,h) in faces:
roi = image_array[y:y+h, x:x+w]
x_train.append(roi)
y_labels.append(id_)
# print(y_labels)
# print(x_train)
with open("labels.pickle", 'wb') as f:
pickle.dump(label_ids, f)
recognizer.train(x_train, np.array(y_labels))
recognizer.save("trainner.yml")
remove the following lines:
y_labels.append(label)
x_train.append(path)
you don't really need to append the labels or the paths since you are basically training the face recognizer to recognise/categorise faces. Likewise, you wouldnt need the path to your images as it is also not necessary. Guessing this is what caused the error!
Do let me know if the fix works! Cheers :)
For a current project, I am trying to set up a video recognition program leveraging TensorFlow 2 and OpenCV (Mac OS Catalina).
When running the below script with Python 3 through terminal or via Jupyter, the green "wecam light" is indicating that the camera is switched on and no error messages appear. However, there is not video image/window showing on my screen. I have tried various solutions, including adding camera screen frame data, none of which worked.
Does anyone know a smart tweak to make the camera image/window visible?
import os
import six.moves.urllib as urllib
import sys
import tarfile
import tensorflow as tf
import zipfile
import cv2
from collections import defaultdict
from io import StringIO
from matplotlib import pyplot as plt
from PIL import Image
from utils import label_map_util
from utils import visualization_utils as vis_util
# Define the video stream
cap = cv2.VideoCapture(0) # Change only if you have more than one webcams
# What model to download.
# Models can bee found here: https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md
MODEL_NAME = 'ssd_inception_v2_coco_2017_11_17'
MODEL_FILE = MODEL_NAME + '.tar.gz'
DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/'
# Path to frozen detection graph. This is the actual model that is used for the object detection.
PATH_TO_CKPT = MODEL_NAME + '/frozen_inference_graph.pb'
# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = os.path.join('data', 'mscoco_label_map.pbtxt')
# Number of classes to detect
NUM_CLASSES = 90
# Download Model
opener = urllib.request.URLopener()
opener.retrieve(DOWNLOAD_BASE + MODEL_FILE, MODEL_FILE)
tar_file = tarfile.open(MODEL_FILE)
for file in tar_file.getmembers():
file_name = os.path.basename(file.name)
if 'frozen_inference_graph.pb' in file_name:
tar_file.extract(file, os.getcwd())
# Load a (frozen) Tensorflow model into memory.
detection_graph = tf.Graph()
with detection_graph.as_default():
od_graph_def = tf.GraphDef()
with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.import_graph_def(od_graph_def, name='')
# Loading label map
# Label maps map indices to category names, so that when our convolution network predicts `5`, we know that this corresponds to `airplane`. Here we use internal utility functions, but anything that returns a dictionary mapping integers to appropriate string labels would be fine
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(
label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)
# Helper code
def load_image_into_numpy_array(image):
(im_width, im_height) = image.size
return np.array(image.getdata()).reshape(
(im_height, im_width, 3)).astype(np.uint8)
# Detection
with detection_graph.as_default():
with tf.Session(graph=detection_graph) as sess:
while True:
# Read frame from camera
ret, image_np = cap.read()
# Expand dimensions since the model expects images to have shape: [1, None, None, 3]
image_np_expanded = np.expand_dims(image_np, axis=0)
# Extract image tensor
image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
# Extract detection boxes
boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
# Extract detection scores
scores = detection_graph.get_tensor_by_name('detection_scores:0')
# Extract detection classes
classes = detection_graph.get_tensor_by_name('detection_classes:0')
# Extract number of detectionsd
num_detections = detection_graph.get_tensor_by_name(
'num_detections:0')
# Actual detection.
(boxes, scores, classes, num_detections) = sess.run(
[boxes, scores, classes, num_detections],
feed_dict={image_tensor: image_np_expanded})
# Visualization of the results of a detection.
vis_util.visualize_boxes_and_labels_on_image_array(
image_np,
np.squeeze(boxes),
np.squeeze(classes).astype(np.int32),
np.squeeze(scores),
category_index,
use_normalized_coordinates=True,
line_thickness=8)
# Display output
cv2.imshow('object detection', cv2.resize(image_np, (800, 600)))
if cv2.waitKey(25) & 0xFF == ord('q'):
cv2.destroyAllWindows()
break
Have you tried passing -1 or 1 as the device index of the VideoCapture? Just in case you haven't tried it yet.
But
First of all, you should know where it went wrong. We should verify if the system reads the frames properly.
You can try implementing this to test if your camera is running and being read properly:
cap = cv.VideoCapture(0)
if not cap.isOpened():
print("Cannot open camera")
exit()
while True:
# Capture frame-by-frame
ret, frame = cap.read()
# if frame is read correctly ret is True
if not ret:
print("Can't receive frame (stream end?). Exiting ...")
break
# Our operations on the frame come here
gray = cv.cvtColor(frame, cv.COLOR_BGR2GRAY)
# Display the resulting frame
cv.imshow('frame', gray)
if cv.waitKey(1) == ord('q'):
break
# When everything done, release the capture
cap.release()
cv.destroyAllWindows()
cap.read() returns a bool (True/False). If the frame is read correctly, it will be True. So you can check for the end of the video by checking this returned value.
Sometimes, cap may not have initialized the capture. In that case, this code shows an error. You can check whether it is initialized or not by the method cap.isOpened(). If it is True, OK. Otherwise open it using cap.open().
with this, it will help you and us to determine what part has gone wrong and can suggest furthermore solutions.
After this, if the test shows no error, this link will be a little bit related.
You can check it out.
Provide us the result from this so we can inspect furthermore.
I'm currently working with Detectron2 for people detectios in videos, I've been trying to run the following code to read a video file, make the prediction frame by frame and record a video with the processed frames, but I am getting an empty video file. The environment that I've created for this is located in Colaboratory and has the following versions (python 3.6 , opencv 4.2.30).
I'm new on this but I would truly appreciate if someone could give me an idea
This is the code
#!/usr/bin/env python3
# -- coding: utf-8 --
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()
# import some common libraries
import numpy as np
import cv2
# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog
import time
cap = cv2.VideoCapture('piloto legger 1.mp4')
hasFrame, frame = cap.read()
FPS = cap.get(cv2.CAP_PROP_FPS)
frame_width = frame.shape[1]
frame_height = frame.shape[0]
video_writer = cv2.VideoWriter('out.mp4', cv2.VideoWriter_fourcc('m', 'p', '4', 'v'), FPS, (frame_width, frame_height))
while cv2.waitKey(1) < 0:
hasFrame, frame = cap.read()
if not hasFrame:
cv2.waitKey()
break
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml"))
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7 # set threshold for this model
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml")
predictor = DefaultPredictor(cfg)
outputs = predictor(frame)
v = Visualizer(frame[:,:,::-1], MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=1.2)
v = v.draw_instance_predictions(outputs["instances"].to("cpu"))
imagen = (v.get_image()[:, :, ::-1])
cv2.imwrite('POSE detectron2.png', imagen)
video_writer.write(imagen)
cap.release()
video_writer.release()
cv2.destroyAllWindows()
I used your code as a starting point, and took some ideas from the Detectron2 examples in order to make it work.
The problem seems to have been something with the fourcc-argument of the VideoWriter, but may also have been related to your code using Visualizer instead of VideoVisualizer (and with a scale of 1.2, which made the image the wrong size for the VideoWriter).
The code below works for me (and is also a lot faster, as the predictor and visualizer are defined outside of the loop):
#!/usr/bin/env python3
# -- coding: utf-8 --
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()
# import some common libraries
import numpy as np
import tqdm
import cv2
# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.video_visualizer import VideoVisualizer
from detectron2.utils.visualizer import ColorMode, Visualizer
from detectron2.data import MetadataCatalog
import time
# Extract video properties
video = cv2.VideoCapture('video-input.mp4')
width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
frames_per_second = video.get(cv2.CAP_PROP_FPS)
num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
# Initialize video writer
video_writer = cv2.VideoWriter('out.mp4', fourcc=cv2.VideoWriter_fourcc(*"mp4v"), fps=float(frames_per_second), frameSize=(width, height), isColor=True)
# Initialize predictor
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7 # set threshold for this model
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
predictor = DefaultPredictor(cfg)
# Initialize visualizer
v = VideoVisualizer(MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), ColorMode.IMAGE)
def runOnVideo(video, maxFrames):
""" Runs the predictor on every frame in the video (unless maxFrames is given),
and returns the frame with the predictions drawn.
"""
readFrames = 0
while True:
hasFrame, frame = video.read()
if not hasFrame:
break
# Get prediction results for this frame
outputs = predictor(frame)
# Make sure the frame is colored
frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
# Draw a visualization of the predictions using the video visualizer
visualization = v.draw_instance_predictions(frame, outputs["instances"].to("cpu"))
# Convert Matplotlib RGB format to OpenCV BGR format
visualization = cv2.cvtColor(visualization.get_image(), cv2.COLOR_RGB2BGR)
yield visualization
readFrames += 1
if readFrames > maxFrames:
break
# Create a cut-off for debugging
num_frames = 120
# Enumerate the frames of the video
for visualization in tqdm.tqdm(runOnVideo(video, num_frames), total=num_frames):
# Write test image
cv2.imwrite('POSE detectron2.png', visualization)
# Write to video file
video_writer.write(visualization)
# Release resources
video.release()
video_writer.release()
cv2.destroyAllWindows()
Hello to all senior programmer! I have an error on eigenfaces image training part.
The error is : OpenCV Error: Unsupported format or combination of formats (In the Eigenfaces method all input samples (training images) must be of equal size! Expected 27889 pixels, but was 27556 pixels.) in cv::face::Eigenfaces::train, file C:\projects\opencv-python\opencv_contrib\modules\face\src\eigen_faces.cpp, line 68
Which mean my pictures don't be in equal size. I try cv2.rezise() when I capture picture from camera but it still doesn't work.
here is my capture code :
import cv2
cam = cv2.VideoCapture(0)
detector = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
Id = input('enter your id: ')
sampleNum = 0
while(True):
ret, img = cam.read()
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
faces = detector.detectMultiScale(gray, 1.3, 5)
for (x,y,w,h) in faces:
cv2.rectangle(img,(x,y),(x+w,y+h),(255,0,0),2)
sampleNum = sampleNum+1
cv2.imwrite("dataSet/user."+Id+'.'+str(sampleNum)+".jpg",cv2.resize
(gray[y:y+h,x:x+w],(70,70)))
cv2.imshow('frame',img)
if cv2.waitKey(100) & 0xFF == ord('q'):#waitKey is for delay in video capture
break
elif sampleNum >= 50:#how many picture capture?
break
cam.release()
cv2.destroyAllWindows()
and here is training part:
import cv2,os
import numpy as np
recognizer = cv2.face.EigenFaceRecognizer_create()
detector= cv2.CascadeClassifier("haarcascade_frontalface_default.xml")
def getImagesAndLabels(path):
imagePaths=[os.path.join(path,f) for f in os.listdir(path)]
faceSamples=[]
Ids=[]
for imagePath in imagePaths:
pilImage = Image.open(imagePath).convert('L')
imageNp = np.array(pilImage,'uint8')
Id = int(os.path.split(imagePath)[-1].split(".")[1])
faces = detector.detectMultiScale(imageNp)
for (x,y,w,h) in faces:
faceSamples.append(imageNp[y:y+h,x:x+w])
Ids.append(Id)
return faceSamples,Ids
faces,Ids = getImagesAndLabels('dataSet')
recognizer.train(faces, np.array(Ids))
recognizer.write('trainner/trainnerEi.yml')
PS. I adapt this code from LBPHFaceRecognizer
Thank you!*3
Okay, so EigenFaces only works if the dimension of all the images is same in pixel space
Which means if one image used in training is of size 28x28 then every other image in the training as well as in testing has to be of size 28x28
If the image size is not same then opencv will throw you that error
The error simply says that one of the image was of 27889 dimensions in pixel space and other was 27556 dimensions pixel space.
I would recommend you to use cv2.resize() function to make all images of the same size
Use the below code as a reference for you training part:
import cv2,os
import numpy as np
recognizer = cv2.face.EigenFaceRecognizer_create()
detector= cv2.CascadeClassifier("haarcascade_frontalface_default.xml")
def getImagesAndLabels(path):
width_d, height_d = 280, 280 # Declare your own width and height
imagePaths=[os.path.join(path,f) for f in os.listdir(path)]
faceSamples=[]
Ids=[]
for imagePath in imagePaths:
pilImage = Image.open(imagePath).convert('L')
imageNp = np.array(pilImage,'uint8')
Id = int(os.path.split(imagePath)[-1].split(".")[1])
faces = detector.detectMultiScale(imageNp)
for (x,y,w,h) in faces:
########################################
# The line to be changed by cv2.resize()
########################################
faceSamples.append(cv2.resize(imageNp[y:y+h,x:x+w], (width_d, height_d))
Ids.append(Id)
return faceSamples,Ids
faces,Ids = getImagesAndLabels('dataSet')
recognizer.train(faces, np.array(Ids))
recognizer.write('trainner/trainnerEi.yml')
Keep in mind even the test images has to be of same size