python loop not calling function second time - python

Below code is gives pose estimation for a 'face' in a video. I have modified the code to take a folder/directory as input and expect it to process all videos in the directory.
Using below code I am expecting all videos in a folder to be processed but 'for' loop will only process one video and not others, below is the loop and it will call parse_video only once.
if args.videoDirPath is not None:
for videoName in os.listdir(folderName):
print(videoName)
video = cv2.VideoCapture(videoName)
parse_video(video)
Folder(videoFolder) has following videos:
amir.mp4
arnab-srk.mp4
kanihya.mp4
simma.mp4
salman.mp4
output
opt/anaconda3/lib/python3.7/site-
packages/torchvision/transforms/transforms.py:207: UserWarning: The use of
the transforms.Scale transform is deprecated, please use transforms.Resize
instead.
warnings.warn("The use of the transforms.Scale transform is deprecated, " +
simma.mp4
frameNumber : 1
amir.mp4
creating...output/frame1.jpg
creating...output/frame2.jpg
creating...output/frame3.jpg
creating...output/frame4.jpg
creating...output/frame5.jpg
frameNumber : 6
arnab-srk.mp4
frameNumber : 6
kanihya.mp4
frameNumber : 6
salman.mp4
frameNumber : 6
Output folder: has following videos and text file:
output-out-1.avi
output-out-6.avi
output-out.txt # blank
I run the program using following parameters
!python code/test_on_video_dlib.py --snapshot hopenet_alpha1.pkl --face_model mmod_human_face_detector.dat --directoryPath videoFolder --output_string out --n_frames 20 --fps 200enter code here
Code for 'test_on_video_dlib.py'
import sys, os, argparse
import numpy as np
import cv2
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torchvision import transforms
import torch.backends.cudnn as cudnn
import torchvision
import torch.nn.functional as F
from PIL import Image
import datasets, hopenet, utils
from skimage import io
import dlib
import face_alignment
import numpy as np
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
from skimage import io
def parse_video(video,nr):
# New cv2
width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH)) # float
height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)) # float
# Define the codec and create VideoWriter object
fourcc = cv2.VideoWriter_fourcc(*'MJPG')
out = cv2.VideoWriter('output/video/output-{}-{}.avi'.format(args.output_string, nr), fourcc,
args.fps, (width, height))
#frame_num = 1
frame_num = nr # add nr here also
while frame_num <= args.n_frames:
#print frame_num
ret,frame = video.read()
if ret == False:
break
#writing frames
name = 'output/frame' + str(frame_num) + '.jpg'
print("creating..." +name)
cv2.imwrite(name,frame)
cv2_frame = cv2.cvtColor(frame,cv2.COLOR_BGR2RGB)
# Dlib detect
dets = cnn_face_detector(cv2_frame, 1)
for idx, det in enumerate(dets):
# Get x_min, y_min, x_max, y_max, conf
x_min = det.rect.left()
y_min = det.rect.top()
x_max = det.rect.right()
y_max = det.rect.bottom()
conf = det.confidence
if conf > 1.0:
bbox_width = abs(x_max - x_min)
bbox_height = abs(y_max - y_min)
x_min -= 2 * bbox_width / 4
x_max += 2 * bbox_width / 4
y_min -= 3 * bbox_height / 4
y_max += bbox_height / 4
x_min = max(x_min, 0); y_min = max(y_min, 0)
x_max = min(frame.shape[1], x_max); y_max = min(frame.shape[0], y_max)
# Crop image
img = cv2_frame[int(y_min):int(y_max),int(x_min):int(x_max)]
img = Image.fromarray(img)
# Transform
img = transformations(img)
img_shape = img.size()
img = img.view(1, img_shape[0], img_shape[1], img_shape[2])
img = Variable(img).cuda(gpu)
yaw, pitch, roll = model(img)
yaw_predicted = F.softmax(yaw,dim=1)
pitch_predicted = F.softmax(pitch,dim=1)
roll_predicted = F.softmax(roll,dim=1)
# Get continuous predictions in degrees.
yaw_predicted = torch.sum(yaw_predicted.data[0] * idx_tensor) * 3 - 99
pitch_predicted = torch.sum(pitch_predicted.data[0] * idx_tensor) * 3 - 99
roll_predicted = torch.sum(roll_predicted.data[0] * idx_tensor) * 3 - 99
txt_out.write(('output/frame' + str(frame_num) + '.jpg') + ' %f %f %f\n' % (yaw_predicted, pitch_predicted, roll_predicted))
# utils.plot_pose_cube(frame, yaw_predicted, pitch_predicted, roll_predicted, (x_min + x_max) / 2, (y_min + y_max) / 2, size = bbox_width)
utils.draw_axis(frame, yaw_predicted, pitch_predicted, roll_predicted, tdx = (x_min + x_max) / 2, tdy= (y_min + y_max) / 2, size = bbox_height/2)
# Plot expanded bounding box
# cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), (0,255,0), 1)
out.write(frame)
frame_num += 1
out.release()
video.release()
return frame_num
def parse_args():
"""Parse input arguments."""
parser = argparse.ArgumentParser(description='Head pose estimation using the Hopenet network.')
parser.add_argument('--gpu', dest='gpu_id', help='GPU device id to use [0]',
default=0, type=int)
parser.add_argument('--snapshot', dest='snapshot', help='Path of model snapshot.',
default='', type=str)
parser.add_argument('--face_model', dest='face_model', help='Path of DLIB face detection model.',
default='', type=str)
parser.add_argument('--video', dest='video_path', help='Path of video')
#code to pass video folder name
parser.add_argument('--directoryPath',dest='videoDirPath' ,help="directory path containing all videos")
parser.add_argument('--output_string', dest='output_string', help='String appended to output file')
parser.add_argument('--n_frames', dest='n_frames', help='Number of frames', type=int)
parser.add_argument('--fps', dest='fps', help='Frames per second of source video', type=float, default=30.)
args = parser.parse_args()
return args
if __name__ == '__main__':
args = parse_args()
cudnn.enabled = True
batch_size = 1
gpu = args.gpu_id
snapshot_path = args.snapshot
out_dir = 'output/video'
video_path = args.video_path
#folder path code
folderName = args.videoDirPath
if not os.path.exists(out_dir):
os.makedirs(out_dir)
# ResNet50 structure
model = hopenet.Hopenet(torchvision.models.resnet.Bottleneck, [3, 4, 6, 3], 66)
# Dlib face detection model
cnn_face_detector = dlib.cnn_face_detection_model_v1(args.face_model)
#print 'Loading snapshot.'
# Load snapshot
saved_state_dict = torch.load(snapshot_path)
model.load_state_dict(saved_state_dict)
#print 'Loading data.'
transformations = transforms.Compose([transforms.Scale(224),
transforms.CenterCrop(224), transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
model.cuda(gpu)
#print 'Ready to test network.'
# Test the Model
model.eval() # Change model to 'eval' mode (BN uses moving mean/var).
total = 0
idx_tensor = [idx for idx in range(66)]
idx_tensor = torch.FloatTensor(idx_tensor).cuda(gpu)
if args.video_path is not None:
video = cv2.VideoCapture(video_path)
parse_video(video)
# THIS IS THE LOOP I AM REFERRING IN QUESTION
nr=1
if args.videoDirPath is not None:
for videoName in os.listdir(folderName):
print(videoName)
video = cv2.VideoCapture(videoName)
nr = parse_video(video ,nr)
Expected output:
I want each video in videoFolder to be processed and its frame should be created in output folder.

As for me you have to use correct path to file - folderName/videoName
for videoName in os.listdir(folderName):
videoName = os.path.join(folderName, videoName)
print(videoName)
video = cv2.VideoCapture(videoName)

Related

how to maintain the surround view cameras (4 camera images) position in the point cloud

I am trying to generate 3D point cloud from 4 RGB-D images. I am able to do that with Open3D but I am unable to maintain the position of the images. You can find the camera_parameters.json here.
import open3d as o3d
import cv2
import os
import numpy as np
import matplotlib.pyplot as plt
import json
def load_json(path):
with open(path) as f:
return json.load(f)
def parse_camera_params(camera):
param = {}
param['camera'] = camera['camera']
param['depth'] = {}
param['depth']['fx'] = camera['K_depth'][0][0]
param['depth']['fy'] = camera['K_depth'][1][1]
param['depth']['cx'] = camera['K_depth'][0][2]
param['depth']['cy'] = camera['K_depth'][1][2]
param['depth']['K'] = camera['K_depth']
# ignore distCoeffs_depth's 5th (1000) and 6th (0) element
# since they are strange
param['depth']['distCoeffs'] = np.array(camera['distCoeffs_depth'][:5])
param['depth_width'] = camera['depth_width']
param['depth_height'] = camera['depth_height']
param['color'] = {}
param['color']['fx'] = camera['K_color'][0][0]
param['color']['fy'] = camera['K_color'][1][1]
param['color']['cx'] = camera['K_color'][0][2]
param['color']['cy'] = camera['K_color'][1][2]
param['color']['K'] = camera['K_color']
# ignore distCoeffs_color's 5th (1000) and 6th (0) element
# since they are strange
param['color']['distCoeffs'] = np.array(camera['distCoeffs_color'][:5])
param['color_width'] = camera['color_width']
param['color_height'] = camera['color_height']
# world to depth
w2d_T = np.array(camera['M_world2sensor'])
param['w2d_R'] = w2d_T[0:3, 0:3]
param['w2d_t'] = w2d_T[0:3, 3]
param['w2d_T'] = camera['M_world2sensor']
d2w_T = np.linalg.inv(w2d_T)
param['d2w_R'] = d2w_T[0:3, 0:3]
param['d2w_t'] = d2w_T[0:3, 3]
param['d2w_T'] = d2w_T
return param
if __name__ == '__main__':
data_dir = "data/"
camera_params = load_json(os.path.join(data_dir,
'camera_parameters.json'))
SVC_NUM = 4
pcd_combined = o3d.geometry.PointCloud()
for i in range(SVC_NUM):
param = parse_camera_params(camera_params['sensors'][i])
color = cv2.imread(os.path.join(data_dir, 'color_{:05d}.png'.format(i)))
color = cv2.cvtColor(color, cv2.COLOR_BGR2RGB)
depth = cv2.imread(os.path.join(data_dir, 'depth_{:05d}.png'.format(i)), -1)
# depth = depth * 0.001 # factor to scale the depth image from carla
o3d_color = o3d.geometry.Image(color)
o3d_depth = o3d.geometry.Image(depth)
rgbd_image = o3d.geometry.RGBDImage.create_from_tum_format(o3d_color, o3d_depth, False)
h, w = depth.shape
dfx, dfy, dcx, dcy = param['depth']['fx'], param['depth']['fy'], param['depth']['cx'], param['depth']['cy']
intrinsic = o3d.camera.PinholeCameraIntrinsic(w, h, dfx,dfy, dcx, dcy)
intrinsic.intrinsic_matrix = param['depth']['K']
cam = o3d.camera.PinholeCameraParameters()
cam.intrinsic = intrinsic
cam.extrinsic = np.array(param['w2d_T'])
pcd = o3d.geometry.PointCloud.create_from_rgbd_image(rgbd_image, cam.intrinsic, cam.extrinsic)
o3d.io.write_point_cloud("svc_{:05d}_v13.pcd".format(i), pcd)
pcd_combined += pcd
o3d.io.write_point_cloud("svc_global_v13.pcd", pcd_combined)
With the above code I am getting output of svc_global_v13.pcd like below
As you can see, all the images are projected into center. As indicated in the json file, I would like the images to be positioned as left, right, front and rear in the 3D point cloud.
May I know what is it I am missing here?

cosine similarity and euclidean distance between features of same sample and features of different sample are almost same in tf.keras

I have extracted features from face and left and right iris using MobileNetV2 in keras tensorflow transfer learning. The features are then concatenated. I have tried to find the cosine similarity, euclidean distance and squared euclidean distance between the concatenated features. The distance scores I get for all the metrics do not show much difference between features belonging to the same sample pairs and feature belonging to different samples pairs. I will like have distances between sample features to have smaller distance than distances of different sample pairs. I think I am doing something fundamentally wrong somewhere. I want to be able to plot probability distribution from the scores I get in that the distributions from the different scores should not overlap.
Attached is my code. Any help would be much appreciated. Thank you
import pandas as pd
import datetime
from scipy.spatial import distance
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras import backend as K
import h5py
import argparse
import random
import math
import tensorflow as tf
modelPath_face = 'HACK-FACE-SMALL-5-02.hdf5'
modelPath_liris = 'HACK-LIRIS-SMALL-5-02.hdf5'
modelPath_riris = 'HACK-RIRIS-SMALL-5-02.hdf5'
def cosine_distance(vests):
x, y = vests
x = K.l2_normalize(x, axis=-1)
y = K.l2_normalize(y, axis=-1)
return -K.mean(x * y, axis=-1, keepdims=True)
def compute_cosine_distances(a, b):
# x shape is n_a * dim
# y shape is n_b * dim
# results shape is n_a * n_b
normalize_a = tf.nn.l2_normalize(a, 1)
normalize_b = tf.nn.l2_normalize(b, 1)
distance = 1 - tf.matmul(normalize_a, normalize_b, transpose_b=True)
return distance
def cos_dist_output_shape(shapes):
shape1, shape2 = shapes
return (shape1[0], 1)
def eudis(v1, v2):
dist = [(a - b) ** 2 for a, b in zip(v1, v2)]
dist = math.sqrt(sum(dist))
return dist
def Extract(lst):
return [item[0] for item in lst]
if __name__ == '__main__':
import cv2
ap = argparse.ArgumentParser()
ap.add_argument("-df", "--db-face", default=modelPath_face, help="path to face HDF5 file")
ap.add_argument("-dli", "--db-liris", default=modelPath_liris, help="path to left iris HDF5 file")
ap.add_argument("-dri", "--db-riris", default=modelPath_riris, help="path to right iris HDF5 file")
ap.add_argument("-b", "--batch-size", type=int, default=8,
help="batch size of images to be passed through network")
ap.add_argument("-s", "--buffer-size", type=int, default=1000, help="size of feature extraction buffer")
ap.add_argument('--num-classes', default=53, type=int, metavar='NC',
help='number of classes (default: 10000)')
args = vars(ap.parse_args())
le = LabelEncoder()
db_face = h5py.File(args["db_face"], "r")
db_liris = h5py.File(args["db_liris"], "r")
db_riris = h5py.File(args["db_riris"], "r")
face_features = db_face["face_features"][:]
liris_features = db_liris["liris_features"][:]
riris_features = db_riris["riris_features"][:]
face_labels = db_face["labels"][:]
liris_labels = db_liris["labels"][:]
riris_labels = db_riris["labels"][:]
n_classes_to_find = le.fit_transform(face_labels)
n_classes = len(le.classes_)
args['num_classes'] = n_classes
face_labels = tf.keras.utils.to_categorical(face_labels, num_classes=args['num_classes'])
liris_labels = tf.keras.utils.to_categorical(liris_labels, num_classes=args['num_classes'])
riris_labels = tf.keras.utils.to_categorical(riris_labels, num_classes=args['num_classes'])
all_features = np.concatenate([face_features, liris_features, riris_features], axis=1)
all_labels = np.concatenate([face_labels, liris_labels, riris_labels], axis=1)
all_labels = np.argmax(all_labels, axis=-1)
stacked_all_ft = np.column_stack((all_features, all_labels))
columnIndex = stacked_all_ft.shape[1] - 1
sorted_stacked_all_ft = stacked_all_ft[stacked_all_ft[:, columnIndex].argsort()]
mylabels = sorted_stacked_all_ft[:, -1]
myfeatures = sorted_stacked_all_ft[:, :-1]
unique_labels = np.unique(mylabels)
myfeatures_shape = myfeatures.shape
# pca = PCA(0.9)
#
# pca.fit(myfeatures)
#
# PCA(copy=True, iterated_power='auto', n_components=0.9, random_state=None,
# svd_solver='auto', tol=0.0, whiten=False)
#
# print(pca.n_components_)
#
# myfeatures_pca = pca.transform(myfeatures)
df_pos_pair = pd.DataFrame()
df_neg_pair = pd.DataFrame()
positive_counter = 0
general_counter = 0
start_time = datetime.datetime.now().replace(microsecond=0)
print("COMPARISON OF ALL FILES: Started")
myfeatures_pca = myfeatures
for i in range(myfeatures_pca.shape[0]):
for j in range(myfeatures_pca.shape[0]):
if j > i:
general_counter += 1
pair_pos = mylabels[j] == mylabels[i]
if pair_pos:
positive_counter += 1
try:
f1 = myfeatures_pca[i]
f2 = myfeatures_pca[j]
# score = tf.keras.losses.cosine_similarity(f1, f2, axis=-1)
# score = score.numpy()
# aa = f1.reshape(1, -1)
# ba = f2.reshape(1, -1)
# score = cosine_similarity(aa, ba)
# score = score[0][0]
# score = eudis(f1, f2)
score = distance.sqeuclidean(f1, f2)
# if i in data1.index and j in data1.columns:
df_pos_pair = df_pos_pair.append({'score': score}, ignore_index=True)
general_counter += 1
if general_counter % 25 == 0:
end_time = datetime.datetime.now().replace(microsecond=0)
print("COMPLETED OF ALL FILES: {}, TOTAL TIME: {}".format(general_counter,
(end_time - start_time)))
except Exception as e:
print(e)
else:
for k2 in range(positive_counter):
r = random.randint(j + 1, myfeatures_pca.shape[0] - 2)
if mylabels[r] != mylabels[i]:
try:
f3 = myfeatures_pca[i]
f4 = myfeatures_pca[r]
# scoren = tf.keras.losses.cosine_similarity(f3, f4, axis=-1)
# scoren = score.numpy()
scoren = distance.sqeuclidean(f3, f4)
# aa2 = f3.reshape(1, -1)
# ba2 = f4.reshape(1, -1)
# scoren = cosine_similarity(aa2, ba2)
# scoren = scoren[0][0]
# scoren = eudis(f3, f4)
df_neg_pair = df_neg_pair.append({'score': scoren}, ignore_index=True)
general_counter += 1
end_time = datetime.datetime.now().replace(microsecond=0)
print("COMPLETED OF ALL FILES: {}, TOTAL TIME: {}".format(general_counter,
(end_time - start_time)))
except Exception as e:
print(e)
positive_counter = 0
break
end_time = datetime.datetime.now().replace(microsecond=0)
print("COMPARISON OF ALL FILES: Ended in {} ".format((end_time - start_time)))
df_neg_pair.to_csv("sqed_face_iris_neg_scores_pca_feb_01.csv", index=False)
df_pos_pair.to_csv("sqed_face_iris_pos_scores_pca_feb_01.csv", index=False)

Realtime yolov5 detection with Desktop screen as input

I have a script that grabs an application's screenshot and displays it. it works quite nicely on my machine like a video with around 60FPS.
import os
os.getcwd()
from PIL import ImageGrab
import numpy as np
import cv2
import pyautogui
import win32gui
import time
from mss import mss
from PIL import Image
import tempfile
os.system('calc')
sct = mss()
xx=1
tstart = time.time()
while xx<10000:
hwnd = win32gui.FindWindow(None, 'Calculator')
left_x, top_y, right_x, bottom_y = win32gui.GetWindowRect(hwnd)
#screen = np.array(ImageGrab.grab( bbox = (left_x, top_y, right_x, bottom_y ) ) )
bbox = {'top': top_y, 'left': left_x, 'width': right_x-left_x, 'height':bottom_y-top_y }
screen = sct.grab(bbox)
scr = np.array(screen)
cv2.imshow('window', scr)
if cv2.waitKey(25) & 0xFF == ord('q'):
cv2.destroyAllWindows()
break
xx+=1
cv2.destroyAllWindows()
tend = time.time()
print(xx/(tend-tstart))
print((tend-tstart))
os.system('taskkill /f /im calculator.exe')
I would like to run yolov5's detect.py on this scr image without having to save to disk all the time. I'd also like to show the images with bounding boxes and have their coordinates saved somewhere.
My python level is not good enough, I tried importing detect and adding arguments, but it doesn't seem like it accepts any function parameter, only command line arguments.
Perhaps I should adapt this line, or use opencv?
parser.add_argument('--source', type=str, default='data/images', help='source') # file/folder, 0 for webcam
Any idea? thanks (this is the detect.py file for yolov5)
import argparse
import time
from pathlib import Path
import cv2
import torch
import torch.backends.cudnn as cudnn
from numpy import random
from models.experimental import attempt_load
from utils.datasets import LoadStreams, LoadImages
from utils.general import check_img_size, non_max_suppression, apply_classifier, scale_coords, xyxy2xywh, \
strip_optimizer, set_logging, increment_path
from utils.plots import plot_one_box
from utils.torch_utils import select_device, load_classifier, time_synchronized
def detect(save_img=False):
source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
webcam = source.isnumeric() or source.endswith('.txt') or source.lower().startswith(
('rtsp://', 'rtmp://', 'http://'))
# Directories
save_dir = Path(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # increment run
(save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir
# Initialize
set_logging()
device = select_device(opt.device)
half = device.type != 'cpu' # half precision only supported on CUDA
# Load model
model = attempt_load(weights, map_location=device) # load FP32 model
imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size
if half:
model.half() # to FP16
# Second-stage classifier
classify = False
if classify:
modelc = load_classifier(name='resnet101', n=2) # initialize
modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval()
# Set Dataloader
vid_path, vid_writer = None, None
if webcam:
view_img = True
cudnn.benchmark = True # set True to speed up constant image size inference
dataset = LoadStreams(source, img_size=imgsz)
else:
save_img = True
dataset = LoadImages(source, img_size=imgsz)
# Get names and colors
names = model.module.names if hasattr(model, 'module') else model.names
colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]
# Run inference
t0 = time.time()
img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img
_ = model(img.half() if half else img) if device.type != 'cpu' else None # run once
for path, img, im0s, vid_cap in dataset:
img = torch.from_numpy(img).to(device)
img = img.half() if half else img.float() # uint8 to fp16/32
img /= 255.0 # 0 - 255 to 0.0 - 1.0
if img.ndimension() == 3:
img = img.unsqueeze(0)
# Inference
t1 = time_synchronized()
pred = model(img, augment=opt.augment)[0]
# Apply NMS
pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)
t2 = time_synchronized()
# Apply Classifier
if classify:
pred = apply_classifier(pred, modelc, img, im0s)
# Process detections
for i, det in enumerate(pred): # detections per image
if webcam: # batch_size >= 1
p, s, im0 = Path(path[i]), '%g: ' % i, im0s[i].copy()
else:
p, s, im0 = Path(path), '', im0s
save_path = str(save_dir / p.name)
txt_path = str(save_dir / 'labels' / p.stem) + ('_%g' % dataset.frame if dataset.mode == 'video' else '')
s += '%gx%g ' % img.shape[2:] # print string
gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh
if len(det):
# Rescale boxes from img_size to im0 size
det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
# Print results
for c in det[:, -1].unique():
n = (det[:, -1] == c).sum() # detections per class
s += '%g %ss, ' % (n, names[int(c)]) # add to string
# Write results
for *xyxy, conf, cls in reversed(det):
if save_txt: # Write to file
xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh
line = (cls, *xywh, conf) if opt.save_conf else (cls, *xywh) # label format
with open(txt_path + '.txt', 'a') as f:
f.write(('%g ' * len(line)).rstrip() % line + '\n')
if save_img or view_img: # Add bbox to image
label = '%s %.2f' % (names[int(cls)], conf)
plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3)
# Print time (inference + NMS)
print('%sDone. (%.3fs)' % (s, t2 - t1))
# Stream results
if view_img:
cv2.imshow(str(p), im0)
if cv2.waitKey(1) == ord('q'): # q to quit
raise StopIteration
# Save results (image with detections)
if save_img:
if dataset.mode == 'images':
cv2.imwrite(save_path, im0)
else:
if vid_path != save_path: # new video
vid_path = save_path
if isinstance(vid_writer, cv2.VideoWriter):
vid_writer.release() # release previous video writer
fourcc = 'mp4v' # output video codec
fps = vid_cap.get(cv2.CAP_PROP_FPS)
w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h))
vid_writer.write(im0)
if save_txt or save_img:
s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
print(f"Results saved to {save_dir}{s}")
print('Done. (%.3fs)' % (time.time() - t0))
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--weights', nargs='+', type=str, default='yolov5s.pt', help='model.pt path(s)')
parser.add_argument('--source', type=str, default='data/images', help='source') # file/folder, 0 for webcam
parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)')
parser.add_argument('--conf-thres', type=float, default=0.25, help='object confidence threshold')
parser.add_argument('--iou-thres', type=float, default=0.45, help='IOU threshold for NMS')
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
parser.add_argument('--view-img', action='store_true', help='display results')
parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --class 0, or --class 0 2 3')
parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
parser.add_argument('--augment', action='store_true', help='augmented inference')
parser.add_argument('--update', action='store_true', help='update all models')
parser.add_argument('--project', default='runs/detect', help='save results to project/name')
parser.add_argument('--name', default='exp', help='save results to project/name')
parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
opt = parser.parse_args()
print(opt)
with torch.no_grad():
if opt.update: # update all models (to fix SourceChangeWarning)
for opt.weights in ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt']:
detect()
strip_optimizer(opt.weights)
else:
detect()
EDIT I already have weights saved somewhere and am able to run detect on images that are saved on disc, just would like to skip this step to keep those FPS.
The Yolov5 repo is here
For standalone inference in 3rd party projects or repos importing your model into the python workspace with PyTorch Hub is the recommended method. See YOLOv5 PyTorch Hub tutorial here, specifically the section on loading custom models.
https://github.com/ultralytics/yolov5#tutorials
Custom Models
This example loads a custom 20-class VOC-trained YOLOv5s model 'yolov5s_voc_best.pt' with PyTorch Hub.
import torch
model = torch.hub.load('ultralytics/yolov5', 'custom', path_or_model='yolov5s_voc_best.pt')
model = model.autoshape() # for PIL/cv2/np inputs and NMS
Then once the model is loaded:
from PIL import Image
# Images
img1 = Image.open('zidane.jpg')
img2 = Image.open('bus.jpg')
imgs = [img1, img2] # batched list of images
# Inference
result = model(imgs, size=640) # includes NMS
result.print()
import cv2
import torch
from mss import mss
import numpy as np
model = torch.hub.load("/yolov5", 'custom', path="yolov5/best.pt", source='local')
sct = mss()
while 1:
w, h = 1920, 1080
monitor = {'top': 0, 'left': 0, 'width': w, 'height': h}
img = Image.frombytes('RGB', (w, h), sct.grab(monitor).rgb)
screen = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
# set the model use the screen
result = model(screen, size=640)
cv2.imshow('Screen', result.render()[0])
if cv2.waitKey(25) & 0xFF == ord('q'):
cv2.destroyAllWindows()
break
im a noob in programming
and using desktop screen to run inference can be found in yolov5's github page
https://github.com/ultralytics/yolov5/issues/36
import cv2
import numpy
import torch
from mss import mss
from PIL import ImageGrab
im = numpy.array(ImageGrab.grab(bbox=(0,0,1920,1080)))
model = torch.hub.load('ultralytics/yolov5', 'yolov5s')
model.conf = 0.6
image = r'D:\i\test\yolov5-master(original)\yolov5-master\data\images\zidane.jpg'
results = model(im)
results.print()
results.show()
print(results.pandas().xyxy[0])
i have found mss().grab() have an rgb order issue, so use PIL instead

Array creation too slow

I am trying to create an image array from scratch.
I got the code running but it takes arrounds 30 secs to run it.
I feel it could be faster by using numpy native functions.
How can I do this?
import cv2
import numpy as np
import time
volumes = np.random.randint(low=0, high=200, size=10000)
print(volumes)
image_heigh = 128
image_width = 256
image_channel = 3
show_img = False
def nomralized(data, data_min, data_max, maximum_value):
nomamized_data = maximum_value * ((data - data_min) / (data_max - data_min))
return nomamized_data
start_time = time.time()
for ii in range(len(volumes)-image_width):
# ===================== part to optimize start
final_image = np.zeros((image_heigh, image_width, image_channel))
start = ii
end = ii + image_width
current_vols = volumes[start:end]
# nomalize data
vol_min = 0
vol_max = np.max(current_vols)
vol_norm = nomralized(data=current_vols,
data_min=vol_min,
data_max=vol_max,
maximum_value=image_heigh)
for xxx in range(image_width):
final_image[:int(vol_norm[xxx]), xxx, :] = 1
# ===================== part to optimize end
if show_img:
image = np.float32(final_image)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
cv2.imshow("ok", image)
cv2.waitKey(27)
print("total running time: ", (time.time() - start_time))
How can I do to make this image array creation faster?
I need to create the image every timesteps because I want to simulate real live data stream that come every new timesteps.
This is why I would like to optimize only this part of the code :
for xxx in range(image_width):
final_image[:int(vol_norm[xxx]), xxx, :] = 1
How can I do this?
First simplest optimizations are next:
Use comparing values to np.arange(...) instead of inner loop.
Use gray image instead of 3-channels RGB. 3 times less data to process.
Use np.uint8 type instead of np.float32, which is faster to process and doesn't need conversion to float32 for CV2 visualizing.
All these above optimizations give huge speedup (10x times), and my running time is 2.6 sec instead of 27 sec before.
Also another very useful optimization that I didn't do is that you don't need to recompute previous image pixels in a case when max/min of whole data within current window didn't change. You need to recompute previous image data only in the case if max/min changed. And I expect that your real-life data is gradually changing like Forex or Bitcoin prices, hence max/min change within a window is very non-often.
Optimizations 1)-3) mentioned above are implemented in the next code:
import cv2
import numpy as np
import time
volumes = np.random.randint(low=0, high=200, size=10000)
print(volumes)
image_heigh = 128
image_width = 256
image_channel = 3
show_img = False
def nomralized(data, data_min, data_max, maximum_value):
nomamized_data = maximum_value * ((data - data_min) / (data_max - data_min))
return nomamized_data
start_time = time.time()
aranges = np.arange(image_heigh, dtype = np.int32)[:, None]
for ii in range(len(volumes)-image_width):
# ===================== part to optimize start
#final_image = np.zeros((image_heigh, image_width, image_channel), dtype = np.float32)
start = ii
end = ii + image_width
current_vols = volumes[start:end]
# nomalize data
vol_min = 0
vol_max = np.max(current_vols)
vol_norm = nomralized(data=current_vols,
data_min=vol_min,
data_max=vol_max,
maximum_value=image_heigh)
final_image = (aranges < vol_norm[None, :].astype(np.int32)).astype(np.uint8) * 255
# ===================== part to optimize end
if show_img:
cv2.imshow('ok', final_image)
cv2.waitKey(27)
print("total running time: ", (time.time() - start_time))
For above code I just did one more optimization of inner loop which speed-up code above even 2x times more to have timings of 1.3 sec. But also I put back 3 channels plus float32, this reduced speed resulting in final 2.8 sec, here is the code
Another next optimization is possible if re-computing old images data is not needed.
Main thing to be optimized was that you were re-computing almost same whole image on each step with 1 pixel shift-step along width. Instead of this you need to compute whole image once, then shift right not 1 pixel but whole image width.
Then after this optimization running time is 0.08 sec.
And do 1 pixel stepping only for showing animation, not for computing image data, image data should be computed just once if you need speed.
import cv2
import numpy as np
import time
volumes = np.random.randint(low=0, high=200, size=10000)
print(volumes)
image_heigh = 128
image_width = volumes.size #256
image_channel = 3
screen_width = 256
show_img = False
def nomralized(data, data_min, data_max, maximum_value):
nomamized_data = maximum_value * ((data - data_min) / (data_max - data_min))
return nomamized_data
start_time = time.time()
for ii in range(0, len(volumes), image_width):
# ===================== part to optimize start
final_image = np.zeros((image_heigh, image_width, image_channel))
start = ii
end = ii + image_width
current_vols = volumes[start:end]
# nomalize data
vol_min = 0
vol_max = np.max(current_vols)
vol_norm = nomralized(data=current_vols,
data_min=vol_min,
data_max=vol_max,
maximum_value=image_heigh)
for xxx in range(image_width):
final_image[:int(vol_norm[xxx]), xxx, :] = 1
# ===================== part to optimize end
if show_img:
for start in range(0, final_image.shape[1] - screen_width):
image = np.float32(final_image[:, start : start + screen_width])
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
cv2.imshow("ok", image)
cv2.waitKey(27)
print("total running time: ", (time.time() - start_time))
I also created animation image out of your data:
If you want to create same animation just append next piece of code to the end of script above:
# Needs: python -m pip install pillow
import PIL.Image
imgs = [PIL.Image.fromarray(final_image[:, start : start + screen_width].astype(np.uint8) * 255) for start in range(0, final_image.shape[1] - screen_width, 6)]
imgs[0].save('result.png', append_images = imgs[1:], save_all = True, lossless = True, duration = 100)
I've implemented also simulation of real-time live stream data rendering/visualizing.
live_stream() generator spits out random amount of data at random points of time, this is to simulate data generation process.
stream_fetcher() listens to live stream and records all data received to python queue q0, this fetcher is run in one thread.
renderer() gets data recorded by fetcher and renders it into image through your mathematical formulas and normalization process, it renders as much data as available, resulting in images with varying widths, rendered images are saved to another queue q1.
visualizer() visualizes rendered data by fetching as much rendered images as available.
All functions run in separate threads not to block whole process. Also if any of threads works to slow then it skips some of data to catch-up with current real-time data, thus every queue doesn't overflow.
Also you may see that visualized process is jumpy, it is not because functions are somewhat slow, but because live stream spits out different amount of data in each time step, this is how usually real-time data may behave.
In the next code I did also extra optimization mentioned before, that is not-recomputing image if min/max didn't change.
import cv2, numpy as np
import time, random, threading, queue
image_height = 256
image_width = 512
# Make results reproducible and deterministic
np.random.seed(0)
random.seed(0)
def live_stream():
last = 0.
while True:
a = np.random.uniform(low = -1., high = 1., size = random.randint(1, 20)).astype(np.float64).cumsum() + last
yield a
last = a[-1]
time.sleep(random.random() * 0.1)
q0 = queue.Queue()
def stream_fetcher():
for e in live_stream():
q0.put(e)
threading.Thread(target = stream_fetcher, daemon = True).start()
aranges = np.arange(image_height, dtype = np.int32)[:, None]
q1 = queue.Queue()
def renderer():
def normalized(data, data_min, data_max, maximum_value):
nomamized_data = maximum_value * ((data - data_min) / (data_max - data_min))
return nomamized_data
prev_image = np.zeros((image_height, 0), dtype = np.uint8)
prev_vols = np.zeros((0,), dtype = np.float64)
while True:
data = []
data.append(q0.get())
try:
while True:
data.append(q0.get(block = False))
except queue.Empty:
pass
vols = np.concatenate(data)[-image_width:]
prev_vols = prev_vols[-(image_width - vols.size) or prev_vols.size:]
concat_vols = np.concatenate((prev_vols, vols))[-image_width:]
vols_min, vols_max = np.amin(concat_vols), np.amax(concat_vols)
if prev_vols.size > 0 and (vols_min < np.amin(prev_vols) - 10 ** -8 or vols_max > np.amax(prev_vols) + 10 ** -8):
vols = concat_vols
prev_image = prev_image[:, :-prev_vols.size]
prev_vols = prev_vols[:0]
vols_norm = normalized(
data = vols, data_min = vols_min,
data_max = vols_max, maximum_value = image_height,
)
image = (aranges < vols_norm.astype(np.int32)[None, :]).astype(np.uint8) * 255
whole_image = np.concatenate((prev_image, image), axis = 1)[:, -image_width:]
q1.put(whole_image)
prev_image = whole_image
prev_vols = concat_vols
threading.Thread(target = renderer, daemon = True).start()
def visualizer():
imgs = []
while True:
data = []
data.append(q1.get())
try:
while True:
data.append(q1.get(block = False))
except queue.Empty:
pass
image = np.concatenate(data, axis = 1)[:, -image_width:]
cv2.imshow('ok', image)
cv2.waitKey(1)
if imgs is not None:
try:
# Needs: python -m pip install pillow
import PIL.Image
has_pil = True
except:
has_pil = False
imgs = None
if has_pil:
imgs.append(PIL.Image.fromarray(np.pad(image, ((0, 0), (image_width - image.shape[1], 0)), constant_values = 0)))
if len(imgs) >= 1000:
print('saving...', flush = True)
imgs[0].save('result.png', append_images = imgs[1:], save_all = True, lossless = True, duration = 100)
imgs = None
print('saved!', flush = True)
threading.Thread(target = visualizer, daemon = True).start()
while True:
time.sleep(0.1)
Above live process simulation is rendered into result.png which I show down below:
I've also decided to improve visualization, by using more advanced matplotlib instead of cv2 to be able to show axes and doing real-time plot drawing. Visualization image is down below:
Next is a matplotlib-based code corresponding to last image above:
import cv2, numpy as np
import time, random, threading, queue
image_height = 256
image_width = 512
save_nsec = 20
dpi, fps = 100, 15
# Make results reproducible and deterministic
np.random.seed(0)
random.seed(0)
def live_stream():
last = 0.
pos = 0
while True:
a = np.random.uniform(low = -1., high = 1., size = random.randint(1, 30)).astype(np.float64).cumsum() + last
yield a, pos, pos + a.size - 1
pos += a.size
last = a[-1]
time.sleep(random.random() * 2.2 / fps)
q0 = queue.Queue()
def stream_fetcher():
for e in live_stream():
q0.put(e)
threading.Thread(target = stream_fetcher, daemon = True).start()
aranges = np.arange(image_height, dtype = np.int32)[:, None]
q1 = queue.Queue()
def renderer():
def normalized(data, data_min, data_max, maximum_value):
nomamized_data = maximum_value * ((data - data_min) / (data_max - data_min))
return nomamized_data
prev_image = np.zeros((image_height, 0), dtype = np.uint8)
prev_vols = np.zeros((0,), dtype = np.float64)
while True:
data = []
data.append(q0.get())
try:
while True:
data.append(q0.get(block = False))
except queue.Empty:
pass
data_vols = [e[0] for e in data]
data_minx, data_maxx = data[0][1], data[-1][2]
vols = np.concatenate(data_vols)[-image_width:]
prev_vols = prev_vols[-(image_width - vols.size) or prev_vols.size:]
concat_vols = np.concatenate((prev_vols, vols))[-image_width:]
vols_min, vols_max = np.amin(concat_vols), np.amax(concat_vols)
if prev_vols.size > 0 and (vols_min < np.amin(prev_vols) - 10 ** -8 or vols_max > np.amax(prev_vols) + 10 ** -8):
vols = concat_vols
prev_image = prev_image[:, :-prev_vols.size]
prev_vols = prev_vols[:0]
vols_norm = normalized(
data = vols, data_min = vols_min,
data_max = vols_max, maximum_value = image_height,
)
image = (aranges < vols_norm.astype(np.int32)[None, :]).astype(np.uint8) * 255
whole_image = np.concatenate((prev_image, image), axis = 1)[:, -image_width:]
q1.put((whole_image, data_maxx - whole_image.shape[1] + 1, data_maxx, vols_min, vols_max))
prev_image = whole_image
prev_vols = concat_vols
threading.Thread(target = renderer, daemon = True).start()
def visualizer():
import matplotlib.pyplot as plt, matplotlib.animation
def images():
while True:
data = []
data.append(q1.get())
try:
while True:
data.append(q1.get(block = False))
except queue.Empty:
pass
minx = min([e[1] for e in data])
maxx = min([e[2] for e in data])
miny = min([e[3] for e in data])
maxy = min([e[4] for e in data])
image = np.concatenate([e[0] for e in data], axis = 1)[:, -image_width:]
image = np.pad(image, ((0, 0), (image_width - image.shape[1], 0)), constant_values = 0)
image = np.repeat(image[:, :, None], 3, axis = -1)
yield image, minx, maxx, miny, maxy
it = images()
im = None
fig = plt.figure(figsize = (image_width / dpi, image_height / dpi), dpi = dpi)
def animate_func(i):
nonlocal it, im, fig
image, minx, maxx, miny, maxy = next(it)
print(f'.', end = '', flush = True)
if im is None:
im = plt.imshow(image, interpolation = 'none', aspect = 'auto')
else:
im.set_array(image)
im.set_extent((minx, maxx, miny, maxy))
return [im]
anim = matplotlib.animation.FuncAnimation(fig, animate_func, frames = round(save_nsec * fps), interval = 1000 / fps)
print('saving...', end = '', flush = True)
#anim.save('result.mp4', fps = fps, dpi = dpi, extra_args = ['-vcodec', 'libx264'])
anim.save('result.gif', fps = fps, dpi = dpi, writer = 'imagemagick')
print('saved!', end = '', flush = True)
plt.show()
threading.Thread(target = visualizer, daemon = True).start()
while True:
time.sleep(0.1)
Then I've decided to play a bit and colored last image with RGB palette, the higher the peak is more red-ish it is, if it is more in the middle then it is more green-ish, if it is low enough then it is more blue-ish. Resulting image below was achieved by this coloring code:
And another one colored animation below, line-style instead of bar-style, with the help of this code:

Why am I getting this error in facenet?

I am trying to run facematch(facenet) on my Virtual Machine (Google Cloud Platform). At first, things were running smoothly and it was embedding the points of the faces, but then out of the blue, my code stopped working.
The first code, you can see the imports are there
For the second code, you can see the imports are there.
This is the ls commands, so you can see that all the directories/modules are there and see the errors I'm getting
Anyone can share some insight on what I'm doing wrong?
Face_match_demo code:
import tensorflow as tf
import numpy as np
import facenet
from align import detect_face
import cv2
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--img1", type = str, required=True)
parser.add_argument("--img2", type = str, required=True)
args = parser.parse_args()
# some constants kept as default from facenet
minsize = 20
threshold = [0.6, 0.7, 0.7]
factor = 0.709
margin = 44
input_image_size = 160
sess = tf.Session()
# read pnet, rnet, onet models from align directory and files are det1.npy, det2.npy, det3.npy
pnet, rnet, onet = detect_face.create_mtcnn(sess, 'align')
# read 20170512-110547 model file downloaded from https://drive.google.com/file/d/0B5MzpY9kBtDVZ2RpVDYwWmxoSUk
facenet.load_model("20170512-110547/20170512-110547.pb")
# Get input and output tensors
images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0")
embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0")
phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0")
embedding_size = embeddings.get_shape()[1]
def getFace(img):
faces = []
img_size = np.asarray(img.shape)[0:2]
bounding_boxes, _ = detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor)
if not len(bounding_boxes) == 0:
for face in bounding_boxes:
if face[4] > 0.50:
det = np.squeeze(face[0:4])
bb = np.zeros(4, dtype=np.int32)
bb[0] = np.maximum(det[0] - margin / 2, 0)
bb[1] = np.maximum(det[1] - margin / 2, 0)
bb[2] = np.minimum(det[2] + margin / 2, img_size[1])
bb[3] = np.minimum(det[3] + margin / 2, img_size[0])
cropped = img[bb[1]:bb[3], bb[0]:bb[2], :]
resized = cv2.resize(cropped, (input_image_size,input_image_size),interpolation=cv2.INTER_CUBIC)
prewhitened = facenet.prewhiten(resized)
faces.append({'face':resized,'rect':[bb[0],bb[1],bb[2],bb[3]],'embedding':getEmbedding(prewhitened)})
return faces
def getEmbedding(resized):
reshaped = resized.reshape(-1,input_image_size,input_image_size,3)
feed_dict = {images_placeholder: reshaped, phase_train_placeholder: False}
embedding = sess.run(embeddings, feed_dict=feed_dict)
return embedding
def compare2face(img1,img2):
face1 = getFace(img1)
face2 = getFace(img2)
if face1 and face2:
# calculate Euclidean distance
dist = np.sqrt(np.sum(np.square(np.subtract(face1[0]['embedding'], face2[0]['embedding']))))
return dist
return -1
img1 = cv2.imread(args.img1)
img2 = cv2.imread(args.img2)
distance = compare2face(img1, img2)
threshold = 1.10 # set yourself to meet your requirement
print("distance = "+str(distance))
face_embeddings_demo code:
import tensorflow as tf
from align import detect_face
import facenet
import cv2
import imutils
import numpy as np
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--img", type = str, required=True)
args = parser.parse_args()
# some constants kept as default from facenet
minsize = 20
threshold = [0.6, 0.7, 0.7]
factor = 0.709
margin = 44
input_image_size = 160
sess = tf.Session()
# read pnet, rnet, onet models from align directory and files are det1.npy, det2.npy, det3.npy
pnet, rnet, onet = detect_face.create_mtcnn(sess, 'align')
# read 20170512-110547 model file downloaded from https://drive.google.com/file/d/0B5MzpY9kBtDVZ2RpVDYwWmxoSUk
facenet.load_model("20170512-110547/20170512-110547.pb")
# Get input and output tensors
images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0")
embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0")
phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0")
embedding_size = embeddings.get_shape()[1]
def getFace(img):
faces = []
img_size = np.asarray(img.shape)[0:2]
bounding_boxes, points = detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor)
if not len(bounding_boxes) == 0:
for face in bounding_boxes:
if face[4] > 0.50:
det = np.squeeze(face[0:4])
bb = np.zeros(4, dtype=np.int32)
bb[0] = np.maximum(det[0] - margin / 2, 0)
bb[1] = np.maximum(det[1] - margin / 2, 0)
bb[2] = np.minimum(det[2] + margin / 2, img_size[1])
bb[3] = np.minimum(det[3] + margin / 2, img_size[0])
cropped = img[bb[1]:bb[3], bb[0]:bb[2], :]
resized = cv2.resize(cropped, (input_image_size,input_image_size),interpolation=cv2.INTER_CUBIC)
prewhitened = facenet.prewhiten(resized)
faces.append({'face':resized,'rect':[bb[0],bb[1],bb[2],bb[3]],'embedding':getEmbedding(prewhitened)})
return faces
def getEmbedding(resized):
reshaped = resized.reshape(-1,input_image_size,input_image_size,3)
feed_dict = {images_placeholder: reshaped, phase_train_placeholder: False}
# print(feed_dict)
embedding = sess.run(embeddings, feed_dict=feed_dict)
return embedding
img = cv2.imread(args.img)
img = imutils.resize(img,width=1000)
faces = getFace(img)
for face in faces:
print("Embeddings = "+str(face['embedding']))
cv2.waitKey(0)
cv2.destroyAllWindows()
You have to have the __init__.py in the package directory to be recognized as a package. It can be an empty file, but it has to be present. You don't have this in the align directory.
From the documentation:
The __init__.py files are required to make Python treat the directories as containing packages
From your comment, the error
usage: face_match_demo.py [-h] --img1 IMG1 --img2 IMG2 face_match_demo.py: error: ambiguous option: --img=images/faces.jpg could match --img2, --img1
means that face_match_demo.py is actually a utility to match two images, to say whether they contain the same face or not. So you have to provide two images to it, and it will tell if the face is the same. And you need to use the --img1 and --img2 options to do that like this:
python face_match_demo.py --img1 images/faces.jpg --img2 [[another face image]]

Categories