Combine TensorFlow Object Detection API with Keras Model - python

TensorFlow version: 1.14
Python version: 3.6.9
My purpose is to build an object detection system with classification. I used Object Detection API and I want to feed its output bounding boxes to another neural networks (there are 6 different objects to detect and then I want to classify these object with Keras neural networks by object's features).
When I use Object Detection API only its OK, but if I want to use model.predict() script crashes. As I've read there's a problem with graph and sessions.
I'm pretty fresh to all these stuff, so I want to ask: is this possible to use multiple models simultaneously?
I've read about creating two sessions and graphs but the input of Object Detection model is a live video from the webcam and I don't want to lose performance of a script. I tried to start session with each frame, but it's very slow.
Also maybe upgrading script to Tensorflow 2.0 will be helpful?
EDIT:
I want to detect fruits and pass them to another Keras models which will predict their state. Detecting fruits works good, but I cannot use additional Keras model, because of the following error:
Tensor Tensor("dense_3/Sigmoid:0", shape=(?, 1), dtype=float32) is not an element of this graph.
Code provided:
import numpy as np
import os
import six.moves.urllib as urllib
import sys
import tarfile
import tensorflow as tf
import zipfile
from collections import defaultdict
from io import StringIO
from matplotlib import pyplot as plt
from PIL import Image
from keras import models
from keras.preprocessing import image
import cv2
if 'cap' in globals():
cap.release()
cap = cv2.VideoCapture(0)
sys.path.append("..")
graph = tf.get_default_graph()
from utils import label_map_util
from utils import visualization_utils as vis_util
def limit(value, max_val, min_val):
if(value > max_val):
value = max_val
elif(value < min_val):
value = min_val
return value
# What model to download.
MODEL_NAME = 'inference_graph'
MODEL_FILE = MODEL_NAME + '.tar.gz'
DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/'
# Path to frozen detection graph. This is the actual model that is used for the object detection.
PATH_TO_CKPT = MODEL_NAME + '/frozen_inference_graph.pb'
# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = 'training/labelmap.pbtxt'
NUM_CLASSES = 6
detection_graph = tf.Graph()
with detection_graph.as_default():
od_graph_def = tf.GraphDef()
with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.import_graph_def(od_graph_def, name='')
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)
def load_image_into_numpy_array_updated(image):
return np.array(image).astype(np.uint8)
# PATH_TO_TEST_IMAGES_DIR = 'test_images'
# TEST_IMAGE_PATHS = [ os.path.join(PATH_TO_TEST_IMAGES_DIR, 'image{}.jpg'.format(i)) for i in range(1, 3) ]
# Size, in inches, of the output images.
IMAGE_SIZE = (12, 8)
# Loading a keras model
model = models.load_model('new_banana.h5')
with detection_graph.as_default():
with tf.Session(graph=detection_graph) as sess:
while True:
ret, image_np = cap.read()
# Expand dimensions since the model expects images to have shape: [1, None, None, 3]
image_np_expanded = np.expand_dims(image_np, axis=0)
image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
# Each box represents a part of the image where a particular object was detected.
boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
# Each score represent how level of confidence for each of the objects.
# Score is shown on the result image, together with the class label.
scores = detection_graph.get_tensor_by_name('detection_scores:0')
classes = detection_graph.get_tensor_by_name('detection_classes:0')
num_detections = detection_graph.get_tensor_by_name('num_detections:0')
# Actual detection.
(boxes, scores, classes, num_detections) = sess.run(
[boxes, scores, classes, num_detections],
feed_dict={image_tensor: image_np_expanded})
image_np_copy = image_np.copy()
# Visualization of the results of a detection.
vis_util.visualize_boxes_and_labels_on_image_array(
image_np,
np.squeeze(boxes),
np.squeeze(classes).astype(np.int32),
np.squeeze(scores),
category_index,
use_normalized_coordinates=True,
line_thickness=8,
min_score_thresh=0.7)
# Code what are used to get thresholded bounding boxes from image
# enlarge them about compenser value, limitates them
# print them and send them to another script
# 0 - apple, 2 - banana, 3 - orange, 4 - pear, 5 - pepper, 6 - tomato
min_score_thresh = 0.7
bboxes = boxes[scores > min_score_thresh]
bclasses = classes[scores > min_score_thresh]
image_np_new = cv2.resize(image_np_copy, (800,600))
im_width, im_height = (800, 600)
if bclasses.size > 0:
final_box = []
cropped_images = []
compenser = 30
if(bclasses[0] == 2): #if any of detected classes stands for 'banana'
for box in bboxes:
ymin, xmin, ymax, xmax = box
ymin0 = int(im_height * ymin) - compenser
ymax0 = int(im_height * ymax) + compenser
xmin0 = int(im_width * xmin) - compenser
xmax0 = int(im_width * xmax) + compenser
ymin1 = limit(ymin0, im_height, 0)
ymax1 = limit(ymax0, im_height, 0)
xmax1 = limit(xmax0, im_width, 0)
xmin1 = limit(xmin0, im_width, 0)
image_cropped = image_np_new[ymin1:ymax1, xmin1:xmax1]
height, width, _ = image_cropped.shape
if width > height:
image_cropped = cv2.resize(image_cropped, (200, 150))
image_cropped = cv2.rotate(image_cropped, cv2.ROTATE_90_CLOCKWISE)
else:
image_cropped = cv2.resize(image_cropped, (150, 200))
image_cropped = load_image_into_numpy_array_updated(image_cropped)
image_cropped = image_cropped.reshape((1,) + image_cropped.shape)
image_cropped = image_cropped/255
cropped_images.append(image_cropped)
if (len(cropped_images) > 0):
for image in cropped_images:
print(image.shape)
# input tensor 200, 150, 3
classes = model.predict_classes(image, batch_size=10)
print(classes)
cv2.imshow('object detection', image_np)
if cv2.waitKey(25) & 0xFF == ord('q'):
cv2.destroyAllWindows()
cap.release()
break

Related

How to get the classes name on object detection tensorflow

How to get the classes name of prediction?
the code (from https://tensorflow-object-detection-api-tutorial.readthedocs.io/en/latest/auto_examples/object_detection_camera.html#sphx-glr-auto-examples-object-detection-camera-py)
import os
DATA_DIR = os.path.join(os.getcwd(), 'data')
MODELS_DIR = os.path.join(DATA_DIR, 'models')
for dir in [DATA_DIR, MODELS_DIR]:
if not os.path.exists(dir):
os.mkdir(dir)
import tarfile
import urllib.request
# Download and extract model
MODEL_DATE = '20200711'
MODEL_NAME = 'ssd_resnet101_v1_fpn_640x640_coco17_tpu-8'
MODEL_TAR_FILENAME = MODEL_NAME + '.tar.gz'
MODELS_DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/tf2/'
MODEL_DOWNLOAD_LINK = MODELS_DOWNLOAD_BASE + MODEL_DATE + '/' + MODEL_TAR_FILENAME
PATH_TO_MODEL_TAR = os.path.join(MODELS_DIR, MODEL_TAR_FILENAME)
PATH_TO_CKPT = os.path.join(MODELS_DIR, os.path.join(MODEL_NAME, 'checkpoint/'))
PATH_TO_CFG = os.path.join(MODELS_DIR, os.path.join(MODEL_NAME, 'pipeline.config'))
if not os.path.exists(PATH_TO_CKPT):
print('Downloading model. This may take a while... ', end='')
urllib.request.urlretrieve(MODEL_DOWNLOAD_LINK, PATH_TO_MODEL_TAR)
tar_file = tarfile.open(PATH_TO_MODEL_TAR)
tar_file.extractall(MODELS_DIR)
tar_file.close()
os.remove(PATH_TO_MODEL_TAR)
print('Done')
# Download labels file
LABEL_FILENAME = 'mscoco_label_map.pbtxt'
LABELS_DOWNLOAD_BASE = \
'https://raw.githubusercontent.com/tensorflow/models/master/research/object_detection/data/'
PATH_TO_LABELS = os.path.join(MODELS_DIR, os.path.join(MODEL_NAME, LABEL_FILENAME))
if not os.path.exists(PATH_TO_LABELS):
print('Downloading label file... ', end='')
urllib.request.urlretrieve(LABELS_DOWNLOAD_BASE + LABEL_FILENAME, PATH_TO_LABELS)
print('Done')
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # Suppress TensorFlow logging
import tensorflow as tf
from object_detection.utils import label_map_util
from object_detection.utils import config_util
from object_detection.utils import visualization_utils as viz_utils
from object_detection.builders import model_builder
tf.get_logger().setLevel('ERROR') # Suppress TensorFlow logging (2)
# Enable GPU dynamic memory allocation
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
tf.config.experimental.set_memory_growth(gpu, True)
# Load pipeline config and build a detection model
configs = config_util.get_configs_from_pipeline_file(PATH_TO_CFG)
model_config = configs['model']
detection_model = model_builder.build(model_config=model_config, is_training=False)
# Restore checkpoint
ckpt = tf.compat.v2.train.Checkpoint(model=detection_model)
ckpt.restore(os.path.join(PATH_TO_CKPT, 'ckpt-0')).expect_partial()
# Load pipeline config and build a detection model
configs = config_util.get_configs_from_pipeline_file(PATH_TO_CFG)
model_config = configs['model']
detection_model = model_builder.build(model_config=model_config, is_training=False)
# Restore checkpoint
ckpt = tf.compat.v2.train.Checkpoint(model=detection_model)
ckpt.restore(os.path.join(PATH_TO_CKPT, 'ckpt-0')).expect_partial()
def get_model_detection_function(model):
#tf.function
def detect_fn(image):
"""Detect objects in image."""
image, shapes = model.preprocess(image)
prediction_dict = model.predict(image, shapes)
detections = model.postprocess(prediction_dict, shapes)
return detections, prediction_dict, tf.reshape(shapes, [-1])
return detect_fn
detect_fn = get_model_detection_function(detection_model)
category_index = label_map_util.create_category_index_from_labelmap(PATH_TO_LABELS,
use_display_name=True)
import cv2
cap = cv2.VideoCapture(0)
import numpy as np
while True:
# Read frame from camera
ret, image_np = cap.read()
# Expand dimensions since the model expects images to have shape: [1, None, None, 3]
image_np_expanded = np.expand_dims(image_np, axis=0)
# Things to try:
# Flip horizontally
# image_np = np.fliplr(image_np).copy()
# Convert image to grayscale
# image_np = np.tile(
# np.mean(image_np, 2, keepdims=True), (1, 1, 3)).astype(np.uint8)
input_tensor = tf.convert_to_tensor(np.expand_dims(image_np, 0), dtype=tf.float32)
detections, predictions_dict, shapes = detect_fn(input_tensor)
label_id_offset = 1
image_np_with_detections = image_np.copy()
viz_utils.visualize_boxes_and_labels_on_image_array(
image_np_with_detections,
detections['detection_boxes'][0].numpy(),
(detections['detection_classes'][0].numpy() + label_id_offset).astype(int),
detections['detection_scores'][0].numpy(),
category_index,
use_normalized_coordinates=True,
max_boxes_to_draw=200,
min_score_thresh=.30,
agnostic_mode=False)
# Display output
cv2.imshow('object detection', cv2.resize(image_np_with_detections, (800, 600)))
if cv2.waitKey(25) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
actually the code is working fine but i want to get the class name to make some action..
for example :
if variable_name_class == 'cat':
{action 1}
elif variable_name_class == 'dog':
{action 2}
maybe to be like this
while True:
# Read frame from camera
ret, image_np = cap.read()
# Expand dimensions since the model expects images to have shape: [1, None, None, 3]
image_np_expanded = np.expand_dims(image_np, axis=0)
# Things to try:
# Flip horizontally
# image_np = np.fliplr(image_np).copy()
# Convert image to grayscale
# image_np = np.tile(
# np.mean(image_np, 2, keepdims=True), (1, 1, 3)).astype(np.uint8)
input_tensor = tf.convert_to_tensor(np.expand_dims(image_np, 0), dtype=tf.float32)
detections, predictions_dict, shapes = detect_fn(input_tensor)
label_id_offset = 1
image_np_with_detections = image_np.copy()
viz_utils.visualize_boxes_and_labels_on_image_array(
image_np_with_detections,
detections['detection_boxes'][0].numpy(),
(detections['detection_classes'][0].numpy() + label_id_offset).astype(int),
detections['detection_scores'][0].numpy(),
category_index,
use_normalized_coordinates=True,
max_boxes_to_draw=200,
min_score_thresh=.30,
agnostic_mode=False)
# Display output
cv2.imshow('object detection', cv2.resize(image_np_with_detections, (800, 600)))
if variable_name_class == 'cat':
{action 1}
elif variable_name_class == 'dog':
{action 2}
if cv2.waitKey(25) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
I don't know the 'variable_name_class' ..
anyone know how to get the classes name? or the 'variable_name_class' please help me..
Copy and paste the whole code from this link to your jupyter notebook cell:
https://github.com/tensorflow/models/blob/master/research/object_detection/utils/visualization_utils.py
You should copy all those 1500 lines of code.
After copying all the code to jupyter notebook cell, you need to change some part of the code. Our aim is to return the detected label along with image array inside the visualize_boxes_and_labels_on_image_array() function
So, part of the code for visualize_boxes_and_labels_on_image_array() function should look like below code:
Update the code for visualize_boxes_and_labels_on_image_array() function which should look like below.
def visualize_boxes_and_labels_on_image_array(
image,
boxes,
classes,
scores,
category_index,
instance_masks=None,
instance_boundaries=None,
keypoints=None,
keypoint_scores=None,
keypoint_edges=None,
track_ids=None,
use_normalized_coordinates=False,
max_boxes_to_draw=20,
min_score_thresh=.5,
agnostic_mode=False,
line_thickness=4,
mask_alpha=.4,
groundtruth_box_visualization_color='black',
skip_boxes=False,
skip_scores=False,
skip_labels=False,
skip_track_ids=False):
"""Overlay labeled boxes on an image with formatted scores and label names.
This function groups boxes that correspond to the same location
and creates a display string for each detection and overlays these
on the image. Note that this function modifies the image in place, and returns
that same image.
Args:
image: uint8 numpy array with shape (img_height, img_width, 3)
boxes: a numpy array of shape [N, 4]
classes: a numpy array of shape [N]. Note that class indices are 1-based,
and match the keys in the label map.
scores: a numpy array of shape [N] or None. If scores=None, then
this function assumes that the boxes to be plotted are groundtruth
boxes and plot all boxes as black with no classes or scores.
category_index: a dict containing category dictionaries (each holding
category index `id` and category name `name`) keyed by category indices.
instance_masks: a uint8 numpy array of shape [N, image_height, image_width],
can be None.
instance_boundaries: a numpy array of shape [N, image_height, image_width]
with values ranging between 0 and 1, can be None.
keypoints: a numpy array of shape [N, num_keypoints, 2], can
be None.
keypoint_scores: a numpy array of shape [N, num_keypoints], can be None.
keypoint_edges: A list of tuples with keypoint indices that specify which
keypoints should be connected by an edge, e.g. [(0, 1), (2, 4)] draws
edges from keypoint 0 to 1 and from keypoint 2 to 4.
track_ids: a numpy array of shape [N] with unique track ids. If provided,
color-coding of boxes will be determined by these ids, and not the class
indices.
use_normalized_coordinates: whether boxes is to be interpreted as
normalized coordinates or not.
max_boxes_to_draw: maximum number of boxes to visualize. If None, draw
all boxes.
min_score_thresh: minimum score threshold for a box or keypoint to be
visualized.
agnostic_mode: boolean (default: False) controlling whether to evaluate in
class-agnostic mode or not. This mode will display scores but ignore
classes.
line_thickness: integer (default: 4) controlling line width of the boxes.
mask_alpha: transparency value between 0 and 1 (default: 0.4).
groundtruth_box_visualization_color: box color for visualizing groundtruth
boxes
skip_boxes: whether to skip the drawing of bounding boxes.
skip_scores: whether to skip score when drawing a single detection
skip_labels: whether to skip label when drawing a single detection
skip_track_ids: whether to skip track id when drawing a single detection
Returns:
uint8 numpy array with shape (img_height, img_width, 3) with overlaid boxes.
"""
# Create a display string (and color) for every box location, group any boxes
# that correspond to the same location.
box_to_display_str_map = collections.defaultdict(list)
box_to_color_map = collections.defaultdict(str)
box_to_instance_masks_map = {}
box_to_instance_boundaries_map = {}
box_to_keypoints_map = collections.defaultdict(list)
box_to_keypoint_scores_map = collections.defaultdict(list)
box_to_track_ids_map = {}
if not max_boxes_to_draw:
max_boxes_to_draw = boxes.shape[0]
for i in range(boxes.shape[0]):
if max_boxes_to_draw == len(box_to_color_map):
break
if scores is None or scores[i] > min_score_thresh:
box = tuple(boxes[i].tolist())
if instance_masks is not None:
box_to_instance_masks_map[box] = instance_masks[i]
if instance_boundaries is not None:
box_to_instance_boundaries_map[box] = instance_boundaries[i]
if keypoints is not None:
box_to_keypoints_map[box].extend(keypoints[i])
if keypoint_scores is not None:
box_to_keypoint_scores_map[box].extend(keypoint_scores[i])
if track_ids is not None:
box_to_track_ids_map[box] = track_ids[i]
if scores is None:
box_to_color_map[box] = groundtruth_box_visualization_color
else:
display_str = ''
if not skip_labels:
if not agnostic_mode:
if classes[i] in six.viewkeys(category_index):
class_name = category_index[classes[i]]['name']
else:
class_name = 'N/A'
display_str = str(class_name)
final_label = display_str
if not skip_scores:
if not display_str:
display_str = '{}%'.format(round(100*scores[i]))
final_label = display_str
else:
display_str = '{}: {}%'.format(display_str, round(100*scores[i]))
if not skip_track_ids and track_ids is not None:
if not display_str:
display_str = 'ID {}'.format(track_ids[i])
final_label = track_ids[i]
else:
display_str = '{}: ID {}'.format(display_str, track_ids[i])
box_to_display_str_map[box].append(display_str)
if agnostic_mode:
box_to_color_map[box] = 'DarkOrange'
elif track_ids is not None:
prime_multipler = _get_multiplier_for_color_randomness()
box_to_color_map[box] = STANDARD_COLORS[
(prime_multipler * track_ids[i]) % len(STANDARD_COLORS)]
else:
box_to_color_map[box] = STANDARD_COLORS[
classes[i] % len(STANDARD_COLORS)]
# Draw all boxes onto image.
for box, color in box_to_color_map.items():
ymin, xmin, ymax, xmax = box
if instance_masks is not None:
draw_mask_on_image_array(
image,
box_to_instance_masks_map[box],
color=color,
alpha=mask_alpha
)
if instance_boundaries is not None:
draw_mask_on_image_array(
image,
box_to_instance_boundaries_map[box],
color='red',
alpha=1.0
)
draw_bounding_box_on_image_array(
image,
ymin,
xmin,
ymax,
xmax,
color=color,
thickness=0 if skip_boxes else line_thickness,
display_str_list=box_to_display_str_map[box],
use_normalized_coordinates=use_normalized_coordinates)
if keypoints is not None:
keypoint_scores_for_box = None
if box_to_keypoint_scores_map:
keypoint_scores_for_box = box_to_keypoint_scores_map[box]
draw_keypoints_on_image_array(
image,
box_to_keypoints_map[box],
keypoint_scores_for_box,
min_score_thresh=min_score_thresh,
color=color,
radius=line_thickness / 2,
use_normalized_coordinates=use_normalized_coordinates,
keypoint_edges=keypoint_edges,
keypoint_edge_color=color,
keypoint_edge_width=line_thickness // 2)
return final_label, image
Here, I have just created a variable called final_label which is the class name of the detected bounding box.
While calling this function, you have to use this code:
label, _ = visualize_boxes_and_labels_on_image_array(
image_np_with_detections, #change this according to your image array name
detections['detection_boxes'],
detections['detection_classes']+label_id_offset,
detections['detection_scores'],
category_index,
use_normalized_coordinates=True,
max_boxes_to_draw=8,
min_score_thresh=.50,
agnostic_mode=False)
print(label)
It will give the exact label detected.
Though, it's been some time, since this question was posted, if someone still want's to get the class-names without changing the code (as suggested above), one can post these two lines after getting visualization on the image.
classes = [cls for cls in detections['detection_classes'][detections['detection_scores'] > threshold]]
classes = [category_index.get(cls)['name'] for cls in classes]

Realtime yolov5 detection with Desktop screen as input

I have a script that grabs an application's screenshot and displays it. it works quite nicely on my machine like a video with around 60FPS.
import os
os.getcwd()
from PIL import ImageGrab
import numpy as np
import cv2
import pyautogui
import win32gui
import time
from mss import mss
from PIL import Image
import tempfile
os.system('calc')
sct = mss()
xx=1
tstart = time.time()
while xx<10000:
hwnd = win32gui.FindWindow(None, 'Calculator')
left_x, top_y, right_x, bottom_y = win32gui.GetWindowRect(hwnd)
#screen = np.array(ImageGrab.grab( bbox = (left_x, top_y, right_x, bottom_y ) ) )
bbox = {'top': top_y, 'left': left_x, 'width': right_x-left_x, 'height':bottom_y-top_y }
screen = sct.grab(bbox)
scr = np.array(screen)
cv2.imshow('window', scr)
if cv2.waitKey(25) & 0xFF == ord('q'):
cv2.destroyAllWindows()
break
xx+=1
cv2.destroyAllWindows()
tend = time.time()
print(xx/(tend-tstart))
print((tend-tstart))
os.system('taskkill /f /im calculator.exe')
I would like to run yolov5's detect.py on this scr image without having to save to disk all the time. I'd also like to show the images with bounding boxes and have their coordinates saved somewhere.
My python level is not good enough, I tried importing detect and adding arguments, but it doesn't seem like it accepts any function parameter, only command line arguments.
Perhaps I should adapt this line, or use opencv?
parser.add_argument('--source', type=str, default='data/images', help='source') # file/folder, 0 for webcam
Any idea? thanks (this is the detect.py file for yolov5)
import argparse
import time
from pathlib import Path
import cv2
import torch
import torch.backends.cudnn as cudnn
from numpy import random
from models.experimental import attempt_load
from utils.datasets import LoadStreams, LoadImages
from utils.general import check_img_size, non_max_suppression, apply_classifier, scale_coords, xyxy2xywh, \
strip_optimizer, set_logging, increment_path
from utils.plots import plot_one_box
from utils.torch_utils import select_device, load_classifier, time_synchronized
def detect(save_img=False):
source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
webcam = source.isnumeric() or source.endswith('.txt') or source.lower().startswith(
('rtsp://', 'rtmp://', 'http://'))
# Directories
save_dir = Path(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # increment run
(save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir
# Initialize
set_logging()
device = select_device(opt.device)
half = device.type != 'cpu' # half precision only supported on CUDA
# Load model
model = attempt_load(weights, map_location=device) # load FP32 model
imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size
if half:
model.half() # to FP16
# Second-stage classifier
classify = False
if classify:
modelc = load_classifier(name='resnet101', n=2) # initialize
modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval()
# Set Dataloader
vid_path, vid_writer = None, None
if webcam:
view_img = True
cudnn.benchmark = True # set True to speed up constant image size inference
dataset = LoadStreams(source, img_size=imgsz)
else:
save_img = True
dataset = LoadImages(source, img_size=imgsz)
# Get names and colors
names = model.module.names if hasattr(model, 'module') else model.names
colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]
# Run inference
t0 = time.time()
img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img
_ = model(img.half() if half else img) if device.type != 'cpu' else None # run once
for path, img, im0s, vid_cap in dataset:
img = torch.from_numpy(img).to(device)
img = img.half() if half else img.float() # uint8 to fp16/32
img /= 255.0 # 0 - 255 to 0.0 - 1.0
if img.ndimension() == 3:
img = img.unsqueeze(0)
# Inference
t1 = time_synchronized()
pred = model(img, augment=opt.augment)[0]
# Apply NMS
pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)
t2 = time_synchronized()
# Apply Classifier
if classify:
pred = apply_classifier(pred, modelc, img, im0s)
# Process detections
for i, det in enumerate(pred): # detections per image
if webcam: # batch_size >= 1
p, s, im0 = Path(path[i]), '%g: ' % i, im0s[i].copy()
else:
p, s, im0 = Path(path), '', im0s
save_path = str(save_dir / p.name)
txt_path = str(save_dir / 'labels' / p.stem) + ('_%g' % dataset.frame if dataset.mode == 'video' else '')
s += '%gx%g ' % img.shape[2:] # print string
gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh
if len(det):
# Rescale boxes from img_size to im0 size
det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
# Print results
for c in det[:, -1].unique():
n = (det[:, -1] == c).sum() # detections per class
s += '%g %ss, ' % (n, names[int(c)]) # add to string
# Write results
for *xyxy, conf, cls in reversed(det):
if save_txt: # Write to file
xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh
line = (cls, *xywh, conf) if opt.save_conf else (cls, *xywh) # label format
with open(txt_path + '.txt', 'a') as f:
f.write(('%g ' * len(line)).rstrip() % line + '\n')
if save_img or view_img: # Add bbox to image
label = '%s %.2f' % (names[int(cls)], conf)
plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3)
# Print time (inference + NMS)
print('%sDone. (%.3fs)' % (s, t2 - t1))
# Stream results
if view_img:
cv2.imshow(str(p), im0)
if cv2.waitKey(1) == ord('q'): # q to quit
raise StopIteration
# Save results (image with detections)
if save_img:
if dataset.mode == 'images':
cv2.imwrite(save_path, im0)
else:
if vid_path != save_path: # new video
vid_path = save_path
if isinstance(vid_writer, cv2.VideoWriter):
vid_writer.release() # release previous video writer
fourcc = 'mp4v' # output video codec
fps = vid_cap.get(cv2.CAP_PROP_FPS)
w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h))
vid_writer.write(im0)
if save_txt or save_img:
s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
print(f"Results saved to {save_dir}{s}")
print('Done. (%.3fs)' % (time.time() - t0))
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--weights', nargs='+', type=str, default='yolov5s.pt', help='model.pt path(s)')
parser.add_argument('--source', type=str, default='data/images', help='source') # file/folder, 0 for webcam
parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)')
parser.add_argument('--conf-thres', type=float, default=0.25, help='object confidence threshold')
parser.add_argument('--iou-thres', type=float, default=0.45, help='IOU threshold for NMS')
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
parser.add_argument('--view-img', action='store_true', help='display results')
parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --class 0, or --class 0 2 3')
parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
parser.add_argument('--augment', action='store_true', help='augmented inference')
parser.add_argument('--update', action='store_true', help='update all models')
parser.add_argument('--project', default='runs/detect', help='save results to project/name')
parser.add_argument('--name', default='exp', help='save results to project/name')
parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
opt = parser.parse_args()
print(opt)
with torch.no_grad():
if opt.update: # update all models (to fix SourceChangeWarning)
for opt.weights in ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt']:
detect()
strip_optimizer(opt.weights)
else:
detect()
EDIT I already have weights saved somewhere and am able to run detect on images that are saved on disc, just would like to skip this step to keep those FPS.
The Yolov5 repo is here
For standalone inference in 3rd party projects or repos importing your model into the python workspace with PyTorch Hub is the recommended method. See YOLOv5 PyTorch Hub tutorial here, specifically the section on loading custom models.
https://github.com/ultralytics/yolov5#tutorials
Custom Models
This example loads a custom 20-class VOC-trained YOLOv5s model 'yolov5s_voc_best.pt' with PyTorch Hub.
import torch
model = torch.hub.load('ultralytics/yolov5', 'custom', path_or_model='yolov5s_voc_best.pt')
model = model.autoshape() # for PIL/cv2/np inputs and NMS
Then once the model is loaded:
from PIL import Image
# Images
img1 = Image.open('zidane.jpg')
img2 = Image.open('bus.jpg')
imgs = [img1, img2] # batched list of images
# Inference
result = model(imgs, size=640) # includes NMS
result.print()
import cv2
import torch
from mss import mss
import numpy as np
model = torch.hub.load("/yolov5", 'custom', path="yolov5/best.pt", source='local')
sct = mss()
while 1:
w, h = 1920, 1080
monitor = {'top': 0, 'left': 0, 'width': w, 'height': h}
img = Image.frombytes('RGB', (w, h), sct.grab(monitor).rgb)
screen = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
# set the model use the screen
result = model(screen, size=640)
cv2.imshow('Screen', result.render()[0])
if cv2.waitKey(25) & 0xFF == ord('q'):
cv2.destroyAllWindows()
break
im a noob in programming
and using desktop screen to run inference can be found in yolov5's github page
https://github.com/ultralytics/yolov5/issues/36
import cv2
import numpy
import torch
from mss import mss
from PIL import ImageGrab
im = numpy.array(ImageGrab.grab(bbox=(0,0,1920,1080)))
model = torch.hub.load('ultralytics/yolov5', 'yolov5s')
model.conf = 0.6
image = r'D:\i\test\yolov5-master(original)\yolov5-master\data\images\zidane.jpg'
results = model(im)
results.print()
results.show()
print(results.pandas().xyxy[0])
i have found mss().grab() have an rgb order issue, so use PIL instead

Extract Image Segmentation Map from Tensorflow DeepLab v3 Demo

I have set up the Google's DeepLab V3 Demo on my local system and it runs successfully after making some minor changes. It's as:
# -*- coding: utf-8 -*-
# DeepLab Demo
# This demo will demostrate the steps to run deeplab semantic segmentation model on sample input images.
import os
from io import BytesIO
import tarfile
import tempfile
from six.moves import urllib
from matplotlib import gridspec
from matplotlib import pyplot as plt
import numpy as np
from PIL import Image
import tensorflow as tf
class DeepLabModel(object):
"""Class to load deeplab model and run inference."""
INPUT_TENSOR_NAME = 'ImageTensor:0'
OUTPUT_TENSOR_NAME = 'SemanticPredictions:0'
INPUT_SIZE = 513
FROZEN_GRAPH_NAME = 'frozen_inference_graph'
def __init__(self, tarball_path):
"""Creates and loads pretrained deeplab model."""
self.graph = tf.Graph()
graph_def = None
# Extract frozen graph from tar archive.
tar_file = tarfile.open(tarball_path)
for tar_info in tar_file.getmembers():
if self.FROZEN_GRAPH_NAME in os.path.basename(tar_info.name):
file_handle = tar_file.extractfile(tar_info)
graph_def = tf.GraphDef.FromString(file_handle.read())
break
tar_file.close()
if graph_def is None:
raise RuntimeError('Cannot find inference graph in tar archive.')
with self.graph.as_default():
tf.import_graph_def(graph_def, name='')
self.sess = tf.Session(graph=self.graph)
def run(self, image):
"""Runs inference on a single image.
Args:
image: A PIL.Image object, raw input image.
Returns:
resized_image: RGB image resized from original input image.
seg_map: Segmentation map of `resized_image`.
"""
width, height = image.size
resize_ratio = 1.0 * self.INPUT_SIZE / max(width, height)
target_size = (int(resize_ratio * width), int(resize_ratio * height))
resized_image = image.convert('RGB').resize(target_size, Image.ANTIALIAS)
batch_seg_map = self.sess.run(
self.OUTPUT_TENSOR_NAME,
feed_dict={self.INPUT_TENSOR_NAME: [np.asarray(resized_image)]})
seg_map = batch_seg_map[0]
return resized_image, seg_map
def create_pascal_label_colormap():
"""Creates a label colormap used in PASCAL VOC segmentation benchmark.
Returns:
A Colormap for visualizing segmentation results.
"""
colormap = np.zeros((256, 3), dtype=int)
ind = np.arange(256, dtype=int)
for shift in reversed(range(8)):
for channel in range(3):
colormap[:, channel] |= ((ind >> channel) & 1) << shift
ind >>= 3
return colormap
def label_to_color_image(label):
"""Adds color defined by the dataset colormap to the label.
Args:
label: A 2D array with integer type, storing the segmentation label.
Returns:
result: A 2D array with floating type. The element of the array
is the color indexed by the corresponding element in the input label
to the PASCAL color map.
Raises:
ValueError: If label is not of rank 2 or its value is larger than color
map maximum entry.
"""
if label.ndim != 2:
raise ValueError('Expect 2-D input label')
colormap = create_pascal_label_colormap()
if np.max(label) >= len(colormap):
raise ValueError('label value too large.')
return colormap[label]
def vis_segmentation(image, seg_map):
"""Visualizes input image, segmentation map and overlay view."""
plt.figure(figsize=(15, 5))
grid_spec = gridspec.GridSpec(1, 4, width_ratios=[6, 6, 6, 1])
plt.subplot(grid_spec[0])
plt.imshow(image)
plt.axis('off')
plt.title('input image')
plt.subplot(grid_spec[1])
seg_image = label_to_color_image(seg_map).astype(np.uint8)
plt.imshow(seg_image)
plt.axis('off')
plt.title('segmentation map')
plt.subplot(grid_spec[2])
plt.imshow(image)
plt.imshow(seg_image, alpha=0.7)
plt.axis('off')
plt.title('segmentation overlay')
unique_labels = np.unique(seg_map)
ax = plt.subplot(grid_spec[3])
plt.imshow(
FULL_COLOR_MAP[unique_labels].astype(np.uint8), interpolation='nearest')
ax.yaxis.tick_right()
plt.yticks(range(len(unique_labels)), LABEL_NAMES[unique_labels])
plt.xticks([], [])
ax.tick_params(width=0.0)
plt.grid('off')
plt.show()
LABEL_NAMES = np.asarray([
'background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus',
'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike',
'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tv'
])
FULL_LABEL_MAP = np.arange(len(LABEL_NAMES)).reshape(len(LABEL_NAMES), 1)
FULL_COLOR_MAP = label_to_color_image(FULL_LABEL_MAP)
# #title Select and download models {display-mode: "form"}
MODEL_NAME = 'mobilenetv2_coco_voctrainaug' # #param ['mobilenetv2_coco_voctrainaug', 'mobilenetv2_coco_voctrainval', 'xception_coco_voctrainaug', 'xception_coco_voctrainval']
_DOWNLOAD_URL_PREFIX = 'http://download.tensorflow.org/models/'
_MODEL_URLS = {
'mobilenetv2_coco_voctrainaug':
'deeplabv3_mnv2_pascal_train_aug_2018_01_29.tar.gz',
'mobilenetv2_coco_voctrainval':
'deeplabv3_mnv2_pascal_trainval_2018_01_29.tar.gz',
'xception_coco_voctrainaug':
'deeplabv3_pascal_train_aug_2018_01_04.tar.gz',
'xception_coco_voctrainval':
'deeplabv3_pascal_trainval_2018_01_04.tar.gz',
}
_TARBALL_NAME = 'deeplab_model.tar.gz'
model_dir = tempfile.mkdtemp()
tf.gfile.MakeDirs(model_dir)
download_path = os.path.join(model_dir, _TARBALL_NAME)
print('downloading model, this might take a while...')
urllib.request.urlretrieve(_DOWNLOAD_URL_PREFIX + _MODEL_URLS[MODEL_NAME],
download_path)
print('download completed! loading DeepLab model...')
MODEL = DeepLabModel(download_path)
print('model loaded successfully!')
# """## Run on sample images
#
# Select one of sample images (leave `IMAGE_URL` empty) or feed any internet image
# url for inference.
#
# Note that we are using single scale inference in the demo for fast computation,
# so the results may slightly differ from the visualizations in
# [README](https://github.com/tensorflow/models/blob/master/research/deeplab/README.md),
# which uses multi-scale and left-right flipped inputs.
# """
# #title Run on sample images {display-mode: "form"}
SAMPLE_IMAGE = 'image1.jpg' # #param ['image1', 'image2', 'image3']
IMAGE_URL = 'https://raw.githubusercontent.com/tensorflow/models/master/research/deeplab/g3doc/img/image1.jpg' ##param {type:"string"}
_SAMPLE_URL = ('https://github.com/tensorflow/models/blob/master/research/'
'deeplab/g3doc/img/%s.jpg?raw=true')
def run_visualization(url):
"""Inferences DeepLab model and visualizes result."""
try:
# f = urllib.request.urlopen(url)
# jpeg_str = f.read()
# original_im = Image.open(BytesIO(jpeg_str))
original_im = Image.open("human.jpg")
except IOError:
print('Cannot retrieve image. Please check url: ' + url)
return
print('running deeplab on image %s...' % url)
resized_im, seg_map = MODEL.run(original_im)
vis_segmentation(resized_im, seg_map)
image_url = SAMPLE_IMAGE
run_visualization(SAMPLE_IMAGE)
I have used various images with this model and it's working. Here's an example output:
Now I need to extract the mask as a separate image, how can I achieve that?
Thanks in advance!
The seg_map hold the segmented image.
resized_im, seg_map = MODEL.run(original_im)
Its a matplot Image array. You can convert it into numpy array using
np.array(seg_map) or use it whatever way you like.

Why am I getting this error in facenet?

I am trying to run facematch(facenet) on my Virtual Machine (Google Cloud Platform). At first, things were running smoothly and it was embedding the points of the faces, but then out of the blue, my code stopped working.
The first code, you can see the imports are there
For the second code, you can see the imports are there.
This is the ls commands, so you can see that all the directories/modules are there and see the errors I'm getting
Anyone can share some insight on what I'm doing wrong?
Face_match_demo code:
import tensorflow as tf
import numpy as np
import facenet
from align import detect_face
import cv2
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--img1", type = str, required=True)
parser.add_argument("--img2", type = str, required=True)
args = parser.parse_args()
# some constants kept as default from facenet
minsize = 20
threshold = [0.6, 0.7, 0.7]
factor = 0.709
margin = 44
input_image_size = 160
sess = tf.Session()
# read pnet, rnet, onet models from align directory and files are det1.npy, det2.npy, det3.npy
pnet, rnet, onet = detect_face.create_mtcnn(sess, 'align')
# read 20170512-110547 model file downloaded from https://drive.google.com/file/d/0B5MzpY9kBtDVZ2RpVDYwWmxoSUk
facenet.load_model("20170512-110547/20170512-110547.pb")
# Get input and output tensors
images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0")
embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0")
phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0")
embedding_size = embeddings.get_shape()[1]
def getFace(img):
faces = []
img_size = np.asarray(img.shape)[0:2]
bounding_boxes, _ = detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor)
if not len(bounding_boxes) == 0:
for face in bounding_boxes:
if face[4] > 0.50:
det = np.squeeze(face[0:4])
bb = np.zeros(4, dtype=np.int32)
bb[0] = np.maximum(det[0] - margin / 2, 0)
bb[1] = np.maximum(det[1] - margin / 2, 0)
bb[2] = np.minimum(det[2] + margin / 2, img_size[1])
bb[3] = np.minimum(det[3] + margin / 2, img_size[0])
cropped = img[bb[1]:bb[3], bb[0]:bb[2], :]
resized = cv2.resize(cropped, (input_image_size,input_image_size),interpolation=cv2.INTER_CUBIC)
prewhitened = facenet.prewhiten(resized)
faces.append({'face':resized,'rect':[bb[0],bb[1],bb[2],bb[3]],'embedding':getEmbedding(prewhitened)})
return faces
def getEmbedding(resized):
reshaped = resized.reshape(-1,input_image_size,input_image_size,3)
feed_dict = {images_placeholder: reshaped, phase_train_placeholder: False}
embedding = sess.run(embeddings, feed_dict=feed_dict)
return embedding
def compare2face(img1,img2):
face1 = getFace(img1)
face2 = getFace(img2)
if face1 and face2:
# calculate Euclidean distance
dist = np.sqrt(np.sum(np.square(np.subtract(face1[0]['embedding'], face2[0]['embedding']))))
return dist
return -1
img1 = cv2.imread(args.img1)
img2 = cv2.imread(args.img2)
distance = compare2face(img1, img2)
threshold = 1.10 # set yourself to meet your requirement
print("distance = "+str(distance))
face_embeddings_demo code:
import tensorflow as tf
from align import detect_face
import facenet
import cv2
import imutils
import numpy as np
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--img", type = str, required=True)
args = parser.parse_args()
# some constants kept as default from facenet
minsize = 20
threshold = [0.6, 0.7, 0.7]
factor = 0.709
margin = 44
input_image_size = 160
sess = tf.Session()
# read pnet, rnet, onet models from align directory and files are det1.npy, det2.npy, det3.npy
pnet, rnet, onet = detect_face.create_mtcnn(sess, 'align')
# read 20170512-110547 model file downloaded from https://drive.google.com/file/d/0B5MzpY9kBtDVZ2RpVDYwWmxoSUk
facenet.load_model("20170512-110547/20170512-110547.pb")
# Get input and output tensors
images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0")
embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0")
phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0")
embedding_size = embeddings.get_shape()[1]
def getFace(img):
faces = []
img_size = np.asarray(img.shape)[0:2]
bounding_boxes, points = detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor)
if not len(bounding_boxes) == 0:
for face in bounding_boxes:
if face[4] > 0.50:
det = np.squeeze(face[0:4])
bb = np.zeros(4, dtype=np.int32)
bb[0] = np.maximum(det[0] - margin / 2, 0)
bb[1] = np.maximum(det[1] - margin / 2, 0)
bb[2] = np.minimum(det[2] + margin / 2, img_size[1])
bb[3] = np.minimum(det[3] + margin / 2, img_size[0])
cropped = img[bb[1]:bb[3], bb[0]:bb[2], :]
resized = cv2.resize(cropped, (input_image_size,input_image_size),interpolation=cv2.INTER_CUBIC)
prewhitened = facenet.prewhiten(resized)
faces.append({'face':resized,'rect':[bb[0],bb[1],bb[2],bb[3]],'embedding':getEmbedding(prewhitened)})
return faces
def getEmbedding(resized):
reshaped = resized.reshape(-1,input_image_size,input_image_size,3)
feed_dict = {images_placeholder: reshaped, phase_train_placeholder: False}
# print(feed_dict)
embedding = sess.run(embeddings, feed_dict=feed_dict)
return embedding
img = cv2.imread(args.img)
img = imutils.resize(img,width=1000)
faces = getFace(img)
for face in faces:
print("Embeddings = "+str(face['embedding']))
cv2.waitKey(0)
cv2.destroyAllWindows()
You have to have the __init__.py in the package directory to be recognized as a package. It can be an empty file, but it has to be present. You don't have this in the align directory.
From the documentation:
The __init__.py files are required to make Python treat the directories as containing packages
From your comment, the error
usage: face_match_demo.py [-h] --img1 IMG1 --img2 IMG2 face_match_demo.py: error: ambiguous option: --img=images/faces.jpg could match --img2, --img1
means that face_match_demo.py is actually a utility to match two images, to say whether they contain the same face or not. So you have to provide two images to it, and it will tell if the face is the same. And you need to use the --img1 and --img2 options to do that like this:
python face_match_demo.py --img1 images/faces.jpg --img2 [[another face image]]

How to use two models in Tensorflow object Detection API

In tensorflow Object Detection API we are using ssd_mobilenet_v1_coco_2017_11_17 model to detect 90 general objects. I want to use this model for detection.
Next, I have trained faster_rcnn_inception_v2_coco_2018_01_28 model to detect a custom object. I wish to use this in the same code where I will be able to detect those 90 objects as well as my new trained custom object. How to achieve this with single code?
I have achieved this by doing the following code in detect_object.py
import numpy as np
import tensorflow as tf
import sys
from PIL import Image
import cv2
from utils import label_map_util
from utils import visualization_utils as vis_util
# ------------------ Knife Model Initialization ------------------------------ #
knife_label_map = label_map_util.load_labelmap('training/labelmap.pbtxt')
knife_categories = label_map_util.convert_label_map_to_categories(
knife_label_map, max_num_classes=1, use_display_name=True)
knife_category_index = label_map_util.create_category_index(knife_categories)
knife_detection_graph = tf.Graph()
with knife_detection_graph.as_default():
knife_od_graph_def = tf.GraphDef()
with tf.gfile.GFile('inference_graph_3/frozen_inference_graph.pb', 'rb') as fid:
knife_serialized_graph = fid.read()
knife_od_graph_def.ParseFromString(knife_serialized_graph)
tf.import_graph_def(knife_od_graph_def, name='')
knife_session = tf.Session(graph=knife_detection_graph)
knife_image_tensor = knife_detection_graph.get_tensor_by_name('image_tensor:0')
knife_detection_boxes = knife_detection_graph.get_tensor_by_name(
'detection_boxes:0')
knife_detection_scores = knife_detection_graph.get_tensor_by_name(
'detection_scores:0')
knife_detection_classes = knife_detection_graph.get_tensor_by_name(
'detection_classes:0')
knife_num_detections = knife_detection_graph.get_tensor_by_name(
'num_detections:0')
# ---------------------------------------------------------------------------- #
# ------------------ General Model Initialization ---------------------------- #
general_label_map = label_map_util.load_labelmap('data/mscoco_label_map.pbtxt')
general_categories = label_map_util.convert_label_map_to_categories(
general_label_map, max_num_classes=90, use_display_name=True)
general_category_index = label_map_util.create_category_index(
general_categories)
general_detection_graph = tf.Graph()
with general_detection_graph.as_default():
general_od_graph_def = tf.GraphDef()
with tf.gfile.GFile('ssd_mobilenet_v1_coco_2017_11_17/frozen_inference_graph.pb', 'rb') as fid:
general_serialized_graph = fid.read()
general_od_graph_def.ParseFromString(general_serialized_graph)
tf.import_graph_def(general_od_graph_def, name='')
general_session = tf.Session(graph=general_detection_graph)
general_image_tensor = general_detection_graph.get_tensor_by_name(
'image_tensor:0')
general_detection_boxes = general_detection_graph.get_tensor_by_name(
'detection_boxes:0')
general_detection_scores = general_detection_graph.get_tensor_by_name(
'detection_scores:0')
general_detection_classes = general_detection_graph.get_tensor_by_name(
'detection_classes:0')
general_num_detections = general_detection_graph.get_tensor_by_name(
'num_detections:0')
# ---------------------------------------------------------------------------- #
def knife(image_path):
try:
image = cv2.imread(image_path)
image_expanded = np.expand_dims(image, axis=0)
(boxes, scores, classes, num) = knife_session.run(
[knife_detection_boxes, knife_detection_scores,
knife_detection_classes, knife_num_detections],
feed_dict={knife_image_tensor: image_expanded})
classes = np.squeeze(classes).astype(np.int32)
scores = np.squeeze(scores)
boxes = np.squeeze(boxes)
for c in range(0, len(classes)):
class_name = knife_category_index[classes[c]]['name']
if class_name == 'knife' and scores[c] > .80:
confidence = scores[c] * 100
break
else:
confidence = 0.00
except:
print("Error occurred in knife detection")
confidence = 0.0 # Some error has occurred
return confidence
def general(image_path):
try:
image = cv2.imread(image_path)
image_expanded = np.expand_dims(image, axis=0)
(boxes, scores, classes, num) = general_session.run(
[general_detection_boxes, general_detection_scores,
general_detection_classes, general_num_detections],
feed_dict={general_image_tensor: image_expanded})
classes = np.squeeze(classes).astype(np.int32)
scores = np.squeeze(scores)
boxes = np.squeeze(boxes)
object_name = []
object_score = []
for c in range(0, len(classes)):
class_name = general_category_index[classes[c]]['name']
if scores[c] > .30: # If confidence level is good enough
object_name.append(class_name)
object_score.append(str(scores[c] * 100)[:5])
except:
print("Error occurred in general detection")
object_name = ['']
object_score = ['']
return object_name, object_score
if __name__ == '__main__':
print(' in main')
I can do
import detect_object
detect_object.knife("image.jpg") # to detect whether knife is present in image(this is custom trained model)
detect_object.general("image.jpg") # to detect those 90 objects from TF API
I know there is knife model in TF API but it is not that much accurate so I retrained it for only knife. Finally I have two models
1. First model is to detect only knife,
2. Second model is to detect general object as usual
You cant combine both the models. Have two sections of code which will load one model at a time and identify whatever it can see in the image.
Other option is to re-train a single model that can identify all objects you are interested in

Categories