Feeding detection coordinates to an object tracker? - python

I'm working on multiple object tracking, I'm using the TensorFlow API to generate detections. I have managed to modify it a bit to make it return coordinates of the detected objects, now I want to feed the coordinates (bounding boxes) to an object tracker (CRST or KCF).
However running both detection and tracking simultaneously would be too computationally expensive.
Is there any other methods to pass the coordinates or pause the detection?
Below is the detection code.
And in this link is the tracking code https://github.com/spmallick/learnopencv/blob/master/MultiObjectTracker/multiTracker.py
import numpy as np
import os
import six.moves.urllib as urllib
import sys
sys.path.insert(0,r'C:\Users\Ahmed.DESKTOP-KJ6U1BJ\.spyder-py3\TensorFlow\models\research\object_detection')
import tarfile
import tensorflow as tf
import zipfile
from collections import defaultdict
from io import StringIO
from matplotlib import pyplot as plt
from PIL import Image
import cv2
import imutils
from protos import string_int_label_map_pb2
from utils import visualization_utils2 as vis_util
def scale(bbox, width, height):
x = int(bbox[0]*width)
y = int(bbox[1]*height)
w = int(bbox[2]*width)
h = int(bbox[3]*height)
return (x,y,w,h)
W = 800
H = 600
videopath = "file:///C:/Users/Ahmed.DESKTOP-KJ6U1BJ/.spyder-py3/soccer4.mp4"
cap = cv2.VideoCapture(videopath)
# This is needed since the notebook is stored in the object_detection folder.
sys.path.append("..")
# # Model preparation
# Any model exported using the `export_inference_graph.py` tool can be loaded here simply by changing `PATH_TO_CKPT` to point to a new .pb file.
# By default we use an "SSD with Mobilenet" model here. See the [detection model zoo](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md) for a list of other models that can be run out-of-the-box with varying speeds and accuracies.
# What model to download.
MODEL_NAME = 'ssd_mobilenet_v1_coco_2017_11_17'
MODEL_FILE = MODEL_NAME + '.tar.gz'
DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/'
# Path to frozen detection graph. This is the actual model that is used for the object detection.
PATH_TO_CKPT = MODEL_NAME + '/frozen_inference_graph.pb'
# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = r'C:\Users\Ahmed.DESKTOP-KJ6U1BJ\.spyder-py3\TensorFlow\models\research\object_detection\data\mscoco_label_map.pbtxt'
NUM_CLASSES = 90
# ## Download Model ( uncomment if the model isn't downloaded / comment if you alredy have the model)
"""
opener = urllib.request.URLopener()
opener.retrieve(DOWNLOAD_BASE + MODEL_FILE, MODEL_FILE)
tar_file = tarfile.open(MODEL_FILE)
for file in tar_file.getmembers():
file_name = os.path.basename(file.name)
if 'frozen_inference_graph.pb' in file_name:
tar_file.extract(file, os.getcwd())
"""
# ## Load a (frozen) Tensorflow model into memory.
detection_graph = tf.Graph()
with detection_graph.as_default():
od_graph_def = tf.GraphDef()
with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.import_graph_def(od_graph_def, name='')
# ## Loading label map
# Label maps map indices to category names, so that when our convolution network predicts `5`, we know that this corresponds to `airplane`. Here we use internal utility functions, but anything that returns a dictionary mapping integers to appropriate string labels would be fine
import label_map_util
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)
# # Detection
with detection_graph.as_default():
with tf.Session(graph=detection_graph) as sess:
while True :
ret, image_np = cap.read()
# Expand dimensions since the model expects images to have shape: [1, None, None, 3]
image_np_expanded = np.expand_dims(image_np, axis=0)
# Definite input and output Tensors for detection_graph
image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
# Each box represents a part of the image where a particular object was detected.
detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
# Each score represent how level of confidence for each of the objects.
# Score is shown on the result image, together with the class label.
detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')
num_detections = detection_graph.get_tensor_by_name('num_detections:0')
# Actual detection.
(boxes, scores, classes, num) = sess.run(
[detection_boxes, detection_scores, detection_classes, num_detections],
feed_dict={image_tensor: image_np_expanded})
# Visualization of the results of a detection.
boxes2 = np.squeeze(boxes)
max_boxes_to_draw =boxes2.shape[0]
scores2 = np.squeeze(scores)
min_score_thresh=0.7
classes2 = np.squeeze(classes).astype(np.int32)
for i in range(min(max_boxes_to_draw, boxes2.shape[0])):
if boxes2 is None or scores2[i] > min_score_thresh:
class_name = category_index[classes2[i]]['name']
print ("This box is gonna get used", scale(boxes2[i], W , H), class_name)
cv2.imshow('Object Detection',cv2.resize(image_np,(800,600)))
k = cv2.waitKey(1) & 0xff
if k == 27:
cv2.destroyAllWindows()
cap.release()
cv2.destroyAllWindows()
cap.release

you could count frames with a simple counter in the while True loop and "pause" the detection with an if statement before session.run like:
frame_count = 0
with detection_graph.as_default():
with tf.Session(graph=detection_graph) as sess:
while True :
ret, image_np = cap.read()
#the first frame and every 10 frames do the detection
if frame_count == 0:
###detection here
#restart counter (from -10 to 0)
frame_count = -10
##do tracking here
frame_count += 1
This way the actual detection is done for the first frame and then every 10th frame, so in the other 9 frames you can do whatever you want.

Related

ValueError: saved_model_path must be the valid directory of a saved model to load

This is sentiment analysis project and i am getting this error
Nudity-Detection-Model.h5
Traceback (most recent call last):
File "c:\Users\kvidushi\Desktop\Mini_project\script\vapp.py", line 214, in <module>
main()
File "c:\Users\kvidushi\Desktop\Mini_project\script\vapp.py", line 208, in main
model = load_model('Nudity-Detection-Model.h5')
File "c:\Users\kvidushi\Desktop\Mini_project\script\vapp.py", line 59, in load_model
raise ValueError("saved_model_path must be the valid directory of a saved model to load.")
ValueError: saved_model_path must be the valid directory of a saved model to load.
My script file is:
import json
import cv2
import os
import time
from os import listdir
from os.path import isfile, join, exists, isdir, abspath
from keras.models import load_model
import numpy as np
import tensorflow as tf
from tensorflow import keras
import tensorflow_hub as hub
import matplotlib.pyplot as plt
IMAGE_DIM = 224 # required/default image dimensionality
def load_images(image_paths, image_size, verbose=True):
# Function for loading images into numpy arrays for passing to model.predict
# inputs:
# image_paths: list of image paths to load
# image_size: size into which images should be resized
# verbose: show all of the image path and sizes loaded
# outputs:
# loaded_images: loaded images on which keras model can run predictions
# loaded_image_indexes: paths of images which the function is able to process
loaded_images = []
loaded_image_paths = []
if isdir(image_paths):
parent = abspath(image_paths)
image_paths = [join(parent, f) for f in listdir(image_paths) if isfile(join(parent, f))]
elif isfile(image_paths):
image_paths = [image_paths]
for img_path in image_paths:
try:
if verbose:
print(img_path, "size:", image_size)
image = keras.preprocessing.image.load_img(img_path, target_size=image_size)
image = keras.preprocessing.image.img_to_array(image)
# print(image.dtype)
# print(image.shape)
# print(image)
image /= 255
loaded_images.append(image)
loaded_image_paths.append(img_path)
except Exception as ex:
print("Image Load Failure: ", img_path, ex)
return np.asarray(loaded_images), loaded_image_paths
def load_model(model_path):
print(model_path)
if model_path is None or not exists(model_path):
raise ValueError("saved_model_path must be the valid directory of a saved model to load.")
model = tf.keras.models.load_model(model_path)
#model = tf.keras.models.load_model(model_path, custom_objects={'KerasLayer':hub.KerasLayer})
# model.summary()
print(model.summary())
return model
def classify(model, input_paths, image_dim=IMAGE_DIM):
""" Classify given a model, input paths (could be single string), and image dimensionality...."""
images, image_paths = load_images(input_paths, (image_dim, image_dim))
probs = classify_nd(model, images)
# print(type(probs))
return probs
def classify_nd(model, nd_images):
""" Classify given a model, image array (numpy)...."""
model_preds = model.predict(nd_images)
# preds = np.argsort(model_preds, axis = 1).tolist()
categories = ['drawings', 'hentai', 'neutral', 'porn', 'sexy']
probs = []
single_probs = {}
cnt=0
for i, single_preds in enumerate(model_preds):
cnt=cnt+1
for j, pred in enumerate(single_preds):
if categories[j] in single_probs.keys():
single_probs[categories[j]] = single_probs[categories[j]] + float(pred)
else:
single_probs[categories[j]]=float(pred)
print(cnt)
for i in single_probs.keys():
# print(single_probs[i])
single_probs[i]=single_probs[i]/cnt
probs.append(single_probs)
return probs
def predict(model,img_paths):
# for img in img_paths:
image_preds = classify(model, img_paths, IMAGE_DIM)
data=image_preds[0]
category= list(data.keys())
values = list(data.values())
fig = plt.figure(figsize = (10, 5))
# creating the bar plot
plt.bar(category, values, color ='maroon',
width = 0.4)
plt.xlabel("Categories")
plt.ylabel("values")
plt.title("Nudity Detection Model")
print(json.dumps(image_preds, indent=2), '\n')
plt.show()
def get_frames(inputFile,outputFolder,step,count):
'''
Input:
inputFile - name of the input file with directoy
outputFolder - name and path of the folder to save the results
step - time lapse between each step (in seconds)
count - number of screenshots
Output:
'count' number of screenshots that are 'step' seconds apart created from video 'inputFile' and stored in folder 'outputFolder'
Function Call:
get_frames("test.mp4", 'data', 10, 10)
'''
#initializing local variables
step = step
frames_count = count
currentframe = 0
frames_captured = 0
#creating a folder
try:
# creating a folder named data
if not os.path.exists(outputFolder):
os.makedirs(outputFolder)
#if not created then raise error
except OSError:
print ('Error! Could not create a directory')
#reading the video from specified path
cam = cv2.VideoCapture(inputFile)
#reading the number of frames at that particular second
frame_per_second = cam.get(cv2.CAP_PROP_FPS)
print( frame_per_second)
while (True):
ret, frame = cam.read()
if ret:
if currentframe > (step*frame_per_second):
currentframe = 0
#saving the frames (screenshots)
name = './data/frame' + str(frames_captured) + '.jpg'
print ('Creating...' + name)
cv2.imwrite(name, frame)
frames_captured+=1
#breaking the loop when count achieved
if frames_captured > frames_count-1:
ret = False
currentframe += 1
if ret == False:
break
#Releasing all space and windows once done
cam.release()
cv2.destroyAllWindows()
def main():
# img_paths=[]
# img_paths.append("1.jpg")
# img_paths.append("2.jpg")
# img_paths.append("3.jpg")
# img_paths.append("4.jpg")
# img_paths.append("5.jpg")
# img_paths.append("6.jpg")
# img_paths.append("7.jpg")
# img_paths.append("8.jpg")
# img_paths.append("9.jpg")
# img_paths.append("10.jpg")
# img_paths.append("11.jpg")
# img_paths.append("12.jpg")
# img_paths.append("13.jpg")
# img_paths.append("14.jpg")
# img_paths.append("15.jpg")
get_frames("1.mp4","data",5,20)
model = load_model('Nudity-Detection-Model.h5')
predict(model,"data")
if __name__ == "__main__":
main()
It is asking for this file: Nudity_detection_model.h5
I have put this file in same folder.Still it is not able to recognize it. I tried adding double quotes and single quotes and import load_model but still the error is same.
can anyone help me

Tensorflow: Mac OS camera switched on but video not visible on screen

For a current project, I am trying to set up a video recognition program leveraging TensorFlow 2 and OpenCV (Mac OS Catalina).
When running the below script with Python 3 through terminal or via Jupyter, the green "wecam light" is indicating that the camera is switched on and no error messages appear. However, there is not video image/window showing on my screen. I have tried various solutions, including adding camera screen frame data, none of which worked.
Does anyone know a smart tweak to make the camera image/window visible?
import os
import six.moves.urllib as urllib
import sys
import tarfile
import tensorflow as tf
import zipfile
import cv2
from collections import defaultdict
from io import StringIO
from matplotlib import pyplot as plt
from PIL import Image
from utils import label_map_util
from utils import visualization_utils as vis_util
# Define the video stream
cap = cv2.VideoCapture(0) # Change only if you have more than one webcams
# What model to download.
# Models can bee found here: https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md
MODEL_NAME = 'ssd_inception_v2_coco_2017_11_17'
MODEL_FILE = MODEL_NAME + '.tar.gz'
DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/'
# Path to frozen detection graph. This is the actual model that is used for the object detection.
PATH_TO_CKPT = MODEL_NAME + '/frozen_inference_graph.pb'
# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = os.path.join('data', 'mscoco_label_map.pbtxt')
# Number of classes to detect
NUM_CLASSES = 90
# Download Model
opener = urllib.request.URLopener()
opener.retrieve(DOWNLOAD_BASE + MODEL_FILE, MODEL_FILE)
tar_file = tarfile.open(MODEL_FILE)
for file in tar_file.getmembers():
file_name = os.path.basename(file.name)
if 'frozen_inference_graph.pb' in file_name:
tar_file.extract(file, os.getcwd())
# Load a (frozen) Tensorflow model into memory.
detection_graph = tf.Graph()
with detection_graph.as_default():
od_graph_def = tf.GraphDef()
with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.import_graph_def(od_graph_def, name='')
# Loading label map
# Label maps map indices to category names, so that when our convolution network predicts `5`, we know that this corresponds to `airplane`. Here we use internal utility functions, but anything that returns a dictionary mapping integers to appropriate string labels would be fine
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(
label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)
# Helper code
def load_image_into_numpy_array(image):
(im_width, im_height) = image.size
return np.array(image.getdata()).reshape(
(im_height, im_width, 3)).astype(np.uint8)
# Detection
with detection_graph.as_default():
with tf.Session(graph=detection_graph) as sess:
while True:
# Read frame from camera
ret, image_np = cap.read()
# Expand dimensions since the model expects images to have shape: [1, None, None, 3]
image_np_expanded = np.expand_dims(image_np, axis=0)
# Extract image tensor
image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
# Extract detection boxes
boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
# Extract detection scores
scores = detection_graph.get_tensor_by_name('detection_scores:0')
# Extract detection classes
classes = detection_graph.get_tensor_by_name('detection_classes:0')
# Extract number of detectionsd
num_detections = detection_graph.get_tensor_by_name(
'num_detections:0')
# Actual detection.
(boxes, scores, classes, num_detections) = sess.run(
[boxes, scores, classes, num_detections],
feed_dict={image_tensor: image_np_expanded})
# Visualization of the results of a detection.
vis_util.visualize_boxes_and_labels_on_image_array(
image_np,
np.squeeze(boxes),
np.squeeze(classes).astype(np.int32),
np.squeeze(scores),
category_index,
use_normalized_coordinates=True,
line_thickness=8)
# Display output
cv2.imshow('object detection', cv2.resize(image_np, (800, 600)))
if cv2.waitKey(25) & 0xFF == ord('q'):
cv2.destroyAllWindows()
break
Have you tried passing -1 or 1 as the device index of the VideoCapture? Just in case you haven't tried it yet.
But
First of all, you should know where it went wrong. We should verify if the system reads the frames properly.
You can try implementing this to test if your camera is running and being read properly:
cap = cv.VideoCapture(0)
if not cap.isOpened():
print("Cannot open camera")
exit()
while True:
# Capture frame-by-frame
ret, frame = cap.read()
# if frame is read correctly ret is True
if not ret:
print("Can't receive frame (stream end?). Exiting ...")
break
# Our operations on the frame come here
gray = cv.cvtColor(frame, cv.COLOR_BGR2GRAY)
# Display the resulting frame
cv.imshow('frame', gray)
if cv.waitKey(1) == ord('q'):
break
# When everything done, release the capture
cap.release()
cv.destroyAllWindows()
cap.read() returns a bool (True/False). If the frame is read correctly, it will be True. So you can check for the end of the video by checking this returned value.
Sometimes, cap may not have initialized the capture. In that case, this code shows an error. You can check whether it is initialized or not by the method cap.isOpened(). If it is True, OK. Otherwise open it using cap.open().
with this, it will help you and us to determine what part has gone wrong and can suggest furthermore solutions.
After this, if the test shows no error, this link will be a little bit related.
You can check it out.
Provide us the result from this so we can inspect furthermore.

TFLite Inference on video input

I have an SSD tflite detection model that I am running with Python on a desktop computer. As for now, my script below takes a single image as an input for inference and it works fine:
# Load TFLite model and allocate tensors.
interpreter = tf.lite.Interpreter(model_path="model.tflite")
interpreter.allocate_tensors()
img_resized = Image.open(file_name)
input_data = np.expand_dims(img_resized, axis=0)
input_data = (np.float32(input_data) - input_mean) / input_std
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
interpreter.set_tensor(input_details[0]['index'], input_data)
interpreter.invoke()
output_data = interpreter.get_tensor(output_details[0]['index'])
How to run inference on a .mp4 video as an input?
Is it also possible to draw bounding boxes from detected objects on that video?
To Answer your first question of running inference on a video. Here is the code that you can use. I made this code for the inference of classification model, So in your case the output of the output_data variable will be in the form of bounding boxes, you have to map them on the frames using OpenCV which answer your second question as well (drawing bounding boxes on the video).
import cv2
from PIL import Image
import numpy as np
import tensorflow as tf
def read_tensor_from_readed_frame(frame, input_height=224, input_width=224,
input_mean=0, input_std=255):
output_name = "normalized"
float_caster = tf.cast(frame, tf.float32)
dims_expander = tf.expand_dims(float_caster, 0);
resized = tf.image.resize_bilinear(dims_expander, [input_height, input_width])
normalized = tf.divide(tf.subtract(resized, [input_mean]), [input_std])
sess = tf.Session()
result = sess.run(normalized)
return result
def load_labels(label_file):
label = []
proto_as_ascii_lines = tf.gfile.GFile(label_file).readlines()
for l in proto_as_ascii_lines:
label.append(l.rstrip())
return label
def VideoSrcInit(paath):
cap = cv2.VideoCapture(paath)
flag, image = cap.read()
if flag:
print("Valid Video Path. Lets move to detection!")
else:
raise ValueError("Video Initialization Failed. Please make sure video path is valid.")
return cap
def main():
Labels_Path = "labels.txt"
Model_Path = "model.tflite"
input_path = "video.mp4"
##Loading labels
labels = load_labels(Labels_Path)
##Load tflite model and allocate tensors
interpreter = tf.lite.Interpreter(model_path=Model_Path)
interpreter.allocate_tensors()
# Get input and output tensors.
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
input_shape = input_details[0]['shape']
##Read video
cap = VideoSrcInit(input_path)
while True:
ok, cv_image = cap.read()
if not ok:
break
##Converting the readed frame to RGB as opencv reads frame in BGR
image = Image.fromarray(cv_image).convert('RGB')
##Converting image into tensor
image_tensor = read_tensor_from_readed_frame(image ,224, 224)
##Test model
interpreter.set_tensor(input_details[0]['index'], image_tensor)
interpreter.invoke()
output_data = interpreter.get_tensor(output_details[0]['index'])
## You need to check the output of the output_data variable and
## map it on the frame in order to draw the bounding boxes.
cv2.namedWindow("cv_image", cv2.WINDOW_NORMAL)
cv2.imshow("cv_image",cv_image)
##Use p to pause the video and use q to termiate the program
key = cv2.waitKey(10) & 0xFF
if key == ord("q"):
break
elif key == ord("p"):
cv2.waitKey(0)
continue
cap.release()
if __name__ == '__main__':
main()

How to run inference from the SavedModel locally?

I want to run a model locally. I'm trying to train and predict models from web course:
https://github.com/GoogleCloudPlatform/tensorflow-without-a-phd/blob/master/tensorflow-planespotting/trainer_yolo/main.py
A model was trained with above code. This is a YOLO object detection model that detect airplane built with tf.estimator. Training was done successfully with provided codes but I don't know about how to inference the model.
import tensorflow as tf
# DATA
DATA = './samples/airplane_sample.png'
# Model: This directory contains saved_model.pb and variables
SAVED_MODEL_DIR = './1559196417/'
def decode_image():
img_bytes = tf.read_file(DATA)
decoded = tf.image.decode_image(img_bytes, channels=3)
return tf.cast(decoded, dtype=tf.uint8)
def main1():
with tf.Session(graph=tf.Graph()) as sess:
tf.saved_model.loader.load(sess, [tf.saved_model.tag_constants.SERVING], SAVED_MODEL_DIR)
img = decode_image()
result = sess.run(['classes'], feed_dict={'input': img})
print(result)
def main2():
model = tf.contrib.predictor.from_saved_model(SAVED_MODEL_DIR)
pred = model({'image_bytes': [decode_image()], 'square_size': [tf.placeholder(tf.int32)]})
print(pred)
if __name__ == "__main__":
main2()
Above is a code written by me but it doesn't work. Even I don't know what is a problem. Incorrect input type? Improper API? Could you give me some advice to me?
First run saved_model_cli show --all --dir SAVED_MODEL_DIR in the terminal outside of python to inspect the saved model and check that it has the right tags, inputs and outputs. From there it takes a bit of wrangling to get the necessary info out of the API.
def extract_tensors(signature_def, graph):
output = dict()
for key in signature_def:
value = signature_def[key]
if isinstance(value, tf.TensorInfo):
output[key] = graph.get_tensor_by_name(value.name)
return output
def extract_tags(signature_def, graph):
output = dict()
for key in signature_def:
output[key] = dict()
output[key]['inputs'] = extract_tensors(
signature_def[key].inputs, graph)
output[key]['outputs'] = extract_tensors(
signature_def[key].outputs, graph)
return output
with tf.Session(graph=tf.Graph()) as session:
serve = tf.saved_model.load(
session, tags=['serve'], export_dir=SAVED_MODEL_DIR)
tags = extract_tags(serve.signature_def, session.graph)
model = tags['serving_default']
From there you can try print(model['inputs'], model['outputs']) to see which inputs and outputs were exported and if they agree with saved_model_cli, if you need another tag then just replace serving_default with that.
Maybe this will work:
import tensorflow as tf
import cv2
with detection_graph.as_default():
od_graph_def = tf.GraphDef()
with tf.gfile.GFile('./1559196417/saved_model.pb', 'rb') as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.import_graph_def(od_graph_def, name='')
with detection_graph.as_default():
with tf.Session(graph=detection_graph) as sess:
image = cv2.imread('./samples/airplane_sample.png')
rgb_img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
rgb_img_expanded = np.expand_dims(rgb_img, axis=0)
image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
classes = detection_graph.get_tensor_by_name('classes:0')
result = sess.run([classes],feed_dict={image_tensor: rgb_img_expanded})

Object Detection using Tensorflow

I am following tensorflow object detection tutorial for Oxford-IIIT Pets Dataset: https://github.com/tensorflow/models/blob/master/object_detection/g3doc/running_pets.md
I have successfully generated the "frozen_inference_graph.pb" from the latest checkpoint.
How I can test the inference graph - "frozen_inference_graph.pb" and pet labels - "pet_label_map.pbtxt" on an image.
I have tried using jupytor notebook but nothing gets detected in the image. I have also used following python code for detecting "dog" and "cat" but nothing gets detected. Python code is given below:
import os
import cv2
import time
import argparse
import multiprocessing
import numpy as np
import tensorflow as tf
from utils import FPS, WebcamVideoStream
from multiprocessing import Queue, Pool
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as vis_util
PATH_TO_CKPT = os.path.join('frozen_inference_graph.pb')
PATH_TO_LABELS = os.path.join('pet_label_map.pbtxt')
NUM_CLASSES = 37
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES,
use_display_name=True)
category_index = label_map_util.create_category_index(categories)
def detect_objects(image_np, sess, detection_graph):
# Expand dimensions since the model expects images to have shape: [1, None, None, 3]
image_np_expanded = np.expand_dims(image_np, axis=0)
image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
# Each box represents a part of the image where a particular object was detected.
boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
# Each score represent how level of confidence for each of the objects.
# Score is shown on the result image, together with the class label.
scores = detection_graph.get_tensor_by_name('detection_scores:0')
classes = detection_graph.get_tensor_by_name('detection_classes:0')
num_detections = detection_graph.get_tensor_by_name('num_detections:0')
# Actual detection.
(boxes, scores, classes, num_detections) = sess.run(
[boxes, scores, classes, num_detections],
feed_dict={image_tensor: image_np_expanded})
# Visualization of the results of a detection.
vis_util.visualize_boxes_and_labels_on_image_array(
image_np,
np.squeeze(boxes),
np.squeeze(classes).astype(np.int32),
np.squeeze(scores),
category_index,
use_normalized_coordinates=True,
line_thickness=8)
return image_np
def worker(input_q, output_q):
# Load a (frozen) Tensorflow model into memory.
detection_graph = tf.Graph()
with detection_graph.as_default():
od_graph_def = tf.GraphDef()
with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.import_graph_def(od_graph_def, name='')
sess = tf.Session(graph=detection_graph)
frame = input_q.get()
output_q.put(detect_objects(frame, sess, detection_graph))
sess.close()
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('-src', '--source', dest='video_source', type=int,
default=0, help='Device index of the camera.')
parser.add_argument('-wd', '--width', dest='width', type=int,
default=20, help='Width of the frames in the video stream.')
parser.add_argument('-ht', '--height', dest='height', type=int,
default=20, help='Height of the frames in the video stream.')
parser.add_argument('-num-w', '--num-workers', dest='num_workers', type=int,
default=2, help='Number of workers.')
parser.add_argument('-q-size', '--queue-size', dest='queue_size', type=int,
default=5, help='Size of the queue.')
args = parser.parse_args()
logger = multiprocessing.log_to_stderr()
logger.setLevel(multiprocessing.SUBDEBUG)
input_q = Queue(maxsize=args.queue_size)
output_q = Queue(maxsize=args.queue_size)
pool = Pool(args.num_workers, worker, (input_q, output_q))
frame = cv2.imread("image2.jpg");
input_q.put(frame)
cv2.imshow('Video', output_q.get())
cv2.waitKey(0)
cv2.destroyAllWindows()
Any help will be greatly appreciated related to running the inference graph on actual image or debugging if nothing gets detected.
if you are using Tensorflow API, go to the folder models/research, open there a console.
In the research folder run command protoc object_detection/protos/*.proto --python_out=. and then export PYTHONPATH=$PYTHONPATH:pwd:pwd/slim.
Then run cd object_detection to change folder in the console and open jupyter notebook in current folder.
In jupyter notebook's home find the file object_detection_tutorial.ipynb, modify it so that it suits your purposes.
What are the outputs of boxes, scores and classes? Can you print them? If you get numbers from them, maybe you just need to change a few lines in your code to properly visualize the results.
For test, you can use:
vis_util.save_image_array_as_png(image,'./outputImg.png')
#print(image.shape)
print('image saved')
img=mpimg.imread('./outputImg.png')
imgplot = plt.imshow(img)
plt.show()

Categories