How to perform iterative inference using Tensorflow Slim library - python

There are many examples out there that show how to use the tf.contrib.slim library to classify a single image downloaded from the web. In fact the tensorflow github provides this. However, I am struggling to understand the best way to do this in a loop. Any application that uses Tensorflow for classification will have to classify more than one batch of images. The inference process involves building a graph, and loading the weights from a checkpoint. When iteratively running, it seems wasteful to repeat those steps again, and again. In fact, when I try that rudimentary method, I can see that the memory allocated to python continues to grow each iteration. Can someone please help suggest how to modify the basic examples to achieve repetitive/iterative inference? Here is my current method which works, but is clearly wasteful with memory resources (This code crashes a machine with limited memory, new images are periodically dumped in global frame):
def classification():
def classification():
global frame
global count
slim = tf.contrib.slim
image_size = inception_v4.inception_v4.default_image_size
names = imagenet.create_readable_names_for_imagenet_labels()
checkpoints_dir = '../../checkpoints'
# Don't classify the first few frames
while count < 5:
pass
while True:
start = count
with tf.Graph().as_default():
image = tf.convert_to_tensor(frame,dtype=tf.float32)
processed_image = inception_preprocessing.preprocess_image(image, image_size, image_size, is_training=False)
processed_images = tf.expand_dims(processed_image, 0)
# processed_images will be a 1x299x299x3 tensor of float32
# Create the model, use the default arg scope to configure the batch norm parameters.
with slim.arg_scope(inception_v4.inception_v4_arg_scope()):
logits, _ = inception_v4.inception_v4(processed_images, num_classes=1001, is_training=False)
probabilities = tf.nn.softmax(logits)
init_fn = slim.assign_from_checkpoint_fn(
os.path.join(checkpoints_dir, 'inception_v4.ckpt'),
slim.get_model_variables('InceptionV4'))
with tf.Session() as sess:
init_fn(sess)
np_image, probabilities = sess.run([image, probabilities])
probabilities = probabilities[0, 0:]
sorted_inds = [i[0] for i in sorted(enumerate(-probabilities), key=lambda x:x[1])]
for i in range(5):
index = sorted_inds[i]
print('Probability %0.2f%% => [%s]' % (probabilities[index] * 100, names[index]))
end = count
print "Classification latency = %d frames" % (end-start)

I got this to work, would still appreciate some wisdom from others.
My solution was to build the graph with a placeholder as the input. Then the video frame can be fed into the session run method used feed_dict. This allows me to put the while loop around the call to session run. The latency using this method was 1/10th the original I shared, and the memory fingerprint is stable. Here is my full code used to classify video frames from a webcam. Note that there is an issue with it. I have no mechanism to exit the threads cleanly. Ctrl+C will not kill the script. Also, note that to run this, you would need to clone the github tensorflow models repo, and download and untar the pretrained weights at ../../checkpoints.
import cv2
import os
import time
import numpy as np
from threading import Thread
import tensorflow as tf
from datasets import imagenet
from nets import inception_v4
from preprocessing import inception_preprocessing
######################################################
# Global Variables Shared by threads
frame = None
count = 0
######################################################
def capture():
######################################################
global frame
global count
video_capture = cv2.VideoCapture(0)
while True:
# Capture frame-by-frame
ret, frame_bgr = video_capture.read()
# Display the resulting frame
cv2.imshow('Video', frame_bgr)
# Convert to RGB format (Inception expects RGB not BGR color channels)
frame = cv2.cvtColor(frame_bgr,cv2.COLOR_BGR2RGB)
# Increment frame counter (Used only to calculate latency)
count += 1
# Kill loop when user hits q
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# When everything is done, release the capture
video_capture.release()
cv2.destroyAllWindows()
######################################################
######################################################
def classification():
######################################################
global frame
global count
slim = tf.contrib.slim
image_size = inception_v4.inception_v4.default_image_size
names = imagenet.create_readable_names_for_imagenet_labels()
checkpoints_dir = '../../checkpoints'
# Don't classify the None Object
time.sleep(5)
with tf.Graph().as_default():
image = tf.placeholder(tf.uint8,[480,640,3])
processed_image = inception_preprocessing.preprocess_image(image,
image_size, image_size, is_training=False)
processed_images = tf.expand_dims(processed_image, 0)
# processed_images will be a 1x299x299x3 tensor of float32
# Create the model, use the default arg scope to configure the batch norm parameters.
with slim.arg_scope(inception_v4.inception_v4_arg_scope()):
logits, _ = inception_v4.inception_v4(processed_images, num_classes=1001, is_training=False)
probs = tf.nn.softmax(logits)
init_fn = slim.assign_from_checkpoint_fn(
os.path.join(checkpoints_dir, 'inception_v4.ckpt'),
slim.get_model_variables('InceptionV4'))
with tf.Session() as sess:
init_fn(sess)
while True:
start = count
probabilities = sess.run(probs,feed_dict={image: frame})
probabilities = probabilities[0, 0:]
sorted_inds = [i[0] for i in sorted(enumerate(-probabilities), key=lambda x:x[1])]
for i in range(5):
index = sorted_inds[i]
print('Probability %0.2f%% => [%s]' % (probabilities[index] * 100, names[index]))
end = count
print "Classification latency = %d frames" % (end-start)
# How to end this thread cleanly?
######################################################
# Start the threads
capture_thread = Thread(target=capture)
classify_thread = Thread(target=classification)
capture_thread.start()
classify_thread.start()

One option can solve the problem by defining a class, and you load the model in the init method. Also, add a method called classify. So, you initiate the class first. Then, for every frame, you call method classify.
Below you find how did I modify your code:
import os
import cv2
import matplotlib.pyplot as plt
import tensorflow as tf
from datasets import imagenet
from nets import inception_v4
from preprocessing import inception_preprocessing
def show_image(img_path):
img = cv2.imread(img_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img_plot = plt.imshow(img)
# Set up the plot and hide axes
plt.title('test')
img_plot.axes.get_yaxis().set_ticks([])
img_plot.axes.get_xaxis().set_ticks([])
plt.show()
def load_image(img_path):
img = cv2.imread(img_path)
return cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
class ImageClassifier():
def __init__(self):
self.slim = tf.contrib.slim
self.image_size = inception_v4.inception_v4.default_image_size
self.checkpoints_dir = 'checkpoints'
self.names = imagenet.create_readable_names_for_imagenet_labels()
self.arg_scope = inception_v4.inception_v4_arg_scope()
self.image = tf.placeholder(tf.uint8, [480, 640, 3])
self.processed_image = inception_preprocessing.preprocess_image(self.image,
self.image_size, self.image_size,
is_training=False)
self.processed_images = tf.expand_dims(self.processed_image, 0)
# processed_images will be a 1x299x299x3 tensor of float32
# Create the model, use the default arg scope to configure the batch norm parameters.
with self.slim.arg_scope(self.arg_scope):
self.logits, self.end_points = inception_v4.inception_v4(self.processed_images, num_classes=1001,
is_training=False)
self.probs = tf.nn.softmax(self.logits)
self.init_fn = self.slim.assign_from_checkpoint_fn(
os.path.join(self.checkpoints_dir, 'inception_v4.ckpt'),
self.slim.get_model_variables('InceptionV4'))
self.session = tf.Session()
self.init_fn(self.session)
def classify(self, img):
height, width = img.shape[:2]
feed_dict = {self.image: img}
probabilities = self.session.run(self.probs, feed_dict=feed_dict)
probabilities = probabilities[0, 0:]
sorted_inds = [i[0] for i in sorted(enumerate(-probabilities), key=lambda x: x[1])]
for i in range(5):
index = sorted_inds[i]
print('Probability %0.2f%% => [%s]' % (probabilities[index] * 100, self.names[index]))
def main():
imgs_dir = "./imgs/wep"
image_classifier = ImageClassifier()
for img_name in os.listdir(imgs_dir):
img = load_image(os.path.join(imgs_dir, img_name))
img = cv2.resize(img, (640, 480))
print(img_name)
image_classifier.classify(img)

Related

Combine TensorFlow Object Detection API with Keras Model

TensorFlow version: 1.14
Python version: 3.6.9
My purpose is to build an object detection system with classification. I used Object Detection API and I want to feed its output bounding boxes to another neural networks (there are 6 different objects to detect and then I want to classify these object with Keras neural networks by object's features).
When I use Object Detection API only its OK, but if I want to use model.predict() script crashes. As I've read there's a problem with graph and sessions.
I'm pretty fresh to all these stuff, so I want to ask: is this possible to use multiple models simultaneously?
I've read about creating two sessions and graphs but the input of Object Detection model is a live video from the webcam and I don't want to lose performance of a script. I tried to start session with each frame, but it's very slow.
Also maybe upgrading script to Tensorflow 2.0 will be helpful?
EDIT:
I want to detect fruits and pass them to another Keras models which will predict their state. Detecting fruits works good, but I cannot use additional Keras model, because of the following error:
Tensor Tensor("dense_3/Sigmoid:0", shape=(?, 1), dtype=float32) is not an element of this graph.
Code provided:
import numpy as np
import os
import six.moves.urllib as urllib
import sys
import tarfile
import tensorflow as tf
import zipfile
from collections import defaultdict
from io import StringIO
from matplotlib import pyplot as plt
from PIL import Image
from keras import models
from keras.preprocessing import image
import cv2
if 'cap' in globals():
cap.release()
cap = cv2.VideoCapture(0)
sys.path.append("..")
graph = tf.get_default_graph()
from utils import label_map_util
from utils import visualization_utils as vis_util
def limit(value, max_val, min_val):
if(value > max_val):
value = max_val
elif(value < min_val):
value = min_val
return value
# What model to download.
MODEL_NAME = 'inference_graph'
MODEL_FILE = MODEL_NAME + '.tar.gz'
DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/'
# Path to frozen detection graph. This is the actual model that is used for the object detection.
PATH_TO_CKPT = MODEL_NAME + '/frozen_inference_graph.pb'
# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = 'training/labelmap.pbtxt'
NUM_CLASSES = 6
detection_graph = tf.Graph()
with detection_graph.as_default():
od_graph_def = tf.GraphDef()
with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.import_graph_def(od_graph_def, name='')
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)
def load_image_into_numpy_array_updated(image):
return np.array(image).astype(np.uint8)
# PATH_TO_TEST_IMAGES_DIR = 'test_images'
# TEST_IMAGE_PATHS = [ os.path.join(PATH_TO_TEST_IMAGES_DIR, 'image{}.jpg'.format(i)) for i in range(1, 3) ]
# Size, in inches, of the output images.
IMAGE_SIZE = (12, 8)
# Loading a keras model
model = models.load_model('new_banana.h5')
with detection_graph.as_default():
with tf.Session(graph=detection_graph) as sess:
while True:
ret, image_np = cap.read()
# Expand dimensions since the model expects images to have shape: [1, None, None, 3]
image_np_expanded = np.expand_dims(image_np, axis=0)
image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
# Each box represents a part of the image where a particular object was detected.
boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
# Each score represent how level of confidence for each of the objects.
# Score is shown on the result image, together with the class label.
scores = detection_graph.get_tensor_by_name('detection_scores:0')
classes = detection_graph.get_tensor_by_name('detection_classes:0')
num_detections = detection_graph.get_tensor_by_name('num_detections:0')
# Actual detection.
(boxes, scores, classes, num_detections) = sess.run(
[boxes, scores, classes, num_detections],
feed_dict={image_tensor: image_np_expanded})
image_np_copy = image_np.copy()
# Visualization of the results of a detection.
vis_util.visualize_boxes_and_labels_on_image_array(
image_np,
np.squeeze(boxes),
np.squeeze(classes).astype(np.int32),
np.squeeze(scores),
category_index,
use_normalized_coordinates=True,
line_thickness=8,
min_score_thresh=0.7)
# Code what are used to get thresholded bounding boxes from image
# enlarge them about compenser value, limitates them
# print them and send them to another script
# 0 - apple, 2 - banana, 3 - orange, 4 - pear, 5 - pepper, 6 - tomato
min_score_thresh = 0.7
bboxes = boxes[scores > min_score_thresh]
bclasses = classes[scores > min_score_thresh]
image_np_new = cv2.resize(image_np_copy, (800,600))
im_width, im_height = (800, 600)
if bclasses.size > 0:
final_box = []
cropped_images = []
compenser = 30
if(bclasses[0] == 2): #if any of detected classes stands for 'banana'
for box in bboxes:
ymin, xmin, ymax, xmax = box
ymin0 = int(im_height * ymin) - compenser
ymax0 = int(im_height * ymax) + compenser
xmin0 = int(im_width * xmin) - compenser
xmax0 = int(im_width * xmax) + compenser
ymin1 = limit(ymin0, im_height, 0)
ymax1 = limit(ymax0, im_height, 0)
xmax1 = limit(xmax0, im_width, 0)
xmin1 = limit(xmin0, im_width, 0)
image_cropped = image_np_new[ymin1:ymax1, xmin1:xmax1]
height, width, _ = image_cropped.shape
if width > height:
image_cropped = cv2.resize(image_cropped, (200, 150))
image_cropped = cv2.rotate(image_cropped, cv2.ROTATE_90_CLOCKWISE)
else:
image_cropped = cv2.resize(image_cropped, (150, 200))
image_cropped = load_image_into_numpy_array_updated(image_cropped)
image_cropped = image_cropped.reshape((1,) + image_cropped.shape)
image_cropped = image_cropped/255
cropped_images.append(image_cropped)
if (len(cropped_images) > 0):
for image in cropped_images:
print(image.shape)
# input tensor 200, 150, 3
classes = model.predict_classes(image, batch_size=10)
print(classes)
cv2.imshow('object detection', image_np)
if cv2.waitKey(25) & 0xFF == ord('q'):
cv2.destroyAllWindows()
cap.release()
break

When I try to reshape my training data I get this error.... ValueError: cannot reshape array of size 568 into shape (28,28,3)

This is the where I read in the images:
train = []
imgsize = 28
for image_name in image_name_list:
im = cv2.imread(path_string + image_name +'.jpg')
new = cv2.resize(im,(imgsize, imgsize))
train.append(new)
In a tutorial I was using, I'm not sure why we were meant to loop the X, Y into different variables via the resized list of images. I was assuming it was to divide the Training data and Testing Data:
X = []
Y = []
for features, labels in enumerate(train):
X.append(features)
Y.append(labels)
X = np.array(X).reshape(-1, imgsize, imgsize, 3)
I know that the last number indicates whether it's a grey scale or RGB, but I need the colour because my images require colour
ValueError: cannot reshape array of size 568 into shape (28,28,3)
Reshaping an array of size 568 into 28x28x3 doesn't seem possible...?
28x28x3=2352
You need to set fixed input tensor size when freezieng your model.
import tensorflow as tf
import os
from tensorflow.python.tools.freeze_graph import freeze_graph
import models
import utils
import image_utils as im
import numpy as np
FLAGS = tf.flags.FLAGS
tf.flags.DEFINE_string('checkpoint_dir', './checkpoints/photo2cartoon', 'checkpoints directory path')
tf.flags.DEFINE_integer('crop_size', '256', 'crop_size, default: 256')
def export_graph(model_name):
graph = tf.Graph()
with graph.as_default():
a_real = tf.placeholder(tf.float32,shape=([1,FLAGS.crop_size, FLAGS.crop_size, 3]),name='input_image') # <<<< YOU NEED TO DEFINE THIS
#a_real=tf.reshape(a_real,tf.stack([1,FLAGS.crop_size, FLAGS.crop_size, 3]))
a2b = models.generator(a_real, 'a2b',reuse=False, train=False)
saver = tf.train.Saver()
with tf.Session(graph=graph) as sess:
sess.run(tf.global_variables_initializer())
# ------------------------------
# Save graph nodes to text file
# ------------------------------
graph_def=graph.as_graph_def()
# Remove Const nodes.
for i in reversed(range(len(graph_def.node))):
if graph_def.node[i].op == 'Const':
del graph_def.node[i]
for attr in ['T', 'data_format', 'Tshape', 'N', 'Tidx', 'Tdim',
'use_cudnn_on_gpu', 'Index', 'Tperm', 'is_training',
'Tpaddings']:
if attr in graph_def.node[i].attr:
del graph_def.node[i].attr[attr]
# Save as text.
tf.train.write_graph(graph_def, "", "text_graph.pbtxt", as_text=True)
# ------------------------------
# Load variables data
# ------------------------------
latest_ckpt = utils.load_checkpoint(FLAGS.checkpoint_dir, sess, saver)
if latest_ckpt is None:
raise Exception('No checkpoint!')
else:
print('Copy variables from % s' % latest_ckpt)
# -----------------------------------------
# Write data for tensorboard for show graph
# -----------------------------------------
a_real_ipt = np.zeros(shape=[1, FLAGS.crop_size, FLAGS.crop_size, 3])
writer = tf.summary.FileWriter('logs', sess.graph)
writer.close()
# -----------------------------------------
# Write graph output
# -----------------------------------------
# get graph definition
gd = sess.graph.as_graph_def()
# fix batch norm nodes
for node in gd.node:
if node.op == 'RefSwitch':
node.op = 'Switch'
for index in xrange(len(node.input)):
if 'moving_' in node.input[index]:
node.input[index] = node.input[index] + '/read'
elif node.op == 'AssignSub':
node.op = 'Sub'
if 'use_locking' in node.attr: del node.attr['use_locking']
output_graph_def = tf.graph_util.convert_variables_to_constants(sess, gd, ["a2b_generator/Tanh"])
tf.train.write_graph(output_graph_def, 'pretrained', model_name, as_text=False)
def main(unused_argv):
print('photo2cartoon.pb')
export_graph('photo2cartoon.pb')
if __name__ == '__main__':
tf.app.run()

Tensorflow dataset generator inverted colors

I have a problem with TF dataset generator. I do not why, but when I get picture from dataset by running it through session, it returns Tensors where colors are inverted. I tried to changed BGR to RGB, but this is not the problem.
It is partially solved by inverting the image array (img = 1 - img ), but I would like not this problem to occur in first place. Does somebody know what could be the cause?
import os
import glob
import random
import tensorflow as tf
from tensorflow import Tensor
class PairGenerator(object):
person1 = 'img'
person2 = 'person2'
label = 'same_person'
#def __init__(self, lfw_path='./tf_dataset/resources' + os.path.sep + 'lfw'):
def __init__(self, lfw_path='/home/tom/Devel/ai-dev/tensorflow-triplet-loss/data/augmentor'):
self.all_people = self.generate_all_people_dict(lfw_path)
print(self.all_people.keys())
def generate_all_people_dict(self, lfw_path):
# generates a dictionary between a person and all the photos of that person
all_people = {}
for person_folder in os.listdir(lfw_path):
person_photos = glob.glob(lfw_path + os.path.sep + person_folder + os.path.sep + '*.jpg')
all_people[person_folder] = person_photos
return all_people
def get_next_pair(self):
all_people_names = list(self.all_people.keys())
while True:
# draw a person at random
person1 = random.choice(all_people_names)
# flip a coin to decide whether we fetch a photo of the same person vs different person
same_person = random.random() > 0.5
if same_person:
person2 = person1
else:
# repeatedly pick random names until we find a different name
person2 = person1
while person2 == person1:
person2 = random.choice(all_people_names)
person1_photo = random.choice(self.all_people[person1])
yield ({self.person1: person1_photo,
self.label: same_person})
class Inputs(object):
def __init__(self, img: Tensor, label: Tensor):
self.img = img
self.label = label
def feed_input(self, input_img, input_label=None):
# feed the input images that are necessary to make a prediction
feed_dict = {self.img: input_img}
# optionally also include the label:
# if we're just making a prediction without calculating loss, that won't be necessary
if input_label is not None:
feed_dict[self.label] = input_label
return feed_dict
class Dataset(object):
img_resized = 'img_resized'
label = 'same_person'
def __init__(self, generator=PairGenerator()):
self.next_element = self.build_iterator(generator)
def build_iterator(self, pair_gen: PairGenerator):
batch_size = 10
prefetch_batch_buffer = 5
dataset = tf.data.Dataset.from_generator(pair_gen.get_next_pair,
output_types={PairGenerator.person1: tf.string,
PairGenerator.label: tf.bool})
dataset = dataset.map(self._read_image_and_resize)
dataset = dataset.batch(batch_size)
dataset = dataset.prefetch(prefetch_batch_buffer)
iter = dataset.make_one_shot_iterator()
element = iter.get_next()
return Inputs(element[self.img_resized],
element[PairGenerator.label])
def _read_image_and_resize(self, pair_element):
target_size = [224, 224]
# read images from disk
img_file = tf.read_file(pair_element[PairGenerator.person1])
print("////")
print(PairGenerator.person1)
img = tf.image.decode_image(img_file, channels=3)
# let tensorflow know that the loaded images have unknown dimensions, and 3 color channels (rgb)
img.set_shape([None, None, 3])
# resize to model input size
img_resized = tf.image.resize_images(img, target_size)
#img_resized = tf.image.flip_up_down(img_resized)
#img_resized = tf.image.rot90(img_resized)
pair_element[self.img_resized] = img_resized
pair_element[self.label] = tf.cast(pair_element[PairGenerator.label], tf.float32)
return pair_element
generator = PairGenerator()
iter = generator.get_next_pair()
for i in range(10):
print(next(iter))
ds = Dataset(generator)
import matplotlib.pyplot as plt
imgplot = plt.imshow(out)
imgplot = plt.imshow(1 - out)
Ok so the solution was
imgplot = plt.imshow(out/255)

Tensorflow uses all memory for a single example

Here is my code for reading and passing a tf record batch through a network.
import os, sys
import tensorflow as tf
def read_and_decode_single_example(filename_queue):
# Unlike the TFRecordWriter, the TFRecordReader is symbolic
reader = tf.TFRecordReader()
# One can read a single serialized example from a filename
# serialized_example is a Tensor of type string.
_, serialized_example = reader.read(filename_queue)
# The serialized example is converted back to actual values.
# One needs to describe the format of the objects to be returned
features = tf.parse_single_example(
serialized_example,
features={
# We know the length of both fields. If not the
# tf.VarLenFeature could be used
'click': tf.FixedLenFeature([], tf.int64),
'title': tf.FixedLenFeature([25], tf.int64)
# maybe others eg data1:tf.FixLenFeature([],tf.float64)
})
# now return the converted data
lbl = features['click']
ttl = features['title']
return lbl, ttl
def read_batch_data(files, b_s):
min_after_dequeue = 8
num_threads = 2
batch_size = b_s
capacity = min_after_dequeue + (num_threads + 2) * batch_size
filename_queue = tf.train.string_input_producer(files, num_epochs=1)
c_n_c, tit = read_and_decode_single_example(filename_queue)
label_batch, title_batch = tf.train.shuffle_batch([c_n_c, tit], batch_size=batch_size, capacity=capacity,num_threads=num_threads, min_after_dequeue=min_after_dequeue)
return label_batch, title_batch
And the network code:
import math
import os,sys
import subprocess
import pickle
import load_data_labels
import numpy as np
import tensorflow as tf
import shutil
LOG_DIR = './log_dir'
def init_weights(shape, name):
return tf.Variable(tf.random_normal(shape,stddev=0.01,dtype=tf.float64), name=name)
def init_biases(shape, name):
return tf.Variable(tf.random_normal(shape,dtype=tf.float64),name=name)
def model(titles, w_h, w_h2, w_o, vocab_size,embd_layer):
# Add layer name scopes for better graph visualization
# Embedding layer
with tf.device('/cpu:0'), tf.name_scope("embedding"):
W_em = tf.Variable(embd_layer,name="word_embeddings")
embed_l = tf.nn.embedding_lookup(W_em, titles)
# can be reduce sum
embedding = tf.reduce_mean(embed_l, [1])
with tf.name_scope("layer1"):
h = tf.nn.relu(tf.add(tf.matmul(embedding, w_h), b_h))
with tf.name_scope("layer2"):
h2 = tf.nn.relu(tf.add(tf.matmul(h, w_h2), b_h2))
with tf.name_scope("layer3"):
return tf.add(tf.matmul(h2, w_o), b_o)
def init_word_embedding_with_w2v(w2v_dict, word_map, emb_dim, voc_len):
initW = np.random.uniform(-1.0,1.0,(voc_len+1, emb_dim))
for word in word_map:
vec = w2v_dict.get(word)
idx = word_map[word]
if vec is not None:
initW[idx,:] = vec
return initW
with open('./data/word_map.pickle', 'rb') as word_map_file:
word_map = pickle.load(word_map_file)
with open('./data/word_2_vec_dict.pickle', 'rb') as w2vec_file:
w2vec = pickle.load(w2vec_file)
dataset_file= "./data/file000000000000_1000lines.tfrecords"
batch_size=4
trY,trX = load_data_labels.read_batch_data([dataset_file],batch_size)
trY=tf.one_hot(trY,depth=2,axis = -1)
trY=tf.reshape(trY,[4,2])
print trY.get_shape()
print trX.get_shape()
w_h = init_weights([300, 625], "w_h")
w_h2 = init_weights([625, 625], "w_h2")
w_o = init_weights([625, 2], "w_o")
vocabulary_length=len(w2vec)
any_vector_in_dict = w2vec.itervalues().next()
emb_dim = len(any_vector_in_dict)
embd_layer=init_word_embedding_with_w2v(w2vec,word_map,emb_dim,vocabulary_length)
b_h = init_biases([625], "b_h")
b_h2 = init_biases([625], "b_h2")
b_o = init_biases([2],"b_o")
tf.summary.histogram("w_h_summar", w_h)
tf.summary.histogram("w_h2_summar", w_h2)
tf.summary.histogram("w_o_summar", w_o)
tf.summary.histogram("embedding_layer", embd_layer)
py_x = model(trX, w_h, w_h2, w_o, vocabulary_length,embd_layer)
with tf.name_scope("cost"):
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=trY, logits=py_x))
train_op = tf.train.RMSPropOptimizer(0.001, 0.9).minimize(cost)
tf.summary.scalar("cost", cost)
with tf.name_scope("accuracy"):
correct_pred = tf.equal(tf.argmax(trY, 1), tf.argmax(py_x, 1))
acc_op = tf.reduce_mean(tf.cast(correct_pred, "float"))
tf.summary.scalar("accuracy", acc_op)
with tf.Session() as sess:
writer = tf.summary.FileWriter(LOG_DIR, sess.graph)
merged = tf.summary.merge_all()
tf.global_variables_initializer().run()
for i in range(10):
sess.run(train_op)
summary, acc = sess.run([merged, acc_op])
writer.add_summary(summary, i) # Write summary
The problem is that the program crushes because it fills all of the RAM memory.The point that the crush starts is in merge_all statement of the network and it hangs in global_variable_initializer from it never returns but the memory is filled gradually.Maybe a queue left open?I can't find anything relevant and specific and tensorflow's docs are at least bad.I'm searching it around more than a week and I m starting to get extremely tired.Could anyone help?

Keras/Tensorflow: Training successful on MBP 13" with Theano, but throws ResourceExhaustedError on a powerful computer with TensorFlow

I have successfully trained a Keras model on a 13" Macbook Pro with Theano, albeit at a slow speed, but when I train the exact same model with the same data on a more powerful computer (32 GB RAM, 8 GB Nvidia Quadro GPU, 8 CPU cores) with TensorFlow on Ubuntu, the following error occurs:
Here is the script that I use:
from keras import backend as K
from keras.callbacks import Callback
from keras.constraints import maxnorm
from keras.models import Sequential, load_model
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import Flatten
from keras.layers.convolutional import Convolution3D
from keras.layers.convolutional import MaxPooling3D
from keras.optimizers import Nadam
from keras.preprocessing.image import random_rotation, random_shift, random_shear, random_zoom
from keras.utils import np_utils
from keras.utils.io_utils import HDF5Matrix
from pprint import pprint
from random import shuffle
from sklearn.utils import shuffle
K.set_image_dim_ordering("th")
import cv2
import h5py
import json
import os
import sys
import numpy as np
class OpticalSpeechRecognizer(object):
def __init__(self, rows, columns, frames_per_sequence, samples_generated_per_sample, config_file, training_save_fn, osr_save_fn):
self.rows = rows
self.columns = columns
self.frames_per_sequence = frames_per_sequence
self.samples_generated_per_sample = samples_generated_per_sample
self.config_file = config_file
self.training_save_fn = training_save_fn
self.osr_save_fn = osr_save_fn
self.osr = None
def save_osr_model(self):
""" Save the OSR model to an HDF5 file
"""
# delete file if it already exists
try:
print "Saved file \"{0}\" already exists! Overwriting previous saved file.\n".format(self.osr_save_fn)
os.remove(self.osr_save_fn)
except OSError:
pass
print "Saving OSR model to \"{0}\"".format(self.osr_save_fn)
self.osr.save(self.osr_save_fn)
def load_osr_model(self):
""" Load the OSR model from an HDF5 file
"""
print "Loading OSR model from \"{0}\"".format(self.osr_save_fn)
self.osr = load_model(self.osr_save_fn)
def train_osr_model(self):
""" Train the optical speech recognizer
"""
print "\nTraining OSR"
validation_ratio = 0.3
batch_size = 25
training_sequence_generator = self.generate_training_sequences(batch_size=batch_size)
validation_sequence_generator = self.generate_training_sequences(batch_size=batch_size, validation_ratio=validation_ratio)
with h5py.File(self.training_save_fn, "r") as training_save_file:
sample_count = training_save_file.attrs["sample_count"]
pbi = ProgressDisplay()
self.osr.fit_generator(generator=training_sequence_generator,
validation_data=validation_sequence_generator,
samples_per_epoch=sample_count,
nb_val_samples=int(round(validation_ratio*sample_count)),
nb_epoch=10,
max_q_size=1,
verbose=2,
callbacks=[pbi],
class_weight=None,
nb_worker=1)
def generate_training_sequences(self, batch_size, validation_ratio=0):
""" Generates training sequences from HDF5 file on demand
"""
while True:
with h5py.File(self.training_save_fn, "r") as training_save_file:
sample_count = int(training_save_file.attrs["sample_count"])
sample_idxs = range(0, sample_count)
shuffle(sample_idxs)
training_sample_idxs = sample_idxs[0:int((1-validation_ratio)*sample_count)]
validation_sample_idxs = sample_idxs[int((1-validation_ratio)*sample_count):]
# generate sequences for validation
if validation_ratio:
validation_sample_count = len(validation_sample_idxs)
batches = int(validation_sample_count/batch_size)
remainder_samples = validation_sample_count%batch_size
# generate batches of samples
for idx in xrange(0, batches):
X = training_save_file["X"][validation_sample_idxs[idx*batch_size:idx*batch_size+batch_size]]
Y = training_save_file["Y"][validation_sample_idxs[idx*batch_size:idx*batch_size+batch_size]]
yield (X, Y)
# send remainder samples as one batch, if there are any
if remainder_samples:
X = training_save_file["X"][validation_sample_idxs[-remainder_samples:]]
Y = training_save_file["Y"][validation_sample_idxs[-remainder_samples:]]
yield (X, Y)
# generate sequences for training
else:
training_sample_count = len(training_sample_idxs)
batches = int(training_sample_count/batch_size)
remainder_samples = training_sample_count%batch_size
# generate batches of samples
for idx in xrange(0, batches):
X = training_save_file["X"][training_sample_idxs[idx*batch_size:idx*batch_size+batch_size]]
Y = training_save_file["Y"][training_sample_idxs[idx*batch_size:idx*batch_size+batch_size]]
yield (X, Y)
# send remainder samples as one batch, if there are any
if remainder_samples:
X = training_save_file["X"][training_sample_idxs[-remainder_samples:]]
Y = training_save_file["Y"][training_sample_idxs[-remainder_samples:]]
yield (X, Y)
def print_osr_summary(self):
""" Prints a summary representation of the OSR model
"""
print "\n*** MODEL SUMMARY ***"
self.osr.summary()
def generate_osr_model(self):
""" Builds the optical speech recognizer model
"""
print "".join(["\nGenerating OSR model\n",
"-"*40])
with h5py.File(self.training_save_fn, "r") as training_save_file:
class_count = len(training_save_file.attrs["training_classes"].split(","))
osr = Sequential()
print " - Adding convolution layers"
osr.add(Convolution3D(nb_filter=32,
kernel_dim1=3,
kernel_dim2=3,
kernel_dim3=3,
border_mode="same",
input_shape=(1, self.frames_per_sequence, self.rows, self.columns),
activation="relu"))
osr.add(MaxPooling3D(pool_size=(3, 3, 3)))
osr.add(Convolution3D(nb_filter=64,
kernel_dim1=3,
kernel_dim2=3,
kernel_dim3=3,
border_mode="same",
activation="relu"))
osr.add(MaxPooling3D(pool_size=(3, 3, 3)))
osr.add(Convolution3D(nb_filter=128,
kernel_dim1=3,
kernel_dim2=3,
kernel_dim3=3,
border_mode="same",
activation="relu"))
osr.add(MaxPooling3D(pool_size=(3, 3, 3)))
osr.add(Dropout(0.2))
osr.add(Flatten())
print " - Adding fully connected layers"
osr.add(Dense(output_dim=128,
init="normal",
activation="relu"))
osr.add(Dense(output_dim=128,
init="normal",
activation="relu"))
osr.add(Dense(output_dim=128,
init="normal",
activation="relu"))
osr.add(Dropout(0.2))
osr.add(Dense(output_dim=class_count,
init="normal",
activation="softmax"))
print " - Compiling model"
optimizer = Nadam(lr=0.002,
beta_1=0.9,
beta_2=0.999,
epsilon=1e-08,
schedule_decay=0.004)
osr.compile(loss="categorical_crossentropy",
optimizer=optimizer,
metrics=["categorical_accuracy"])
self.osr = osr
print " * OSR MODEL GENERATED * "
def process_training_data(self):
""" Preprocesses training data and saves them into an HDF5 file
"""
# load training metadata from config file
training_metadata = {}
training_classes = []
with open(self.config_file) as training_config:
training_metadata = json.load(training_config)
training_classes = sorted(list(training_metadata.keys()))
print "".join(["\n",
"Found {0} training classes!\n".format(len(training_classes)),
"-"*40])
for class_label, training_class in enumerate(training_classes):
print "{0:<4d} {1:<10s} {2:<30s}".format(class_label, training_class, training_metadata[training_class])
print ""
# count number of samples
sample_count = 0
sample_count_by_class = [0]*len(training_classes)
for class_label, training_class in enumerate(training_classes):
# get training class sequeunce paths
training_class_data_path = training_metadata[training_class]
training_class_sequence_paths = [os.path.join(training_class_data_path, file_name)
for file_name in os.listdir(training_class_data_path)
if (os.path.isfile(os.path.join(training_class_data_path, file_name))
and ".mov" in file_name)]
# update sample count
sample_count += len(training_class_sequence_paths)
sample_count_by_class[class_label] = len(training_class_sequence_paths)
print "".join(["\n",
"Found {0} training samples!\n".format(sample_count),
"-"*40])
for class_label, training_class in enumerate(training_classes):
print "{0:<4d} {1:<10s} {2:<6d}".format(class_label, training_class, sample_count_by_class[class_label])
print ""
# initialize HDF5 save file, but clear older duplicate first if it exists
try:
print "Saved file \"{0}\" already exists! Overwriting previous saved file.\n".format(self.training_save_fn)
os.remove(self.training_save_fn)
except OSError:
pass
# process and save training data into HDF5 file
print "Generating {0} samples from {1} samples via data augmentation\n".format(sample_count*self.samples_generated_per_sample,
sample_count)
sample_count = sample_count*self.samples_generated_per_sample
with h5py.File(self.training_save_fn, "w") as training_save_file:
training_save_file.attrs["training_classes"] = np.string_(",".join(training_classes))
training_save_file.attrs["sample_count"] = sample_count
x_training_dataset = training_save_file.create_dataset("X",
shape=(sample_count, 1, self.frames_per_sequence, self.rows, self.columns),
dtype="f")
y_training_dataset = training_save_file.create_dataset("Y",
shape=(sample_count, len(training_classes)),
dtype="i")
# iterate through each class data
sample_idx = 0
for class_label, training_class in enumerate(training_classes):
# get training class sequeunce paths
training_class_data_path = training_metadata[training_class]
training_class_sequence_paths = [os.path.join(training_class_data_path, file_name)
for file_name in os.listdir(training_class_data_path)
if (os.path.isfile(os.path.join(training_class_data_path, file_name))
and ".mov" in file_name)]
# iterate through each sequence
for idx, training_class_sequence_path in enumerate(training_class_sequence_paths):
sys.stdout.write("Processing training data for class \"{0}\": {1}/{2} sequences\r"
.format(training_class, idx+1, len(training_class_sequence_paths)))
sys.stdout.flush()
# accumulate samples and labels
samples_batch = self.process_frames(training_class_sequence_path)
label = [0]*len(training_classes)
label[class_label] = 1
for sample in samples_batch:
x_training_dataset[sample_idx] = sample
y_training_dataset[sample_idx] = label
# update sample index
sample_idx += 1
print "\n"
training_save_file.close()
print "Training data processed and saved to {0}".format(self.training_save_fn)
def process_frames(self, video_file_path):
""" Preprocesses sequence frames
"""
# haar cascades for localizing oral region
face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
mouth_cascade = cv2.CascadeClassifier('haarcascade_mcs_mouth.xml')
video = cv2.VideoCapture(video_file_path)
success, frame = video.read()
frames = []
success = True
# convert to grayscale, localize oral region, equalize frame dimensions, and accumulate valid frames
while success:
success, frame = video.read()
if success:
# convert to grayscale
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
# localize single facial region
faces_coords = face_cascade.detectMultiScale(frame, 1.3, 5)
if len(faces_coords) == 1:
face_x, face_y, face_w, face_h = faces_coords[0]
frame = frame[face_y:face_y + face_h, face_x:face_x + face_w]
# localize oral region
mouth_coords = mouth_cascade.detectMultiScale(frame, 1.3, 5)
threshold = 0
for (mouth_x, mouth_y, mouth_w, mouth_h) in mouth_coords:
if (mouth_y > threshold):
threshold = mouth_y
valid_mouth_coords = (mouth_x, mouth_y, mouth_w, mouth_h)
else:
pass
mouth_x, mouth_y, mouth_w, mouth_h = valid_mouth_coords
frame = frame[mouth_y:mouth_y + mouth_h, mouth_x:mouth_x + mouth_w]
# equalize frame dimensions
frame = cv2.resize(frame, (self.columns, self.rows)).astype('float32')
# accumulate frames
frames.append(frame)
# ignore multiple facial region detections
else:
pass
# equalize sequence lengths
if len(frames) < self.frames_per_sequence:
frames = [frames[0]]*(self.frames_per_sequence - len(frames)) + frames
frames = np.asarray(frames[0:self.frames_per_sequence])
# pixel normalizer
pix_norm = lambda frame: frame / 255.0
samples_batch = [[map(pix_norm, frames)]]
# random transformations for data augmentation
for _ in xrange(0, self.samples_generated_per_sample-1):
rotated_frames = random_rotation(frames, rg=45)
shifted_frames = random_shift(rotated_frames, wrg=0.25, hrg=0.25)
sheared_frames = random_shear(shifted_frames, intensity=0.79)
zoomed_frames = random_zoom(sheared_frames, zoom_range=(1.25, 1.25))
samples_batch.append([map(pix_norm, zoomed_frames)])
return samples_batch
class ProgressDisplay(Callback):
""" Progress display callback
"""
def on_batch_end(self, epoch, logs={}):
print " Batch {0:<4d} => Accuracy: {1:>8.4f} | Loss: {2:>8.4f} | Size: {3:>4d}".format(int(logs["batch"])+1,
float(logs["categorical_accuracy"]),
float(logs["loss"]),
int(logs["size"]))
if __name__ == "__main__":
# Example usage
osr = OpticalSpeechRecognizer(rows=100,
columns=150,
frames_per_sequence=45,
samples_generated_per_sample=10,
config_file="training_config.json",
training_save_fn="training_data.h5",
osr_save_fn="osr_model.h5")
osr.process_training_data()
osr.generate_osr_model()
osr.print_osr_summary()
osr.train_osr_model()
osr.save_osr_model()
osr.load_osr_model()
As of this writing, the problem seems to be a TensorFlow issue. For me, the solution was to switch the backend to Theano. To switch the Keras backend, perform the following steps:
Find the Keras configuration file
~/.keras/keras.json
Change the value of the backend field and, optionally, the ordering field
{
"image_dim_ordering": "th",
"epsilon": 1e-07,
"floatx": "float32",
"backend": "theano"
}

Categories