as I have mentioned in the topic I would like to save my tf.graph into a frozen_graph.pb file. This should save space later I will try to run it an a jetson tx2. I have made a short MNIST example describing my problem. I run tf 1.7 on python 3.5.
Question1: As far as I understood my freeze_graph method takes a checkpoint file transfers all variables to constants except the ones i define with the second parameter. When I try to get the correct tensorname I wrote loggits.name but I get an error no Tensor with that name found in graph.
Question2: After that I would be able to extract a frozen graph, how can i load it back and run an classification on that.
My Code is attached and should work in a single py file.
Thank you very much in advance
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.examples.tutorials.mnist import input_data
import os
import time
import tensorflow as tf
import os
import argparse
#METHODS I WANT TO TEST
#TAKE THE CHECKPOINT FILE AND DELETE ALL NOTES THAT ARE NOT USEFUL
def freeze_graph(checkpoint_directory,output_node_names):
#checkpoint = tf.train.get_checkpoint_state(checkpoint_directory)
print(checkpoint_directory)
checkpoint = tf.train.get_checkpoint_state(checkpoint_directory)
input_checkpoint = checkpoint.model_checkpoint_path
absolute_model_dir = str(os.sep).join(input_checkpoint.split(os.sep)[:-1])
output_graph = absolute_model_dir + "/frozen_model.pb"
clear_devices = True
with tf.Session(graph = tf.Graph()) as sess:
#import the metagraph in default graph
saver = tf.train.import_meta_graph(input_checkpoint + '.meta',clear_devices=clear_devices)
#restore the weights
saver.restore(sess,input_checkpoint)
#wrap variables to constants
[print(n.name) for n in tf.get_default_graph().as_graph_def().node]
output_graph_def = tf.graph_util.convert_variables_to_constants(sess, tf.get_default_graph().as_graph_def(),output_node_names.split(","))
with tf.gfile.GFile(output_graph, "wb") as f:
f.write(output_graph_def.SerializeToString())
print("%d ops in the final graph." %len(output_graph_def.node))
return output_graph_def
#HERE IS THE METHOD THAT ALLOWS ME TO LOAD MY FROZEN GRAPH AS GRAPH
def load_graph(frozen_graph_filename):
with tf.gfile.GFile(frozen_graph_filename,"rb") as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
with tf.Graph().as_default() as graph:
tf.import_graph_def(graph_def, name = "prefix")
return graph
#get the data
mnist = input_data.read_data_sets("MNIST_data/",one_hot=True)
#NETWORK PARAMETERS
learning_rate = 0.01
dropout = 0.75
display_step = 1
filter_height = 5
filter_width = 5
depth_in = 1
depth_out1 = 64
depth_out2 = 128
#PARAMETERS OF THE DATASET
input_height = 28
input_width = 28
n_classes = 10
#TRAINING PARAMETERS
epochs = 1
batch_size = 256
num_batches = int(mnist.train.num_examples/batch_size)
x = tf.placeholder(tf.float32,[None,28*28],name = "input")
y = tf.placeholder(tf.float32,[None,n_classes])
keep_prob = tf.placeholder(tf.float32)
weights = {'wc1': tf.Variable(tf.random_normal([filter_height,filter_width,depth_in,depth_out1])),
'wc2': tf.Variable(tf.random_normal([filter_height, filter_width, depth_out1, depth_out2])),
'wd1': tf.Variable(tf.random_normal([int(input_height/4)*int(input_height/4)*depth_out2,1024])),
'out': tf.Variable(tf.random_normal([1024,n_classes]))}
biases = {'bc1': tf.Variable(tf.random_normal([depth_out1])),
'bc2': tf.Variable(tf.random_normal([depth_out2])),
'bd1': tf.Variable(tf.random_normal([1024])),
'out': tf.Variable(tf.random_normal([n_classes]))}
#DEFINE YOUR NEURAL NETWORKS LAYER OPERATIONS
def ops_conv2d(x,W,b,strides = 1, add_bias = True, activation = tf.nn.relu, use_activation = True):
x = tf.nn.conv2d(x,W,strides = [1,strides,strides,1],padding = 'SAME')
x = tf.nn.bias_add(x,b)
if use_activation:
return activation(x)
else:
return x
def ops_maxpool2d(x,stride=2):
return tf.nn.max_pool(x,ksize=[1,stride,stride,1],strides = [1,stride,stride,1], padding = 'SAME' )
def ops_dropout(input_fully_connected,dropout):
return tf.nn.dropout(input_fully_connected,dropout)
def ops_fullyconnected(input, activation = tf.nn.relu, use_activation = True):
fc = tf.reshape(input,[-1,weights['wd1'].get_shape().as_list()[0]])
fc = tf.add(tf.matmul(fc,weights['wd1']),biases['bd1'])
if use_activation:
return activation(fc)
else:
return fc
#DEFINE NETWORK ARCHTEKTURE (FORWARDPASS)
def build_network(x,weights,biases,dropout):
x = tf.reshape(x,shape=(-1,28,28,1))
conv_layer_1 = ops_conv2d(x,weights['wc1'],biases['bc1'],activation=tf.nn.relu, use_activation=True)
conv_layer_1 = ops_maxpool2d(conv_layer_1,2)
conv_layer_2 = ops_conv2d(conv_layer_1,weights['wc2'],biases['bc2'],activation=tf.nn.relu, use_activation=True)
conv_layer_2 = ops_maxpool2d(conv_layer_2,2)
fc1 = ops_fullyconnected(conv_layer_2, activation=tf.nn.relu, use_activation=True)
fc1 = ops_dropout(fc1,dropout)
logits = tf.add(tf.matmul(fc1,weights['out']),biases['out'],name = "logits")
return logits
#DEFINE TENSORFLOW BACKPROPAGATION OBJECTS (BACKWARDPASS)
logits = build_network(x,weights,biases,keep_prob)
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = logits,labels = y))
#CHOSE AN OPTIMIZER
optimizer = tf.train.AdadeltaOptimizer(learning_rate=learning_rate).minimize(loss=loss)
predicted_labels = tf.equal(tf.argmax(logits,1),tf.argmax(y,1))
#EVALUATION PARAMETERS
acc = tf.reduce_mean(tf.cast(predicted_labels,tf.float32))
#NOW INITIALIZE ALL TF VARIABLES
init = tf.global_variables_initializer()
saver = tf.train.Saver(max_to_keep=10)
#NOW START THE SESSION AND EXECUTE THE GRAPH
with tf.Session() as sess:
sess.run(init)
for i in range(epochs):
save_path = saver.save(sess, os.curdir + "checkpoints/MNIST_TEST.ckpt")
for j in range(num_batches):
batch_x, batch_y = mnist.train.next_batch(batch_size)
sess.run(optimizer, feed_dict={x:batch_x,y:batch_y,keep_prob:dropout})
losses,accs = sess.run([loss,acc],feed_dict={x:batch_x,y:batch_y,keep_prob:1.})
if epochs % display_step == 0:
print("EPOCH:",'%04d' % (i+1),
"loss =", "{:.9f}".format(losses),
"acc =", "{:.5f}".format(accs))
print("TRAINING COMPLETED")
#START PREDICTIONS
predicted_label = sess.run(logits,feed_dict={x:mnist.test.images[:256],keep_prob:1.})
test_classes = np.argmax(predicted_label,1)
print("TEST ACCURACY:",sess.run(acc,feed_dict={x:mnist.test.images[:256], y:mnist.test.labels[:256],keep_prob:1.}))
f,a = plt.subplots(1,10,figsize = (10,2))
for i in range(10):
a[i].imshow(np.reshape(mnist.test.images[i],(28,28)))
print( test_classes[i])
print("TOTAL EXAMPLE FINNISHED")
freeze_graph(os.curdir + "checkpoints" + os.sep, logits.name)
graph = load_graph(os.curdir + os.sep + "checkpoints" + os.sep + "frozen_model.pb")
with tf.Session(graph) as sess:
sess.run(init)
predicted_label = sess.run(logits, feed_dict={x: mnist.test.images[:256], keep_prob: 1.})
print(predicted_label)
if anybody has the same problem here is a description how i solved it.
Saving and Loading the data:
First of all note that I have now a different pipeline. First of all I save the session in a saver (ckpt files). Afterwards I construct a metagaph (graph.pb). This graph is then transfered into a frozen graph (frozen.pb). To load the frozen graph I use the load_frozen_graph_from_session method. Inside that method I also test a forward pass through my network.
Running an inference on the loaded graph:
First I name my tensors x (name = "input") this will result in a tensorname ("input:0")
so when you try to fill this placeholder in the new session you need predicted_label = sess.run("output:0", feed_dict={"input:0":mnist.test.images[:256], "keep_prob:0": 1.})
The output is the logit and not the prediction inside my network. This is because if you run the session it will run until it hits the variable you want to fetch. would I take the prediction I need also the placeholder for my y (name=label).
Here is the full code:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.examples.tutorials.mnist import input_data
import os
import time
import tensorflow as tf
import os
import argparse
from tensorflow.python.platform import gfile
from tensorflow.python.framework.graph_util import convert_variables_to_constants
#METHODS I WANT TO TEST
def freeze_graph_from_Session(sess,saver):
# convert_variables_to_constants(sess, input_graph_def, output_node_names, variable_names_whitelist=None)
save_graph(sess,saver)
with gfile.FastGFile("./tmp/" + "graph.pb", 'rb') as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
frozen_graph_def = convert_variables_to_constants(sess, graph_def, ["output"])
with tf.gfile.GFile("./tmp/" + "frozen.pb", "wb") as f:
f.write(frozen_graph_def.SerializeToString())
def save_graph(sess, saver):
saver.save(sess, "./tmp/model", write_meta_graph=True, global_step=1)
with open("./tmp/" + "graph.pb", 'wb') as f:
f.write(sess.graph_def.SerializeToString())
#sess.close()
def load_frozen_graph_from_session():
filename = "./tmp/" + "frozen.pb"
print("LOADING GRAPH")
with tf.gfile.GFile(filename, "rb") as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
print("OPEN GRAPH")
with tf.Graph().as_default() as graph:
print("DEFINE INPUT")
new_input = tf.placeholder(tf.float32, [None, 28 * 28], name="new_input")
print("DEFINE INPUT MAP")
tf.import_graph_def(
graph_def,
# usually, during training you use queues, but at inference time use placeholders
# this turns into "input
input_map={"input:0": new_input},
return_elements=None,
# if input_map is not None, needs a name
name="bla",
op_dict=None,
producer_op_list=None
)
checkpoint_path = tf.train.latest_checkpoint("./tmp/")
with tf.Session(graph=graph) as sess:
saver = tf.train.import_meta_graph(checkpoint_path + ".meta", import_scope=None)
saver.restore(sess, checkpoint_path)
print("TRY FORWARD RUN THROUGH LOADED GRAPH")
predicted_label = sess.run("output:0", feed_dict={"input:0":mnist.test.images[:256], "keep_prob:0": 1.})
print("output", predicted_label)
f, a = plt.subplots(1, 10, figsize=(10, 2))
test_classes = np.argmax(predicted_label, 1)
for i in range(10):
a[i].imshow(np.reshape(mnist.test.images[i], (28, 28)))
print(test_classes[i])
print ("output:", test_classes)
#TAKE THE CHECKPOINT FILE AND DELETE ALL NOTES THAT ARE NOT USEFUL
def freeze_graph(checkpoint_directory,output_node_names):
#checkpoint = tf.train.get_checkpoint_state(checkpoint_directory)
print(checkpoint_directory)
checkpoint = tf.train.get_checkpoint_state(checkpoint_directory)
input_checkpoint = checkpoint.model_checkpoint_path
absolute_model_dir = str(os.sep).join(input_checkpoint.split(os.sep)[:-1])
output_graph = absolute_model_dir + "/frozen_model.pb"
clear_devices = True
with tf.Session(graph = tf.Graph()) as sess:
#import the metagraph in default graph
saver = tf.train.import_meta_graph(input_checkpoint + '.meta',clear_devices=clear_devices)
#restore the weights
saver.restore(sess,input_checkpoint)
#wrap variables to constants
[print(n.name) for n in tf.get_default_graph().as_graph_def().node]
output_graph_def = tf.graph_util.convert_variables_to_constants(sess, tf.get_default_graph().as_graph_def(),output_node_names.split(","))
with tf.gfile.GFile(output_graph, "wb") as f:
f.write(output_graph_def.SerializeToString())
print("%d ops in the final graph." %len(output_graph_def.node))
return output_graph_def
#HERE IS THE METHOD THAT ALLOWS ME TO LOAD MY FROZEN GRAPH AS GRAPH
def load_graph(frozen_graph_filename):
with tf.gfile.GFile(frozen_graph_filename,"rb") as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
with tf.Graph().as_default() as graph:
tf.import_graph_def(graph_def, name = "prefix")
return graph
#get the data
mnist = input_data.read_data_sets("MNIST_data/",one_hot=True)
print(mnist.test.labels[:256])
print("load_freeze_graph_from_session: STARTED")
load_frozen_graph_from_session()
print("load_freeze_graph_from_session: ENDED")
exit()
#NETWORK PARAMETERS
learning_rate = 0.01
dropout = 0.75
display_step = 1
filter_height = 5
filter_width = 5
depth_in = 1
depth_out1 = 64
depth_out2 = 128
#PARAMETERS OF THE DATASET
input_height = 28
input_width = 28
n_classes = 10
#TRAINING PARAMETERS
epochs = 1
batch_size = 256
num_batches = int(mnist.train.num_examples/batch_size)
x = tf.placeholder(tf.float32,[None,28*28],name="input")
y = tf.placeholder(tf.float32,[None,n_classes],name = "label")
keep_prob = tf.placeholder(tf.float32,name = "keep_prob")
weights = {'wc1': tf.Variable(tf.random_normal([filter_height,filter_width,depth_in,depth_out1])),
'wc2': tf.Variable(tf.random_normal([filter_height, filter_width, depth_out1, depth_out2])),
'wd1': tf.Variable(tf.random_normal([int(input_height/4)*int(input_height/4)*depth_out2,1024])),
'out': tf.Variable(tf.random_normal([1024,n_classes]))}
biases = {'bc1': tf.Variable(tf.random_normal([depth_out1])),
'bc2': tf.Variable(tf.random_normal([depth_out2])),
'bd1': tf.Variable(tf.random_normal([1024])),
'out': tf.Variable(tf.random_normal([n_classes]))}
#DEFINE YOUR NEURAL NETWORKS LAYER OPERATIONS
def ops_conv2d(x,W,b,strides = 1, add_bias = True, activation = tf.nn.relu, use_activation = True):
x = tf.nn.conv2d(x,W,strides = [1,strides,strides,1],padding = 'SAME')
x = tf.nn.bias_add(x,b)
if use_activation:
return activation(x)
else:
return x
def ops_maxpool2d(x,stride=2):
return tf.nn.max_pool(x,ksize=[1,stride,stride,1],strides = [1,stride,stride,1], padding = 'SAME' )
def ops_dropout(input_fully_connected,dropout):
return tf.nn.dropout(input_fully_connected,dropout)
def ops_fullyconnected(input, activation = tf.nn.relu, use_activation = True):
fc = tf.reshape(input,[-1,weights['wd1'].get_shape().as_list()[0]])
fc = tf.add(tf.matmul(fc,weights['wd1']),biases['bd1'])
if use_activation:
return activation(fc)
else:
return fc
#DEFINE NETWORK ARCHTEKTURE (FORWARDPASS)
def build_network(x,weights,biases,dropout):
x = tf.reshape(x,shape=(-1,28,28,1))
conv_layer_1 = ops_conv2d(x,weights['wc1'],biases['bc1'],activation=tf.nn.relu, use_activation=True)
conv_layer_1 = ops_maxpool2d(conv_layer_1,2)
conv_layer_2 = ops_conv2d(conv_layer_1,weights['wc2'],biases['bc2'],activation=tf.nn.relu, use_activation=True)
conv_layer_2 = ops_maxpool2d(conv_layer_2,2)
fc1 = ops_fullyconnected(conv_layer_2, activation=tf.nn.relu, use_activation=True)
fc1 = ops_dropout(fc1,dropout)
logits = tf.add(tf.matmul(fc1,weights['out']),biases['out'],name = "output")
return logits
#DEFINE TENSORFLOW BACKPROPAGATION OBJECTS (BACKWARDPASS)
logits = build_network(x,weights,biases,keep_prob)
#freeze_graph(os.curdir + "checkpoints" + os.sep, logits.name)
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = logits,labels = y))
#CHOSE AN OPTIMIZER
optimizer = tf.train.AdadeltaOptimizer(learning_rate=learning_rate).minimize(loss=loss)
predicted_labels = tf.equal(tf.argmax(logits,1),tf.argmax(y,1))
#EVALUATION PARAMETERS
acc = tf.reduce_mean(tf.cast(predicted_labels,tf.float32))
#NOW INITIALIZE ALL TF VARIABLES
init = tf.global_variables_initializer()
saver = tf.train.Saver(max_to_keep=10)
#NOW START THE SESSION AND EXECUTE THE GRAPH
with tf.Session() as sess:
sess.run(init)
for i in range(epochs):
save_path = saver.save(sess, os.curdir + "checkpoints/MNIST_TEST.ckpt")
for j in range(num_batches):
batch_x, batch_y = mnist.train.next_batch(batch_size)
sess.run(optimizer, feed_dict={x:batch_x,y:batch_y,keep_prob:dropout})
losses,accs = sess.run([loss,acc],feed_dict={x:batch_x,y:batch_y,keep_prob:1.})
if epochs % display_step == 0:
print("EPOCH:",'%04d' % (i+1),
"loss =", "{:.9f}".format(losses),
"acc =", "{:.5f}".format(accs))
print("TRAINING COMPLETED")
#START PREDICTIONS
predicted_label = sess.run(logits,feed_dict={x:mnist.test.images[:256],keep_prob:1.})
test_classes = np.argmax(predicted_label,1)
print("TEST ACCURACY:",sess.run(acc,feed_dict={x:mnist.test.images[:256], y:mnist.test.labels[:256],keep_prob:1.}))
f,a = plt.subplots(1,10,figsize = (10,2))
for i in range(10):
a[i].imshow(np.reshape(mnist.test.images[i],(28,28)))
print( test_classes[i])
print("TOTAL EXAMPLE FINNISHED")
#freeze_graph(os.curdir + "checkpoints"+os.sep,logits)
print("freeze_graph_from_session: STARTED")
freeze_graph_from_Session(sess,saver)
print("freeze_graph_from_session: ENDED")
print("load_freeze_graph_from_session: STARTED")
load_frozen_graph_from_session()
print("load_freeze_graph_from_session: ENDED")
#with tf.Session() as sess:
#
# sess.run(init)
# graph = load_graph(os.curdir + os.sep + "checkpoints" + os.sep + "frozen_model.pb")
# predicted_label = sess.run(logits, feed_dict={x: mnist.test.images[:256], keep_prob: 1.})
# print(predicted_label)
Thanks goes out to my self. :)
I'm pretty sure I'm missing something about how tensorflow works because my solution doesn't make any sense.
I'm trying to train a neural network (from scratch, without using Estimators or other abstractions), save it, and load a simplified version of it for inference.
The following code trains but gives me the error: FailedPreconditionError (see above for traceback): Attempting to use uninitialized value hidden0/biases/Variable
[[Node: hidden0/biases/Variable/read = Identity[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"](hidden0/biases/Variable)]]. If I add the commented line - if I recreate the saver obect that I'm not going to use nor return - the code works just fine.
Why do I need to create a (useless) saver object in order to restore the saved weights?
import tensorflow as tf
import numpy as np
def add_fc_layer(input_tensor, input_dimensions, output_dimensions, layer_name, activation=None):
with tf.variable_scope(layer_name):
with tf.variable_scope('weights'):
weights = tf.Variable(tf.truncated_normal([input_dimensions, output_dimensions]))
with tf.variable_scope('biases'):
biases = tf.Variable(tf.zeros([output_dimensions]))
with tf.variable_scope('Wx_plus_b'):
preactivate = tf.matmul(input_tensor, weights) + biases
if activation is None:
return preactivate
with tf.variable_scope('activation'):
activations = activation(preactivate)
return activations
def make_network(model_phase):
if model_phase not in {"train", "test"}:
raise ValueError("invalid type")
hidden0_units = 25
hidden1_units = 15
hidden2_units = 10
input_size = 10
output_size = 4
with tf.variable_scope('InputVector'):
inputs = tf.placeholder(shape=[1, input_size], dtype=tf.float32)
hidden0_out = add_fc_layer(inputs, input_size, hidden0_units, "hidden0", activation=tf.nn.sigmoid)
hidden1_out = add_fc_layer(hidden0_out, hidden0_units, hidden1_units, "hidden1", activation=tf.nn.sigmoid)
hidden2_out = add_fc_layer(hidden1_out, hidden1_units, hidden2_units, "hidden2", activation=tf.nn.sigmoid)
out = add_fc_layer(hidden2_out, hidden2_units, output_size, "regression")
if model_phase == "test":
# UNCOMMENTIN THIS LINE MAKES THE SCRIPT WORK
# saver = tf.train.Saver(var_list=tf.trainable_variables())
return inputs, out
saver = tf.train.Saver(var_list=tf.trainable_variables())
with tf.variable_scope('training'):
with tf.variable_scope('groundTruth'):
ground_truth = tf.placeholder(shape=[1, output_size], dtype=tf.float32)
with tf.variable_scope('loss'):
loss = tf.reduce_sum(tf.square(ground_truth - out))
tf.summary.scalar('loss', loss)
with tf.variable_scope('optimizer'):
trainer = tf.train.AdamOptimizer(learning_rate=0.001)
with tf.variable_scope('gradient'):
updateModel = trainer.minimize(loss)
with tf.variable_scope('predict'):
predict = tf.random_shuffle(tf.boolean_mask(out, tf.equal(out, tf.reduce_max(out, axis=None))))[0]
writer = tf.summary.FileWriter('/tmp/test', tf.get_default_graph())
return inputs, out, ground_truth, updateModel, writer, saver
train_graph = tf.Graph()
with tf.Session(graph=train_graph) as sess:
tf.set_random_seed(42)
inputs, out, ground_truth, updateModel, writer, saver = make_network(model_phase='train')
init = tf.initialize_all_variables()
sess.run(init)
print('\nLearning...')
for _ in range(10):
sess.run([updateModel], feed_dict={inputs:np.arange(10)+np.random.random((1,10)), ground_truth:np.arange(4).reshape(1, 4)})
saver.save(sess,'./tensorflowModel.ckpt')
new_graph = tf.Graph()
with tf.Session(graph=new_graph) as sess:
inputs, out = make_network(model_phase='test')
saver = tf.train.import_meta_graph('./tensorflowModel.ckpt.meta')
saver.restore(sess, tf.train.latest_checkpoint('./'))
# evaluation
print('\nEvaluation...')
for _ in range(10):
_ = sess.run(out, feed_dict={inputs:np.arange(10).reshape(1,10)})
I don't know why creating an unused Saver makes the problem go away, but the code betrays a misunderstanding.
When you are restoring, you are creating the model graph twice. First, you call make_network() which creates the computation graph and variables. You then also call import_meta_graph which also creates a graph and variables. You should create a saver with simple saver = tf.train.Saver() instead of saver = tf.train.import_meta_graph('./tensorflowModel.ckpt.meta')
I'm trying to extend the example https://www.tensorflow.org/deploy/distributed outlined here but I'm having trouble saving the model. I'm running this in docker container available at gcr.io/tensorflow/tensorflow:1.5.0-gpu-py3. I started two processes one for 'ps' and one for 'worker' and the ps process is simply this code:
import tensorflow as tf
def main(_):
cluster = tf.train.ClusterSpec({"ps":["localhost:2222"],"worker":["localhost:2223"]})
server = tf.train.Server(cluster, job_name="ps", task_index=0)
server.join()
if __name__ == "__main__":
tf.app.run()
The worker code is the following and is based on the mnist examples and the distributed article above:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
data_dir = "/data"
checkpoint_dir = "/tmp/train_logs"
def main(_):
cluster = tf.train.ClusterSpec({"ps":["localhost:2222"],"worker":["localhost:2223"]})
server = tf.train.Server(cluster, job_name="worker", task_index=0)
mnist = input_data.read_data_sets(data_dir, one_hot=True)
with tf.device(tf.train.replica_device_setter(worker_device="/job:worker/task:0", cluster=cluster)):
x = tf.placeholder(tf.float32, [None,784], name="x_input")
W = tf.Variable(tf.zeros([784,10]))
b = tf.Variable(tf.zeros([10]))
y = tf.placeholder(tf.float32, [None,10])
model = tf.matmul(x, W) + b
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=y, logits=model))
global_step = tf.train.get_or_create_global_step()
train_op = tf.train.GradientDescentOptimizer(0.5).minimize(cost, global_step=global_step)
prediction = tf.equal(tf.argmax(model,1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(prediction, tf.float32))
hooks = [tf.train.StopAtStepHook(last_step=101)]
with tf.train.MonitoredTrainingSession(master=server.target, is_chief=True, checkpoint_dir=checkpoint_dir, hooks=hooks) as sess:
while not sess.should_stop():
batch_xs, batch_ys = mnist.train.next_batch(1000)
sess.run(train_op, feed_dict={x: batch_xs, y: batch_ys})
latest_checkpoint = tf.train.latest_checkpoint(checkpoint_dir)
#saver = tf.train.Saver()
saver = tf.train.import_meta_graph(latest_checkpoint+".meta", clear_devices=True)
with tf.Session() as sess:
saver.restore(sess,latest_checkpoint) # "/tmp/train_logs/model.ckpt"
acc = sess.run(accuracy, feed_dict={x: mnist.test.images,y: mnist.test.labels});
print("Test accuracy = "+"{:5f}".format(acc))
if __name__ == "__main__":
tf.app.run()
The examples I've found all seem to end without showing how to use the model. The above code fails on the saver.restore() line with the following error:
InvalidArgumentError (see above for traceback): Cannot assign a device for operation 'save/RestoreV2_2':
Operation was explicitly assigned to /job:ps/task:0/device:CPU:0
but available devices are [ /job:localhost/replica:0/task:0/device:CPU:0, /job:localhost/replica:0/task:0/device:GPU:0 ].
Make sure the device specification refers to a valid device.
Also, as shown above I tried both saver = tf.train.Saver() and saver = tf.train.import_meta_graph(latest_checkpoint+".meta", clear_devices=True) with no success. Same error is shown in either case.
I don't really understand the with tf.device(...): statement. In one iteration I commented out this line (and unindented the statements below it) and the code ran without errors. But I think this is not correct and would like to understand the correct way for this to work.
I have the following code which is working (no errors). My question is just am I restoring the model correctly? Especially that I cannot see any output for the statement print(v_).
So, I'm trying to know if I'm doing the following correct:
Restoring the model
Using that restored model
import tensorflow as tf
data, labels = cifar_tools.read_data('C:\\Users\\abc\\Desktop\\Testing')
x = tf.placeholder(tf.float32, [None, 150 * 150])
y = tf.placeholder(tf.float32, [None, 2])
w1 = tf.Variable(tf.random_normal([5, 5, 1, 64]))
b1 = tf.Variable(tf.random_normal([64]))
w2 = tf.Variable(tf.random_normal([5, 5, 64, 64]))
b2 = tf.Variable(tf.random_normal([64]))
w3 = tf.Variable(tf.random_normal([38*38*64, 1024]))
b3 = tf.Variable(tf.random_normal([1024]))
w_out = tf.Variable(tf.random_normal([1024, 2]))
b_out = tf.Variable(tf.random_normal([2]))
def conv_layer(x,w,b):
conv = tf.nn.conv2d(x,w,strides=[1,1,1,1], padding = 'SAME')
conv_with_b = tf.nn.bias_add(conv,b)
conv_out = tf.nn.relu(conv_with_b)
return conv_out
def maxpool_layer(conv,k=2):
return tf.nn.max_pool(conv, ksize=[1,k,k,1], strides=[1,k,k,1], padding='SAME')
def model():
x_reshaped = tf.reshape(x, shape=[-1, 150, 150, 1])
conv_out1 = conv_layer(x_reshaped, w1, b1)
maxpool_out1 = maxpool_layer(conv_out1)
norm1 = tf.nn.lrn(maxpool_out1, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75)
conv_out2 = conv_layer(norm1, w2, b2)
norm2 = tf.nn.lrn(conv_out2, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75)
maxpool_out2 = maxpool_layer(norm2)
maxpool_reshaped = tf.reshape(maxpool_out2, [-1, w3.get_shape().as_list()[0]])
local = tf.add(tf.matmul(maxpool_reshaped, w3), b3)
local_out = tf.nn.relu(local)
out = tf.add(tf.matmul(local_out, w_out), b_out)
return out
model_op = model()
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(model_op, y))
train_op = tf.train.AdamOptimizer(learning_rate=0.001).minimize(cost)
correct_pred = tf.equal(tf.argmax(model_op, 1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_pred,tf.float32))
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
onehot_labels = tf.one_hot(labels, 2, on_value=1.,off_value=0.,axis=-1)
onehot_vals = sess.run(onehot_labels)
batch_size = len(data)
# Restore model
saver = tf.train.import_meta_graph('C:\\Users\\abc\\Desktop\\\Testing\\mymodel.meta')
saver.restore(sess, tf.train.latest_checkpoint('./'))
all_vars = tf.get_collection('vars')
for v in all_vars:
v_ = sess.run(v)
print(v_)
for j in range(0, 5):
print('EPOCH', j)
for i in range(0, len(data), batch_size):
batch_data = data[i:i+batch_size, :]
batch_onehot_vals = onehot_vals[i:i+batch_size, :]
_, accuracy_val = sess.run([train_op, accuracy], feed_dict={x: batch_data, y: batch_onehot_vals})
print(i, accuracy_val)
print('DONE WITH EPOCH')
EDIT 1
Would restoring this way work?
saver = tf.train.Saver()
saver = tf.train.import_meta_graph('C:\\Users\\Abder-Rahman\\Desktop\\\Testing\\mymodel.meta')
saver.restore(sess, tf.train.latest_checkpoint('./'))
print('model restored'
EDIT 2
This is how I save my model:
#Save model
saver = tf.train.Saver()
saved_path = saver.save(sess, 'C:\\Users\\abc\\Desktop\\\Testing\\mymodel')
print("The model is in this file: ", saved_path)
Thanks.
Your saver code was correct.
While variables must add to collections before retrieving the collection.
tf.add_to_collection("vars", w1)
tf.add_to_collection("vars", b1)
...
Then
all_vars = tf.get_collection('vars')
Usually I restore a TensorFlow model like this:
with tf.Session(graph=graph) as session:
if os.path.exists(save_path):
# Restore variables from disk.
saver.restore(session, save_path)
else:
tf.initialize_all_variables().run()
print('Initialized')
# do the work
# ...
saver.save(session, save_path) # save the model
example code can be fetch here.
I need to know more about how you save your model, it seems that your model was restored before save, and your model didn't turn to a tf.graph and connect with the session.
I assume you have read my blog here , the mechanism for model saving is quite straightforward, when you load a model, the parameter values and relations (which are probably all you care about) are matched by variable name.
For example
#simplesave.py
import tensorflow as tf
with tf.Graph().as_default() as g:#yes you have to have a graph first
with tf.Session() as sess:
b = tf.Variable(1.0, name="bias")
saver = tf.train.Saver()
saver.save(sess,'model') #b should be saved in the model file
#simpleload.py
import tensorflow as tf
with tf.Graph().as_default() as g:
with tf.Session() as sess:
#still need the definition, again
b = tf.Variable(0.0, name="bias")
saver = tf.train.Saver() #now it is satisfied...
saver.restore(sess,model)
What confused me here is, you used a function all_vars = tf.get_collection('vars'), but you have never defined a scope called "vars". You probably should test using tf.all_variables()first.