file doesn't exist after saving with tensorflow - python

I saved my model using tf.train.Saver('./model.ckpt'), when I went to the local directory, I have found files named model.ckpt.index, model.ckpt.meta and model.ckpt.data-00000-of-00001, but not model.ckpt. As a consequence, I wasn't able to restore the model. Anyone know if i did anything wrong? Here's my code
class autoencoder(object):
def __init__(self, network_architecture, learning_rate=0.001, regularization_constant=1):
self.network_arch = network_architecture
self.X = tf.placeholder(tf.float32, [None, network_architecture['n_input']])
self.c = tf.Variable(regularization_constant, dtype=tf.float32)
self._initialize_weights()
self._build_graph()
self.cost, self.optimizer = self._cost_optimizer(learning_rate)
init = tf.global_variables_initializer()
self.saver = tf.train.Saver()
self.sess = tf.Session()
self.sess.run(init)
...
def save(self, path):
self.saver.save(self.sess, path)
def load(self, path):
self.saver.restore(self.sess, path)

The *.meta file contains your MetaGraph, and you can import it with:
saver = tf.train.import_meta_graph("model.ckpt.meta")
Then you can restore the graph's variables.
saver.restore(sess, "model.ckpt")
You can save additional model data to the metagraph by calling tf.train.export_meta_graph.
Alternatively, you can store your application's model using a SavedModel, which can contain multiple MetaGraphs. The documentation is here.

Related

Can't export to SavedModel in TensorFlow

I need to load a trained model in Tensorflow and save it to SavedModel format for use in Tensorflow Serving. I am following this tutorial. I am loading the model like this:
self.graph = tf.Graph()
def _graph_hourglass(self, inputs):
### All layers are here ###
...
def generate_model(self):
with tf.device(self.cpu):
with tf.name_scope('inputs'):
self.img = tf.compat.v1.placeholder(dtype=tf.float32,
shape=(None, 256, 256, 3), name='input_img')
self.output = self._graph_hourglass(self.img)
def model_init(self):
with self.graph.as_default():
self.generate_model()
def restore(self, load='path'):
with tf.name_scope('Session'):
with tf.device(self.cpu):
self.Session = tf.compat.v1.Session()
saver = tf.compat.v1.train.Saver()
saver.restore(self.Session, load)
def load_model(self, load = 'path'):
with self.graph.as_default():
self.restore(load)
...
self.model_init()
self.load_model(load='path/to/model')
Then I use SavedModelBuilder class from the tutorial:
export_path = 'new/path'
print('Exporting trained model to', export_path)
builder = tf.compat.v1.saved_model.builder.SavedModelBuilder(export_path)
tensor_info_x = tf.compat.v1.saved_model.utils.build_tensor_info(self.img)
tensor_info_y = tf.compat.v1.saved_model.utils.build_tensor_info(self.output)
prediction_signature = (
tf.compat.v1.saved_model.signature_def_utils.build_signature_def(
inputs={'image': tensor_info_x},
outputs={'joints': tensor_info_y},
method_name=tf.compat.v1.saved_model.signature_constants.PREDICT_METHOD_NAME))
builder.add_meta_graph_and_variables(
self.Session, [tf.compat.v1.saved_model.tag_constants.SERVING],
signature_def_map={
tf.compat.v1.saved_model.signature_constants
.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
prediction_signature,
},
strip_default_attrs=True)
builder.save()
print('Done exporting!')
Model is saved without errors, I see "Done exporting!" in console but model .pb file is empty! It is only 252 Bytes. What can be the problem here? Will appreciate any help.

tensorflow: save model and load model

Currently trying to make this repo works.
I'm trying to save the trained model in the local machine so can be applied later. I read in tensorflow's doc, seems pretty intuitive to save the model, by calling tf.save_model.save(object). But I'm not sure how to apply.
Original code is here: model.py
Following is my changes:
import tensorflow as tf
class ICON(tf.Module): # make it a tensorflow modul
def __init__(self, config, embeddingMatrix, session=None):
def _build_inputs(self):
def _build_vars(self):
def _convolution(self, input_to_conv):
def _inference(self):
def batch_fit(self, queries, ownHistory, otherHistory, labels):
feed_dict = {self._input_queries: queries, self._own_histories: ownHistory, self._other_histories: otherHistory,
self._labels: labels}
loss, _ = self._sess.run([self.loss_op, self.train_op], feed_dict=feed_dict)
return loss
def predict(self, queries, ownHistory, otherHistory, ):
feed_dict = {self._input_queries: queries, self._own_histories: ownHistory, self._other_histories: otherHistory}
return self._sess.run(self.predict_op, feed_dict=feed_dict)
def save(self): # attempt to save the model
tf.saved_model.save(
self, './output/model')
The code above produces ValueError as following:
ValueError: Tensor("ICON/CNN/embedding_matrix:0", shape=(16832, 300), dtype=float32_ref) must be from the same graph as Tensor("saver_filename:0", shape=(), dtype=string).
I believe you can use the tf.train.Saver class for this
def save(self): # attempt to save the model
saver = tf.train.Saver()
saver.save(self._sess, './output/model')
You can then restore the model this way
saver = tf.train.import_meta_graph('./output/model.meta')
with tf.Session() as sess:
saver.restore(sess, tf.train.latest_checkpoint('./output'))
You might also find this tutorial helpful in understanding this more.
Edit: if you want to use SavedModel
def save(self):
inputs = {'input_queries': self._input_queries, 'own_histories': self._own_histories, 'other_histories': self._other_histories}
outputs = {'output': self.predict_op}
tf.saved_model.simple_save(self._sess, './output/model', inputs, outputs)
You can then use tf.contrib.predictor.from_saved_model to load and serve using the SavedModel
from tensorflow.contrib.predictor import from_saved_model
predictor = from_saved_model('./output/model')
predictions = predictor({'input_queries': input_queries, 'own_histories': own_histories, 'other_histories': other_histories})

Tensorflow - can't initialize saved variables unless I recreate the "saver" object. Why?

I'm pretty sure I'm missing something about how tensorflow works because my solution doesn't make any sense.
I'm trying to train a neural network (from scratch, without using Estimators or other abstractions), save it, and load a simplified version of it for inference.
The following code trains but gives me the error: FailedPreconditionError (see above for traceback): Attempting to use uninitialized value hidden0/biases/Variable
[[Node: hidden0/biases/Variable/read = Identity[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"](hidden0/biases/Variable)]]. If I add the commented line - if I recreate the saver obect that I'm not going to use nor return - the code works just fine.
Why do I need to create a (useless) saver object in order to restore the saved weights?
import tensorflow as tf
import numpy as np
def add_fc_layer(input_tensor, input_dimensions, output_dimensions, layer_name, activation=None):
with tf.variable_scope(layer_name):
with tf.variable_scope('weights'):
weights = tf.Variable(tf.truncated_normal([input_dimensions, output_dimensions]))
with tf.variable_scope('biases'):
biases = tf.Variable(tf.zeros([output_dimensions]))
with tf.variable_scope('Wx_plus_b'):
preactivate = tf.matmul(input_tensor, weights) + biases
if activation is None:
return preactivate
with tf.variable_scope('activation'):
activations = activation(preactivate)
return activations
def make_network(model_phase):
if model_phase not in {"train", "test"}:
raise ValueError("invalid type")
hidden0_units = 25
hidden1_units = 15
hidden2_units = 10
input_size = 10
output_size = 4
with tf.variable_scope('InputVector'):
inputs = tf.placeholder(shape=[1, input_size], dtype=tf.float32)
hidden0_out = add_fc_layer(inputs, input_size, hidden0_units, "hidden0", activation=tf.nn.sigmoid)
hidden1_out = add_fc_layer(hidden0_out, hidden0_units, hidden1_units, "hidden1", activation=tf.nn.sigmoid)
hidden2_out = add_fc_layer(hidden1_out, hidden1_units, hidden2_units, "hidden2", activation=tf.nn.sigmoid)
out = add_fc_layer(hidden2_out, hidden2_units, output_size, "regression")
if model_phase == "test":
# UNCOMMENTIN THIS LINE MAKES THE SCRIPT WORK
# saver = tf.train.Saver(var_list=tf.trainable_variables())
return inputs, out
saver = tf.train.Saver(var_list=tf.trainable_variables())
with tf.variable_scope('training'):
with tf.variable_scope('groundTruth'):
ground_truth = tf.placeholder(shape=[1, output_size], dtype=tf.float32)
with tf.variable_scope('loss'):
loss = tf.reduce_sum(tf.square(ground_truth - out))
tf.summary.scalar('loss', loss)
with tf.variable_scope('optimizer'):
trainer = tf.train.AdamOptimizer(learning_rate=0.001)
with tf.variable_scope('gradient'):
updateModel = trainer.minimize(loss)
with tf.variable_scope('predict'):
predict = tf.random_shuffle(tf.boolean_mask(out, tf.equal(out, tf.reduce_max(out, axis=None))))[0]
writer = tf.summary.FileWriter('/tmp/test', tf.get_default_graph())
return inputs, out, ground_truth, updateModel, writer, saver
train_graph = tf.Graph()
with tf.Session(graph=train_graph) as sess:
tf.set_random_seed(42)
inputs, out, ground_truth, updateModel, writer, saver = make_network(model_phase='train')
init = tf.initialize_all_variables()
sess.run(init)
print('\nLearning...')
for _ in range(10):
sess.run([updateModel], feed_dict={inputs:np.arange(10)+np.random.random((1,10)), ground_truth:np.arange(4).reshape(1, 4)})
saver.save(sess,'./tensorflowModel.ckpt')
new_graph = tf.Graph()
with tf.Session(graph=new_graph) as sess:
inputs, out = make_network(model_phase='test')
saver = tf.train.import_meta_graph('./tensorflowModel.ckpt.meta')
saver.restore(sess, tf.train.latest_checkpoint('./'))
# evaluation
print('\nEvaluation...')
for _ in range(10):
_ = sess.run(out, feed_dict={inputs:np.arange(10).reshape(1,10)})
I don't know why creating an unused Saver makes the problem go away, but the code betrays a misunderstanding.
When you are restoring, you are creating the model graph twice. First, you call make_network() which creates the computation graph and variables. You then also call import_meta_graph which also creates a graph and variables. You should create a saver with simple saver = tf.train.Saver() instead of saver = tf.train.import_meta_graph('./tensorflowModel.ckpt.meta')

How does tf.train.Saver work exactly?

I am a bit confused by how tf.train.Saver() works. I have the following code to save only trainable variables:
import tensorflow as tf
# Create some variables.
v1 = tf.get_variable("v1", shape=[3], initializer = tf.zeros_initializer)
v2 = tf.get_variable("v2", shape=[5], initializer = tf.zeros_initializer)
inc_v1 = v1.assign(v1+1)
dec_v2 = v2.assign(v2-1)
# Add an op to initialize the variables.
init_op = tf.global_variables_initializer()
# Add ops to save and restore all the variables.
saver = tf.train.Saver(tf.trainable_variables())
print([x.name for x in tf.trainable_variables()])
# Later, launch the model, initialize the variables, do some work, and save the
# variables to disk.
with tf.Session() as sess:
sess.run(init_op)
# Do some work with the model.
inc_v1.op.run()
dec_v2.op.run()
# Save the variables to disk.
save_path = saver.save(sess, "./model.ckpt")
print("Model saved in file: %s" % save_path)
And the following code just to see them:
import tensorflow as tf
sess = tf.Session()
saver = tf.train.import_meta_graph('model.ckpt.meta')
saver.restore(sess,'model.ckpt')
print([v.name for v in tf.get_default_graph().as_graph_def().node])
The first code outputs ['v1:0', 'v2:0'], as expected. I am expecting the second code to produce the same result, but i see this:
['v1/Initializer/zeros', 'v1', 'v1/Assign', 'v1/read', 'v2/Initializer/zeros', 'v2', 'v2/Assign', 'v2/read', 'add/y', 'add', 'Assign', 'sub/y', 'sub', 'Assign_1', 'init', 'save/Const', 'save/SaveV2/tensor_names', 'save/SaveV2/shape_and_slices', 'save/SaveV2', 'save/control_dependency', 'save/RestoreV2/tensor_names', 'save/RestoreV2/shape_and_slices', 'save/RestoreV2', 'save/Assign', 'save/RestoreV2_1/tensor_names', 'save/RestoreV2_1/shape_and_slices', 'save/RestoreV2_1', 'save/Assign_1', 'save/restore_all']
I am not sure why tf saves all variables instead of the specifically mentioned two. How can I do that?
Try the following code from the tensorflow wiki
tf.reset_default_graph()
# Create some variables.
v1 = tf.get_variable("v1", shape=[3], name="v1")
v2 = tf.get_variable("v2", shape=[5], name="v2")
saver = tf.train.Saver(var_list=[v1, v2]) # list of TF variables that are to be restored
with tf.Session() as sess:
# Restore variables from disk.
saver.restore(sess, "./model.ckpt")
print("Model restored.")
# Check the values of the variables
print("v1 : %s" % v1.eval())
print("v2 : %s" % v2.eval())
I hope this helps!

std::system_error after restoring model in Tensorflow

I'm trying to implement a simple saver/restorer like so: (copied from Tensorflow website)
Saver:
import tensorflow as tf
# Create some variables.
v1 = tf.get_variable("v1", shape=[3], initializer = tf.zeros_initializer)
v2 = tf.get_variable("v2", shape=[5], initializer = tf.zeros_initializer)
inc_v1 = v1.assign(v1+1)
dec_v2 = v2.assign(v2-1)
# Add an op to initialize the variables.
init_op = tf.global_variables_initializer()
# Add ops to save and restore all the variables.
saver = tf.train.Saver()
# Later, launch the model, initialize the variables, do some work, and save the
# variables to disk.
with tf.Session() as sess:
sess.run(init_op)
# Do some work with the model.
inc_v1.op.run()
dec_v2.op.run()
# Save the variables to disk.
save_path = saver.save(sess, "/tmp/model.ckpt")
print("Model saved in file: %s" % save_path)
Restorer:
import tensorflow as tf
tf.reset_default_graph()
# Create some variables.
v1 = tf.get_variable("v1", shape=[3])
v2 = tf.get_variable("v2", shape=[5])
# Add ops to save and restore all the variables.
saver = tf.train.Saver()
# Later, launch the model, use the saver to restore variables from disk, and
# do some work with the model.
with tf.Session() as sess:
# Restore variables from disk.
saver.restore(sess, "/tmp/model.ckpt")
print("Model restored.")
# Check the values of the variables
print("v1 : %s" % v1.eval())
print("v2 : %s" % v2.eval())
It seems to save the model fine, but when restoring it gets stuck on the line saver.restore(sess, "/tmp/model.ckpt") and I end up with the error message:
terminate called after throwing an instance of 'std::system_error'
what(): Resource temporarily unavailable
I don't see how it can be a memory error as I am running on my work server which has 100s of GB of memory.
Python Version 3.5, Tensorflow version 1.2.1

Categories