I am working on a simple Tensorflow mnist recognition programme. The goal is to upload a jpg/png image that contains a hand-written number to it and get an anwser what number that was. My friend wrote some model and exports it with python script:
import tensorflow as tf
from tensorflow.contrib.learn.python.learn.datasets.mnist import read_data_sets
import time
iters_num = 1000
display_step = 10
batch = 100
tf.set_random_seed(0)
mnist = read_data_sets("MNISTdata", one_hot=True, reshape=False, validation_size=0)
# placeholders definition
X = tf.placeholder(tf.float32, [None, 28, 28, 1])
Y_ = tf.placeholder(tf.float32, [None, 10])
# probability of keeping a node during dropout = 1.0 at test time (no dropout) and 0.75 at training time(defined later on)
pkeep = tf.placeholder(tf.float32)
# layers with size (depth) definition
layer1 = 16
layer2 = 32
layer3 = 64
# fullyconnected layer (number of neurons)
full_layer4 = 512
# layers definitions
W1 = tf.Variable(tf.truncated_normal([10, 10, 1, layer1], stddev=0.1))
b1 = tf.Variable(tf.truncated_normal([layer1], stddev=0.1))
W2 = tf.Variable(tf.truncated_normal([6, 6, layer1, layer2], stddev=0.1))
b2 = tf.Variable(tf.truncated_normal([layer2], stddev=0.1))
W3 = tf.Variable(tf.truncated_normal([6, 6, layer2, layer3], stddev=0.1))
b3 = tf.Variable(tf.truncated_normal([layer3], stddev=0.1))
W4 = tf.Variable(tf.truncated_normal([7 * 7 * layer3, full_layer4], stddev=0.1))
b4 = tf.Variable(tf.truncated_normal([full_layer4], stddev=0.1))
# output softmax layer (10 labels (for 10 digits))
W5 = tf.Variable(tf.truncated_normal([full_layer4, 10], stddev=0.1))
b5 = tf.Variable(tf.truncated_normal([10], stddev=0.1))
XX = tf.reshape(X, [-1, 784])
# model definition
stride = 1 # output is 28x28 (no size changes)
Y1 = tf.nn.relu(tf.nn.conv2d(X, W1, strides=[1, stride, stride, 1], padding='SAME') + b1)
k = 2 # max pool filter size
Y2 = tf.nn.relu(tf.nn.conv2d(Y1, W2, strides=[1, stride, stride, 1], padding='SAME') + b2)
Y2 = tf.nn.max_pool(Y2, ksize=[1, k, k, 1], strides=[1, k, k, 1], padding='SAME')
Y3 = tf.nn.relu(tf.nn.conv2d(Y2, W3, strides=[1, stride, stride, 1], padding='SAME') + b3)
Y3 = tf.nn.max_pool(Y3, ksize=[1, k, k, 1], strides=[1, k, k, 1], padding='SAME')
# reshape the output from the third convolution for the fully connected layer
YY = tf.reshape(Y3, shape=[-1, 7 * 7 * layer3])
Y4 = tf.nn.relu(tf.matmul(YY, W4) + b4)
Ylogits = tf.matmul(Y4, W5) + b5
Y = tf.nn.softmax(Ylogits)
# loss function -> cross entropy
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=Ylogits, labels=Y_)
cross_entropy = tf.reduce_mean(cross_entropy) * 100
# accuracy of the trained model <0,1>
correct_prediction = tf.equal(tf.argmax(Y, 1), tf.argmax(Y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# training step definition with Adam optimalization algorithm
learning_rate = 0.003
train_step = tf.train.AdamOptimizer(learning_rate).minimize(cross_entropy)
# matplotlib visualization
allweights = tf.concat([tf.reshape(W1, [-1]), tf.reshape(W2, [-1]), tf.reshape(W3, [-1]), tf.reshape(W4, [-1]), tf.reshape(W5, [-1])], 0)
allbiases = tf.concat([tf.reshape(b1, [-1]), tf.reshape(b2, [-1]), tf.reshape(b3, [-1]), tf.reshape(b4, [-1]), tf.reshape(b5, [-1])], 0)
# initializing all variables(!!)
init = tf.global_variables_initializer()
# lists for training values
train_losses = list()
train_acc = list()
test_losses = list()
test_acc = list()
saver = tf.train.Saver()
time_start = time.clock()
export_dir = "D:/Optinav.Testowy/trunk/HandwriteRecognition/DigitsRecognitionCNN/23"
builder = tf.saved_model.builder.SavedModelBuilder(export_dir)
# launching computational graph
with tf.Session() as sess:
sess.run(init)
for i in range(iters_num + 1):
# training incoming on batches
batch_X, batch_Y = mnist.train.next_batch(batch)
if i % display_step == 0:
# compute training values for visualization of model steps
acc_trn, loss_trn, w, b = sess.run([accuracy, cross_entropy, allweights, allbiases], feed_dict={X: batch_X, Y_: batch_Y, pkeep: 1.0})
acc_tst, loss_tst = sess.run([accuracy, cross_entropy], feed_dict={X: mnist.test.images, Y_: mnist.test.labels, pkeep: 1.0})
print("Step#{} Train accuracy={} , Train loss={} Test accuracy={} , Test loss={}".format(i,acc_trn,loss_trn,acc_tst,loss_tst))
train_losses.append(loss_trn)
train_acc.append(acc_trn)
test_losses.append(loss_tst)
test_acc.append(acc_tst)
# the back-propagation training step (probability = 0.75)
sess.run(train_step, feed_dict={X: batch_X, Y_: batch_Y, pkeep: 0.75})
# save model
saver.save(sess, "D:/Optinav.Testowy/trunk/HandwriteRecognition/DigitsRecognitionCNN/model2.ckpt")
classification_inputs = tf.saved_model.utils.build_tensor_info(
X)
classification_outputs_classes = tf.saved_model.utils.build_tensor_info(
Y_)
classification_outputs_scores = tf.saved_model.utils.build_tensor_info(Y)
classification_signature = (
tf.saved_model.signature_def_utils.build_signature_def(
inputs={
tf.saved_model.signature_constants.CLASSIFY_INPUTS:
classification_inputs
},
outputs={
tf.saved_model.signature_constants.CLASSIFY_OUTPUT_CLASSES:
classification_outputs_classes,
tf.saved_model.signature_constants.CLASSIFY_OUTPUT_SCORES:
classification_outputs_scores
},
method_name=tf.saved_model.signature_constants.CLASSIFY_METHOD_NAME))
tensor_info_x = tf.saved_model.utils.build_tensor_info(X)
tensor_info_y = tf.saved_model.utils.build_tensor_info(Y)
prediction_signature = (
tf.saved_model.signature_def_utils.build_signature_def(
inputs={'images': tensor_info_x},
outputs={'scores': tensor_info_y},
method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME))
builder.add_meta_graph_and_variables(
sess, [tf.saved_model.tag_constants.SERVING],
signature_def_map={
'predict_images':
prediction_signature,
tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
classification_signature,
},
main_op=tf.tables_initializer(),
strip_default_attrs=True)
builder.save()
print('Done exporting!')
# calculates learning time [s]
time_stop = time.clock()
time_run = time_stop - time_start
print("Learning time: %s" % time_run)
Exported model is placed in Docker container that runs on Azure's Ubuntu.
With the help of TensorFlow's example (GitHub) I have written a client in python:
from __future__ import print_function
import grpc
import tensorflow as tf
import scipy.ndimage
import numpy
from tensorflow_serving.apis import predict_pb2
from tensorflow_serving.apis import prediction_service_pb2_grpc
tf.app.flags.DEFINE_string('server', '', 'PredictionService host:port')
tf.app.flags.DEFINE_string('image_filename', '', 'Name of image to test')
tf.app.flags.DEFINE_string('work_dir', '/tmp', 'Working directory. ')
FLAGS = tf.app.flags.FLAGS
class _ResultObj(object):
def __init__(self):
self._number = 99
def get_number(self):
return self._number
def set_number(self, val):
self._number = val
def myfunc(a):
if a > 125:
return 255
elif a > 70:
return a
else:
return 0
def _create_rpc_callback(resultobj):
def _callback(result_future):
exception = result_future.exception()
if exception:
print(exception)
else:
response = numpy.array(
result_future.result().outputs['scores'].float_val)
prediction = numpy.argmax(response)
resultobj.set_number(prediction)
return _callback
def do_inference(hostport, image_filename):
vfunc = numpy.vectorize(myfunc)
test_data_set = vfunc(numpy.ndarray.flatten(scipy.ndimage.imread(image_filename, flatten=True)).astype(int))
test_data_set = numpy.reshape(test_data_set, [28, 28, 1])
channel = grpc.insecure_channel(hostport)
stub = prediction_service_pb2_grpc.PredictionServiceStub(channel)
resultObj = _ResultObj()
request = predict_pb2.PredictRequest()
request.model_spec.name = 'ver2'
request.model_spec.signature_name = 'predict_images'
request.inputs['images'].CopyFrom(
tf.contrib.util.make_tensor_proto(test_data_set, shape=[1, test_data_set.size]))
result_future = stub.Predict.future(request, 5.0) # 5 seconds
result_future.add_done_callback(
_create_rpc_callback(resultObj))
return resultObj.get_number()
def main(_):
if not FLAGS.server:
print('please specify server host:port')
return
print(do_inference(FLAGS.server, FLAGS.image_filename))
if __name__ == '__main__':
tf.app.run()
The Problem
I am certain that my friend's model can recognize images. She told me to vectorize an image before adding it to a request, but it seems that I can't connect with the model properly. I get no error while using my script. The TensorFlow serving doesn't even mind when I write wrong model_spec.name. _callback is never used and I only get the initial value of my ResultObj object.
Any help with this problem would be appreciated.
Thank you.
Related
I want to classification the dataset of mnist in my way with TensorFlow.
First, converting the dataset into a TFRecords file.
Then, reading this file with tf.TFRecodsReader.
Finally,training the dataset.
Existing problems:The code doesn't have any syntax errors. But,I only get 10% accuracy on the test data.
train.py:
import tensorflow as tf
import Net
import os
import numpy as np
import datetime
import time
import tfrecords as rd
BATCH_SIZE = 100
LEARNING_RATE_BASE = 0.01
LEARNING_RATE_DECAY = 0.99
REGULARIZATION_RATE = 0.0001
TRAINING_STEPS = 10000
MOVING_AVERAGE_DECAY = 0.99
MODEL_SAVE_PATH = "Model/"
MODEL_NAME = "model"
def train():
x = tf.placeholder(tf.float32, [
BATCH_SIZE,
Net.IMAGE_SIZE,
Net.IMAGE_SIZE,
Net.NUM_CHANNELS],
name='x-input')
y_ = tf.placeholder(tf.float32, [None, Net.OUTPUT_NODE], name='y-input')
regularizer = tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE)
y = Net.inference(x, True, regularizer)
global_step = tf.Variable(0, trainable=False)
variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
variables_averages_op = variable_averages.apply(tf.trainable_variables())
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax(y_, 1))
cross_entropy_mean = tf.reduce_mean(cross_entropy)
loss = cross_entropy_mean + tf.add_n(tf.get_collection('losses'))
learning_rate = tf.train.exponential_decay(
LEARNING_RATE_BASE,
global_step,
55000 / BATCH_SIZE, LEARNING_RATE_DECAY,
staircase=True)
train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)
with tf.control_dependencies([train_step, variables_averages_op]):
train_op = tf.no_op(name='train')
init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
img, one_hot = rd.read_and_decode("./tfrecords/train.tfrecords")
img_batch, label_batch = tf.train.shuffle_batch(tensors = [img, one_hot], batch_size = BATCH_SIZE, capacity = 10000 + 3 * 100, min_after_dequeue = 10000)
saver = tf.train.Saver()
with tf.Session() as sess:
sess.run(init_op)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord = coord, sess = sess)
try:
steps = 1
while not coord.should_stop():
if steps > TRAINING_STEPS:
break
xs, ys = sess.run([img_batch, label_batch])
_, loss_value, step = sess.run([train_op, loss, global_step], feed_dict={x: xs, y_: ys})
if steps % 1000 == 0:
print("After %d training step(s), loss on training batch is %g." % (step, loss_value))
saver.save(sess, os.path.join(MODEL_SAVE_PATH, MODEL_NAME), global_step=global_step)
steps += 1
except tf.errors.OutOfRangeError:
print("Done training after reading all data")
finally:
coord.request_stop()
coord.join(threads)
sess.close()
def main(argv=None):
start_time = datetime.datetime.now()
print("start_time = "),
print(start_time)
rd.create_record("train")
train_start_time = datetime.datetime.now()
print("train_start_time = " ),
print( train_start_time)
train()
end_time = datetime.datetime.now()
print("end_time = " ),
print(end_time)
if __name__ == '__main__':
main()
Net.py:
import tensorflow as tf
INPUT_NODE = 784
OUTPUT_NODE = 10
IMAGE_SIZE = 28
NUM_CHANNELS = 1
NUM_LABELS = 10
CONV1_DEEP = 32
CONV1_SIZE = 5
CONV2_DEEP = 64
CONV2_SIZE = 5
FC_SIZE = 512
def inference(input_tensor, train, regularizer):
with tf.variable_scope('layer1-conv1'):
conv1_weights = tf.get_variable(
"weight", [CONV1_SIZE, CONV1_SIZE, NUM_CHANNELS, CONV1_DEEP],
initializer=tf.truncated_normal_initializer(stddev=0.1))
conv1_biases = tf.get_variable("bias", [CONV1_DEEP], initializer=tf.constant_initializer(0.0))
conv1 = tf.nn.conv2d(input_tensor, conv1_weights, strides=[1, 1, 1, 1], padding='SAME')
relu1 = tf.nn.relu(tf.nn.bias_add(conv1, conv1_biases))
with tf.name_scope("layer2-pool1"):
pool1 = tf.nn.max_pool(relu1, ksize = [1,2,2,1],strides=[1,2,2,1],padding="SAME")
with tf.variable_scope("layer3-conv2"):
conv2_weights = tf.get_variable(
"weight", [CONV2_SIZE, CONV2_SIZE, CONV1_DEEP, CONV2_DEEP],
initializer=tf.truncated_normal_initializer(stddev=0.1))
conv2_biases = tf.get_variable("bias", [CONV2_DEEP], initializer=tf.constant_initializer(0.0))
conv2 = tf.nn.conv2d(pool1, conv2_weights, strides=[1, 1, 1, 1], padding='SAME')
relu2 = tf.nn.relu(tf.nn.bias_add(conv2, conv2_biases))
with tf.name_scope("layer4-pool2"):
pool2 = tf.nn.max_pool(relu2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
pool_shape = pool2.get_shape().as_list()
nodes = pool_shape[1] * pool_shape[2] * pool_shape[3]
reshaped = tf.reshape(pool2, [pool_shape[0], nodes])
with tf.variable_scope('layer5-fc1'):
fc1_weights = tf.get_variable("weight", [nodes, FC_SIZE],
initializer=tf.truncated_normal_initializer(stddev=0.1))
if regularizer != None: tf.add_to_collection('losses', regularizer(fc1_weights))
fc1_biases = tf.get_variable("bias", [FC_SIZE], initializer=tf.constant_initializer(0.1))
fc1 = tf.nn.relu(tf.matmul(reshaped, fc1_weights) + fc1_biases)
if train: fc1 = tf.nn.dropout(fc1, 0.5)
with tf.variable_scope('layer6-fc2'):
fc2_weights = tf.get_variable("weight", [FC_SIZE, NUM_LABELS],
initializer=tf.truncated_normal_initializer(stddev=0.1))
if regularizer != None: tf.add_to_collection('losses', regularizer(fc2_weights))
fc2_biases = tf.get_variable("bias", [NUM_LABELS], initializer=tf.constant_initializer(0.1))
logit = tf.matmul(fc1, fc2_weights) + fc2_biases
return logit
tfrecords.py:
import os
import tensorflow as tf
from PIL import Image
import Net
def create_record(op_type):
writer = tf.python_io.TFRecordWriter("./tfrecords/" + op_type + ".tfrecords")
f = open("./" + op_type + ".txt", 'r')
img_num = 0
for line in f.readlines():
img_num += 1
if img_num % 2000 == 0:
print("already read in %d images." % (img_num))
str_split = line.split()
img_path = "./" + str_split[0]
index = int(str_split[1])
img = Image.open(img_path)
img = img.resize((Net.IMAGE_SIZE, Net.IMAGE_SIZE))
img_raw = img.tobytes()
example = tf.train.Example(features=tf.train.Features(feature={
'label': tf.train.Feature(int64_list=tf.train.Int64List(value=[index])),
'img_raw': tf.train.Feature(bytes_list=tf.train.BytesList(value=[img_raw]))
}))
writer.write(example.SerializeToString())
writer.close()
def read_and_decode(filename):
filename_queue = tf.train.string_input_producer([filename], shuffle = True)
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(serialized_example,
features={
'label': tf.FixedLenFeature([], tf.int64),
'img_raw' : tf.FixedLenFeature([], tf.string),
})
img = tf.decode_raw(features['img_raw'], tf.uint8)
img = tf.reshape(img, [Net.IMAGE_SIZE, Net.IMAGE_SIZE, Net.NUM_CHANNELS])
img = 1 - tf.cast(img, tf.float32) * (1. / 255)
label = tf.cast(features['label'], tf.int32)
one_hot = tf.one_hot(label, 10, dtype = tf.float32)
one_hot = tf.reshape(one_hot, [-1])
return img, one_hot
Eval.py:
import time
import math
import tensorflow as tf
import numpy as np
import Net
import Train
import tfrecords as rd
def evaluate():
with tf.Graph().as_default() as g:
x = tf.placeholder(tf.float32, [
10000,
Net.IMAGE_SIZE,
Net.IMAGE_SIZE,
Net.NUM_CHANNELS],
name='x-input')
y_ = tf.placeholder(tf.float32, [None, Net.OUTPUT_NODE], name='y-input')
#validate_feed = {x: mnist.test.images, y_: mnist.test.labels}
global_step = tf.Variable(0, trainable=False)
regularizer = tf.contrib.layers.l2_regularizer(Train.REGULARIZATION_RATE)
y = Net.inference(x, False, regularizer)
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
variable_averages = tf.train.ExponentialMovingAverage(Train.MOVING_AVERAGE_DECAY)
variables_to_restore = variable_averages.variables_to_restore()
saver = tf.train.Saver(variables_to_restore)
for i in range(1):
img, one_hot = rd.read_and_decode("./tfrecords/test.tfrecords")
img_batch, label_batch = tf.train.shuffle_batch(tensors = [img, one_hot], batch_size = 10000, capacity = 10000 + 3 * 100, min_after_dequeue = 10000)
with tf.Session() as sess:
ckpt = tf.train.get_checkpoint_state(Train.MODEL_SAVE_PATH)
if ckpt and ckpt.model_checkpoint_path:
saver.restore(sess, ckpt.model_checkpoint_path)
global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
#the reason of this error!!!
#tf.global_variables_initializer().run()
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord = coord, sess = sess)
try:
xs, ys = sess.run([img_batch, label_batch])
accuracy_score = sess.run(accuracy, feed_dict={x:xs, y_:ys})
print("After %s training step(s), test accuracy = %g" % (global_step, accuracy_score))
except tf.errors.OutOfRangeError:
print("Done testting after reading all data")
finally:
coord.request_stop()
coord.join(threads)
sess.close()
else:
print('No checkpoint file found')
return
def main(argv=None):
rd.create_record("test")
evaluate()
if __name__ == '__main__':
main()
Now, I have solved this problem.
My code and other flie: https://github.com/xmy7216/MNIST_classification.git
Linux: Red Hat Enterprise Linux Server release 7.2 (Maipo)
GPU: Tesla P4
TensorFlow: 1.3
Python:2.7.5
I find the reason. Because after I load the model using saver.restore, I initialize the variables again by tf.global_variables_initializer().run().
How stupid I am!
I have the following code, where I'm trying to restore the model at some point in the code, but seems that I'm getting some infinite loop (not sure), as the program would not return any output although seems to be running:
import tensorflow as tf
data, labels = cifar_tools.read_data('C:\\Users\\abc\\Desktop\\Testing')
x = tf.placeholder(tf.float32, [None, 150 * 150])
y = tf.placeholder(tf.float32, [None, 2])
w1 = tf.Variable(tf.random_normal([5, 5, 1, 64]))
b1 = tf.Variable(tf.random_normal([64]))
w2 = tf.Variable(tf.random_normal([5, 5, 64, 64]))
b2 = tf.Variable(tf.random_normal([64]))
w3 = tf.Variable(tf.random_normal([38*38*64, 1024]))
b3 = tf.Variable(tf.random_normal([1024]))
w_out = tf.Variable(tf.random_normal([1024, 2]))
b_out = tf.Variable(tf.random_normal([2]))
def conv_layer(x,w,b):
conv = tf.nn.conv2d(x,w,strides=[1,1,1,1], padding = 'SAME')
conv_with_b = tf.nn.bias_add(conv,b)
conv_out = tf.nn.relu(conv_with_b)
return conv_out
def maxpool_layer(conv,k=2):
return tf.nn.max_pool(conv, ksize=[1,k,k,1], strides=[1,k,k,1], padding='SAME')
def model():
x_reshaped = tf.reshape(x, shape=[-1, 150, 150, 1])
conv_out1 = conv_layer(x_reshaped, w1, b1)
maxpool_out1 = maxpool_layer(conv_out1)
norm1 = tf.nn.lrn(maxpool_out1, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75)
conv_out2 = conv_layer(norm1, w2, b2)
norm2 = tf.nn.lrn(conv_out2, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75)
maxpool_out2 = maxpool_layer(norm2)
maxpool_reshaped = tf.reshape(maxpool_out2, [-1, w3.get_shape().as_list()[0]])
local = tf.add(tf.matmul(maxpool_reshaped, w3), b3)
local_out = tf.nn.relu(local)
out = tf.add(tf.matmul(local_out, w_out), b_out)
return out
model_op = model()
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(model_op, y))
train_op = tf.train.AdamOptimizer(learning_rate=0.001).minimize(cost)
correct_pred = tf.equal(tf.argmax(model_op, 1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_pred,tf.float32))
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
onehot_labels = tf.one_hot(labels, 2, on_value=1.,off_value=0.,axis=-1)
onehot_vals = sess.run(onehot_labels)
batch_size = len(data)
# Restore model
saver = tf.train.import_meta_graph('mymodel.meta')
saver.restore(sess, tf.train.latest_checkpoint('./'))
all_vars = tf.get_collection('vars')
for v in all_vars:
v_ = sess.run(v)
print(v_)
for j in range(0, 5):
print('EPOCH', j)
for i in range(0, len(data), batch_size):
batch_data = data[i:i+batch_size, :]
batch_onehot_vals = onehot_vals[i:i+batch_size, :]
_, accuracy_val = sess.run([train_op, accuracy], feed_dict={x: batch_data, y: batch_onehot_vals})
print(i, accuracy_val)
print('DONE WITH EPOCH')
What could be the issue? Am I restoring the model correct here?
Thanks.
It seems I had to list the whole path to the model as follows:
saver = tf.train.import_meta_graph('C:\\Users\\abc\\Desktop\\\Testing\\mymodel.meta')
The same mistake I made when saving the model, as shown here :-)
I started learning tensorflow one week ago and i am struggling a bit with the possibility of restoring a model.
I am constructing a class for a CNN (from stanford course code) where i want to have a run and a test function (first one to train the model, second one to make a prediction on another set of datas).
The function run is working correctly, mais i have a hard time restoring the model to make the prediction in function test (here i have a problem shape and i guess it comes from the way I get the operation).
import os
import tensorflow as tf
from time import time
class Cnn:
def __init__(self, batch_size=128, skip_step=10, epochs=1, dropout_ratio=0.75):
self.batch_size = batch_size
self.skip_step = skip_step
self.epochs = epochs
self.dropout_ratio = dropout_ratio
self.x = tf.placeholder(tf.float32, [None, 784], name="X_placeholder")
self.images = tf.reshape(self.x, shape=[-1, 28, 28, 1])
self.y = tf.placeholder(tf.float32, [None, 10], name="Y_placeholder")
self.dropout = tf.placeholder(tf.float32, name="dropout")
self.global_step = tf.Variable(0, dtype=tf.int32, trainable=False, name='global_step')
self.layers = [self.images]
self.n_layers = 0
def convolution(self, kernel_size, n_output, strides, scope_name):
previous_layer = self.layers[self.n_layers]
with tf.variable_scope(scope_name) as scope:
kernel_shape = [kernel_size[0], kernel_size[1], previous_layer.get_shape()[3], n_output]
kernel = tf.get_variable("kernels", kernel_shape,
initializer=tf.truncated_normal_initializer())
biases = tf.get_variable("biases", [n_output],
initializer=tf.random_normal_initializer())
convolution_2d = tf.nn.conv2d(previous_layer, kernel, strides=strides,
padding="SAME")
convolution = tf.nn.relu(convolution_2d + biases, name=scope.name)
self.layers.append(convolution)
self.n_layers += 1
return convolution
def pool(self, size, strides, scope_name, padding="SAME"):
previous_layer = self.layers[self.n_layers]
with tf.variable_scope(scope_name):
pool = tf.nn.max_pool(previous_layer, ksize=size, strides=strides,
padding=padding)
self.layers.append(pool)
self.n_layers += 1
return pool
def fully_connected(self, n_input, n_output, scope_name, relu=False, dropout=True):
previous_layer = self.layers[self.n_layers]
with tf.variable_scope(scope_name):
weights = tf.get_variable("weights", [n_input, n_output],
initializer=tf.truncated_normal_initializer())
biases = tf.get_variable("biases", [n_output],
initializer=tf.random_normal_initializer())
pool_reshaped = tf.reshape(previous_layer, [-1, n_input])
logits = tf.add(tf.matmul(pool_reshaped, weights), biases, name="logits")
fc = logits
if relu:
fc = tf.nn.relu(fc, name="relu")
if dropout:
fc = tf.nn.dropout(fc, self.dropout, name="dropout")
self.layers.append(fc)
self.n_layers += 1
if relu is False and dropout is False:
self.logits = tf.add(tf.matmul(pool_reshaped, weights), biases, name="logits")
self.predictions = tf.nn.softmax(logits, name="predictions")
return fc
def set_loss(self, scope_name):
previous_layer = self.layers[self.n_layers]
with tf.name_scope(scope_name):
entropy = tf.nn.softmax_cross_entropy_with_logits(logits=previous_layer, labels=self.y)
loss = tf.reduce_mean(entropy, name='loss')
self.layers.append(loss)
self.n_layers += 1
self.loss = loss
def set_optimizer(self):
previous_layer = self.layers[self.n_layers]
optimizer = tf.train.AdamOptimizer(0.001).minimize(previous_layer, global_step=self.global_step)
self.layers.append(optimizer)
self.n_layers += 1
self.optimizer = optimizer
def run(self, train_x, train_y):
sess = tf.Session()
with sess.as_default():
sess.run(tf.global_variables_initializer())
saver = tf.train.Saver()
writer = tf.summary.FileWriter('./my_graph/mnist', sess.graph)
ckpt = tf.train.get_checkpoint_state(os.path.dirname('results/checkpoint'))
if ckpt and ckpt.model_checkpoint_path:
saver.restore(sess, ckpt.model_checkpoint_path)
initial_step = self.global_step.eval()
start_time = time()
n_batches = int(train_x.shape[0] / self.batch_size)
total_correct_preds = 0
total_loss = 0.0
current_position = 0
for index in range(initial_step, int(n_batches * self.epochs)): # train the model n_epochs times
x_batch = train_x[current_position: current_position + self.batch_size, :]
y_batch = train_y[current_position: current_position + self.batch_size, :]
feed_dict = {self.x: x_batch, self.y: y_batch, self.dropout: self.dropout_ratio}
_, loss_batch, logits_batch = sess.run([self.optimizer, self.loss, self.logits],
feed_dict=feed_dict)
print(logits_batch.shape)
total_loss += loss_batch
preds = tf.nn.softmax(logits_batch)
correct_preds = tf.equal(tf.argmax(preds, 1), tf.argmax(y_batch, 1))
accuracy = tf.reduce_sum(tf.cast(correct_preds, tf.float32))
total_correct_preds += sess.run(accuracy)
if (index + 1) % self.skip_step == 0:
print('Average loss at step {}: {:5.1f}'.format(index + 1, total_loss / self.skip_step))
total_loss = 0.0
saver.save(sess, 'results/mnist-convnet', index)
current_position += self.batch_size
print("Optimization Finished!") # should be around 0.35 after 25 epochs
print("Total time: {0} seconds".format(time() - start_time))
print("Accuracy {0}".format(total_correct_preds / train_x.shape[0]))
def test(self, val_x, val_y):
checkpoint_file = tf.train.latest_checkpoint("results/")
graph = tf.Graph()
with graph.as_default():
sess = tf.Session()
with sess.as_default():
saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file))
saver.restore(sess, checkpoint_file)
self.x = graph.get_operation_by_name("X_placeholder").outputs[0]
self.y = graph.get_operation_by_name("Y_placeholder").outputs[0]
self.dropout = graph.get_operation_by_name("dropout").outputs[0]
self.global_step = graph.get_operation_by_name("global_step").outputs[0]
self.logits = graph.get_operation_by_name("logits").outputs[0]
current_position = 0
n_batches = int(val_x.shape[0] / self.batch_size)
total_correct_preds = 0
for i in range(n_batches):
x_batch = val_x[current_position: current_position + self.batch_size]
y_batch = val_y[current_position: current_position + self.batch_size]
feed_dict = {self.x: x_batch, self.y: y_batch, self.dropout: self.dropout_ratio}
logits_batch = sess.run([self.logits], feed_dict=feed_dict)
preds = tf.nn.softmax(logits_batch)
# correct_preds = tf.equal(tf.argmax(preds, 1), tf.argmax(y_batch, 1))
"""
accuracy = tf.reduce_sum(tf.cast(correct_preds, tf.float32))
total_correct_preds += sess.run(accuracy)
current_position += self.batch_size
print("Accuracy {0}".format(total_correct_preds / val_x.shape[0]))
"""
and testing it like this (sorry if there is a lot of code) :
from cnn import Cnn
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
tf.set_random_seed(1)
N_CLASSES = 10
# load datas
mnist = input_data.read_data_sets("mnist", one_hot=True)
train_x, train_y = mnist.train.images, mnist.train.labels
val_x, val_y = mnist.test.images, mnist.test.labels
model = Cnn(batch_size=128, skip_step=10, epochs=0.25, dropout_ratio=0.75)
model.convolution(kernel_size=[5, 5], strides=[1, 1, 1, 1], n_output=32, scope_name="conv1")
model.pool(size=[1, 2, 2, 1], strides=[1, 2, 2, 1], scope_name="pool1")
model.convolution(kernel_size=[5, 5], strides=[1, 1, 1, 1], n_output=64, scope_name="conv2")
model.pool(size=[1, 2, 2, 1], strides=[1, 2, 2, 1], scope_name="pool2")
input_features = 7 * 7 * 64
model.fully_connected(n_input=input_features, n_output=1024, scope_name="fc", relu=True, dropout=True)
model.fully_connected(n_input=1024, n_output=N_CLASSES, scope_name="softmax_linear", dropout=False)
model.set_loss(scope_name="loss")
model.set_optimizer()
model.run(train_x=train_x, train_y=train_y)
model.test(val_x, val_y)
I would also appreciate any comments regarding the better way to create a class like this (in particular the handling of the placeholders and the sessions)
Thanks !
Nicolas
I have the following code which is working (no errors). My question is just am I restoring the model correctly? Especially that I cannot see any output for the statement print(v_).
So, I'm trying to know if I'm doing the following correct:
Restoring the model
Using that restored model
import tensorflow as tf
data, labels = cifar_tools.read_data('C:\\Users\\abc\\Desktop\\Testing')
x = tf.placeholder(tf.float32, [None, 150 * 150])
y = tf.placeholder(tf.float32, [None, 2])
w1 = tf.Variable(tf.random_normal([5, 5, 1, 64]))
b1 = tf.Variable(tf.random_normal([64]))
w2 = tf.Variable(tf.random_normal([5, 5, 64, 64]))
b2 = tf.Variable(tf.random_normal([64]))
w3 = tf.Variable(tf.random_normal([38*38*64, 1024]))
b3 = tf.Variable(tf.random_normal([1024]))
w_out = tf.Variable(tf.random_normal([1024, 2]))
b_out = tf.Variable(tf.random_normal([2]))
def conv_layer(x,w,b):
conv = tf.nn.conv2d(x,w,strides=[1,1,1,1], padding = 'SAME')
conv_with_b = tf.nn.bias_add(conv,b)
conv_out = tf.nn.relu(conv_with_b)
return conv_out
def maxpool_layer(conv,k=2):
return tf.nn.max_pool(conv, ksize=[1,k,k,1], strides=[1,k,k,1], padding='SAME')
def model():
x_reshaped = tf.reshape(x, shape=[-1, 150, 150, 1])
conv_out1 = conv_layer(x_reshaped, w1, b1)
maxpool_out1 = maxpool_layer(conv_out1)
norm1 = tf.nn.lrn(maxpool_out1, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75)
conv_out2 = conv_layer(norm1, w2, b2)
norm2 = tf.nn.lrn(conv_out2, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75)
maxpool_out2 = maxpool_layer(norm2)
maxpool_reshaped = tf.reshape(maxpool_out2, [-1, w3.get_shape().as_list()[0]])
local = tf.add(tf.matmul(maxpool_reshaped, w3), b3)
local_out = tf.nn.relu(local)
out = tf.add(tf.matmul(local_out, w_out), b_out)
return out
model_op = model()
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(model_op, y))
train_op = tf.train.AdamOptimizer(learning_rate=0.001).minimize(cost)
correct_pred = tf.equal(tf.argmax(model_op, 1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_pred,tf.float32))
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
onehot_labels = tf.one_hot(labels, 2, on_value=1.,off_value=0.,axis=-1)
onehot_vals = sess.run(onehot_labels)
batch_size = len(data)
# Restore model
saver = tf.train.import_meta_graph('C:\\Users\\abc\\Desktop\\\Testing\\mymodel.meta')
saver.restore(sess, tf.train.latest_checkpoint('./'))
all_vars = tf.get_collection('vars')
for v in all_vars:
v_ = sess.run(v)
print(v_)
for j in range(0, 5):
print('EPOCH', j)
for i in range(0, len(data), batch_size):
batch_data = data[i:i+batch_size, :]
batch_onehot_vals = onehot_vals[i:i+batch_size, :]
_, accuracy_val = sess.run([train_op, accuracy], feed_dict={x: batch_data, y: batch_onehot_vals})
print(i, accuracy_val)
print('DONE WITH EPOCH')
EDIT 1
Would restoring this way work?
saver = tf.train.Saver()
saver = tf.train.import_meta_graph('C:\\Users\\Abder-Rahman\\Desktop\\\Testing\\mymodel.meta')
saver.restore(sess, tf.train.latest_checkpoint('./'))
print('model restored'
EDIT 2
This is how I save my model:
#Save model
saver = tf.train.Saver()
saved_path = saver.save(sess, 'C:\\Users\\abc\\Desktop\\\Testing\\mymodel')
print("The model is in this file: ", saved_path)
Thanks.
Your saver code was correct.
While variables must add to collections before retrieving the collection.
tf.add_to_collection("vars", w1)
tf.add_to_collection("vars", b1)
...
Then
all_vars = tf.get_collection('vars')
Usually I restore a TensorFlow model like this:
with tf.Session(graph=graph) as session:
if os.path.exists(save_path):
# Restore variables from disk.
saver.restore(session, save_path)
else:
tf.initialize_all_variables().run()
print('Initialized')
# do the work
# ...
saver.save(session, save_path) # save the model
example code can be fetch here.
I need to know more about how you save your model, it seems that your model was restored before save, and your model didn't turn to a tf.graph and connect with the session.
I assume you have read my blog here , the mechanism for model saving is quite straightforward, when you load a model, the parameter values and relations (which are probably all you care about) are matched by variable name.
For example
#simplesave.py
import tensorflow as tf
with tf.Graph().as_default() as g:#yes you have to have a graph first
with tf.Session() as sess:
b = tf.Variable(1.0, name="bias")
saver = tf.train.Saver()
saver.save(sess,'model') #b should be saved in the model file
#simpleload.py
import tensorflow as tf
with tf.Graph().as_default() as g:
with tf.Session() as sess:
#still need the definition, again
b = tf.Variable(0.0, name="bias")
saver = tf.train.Saver() #now it is satisfied...
saver.restore(sess,model)
What confused me here is, you used a function all_vars = tf.get_collection('vars'), but you have never defined a scope called "vars". You probably should test using tf.all_variables()first.
I have the following code where I restore a previously saved model. Is it correct this way? I save a model at some point, and when I want to restore it I don't need to to save the model, since I already have a saved model. Is it right how I understand it?
import tensorflow as tf
data, labels = cifar_tools.read_data('C:\\Users\\abc\\Desktop\\Testing')
x = tf.placeholder(tf.float32, [None, 150 * 150])
y = tf.placeholder(tf.float32, [None, 2])
w1 = tf.Variable(tf.random_normal([5, 5, 1, 64]))
b1 = tf.Variable(tf.random_normal([64]))
w2 = tf.Variable(tf.random_normal([5, 5, 64, 64]))
b2 = tf.Variable(tf.random_normal([64]))
w3 = tf.Variable(tf.random_normal([38*38*64, 1024]))
b3 = tf.Variable(tf.random_normal([1024]))
w_out = tf.Variable(tf.random_normal([1024, 2]))
b_out = tf.Variable(tf.random_normal([2]))
def conv_layer(x,w,b):
conv = tf.nn.conv2d(x,w,strides=[1,1,1,1], padding = 'SAME')
conv_with_b = tf.nn.bias_add(conv,b)
conv_out = tf.nn.relu(conv_with_b)
return conv_out
def maxpool_layer(conv,k=2):
return tf.nn.max_pool(conv, ksize=[1,k,k,1], strides=[1,k,k,1], padding='SAME')
def model():
x_reshaped = tf.reshape(x, shape=[-1, 150, 150, 1])
conv_out1 = conv_layer(x_reshaped, w1, b1)
maxpool_out1 = maxpool_layer(conv_out1)
norm1 = tf.nn.lrn(maxpool_out1, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75)
conv_out2 = conv_layer(norm1, w2, b2)
norm2 = tf.nn.lrn(conv_out2, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75)
maxpool_out2 = maxpool_layer(norm2)
maxpool_reshaped = tf.reshape(maxpool_out2, [-1, w3.get_shape().as_list()[0]])
local = tf.add(tf.matmul(maxpool_reshaped, w3), b3)
local_out = tf.nn.relu(local)
out = tf.add(tf.matmul(local_out, w_out), b_out)
return out
model_op = model()
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(model_op, y))
train_op = tf.train.AdamOptimizer(learning_rate=0.001).minimize(cost)
correct_pred = tf.equal(tf.argmax(model_op, 1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_pred,tf.float32))
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
onehot_labels = tf.one_hot(labels, 2, on_value=1.,off_value=0.,axis=-1)
onehot_vals = sess.run(onehot_labels)
batch_size = len(data)
# Restore model
saver = tf.train.import_meta_graph('C:\\Users\\abc\\Desktop\\\Testing\\mymodel.meta')
saver.restore(sess, tf.train.latest_checkpoint('./'))
tf.add_to_collection("vars", w1)
tf.add_to_collection("vars", b1)
all_vars = tf.get_collection('vars')
for v in all_vars:
v_ = sess.run(v)
print(v_)
for j in range(0, 5):
print('EPOCH', j)
for i in range(0, len(data), batch_size):
batch_data = data[i:i+batch_size, :]
batch_onehot_vals = onehot_vals[i:i+batch_size, :]
_, accuracy_val = sess.run([train_op, accuracy], feed_dict={x: batch_data, y: batch_onehot_vals})
print(i, accuracy_val)
print('DONE WITH EPOCH')