Related
I'm learning the basic CNN model by using tensorflow. After training my model, I want to load it and use the model to predict the hand-written digital img (CSV file).
Here is my CNN model:
import random
import os
import numpy as np
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import matplotlib.pyplot as plt
tf.logging.set_verbosity(tf.logging.ERROR)
class CNNLogisticClassification:
def __init__(self, shape_picture, n_labels,
learning_rate=0.5, dropout_ratio=0.5, alpha=0.0):
self.shape_picture = shape_picture
self.n_labels = n_labels
self.weights = None
self.biases = None
self.graph = tf.Graph() # initialize new grap
self.build(learning_rate, dropout_ratio, alpha) # building graph
self.sess = tf.Session(graph=self.graph) # create session by the graph
def build(self, learning_rate, dropout_ratio, alpha):
with self.graph.as_default():
### Input
self.train_pictures = tf.placeholder(tf.float32,
shape=[None]+self.shape_picture,name="Input")
self.train_labels = tf.placeholder(tf.int32,
shape=(None, self.n_labels),name="Output")
### Optimalization
# build neurel network structure and get their predictions and loss
self.y_, self.original_loss = self.structure(pictures=self.train_pictures,
labels=self.train_labels,
dropout_ratio=dropout_ratio,
train=True, )
# regularization loss
self.regularization = \
tf.reduce_sum([tf.nn.l2_loss(w) for w in self.weights.values()]) \
/ tf.reduce_sum([tf.size(w, out_type=tf.float32) for w in self.weights.values()])
# total loss
self.loss = self.original_loss + alpha * self.regularization
# define training operation
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
self.train_op = optimizer.minimize(self.loss)
### Prediction
self.new_pictures = tf.placeholder(tf.float32,
shape=[None]+self.shape_picture,name="Input")
self.new_labels = tf.placeholder(tf.int32,
shape=(None, self.n_labels),name="Output")
self.new_y_, self.new_original_loss = self.structure(pictures=self.new_pictures,
labels=self.new_labels)
self.new_loss = self.new_original_loss + alpha * self.regularization
### Initialization
self.init_op = tf.global_variables_initializer()
### save model
self.saver=tf.train.Saver()
def structure(self, pictures, labels, dropout_ratio=None, train=False):
### Variable
## LeNet5 Architecture(http://yann.lecun.com/exdb/lenet/)
# input:(batch,28,28,1) => conv1[5x5,6] => (batch,24,24,6)
# pool2 => (batch,12,12,6) => conv2[5x5,16] => (batch,8,8,16)
# pool4 => fatten5 => (batch,4x4x16) => fc6 => (batch,120)
# (batch,120) => fc7 => (batch,84)
# (batch,84) => fc8 => (batch,10) => softmax
if (not self.weights) and (not self.biases):
self.weights = {
'conv1': tf.Variable(tf.truncated_normal(shape=(5, 5, 1, 6),
stddev=0.1)),
'conv3': tf.Variable(tf.truncated_normal(shape=(5, 5, 6, 16),
stddev=0.1)),
'fc6': tf.Variable(tf.truncated_normal(shape=(4*4*16, 120),
stddev=0.1)),
'fc7': tf.Variable(tf.truncated_normal(shape=(120, 84),
stddev=0.1)),
'fc8': tf.Variable(tf.truncated_normal(shape=(84, self.n_labels),
stddev=0.1)),
}
self.biases = {
'conv1': tf.Variable(tf.zeros(shape=(6))),
'conv3': tf.Variable(tf.zeros(shape=(16))),
'fc6': tf.Variable(tf.zeros(shape=(120))),
'fc7': tf.Variable(tf.zeros(shape=(84))),
'fc8': tf.Variable(tf.zeros(shape=(self.n_labels))),
}
### Structure
conv1 = self.get_conv_2d_layer(pictures,
self.weights['conv1'], self.biases['conv1'],
activation=tf.nn.relu)
pool2 = tf.nn.max_pool(conv1,
ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')
conv3 = self.get_conv_2d_layer(pool2,
self.weights['conv3'], self.biases['conv3'],
activation=tf.nn.relu)
pool4 = tf.nn.max_pool(conv3,
ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')
fatten5 = self.get_flatten_layer(pool4)
if train:
fatten5 = tf.nn.dropout(fatten5, keep_prob=1-dropout_ratio[0])
fc6 = self.get_dense_layer(fatten5,
self.weights['fc6'], self.biases['fc6'],
activation=tf.nn.relu)
if train:
fc6 = tf.nn.dropout(fc6, keep_prob=1-dropout_ratio[1])
fc7 = self.get_dense_layer(fc6,
self.weights['fc7'], self.biases['fc7'],
activation=tf.nn.relu)
logits = self.get_dense_layer(fc7, self.weights['fc8'], self.biases['fc8'])
y_ = tf.nn.softmax(logits)
loss = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(labels=labels,
logits=logits))
return (y_, loss)
def get_dense_layer(self, input_layer, weight, bias, activation=None):
x = tf.add(tf.matmul(input_layer, weight), bias)
if activation:
x = activation(x)
return x
def get_conv_2d_layer(self, input_layer,
weight, bias,
strides=(1, 1), padding='VALID', activation=None):
x = tf.add(
tf.nn.conv2d(input_layer,
weight,
[1, strides[0], strides[1], 1],
padding=padding), bias)
if activation:
x = activation(x)
return x
def get_flatten_layer(self, input_layer):
shape = input_layer.get_shape().as_list()
n = 1
for s in shape[1:]:
n *= s
x = tf.reshape(input_layer, [-1, n])
return x
def fit(self, X, y, epochs=10,
validation_data=None, test_data=None, batch_size=None):
X = self._check_array(X)
y = self._check_array(y)
N = X.shape[0]
random.seed(9000)
if not batch_size:
batch_size = N
self.sess.run(self.init_op)
for epoch in range(epochs):
print('Epoch %2d/%2d: ' % (epoch+1, epochs))
# mini-batch gradient descent
index = [i for i in range(N)]
random.shuffle(index)
while len(index) > 0:
index_size = len(index)
batch_index = [index.pop() for _ in range(min(batch_size, index_size))]
feed_dict = {
self.train_pictures: X[batch_index, :],
self.train_labels: y[batch_index],
}
_, loss = self.sess.run([self.train_op, self.loss],
feed_dict=feed_dict)
print('[%d/%d] loss = %.4f ' % (N-len(index), N, loss), end='\r')
# evaluate at the end of this epoch
y_ = self.predict(X)
train_loss = self.evaluate(X, y)
train_acc = self.accuracy(y_, y)
msg = '[%d/%d] loss = %8.4f, acc = %3.2f%%' % (N, N, train_loss, train_acc*100)
if validation_data:
val_loss = self.evaluate(validation_data[0], validation_data[1])
val_acc = self.accuracy(self.predict(validation_data[0]), validation_data[1])
msg += ', val_loss = %8.4f, val_acc = %3.2f%%' % (val_loss, val_acc*100)
print(msg)
if test_data:
test_acc = self.accuracy(self.predict(test_data[0]), test_data[1])
print('test_acc = %3.2f%%' % (test_acc*100))
def accuracy(self, predictions, labels):
return (np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))/predictions.shape[0])
def predict(self, X):
X = self._check_array(X)
return self.sess.run(self.new_y_, feed_dict={self.new_pictures: X})
def evaluate(self, X, y):
X = self._check_array(X)
y = self._check_array(y)
return self.sess.run(self.new_loss, feed_dict={self.new_pictures: X,
self.new_labels: y})
def _check_array(self, ndarray):
ndarray = np.array(ndarray)
if len(ndarray.shape) == 1:
ndarray = np.reshape(ndarray, (1, ndarray.shape[0]))
return ndarray
if __name__ == '__main__':
print('Extract MNIST Dataset ...')
mnist = input_data.read_data_sets('MNIST_data/', one_hot=True)
train_data = mnist.train
valid_data = mnist.validation
test_data = mnist.test
train_img = np.reshape(train_data.images, [-1, 28, 28, 1])
valid_img = np.reshape(valid_data.images, [-1, 28, 28, 1])
test_img = np.reshape(test_data.images, [-1, 28, 28, 1])
model = CNNLogisticClassification(
shape_picture=[28, 28, 1],
n_labels=10,
learning_rate=0.07,
dropout_ratio=[0.2, 0.1],
alpha=0.1,
)
model.fit(
X=train_img,
y=train_data.labels,
epochs=10,
validation_data=(valid_img, valid_data.labels),
test_data=(test_img, test_data.labels),
batch_size=32,
)
saver = model.saver.save(model.sess, "test_model")
print("Model saved in path: %s" % saver)
And I create another py file to load my model:
import tensorflow as tf
saver = tf.train.import_meta_graph('./my_model/test_model.meta')
with tf.Session() as sess:
new_saver = tf.train.import_meta_graph('./my_model/test_model.meta')
new_saver.restore(sess, tf.train.latest_checkpoint('./my_model'))
sess.run(tf.global_variables_initializer())
saver.predict('D:\python\number_data\3.csv')
This is the error I'm getting:
AttributeError: 'Saver' object has no attribute 'predict'
How do I fix it and let the trained model predict my CSV file?
Thanks in advance for your help!
Edit:
I change my second py file as below:
import numpy as np
import tensorflow as tf
import pandas as pd
X=pd.read_csv('D:/PYTHON/cnn_data/7.csv', index_col=None, header=None).values
X1=X/255
X3=tf.convert_to_tensor(
X1,
dtype=None,
dtype_hint=None,
name=None
)
saver = tf.train.import_meta_graph('./my_model/test_model.meta')
with tf.Session() as sess:
new_saver = tf.train.import_meta_graph('./my_model/test_model.meta')
new_saver.restore(sess, tf.train.latest_checkpoint('./my_model'))
graph=tf.get_default_graph()
xs0=graph.get_tensor_by_name("Input:0")
prediction=graph.get_tensor_by_name("Output:0")
sess.run(prediction,feed_dict={xs0:X3})
print(prediction)
I only try to predict one digital img data(CSV file with one row), I transfer it into tensor type and name my two placeholder "Input" , "Output", but get another error:
TypeError: The value of a feed cannot be a tf.Tensor object. Acceptable feed values include Python scalars, strings, lists, numpy ndarrays, or TensorHandles. For reference, the tensor object was Tensor("Const:0", shape=(1, 784), dtype=float64) which was passed to
the feed with key Tensor("Input:0", shape=(?, 28, 28, 1), dtype=float32).
>
First of all, the obvious error here is that you are trying to call a function that doesn't exist. Evidently, the saver object does not have a predict function.
Second, if you want Tensorflow to make predictions, you need to provide it with "Tensorflow" input, and sadly, CSVs are not one of them.
All you need to do is transform your CSV inputs into tensors, with a function like this for instance:
filename = 'D:\python\number_data\3.csv'
def csv_to_tensor(filename):
...
return tensors
I cannot tell you how to implement the function exactly since I don't know the exact format of your data, but I am assuming that each row in your file is an input. So you most likely just need to loop through the lines in your file and convert each line to a tensor, which can then be used by a Tensorflow model.
Even if I reduce the network structure to a layer of convolution, RecTensor still takes 99% of the time. How can I adjust it? Because my computer's graphics card is AMD, and I can't use TensorFlow-GPU.So here I am using TensorFlow-CPU.
My all code's summary is(VGG):
7*conv2d(32、64、128)
3*fc(100、100、2)
image_size:64*64*3
batch_size:16
data_format:TFRecords
two ps, two worker
My code is following:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import time
from tensorflow.python.client import timeline
import tensorflow as tf
flags = tf.app.flags
flags.DEFINE_integer("num_gpus", 0,
"Total number of gpus for each machine. If you don't use GPU, please set it to '0'")
flags.DEFINE_integer("replicas_to_aggregate", None,
"Number of replicas to aggregate before parameter update "
"is applied (For sync_replicas mode only; default: "
"num_workers)")
flags.DEFINE_integer("train_steps", 20, "Number of (global) training steps to perform")
flags.DEFINE_integer("batch_size", 16, "Training batch size")
flags.DEFINE_float("learning_rate", 0.001, "Learning rate")
flags.DEFINE_boolean("sync_replicas", False, "Use the sync_replicas (synchronized replicas) mode")
ps_ip = '***.***.**.16:22227,***.***.**.17:22231'
ip = '***.***.**.18:22229,***.***.**.19:22225'
flags.DEFINE_string('ps_hosts', ps_ip, 'Comma-separated list of hostname:port pairs')
flags.DEFINE_string('worker_hosts', ip, 'Comma-separated list of hostname:port pairs')
flags.DEFINE_string("job_name", None, "job name: worker or ps")
flags.DEFINE_integer("task_index", None, "Worker task index, should be >= 0")
FLAGS = flags.FLAGS
batch_size = FLAGS.batch_size
def main(unused_argv):
if FLAGS.job_name is None or FLAGS.job_name == "":
raise ValueError("Must specify an explicit `job_name`")
if FLAGS.task_index is None or FLAGS.task_index == "":
raise ValueError("Must specify an explicit `task_index`")
print("job name = %s" % FLAGS.job_name)
print("task index = %d" % FLAGS.task_index)
ps_spec = FLAGS.ps_hosts.split(",")
worker_spec = FLAGS.worker_hosts.split(",")
num_workers = len(worker_spec)
cluster = tf.train.ClusterSpec({"ps": ps_spec, "worker": worker_spec})
server = tf.train.Server(cluster, job_name=FLAGS.job_name, task_index=FLAGS.task_index)
if FLAGS.job_name == "ps":
server.join()
elif FLAGS.job_name == "worker":
is_chief = (FLAGS.task_index == 0)
with tf.device(tf.train.replica_device_setter(cluster=cluster,
worker_device="/job:worker/task:%d/cpu:%d" % (FLAGS.task_index, 0))):
image_height = 64
image_width = 64
num_channels = 3
num_targets = 2
IMG_PIXELS = image_height * image_width * num_channels
TRAIN_FILE = './train.tfrecords'
TEST_FILE = './test.tfrecords'
def read_and_decode(filename_queue):
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(serialized_example, features={
'label': tf.FixedLenFeature([], tf.int64),
'img_raw': tf.FixedLenFeature([], tf.string)
})
image = tf.decode_raw(features['img_raw'], tf.uint8)
label = features['label']
image.set_shape([IMG_PIXELS])
image = tf.reshape(image, [image_height, image_width, num_channels])
image = tf.cast(image, tf.float32) * (1. / 255) - 0.5
return image, label
def inputs(data_set, batch_size, num_epochs):
if not num_epochs:
num_epochs = None
if data_set == 'train':
file = TRAIN_FILE
else:
file = TEST_FILE
with tf.name_scope('input') as scope:
filename_queue = tf.train.string_input_producer([file], num_epochs=num_epochs)
image, label = read_and_decode(filename_queue)
images, labels = tf.train.shuffle_batch([image, label],
batch_size=batch_size,
num_threads=64,
capacity=320 + 3 * batch_size,
min_after_dequeue=320
)
return images, labels
global_step = tf.Variable(0, name="global_step", trainable=False)
images, targets = inputs(data_set='train', batch_size=batch_size, num_epochs=None)
test_images, test_targets = inputs(data_set='test', batch_size=batch_size, num_epochs=None)
def cnn_model(input_images, batch_size):
def truncated_nomal_var(name, shape, dtype):
return (tf.get_variable(name=name, shape=shape, dtype=dtype,
initializer=tf.truncated_normal_initializer(stddev=0.05)))
def zero_var(name, shape, dtype):
return (tf.get_variable(name=name, shape=shape, dtype=dtype, initializer=tf.constant_initializer(0.0)))
with tf.variable_scope('conv1') as scope:
conv1_kernel = truncated_nomal_var(name='conv_kernel', shape=[3, 3, 3, 32], dtype=tf.float32)
conv1 = tf.nn.conv2d(input_images, conv1_kernel, strides=[1, 1, 1, 1], padding='SAME', name='conv2d')
conv1_bias = zero_var(name='conv_bias', shape=[32], dtype=tf.float32)
conv1_add_bias = tf.nn.bias_add(conv1, conv1_bias)
relu_conv1 = tf.nn.relu(conv1_add_bias, name='relu')
norm1 = tf.nn.dropout(relu_conv1, keep_prob=0.8, name='dropout')
with tf.variable_scope('conv2') as scope:
conv2_kernel = truncated_nomal_var(name='conv_kernel', shape=[3, 3, 32, 32], dtype=tf.float32)
conv2 = tf.nn.conv2d(norm1, conv2_kernel, strides=[1, 1, 1, 1], padding='SAME', name='conv2d')
conv2_bias = zero_var(name='conv_bias', shape=[32], dtype=tf.float32)
conv2_add_bias = tf.nn.bias_add(conv2, conv2_bias)
relu_conv2 = tf.nn.relu(conv2_add_bias, name='relu')
pool1 = tf.nn.avg_pool(relu_conv2, ksize=[1, 2, 2, 1], strides=[1, 1, 1, 1], padding='SAME',
name='pool_layer')
norm2 = tf.nn.dropout(pool1, keep_prob=0.8, name='dropout')
with tf.variable_scope('conv3') as scope:
conv3_kernel = truncated_nomal_var(name='conv_kernel', shape=[3, 3, 32, 64], dtype=tf.float32)
conv3 = tf.nn.conv2d(norm2, conv3_kernel, [1, 1, 1, 1], padding='SAME', name='conv2d')
conv3_bias = zero_var(name='conv_bias', shape=[64], dtype=tf.float32)
conv3_add_bias = tf.nn.bias_add(conv3, conv3_bias)
relu_conv3 = tf.nn.relu(conv3_add_bias, name='relu')
norm3 = tf.nn.dropout(relu_conv3, keep_prob=0.7, name='dropout')
with tf.variable_scope('conv4') as scope:
conv4_kernel = truncated_nomal_var(name='conv_kernel', shape=[3, 3, 64, 64], dtype=tf.float32)
conv4 = tf.nn.conv2d(norm3, conv4_kernel, [1, 1, 1, 1], padding='SAME', name='conv2d')
conv4_bias = zero_var(name='conv_bias', shape=[64], dtype=tf.float32)
conv4_add_bias = tf.nn.bias_add(conv4, conv4_bias, name='relu')
relu_conv4 = tf.nn.relu(conv4_add_bias)
pool2 = tf.nn.avg_pool(relu_conv4, ksize=[1, 2, 2, 1], strides=[1, 1, 1, 1], padding='SAME',
name='pool_layer')
norm4 = tf.nn.dropout(pool2, keep_prob=0.7, name='dropout4')
with tf.variable_scope('conv5') as scope:
conv5_kernel = truncated_nomal_var(name='conv_kernel', shape=[3, 3, 64, 128], dtype=tf.float32)
conv5 = tf.nn.conv2d(norm4, conv5_kernel, [1, 1, 1, 1], padding='SAME', name='conv2d')
conv5_bias = zero_var(name='conv_bias', shape=[128], dtype=tf.float32)
conv5_add_bias = tf.nn.bias_add(conv5, conv5_bias)
relu_conv5 = tf.nn.relu(conv5_add_bias, name='relu')
norm5 = tf.nn.dropout(relu_conv5, keep_prob=0.6, name='dropout')
with tf.variable_scope('conv6') as scope:
conv6_kernel = truncated_nomal_var(name='conv_kernel', shape=[3, 3, 128, 128], dtype=tf.float32)
conv6 = tf.nn.conv2d(norm5, conv6_kernel, [1, 1, 1, 1], padding='SAME', name='conv2d')
conv6_bias = zero_var(name='conv_bias', shape=[128], dtype=tf.float32)
conv6_add_bias = tf.nn.bias_add(conv6, conv6_bias)
relu_conv6 = tf.nn.relu(conv6_add_bias, name='relu')
norm6 = tf.nn.dropout(relu_conv6, keep_prob=0.6, name='dropout')
with tf.variable_scope('conv7') as scope:
conv7_kernel = truncated_nomal_var(name='conv_kernel', shape=[1, 1, 128, 128], dtype=tf.float32)
conv7 = tf.nn.conv2d(norm6, conv7_kernel, [1, 1, 1, 1], padding='SAME', name='conv2d')
conv7_bias = zero_var(name='conv_bias', shape=[128], dtype=tf.float32)
conv7_add_bias = tf.nn.bias_add(conv7, conv7_bias)
relu_conv7 = tf.nn.relu(conv7_add_bias, name='relu')
pool7 = tf.nn.avg_pool(relu_conv7, ksize=[1, 2, 2, 1], strides=[1, 1, 1, 1], padding='SAME',
name='pool_layer')
norm7 = tf.nn.dropout(pool7, keep_prob=0.6, name='dropout')
reshaped_output = tf.reshape(norm1, [batch_size, -1])
reshaped_dim = reshaped_output.get_shape()[1].value
with tf.variable_scope('full1') as scope:
full_weigth1 = truncated_nomal_var(name='full_mult', shape=[reshaped_dim, 100], dtype=tf.float32)
full_bias1 = zero_var(name='full_bias', shape=[100], dtype=tf.float32)
full_layer1 = tf.nn.relu(tf.add(tf.matmul(reshaped_output, full_weigth1), full_bias1), name='relu')
with tf.variable_scope('full2') as scope:
full_weight2 = truncated_nomal_var(name='full_mult', shape=[100, 32], dtype=tf.float32)
full_bias2 = zero_var(name='full_bias', shape=[32], dtype=tf.float32)
full_layer2 = tf.nn.relu(tf.add(tf.matmul(full_layer1, full_weight2), full_bias2), name='relu')
with tf.variable_scope('full3') as scope:
full_weight3 = truncated_nomal_var(name='full_mult', shape=[32, num_targets], dtype=tf.float32)
full_bias3 = zero_var(name='full_bias', shape=[num_targets], dtype=tf.float32)
final_output = tf.add(tf.matmul(full_layer2, full_weight3), full_bias3)
return final_output
def loss(logits, targets):
targets = tf.squeeze(tf.cast(targets, tf.int32))
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=targets)
cross_entropy_mean = tf.reduce_mean(cross_entropy)
return cross_entropy_mean
def accuracy_of_batch(logits, targets):
targets = tf.squeeze(tf.cast(targets, tf.int32))
batch_predictions = tf.cast(tf.argmax(logits, 1), tf.int32)
predicted_correctly = tf.equal(batch_predictions, targets)
accuracy = tf.reduce_mean(tf.cast(predicted_correctly, tf.float32))
return accuracy
with tf.variable_scope('model_definition') as scope:
model_output = cnn_model(images, batch_size)
scope.reuse_variables()
test_output = cnn_model(test_images, batch_size)
prediction = tf.nn.softmax(model_output)
test_prediction = tf.nn.softmax(test_output)
loss = loss(model_output, targets)
if FLAGS.sync_replicas:
# sync
if FLAGS.replicas_to_aggregate is None:
replicas_to_aggregate = num_workers
else:
replicas_to_aggregate = FLAGS.replicas_to_aggregate
my_optimizer = tf.train.GradientDescentOptimizer(FLAGS.learning_rate)
grads_and_vars = my_optimizer.compute_gradients(loss)
test_acc = accuracy_of_batch(test_prediction, test_targets)
train_acc = accuracy_of_batch(prediction, targets)
opt = tf.train.SyncReplicasOptimizer(my_optimizer,
replicas_to_aggregate=replicas_to_aggregate,
total_num_replicas=num_workers,
name="A01_sync_replicas")
train_op = opt.apply_gradients(grads_and_vars, global_step=global_step)
init_token_op = opt.get_init_tokens_op()
chief_queue_runner = opt.get_chief_queue_runner()
else:
# async
my_optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate)
test_acc = accuracy_of_batch(test_prediction, test_targets)
train_acc = accuracy_of_batch(prediction, targets)
train_op = my_optimizer.minimize(loss, global_step=global_step)
init_op = tf.global_variables_initializer()
sv = tf.train.Supervisor(
is_chief=is_chief,
logdir='./checkout',
init_op=init_op,
recovery_wait_secs=1,
global_step=global_step)
if is_chief:
print("Worker %d: Initializing session..." % FLAGS.task_index)
else:
print("Worker %d: Waiting for session to be initialized..." %
FLAGS.task_index)
sess_config = tf.ConfigProto(
allow_soft_placement=True,
log_device_placement=False,
device_filters=["/job:ps","/job:worker/task:%d" % FLAGS.task_index])
with sv.prepare_or_wait_for_session(server.target, config=sess_config) as sess:
if FLAGS.task_index == 0 and FLAGS.sync_replicas == True:
sv.start_queue_runners(sess, [chief_queue_runner])
sess.run(init_token_op)
print("Worker %d: Session initialization complete." % FLAGS.task_index)
print("-------------Training begins--------")
start_time = time.time()
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord, sess=sess)
run_metadata = tf.RunMetadata()
run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
for i in range(FLAGS.train_steps):
_, loss_value, temp_train_acc, temp_test_acc, step = sess.run(
[train_op, loss, train_acc, test_acc, global_step], options=run_options, run_metadata=run_metadata)
output = 'Loss = {:.5f}'.format(loss_value)
test_acc_output = ' --- test_acc = {:.3f}'.format(temp_test_acc)
train_acc_output = ' --- train_acc = {:.3f}'.format(temp_train_acc)
time_output = '%.2fs' % (time.time() - start_time)
STEP = 'train_step: %d | global_step: %d' % (i, step)
print(output, test_acc_output, train_acc_output, time_output, STEP)
trace = timeline.Timeline(step_stats=run_metadata.step_stats)
with open('./timeline.json', 'w') as trace_file:
trace_file.write(trace.generate_chrome_trace_format())
coord.request_stop()
coord.join(threads)
if __name__ == "__main__":
tf.app.run()
The result is following:
P1:timeline image
I used 4 laptops,2 as ps, 2 as woker. It's config is:
8GB RAM
CPU:Intel(R) Core(TM) i5-4210U CPU # 1.70GHz X4
UPDATE:2018/8/15 16:34
I tested the network bandwidth transfer between ps (server) and worker (client) using iperf.
When a PS and a worker were tested, the results were as follows:
P2:1 ps--1 worker image
1 ps and 2 workers Simultaneous test:
P3:1 ps--2 worker image
So, RecvTensor takes up so much time, is it due to the small network bandwidth? As you can see from the P2, 11MB/sec.
The project I run is C3D-tensorflow from Github, when I trained the network with train_c3d_ucf101.py, Error shows that:
ValueError:Cannot feed value of shape (10,0) for Tensor u
'Placeholder:0', which has shape '(10,16,112,112,3)'
Here is my environment: Ubantu16.04, tensorflow1.4, python2.7. I'm a new guy to learn tensorflow so can you tell me how to locate Placeholder:0 and what can I do to solve this problem? Thank you in advance!
The train_c3d_ucf101.py:
"""Trains and Evaluates the MNIST network using a feed dictionary."""
# pylint: disable=missing-docstring
import os
import time
import numpy
from six.moves import xrange # pylint: disable=redefined-builtin
import tensorflow as tf
import input_data
import c3d_model
import math
import numpy as np
# Basic model parameters as external flags.
flags = tf.app.flags
gpu_num = 1
#flags.DEFINE_float('learning_rate', 0.0, 'Initial learning rate.')
flags.DEFINE_integer('max_steps', 5000, 'Number of steps to run trainer.')
flags.DEFINE_integer('batch_size', 10, 'Batch size.')
FLAGS = flags.FLAGS
MOVING_AVERAGE_DECAY = 0.9999
model_save_dir = './models'
def placeholder_inputs(batch_size):
"""Generate placeholder variables to represent the input tensors.
These placeholders are used as inputs by the rest of the model building
code and will be fed from the downloaded data in the .run() loop, below.
Args:
batch_size: The batch size will be baked into both placeholders.
Returns:
images_placeholder: Images placeholder.
labels_placeholder: Labels placeholder.
"""
# Note that the shapes of the placeholders match the shapes of the full
# image and label tensors, except the first dimension is now batch_size
# rather than the full size of the train or test data sets.
images_placeholder = tf.placeholder(tf.float32, shape=(batch_size,
c3d_model.NUM_FRAMES_PER_CLIP,
c3d_model.CROP_SIZE,
c3d_model.CROP_SIZE,
c3d_model.CHANNELS))
labels_placeholder = tf.placeholder(tf.int64, shape=(batch_size))
return images_placeholder, labels_placeholder
def average_gradients(tower_grads):
average_grads = []
for grad_and_vars in zip(*tower_grads):
grads = []
for g, _ in grad_and_vars:
expanded_g = tf.expand_dims(g, 0)
grads.append(expanded_g)
grad = tf.concat(grads, 0)
grad = tf.reduce_mean(grad, 0)
v = grad_and_vars[0][1]
grad_and_var = (grad, v)
average_grads.append(grad_and_var)
return average_grads
def tower_loss(name_scope, logit, labels):
cross_entropy_mean = tf.reduce_mean(
tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels,logits=logit)
)
tf.summary.scalar(
name_scope + '_cross_entropy',
cross_entropy_mean
)
weight_decay_loss = tf.get_collection('weightdecay_losses')
tf.summary.scalar(name_scope + '_weight_decay_loss', tf.reduce_mean(weight_decay_loss) )
# Calculate the total loss for the current tower.
total_loss = cross_entropy_mean + weight_decay_loss
tf.summary.scalar(name_scope + '_total_loss', tf.reduce_mean(total_loss) )
return total_loss
def tower_acc(logit, labels):
correct_pred = tf.equal(tf.argmax(logit, 1), labels)
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
return accuracy
def _variable_on_cpu(name, shape, initializer):
with tf.device('/cpu:0'):
var = tf.get_variable(name, shape, initializer=initializer)
return var
def _variable_with_weight_decay(name, shape, wd):
var = _variable_on_cpu(name, shape, tf.contrib.layers.xavier_initializer())
if wd is not None:
weight_decay = tf.nn.l2_loss(var)*wd
tf.add_to_collection('weightdecay_losses', weight_decay)
return var
def run_training():
# Get the sets of images and labels for training, validation, and
# Tell TensorFlow that the model will be built into the default Graph.
# Create model directory
if not os.path.exists(model_save_dir):
os.makedirs(model_save_dir)
use_pretrained_model = True
model_filename = "./sports1m_finetuning_ucf101.model"
with tf.Graph().as_default():
global_step = tf.get_variable(
'global_step',
[],
initializer=tf.constant_initializer(0),
trainable=False
)
images_placeholder, labels_placeholder = placeholder_inputs(
FLAGS.batch_size * gpu_num
)
tower_grads1 = []
tower_grads2 = []
logits = []
opt_stable = tf.train.AdamOptimizer(1e-4)
opt_finetuning = tf.train.AdamOptimizer(1e-3)
with tf.variable_scope('var_name') as var_scope:
weights = {
'wc1': _variable_with_weight_decay('wc1', [3, 3, 3, 3, 64], 0.0005),
'wc2': _variable_with_weight_decay('wc2', [3, 3, 3, 64, 128], 0.0005),
'wc3a': _variable_with_weight_decay('wc3a', [3, 3, 3, 128, 256], 0.0005),
'wc3b': _variable_with_weight_decay('wc3b', [3, 3, 3, 256, 256], 0.0005),
'wc4a': _variable_with_weight_decay('wc4a', [3, 3, 3, 256, 512], 0.0005),
'wc4b': _variable_with_weight_decay('wc4b', [3, 3, 3, 512, 512], 0.0005),
'wc5a': _variable_with_weight_decay('wc5a', [3, 3, 3, 512, 512], 0.0005),
'wc5b': _variable_with_weight_decay('wc5b', [3, 3, 3, 512, 512], 0.0005),
'wd1': _variable_with_weight_decay('wd1', [8192, 4096], 0.0005),
'wd2': _variable_with_weight_decay('wd2', [4096, 4096], 0.0005),
'out': _variable_with_weight_decay('wout', [4096, c3d_model.NUM_CLASSES], 0.0005)
}
biases = {
'bc1': _variable_with_weight_decay('bc1', [64], 0.000),
'bc2': _variable_with_weight_decay('bc2', [128], 0.000),
'bc3a': _variable_with_weight_decay('bc3a', [256], 0.000),
'bc3b': _variable_with_weight_decay('bc3b', [256], 0.000),
'bc4a': _variable_with_weight_decay('bc4a', [512], 0.000),
'bc4b': _variable_with_weight_decay('bc4b', [512], 0.000),
'bc5a': _variable_with_weight_decay('bc5a', [512], 0.000),
'bc5b': _variable_with_weight_decay('bc5b', [512], 0.000),
'bd1': _variable_with_weight_decay('bd1', [4096], 0.000),
'bd2': _variable_with_weight_decay('bd2', [4096], 0.000),
'out': _variable_with_weight_decay('bout', [c3d_model.NUM_CLASSES], 0.000),
}
for gpu_index in range(0, gpu_num):
with tf.device('/gpu:%d' % gpu_index):
varlist2 = [ weights['out'],biases['out'] ]
varlist1 = list( set(weights.values() + biases.values()) - set(varlist2) )
logit = c3d_model.inference_c3d(
images_placeholder[gpu_index * FLAGS.batch_size:(gpu_index + 1) * FLAGS.batch_size,:,:,:,:],
0.5,
FLAGS.batch_size,
weights,
biases
)
loss_name_scope = ('gpud_%d_loss' % gpu_index)
loss = tower_loss(
loss_name_scope,
logit,
labels_placeholder[gpu_index * FLAGS.batch_size:(gpu_index + 1) * FLAGS.batch_size]
)
grads1 = opt_stable.compute_gradients(loss, varlist1)
grads2 = opt_finetuning.compute_gradients(loss, varlist2)
tower_grads1.append(grads1)
tower_grads2.append(grads2)
logits.append(logit)
logits = tf.concat(logits,0)
accuracy = tower_acc(logits, labels_placeholder)
tf.summary.scalar('accuracy', accuracy)
grads1 = average_gradients(tower_grads1)
grads2 = average_gradients(tower_grads2)
apply_gradient_op1 = opt_stable.apply_gradients(grads1)
apply_gradient_op2 = opt_finetuning.apply_gradients(grads2, global_step=global_step)
variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY)
variables_averages_op = variable_averages.apply(tf.trainable_variables())
train_op = tf.group(apply_gradient_op1, apply_gradient_op2, variables_averages_op)
null_op = tf.no_op()
# Create a saver for writing training checkpoints.
saver = tf.train.Saver(weights.values() + biases.values())
init = tf.global_variables_initializer()
# Create a session for running Ops on the Graph.
sess = tf.Session(
config=tf.ConfigProto(allow_soft_placement=True)
)
sess.run(init)
if os.path.isfile(model_filename) and use_pretrained_model:
saver.restore(sess, model_filename)
# Create summary writter
merged = tf.summary.merge_all()
train_writer = tf.summary.FileWriter('./visual_logs/train', sess.graph)
test_writer = tf.summary.FileWriter('./visual_logs/test', sess.graph)
for step in xrange(FLAGS.max_steps):
start_time = time.time()
train_images, train_labels, _, _, _ = input_data.read_clip_and_label(
filename='list/train.list',
batch_size=FLAGS.batch_size * gpu_num,
num_frames_per_clip=c3d_model.NUM_FRAMES_PER_CLIP,
crop_size=c3d_model.CROP_SIZE,
shuffle=True
)
sess.run(train_op, feed_dict={
images_placeholder: train_images,
labels_placeholder: train_labels
})
duration = time.time() - start_time
print('Step %d: %.3f sec' % (step, duration))
# Save a checkpoint and evaluate the model periodically.
if (step) % 10 == 0 or (step + 1) == FLAGS.max_steps:
saver.save(sess, os.path.join(model_save_dir, 'c3d_ucf_model'), global_step=step)
print('Training Data Eval:')
summary, acc = sess.run(
[merged, accuracy],
feed_dict={images_placeholder: train_images,
labels_placeholder: train_labels
})
print ("accuracy: " + "{:.5f}".format(acc))
train_writer.add_summary(summary, step)
print('Validation Data Eval:')
val_images, val_labels, _, _, _ = input_data.read_clip_and_label(
filename='list/test.list',
batch_size=FLAGS.batch_size * gpu_num,
num_frames_per_clip=c3d_model.NUM_FRAMES_PER_CLIP,
crop_size=c3d_model.CROP_SIZE,
shuffle=True
)
summary, acc = sess.run(
[merged, accuracy],
feed_dict={
images_placeholder: val_images,
labels_placeholder: val_labels
})
print ("accuracy: " + "{:.5f}".format(acc))
test_writer.add_summary(summary, step)
print("done")
def main(_):
run_training()
if __name__ == '__main__':
tf.app.run()
If you want to check other files, please tell me and I will show you.
This is my Traceback:
I replicate "GoogLeNet" using tensorflow, dataset is oxford flower 17.
Here is my code.
# This code is implementation of GoogLeNet, which is proposed in "https://www.cs.unc.edu/~wliu/papers/GoogLeNet.pdf"
# This code is referred from "https://github.com/tflearn/tflearn/blob/master/examples/images/googlenet.py"
from __future__ import division, print_function, absolute_import
# This code is extracted from "https://github.com/tflearn/tflearn/blob/master/tflearn/datasets/oxflower17.py"
import oxflower17
import tensorflow as tf
import numpy as np
X, Y = oxflower17.load_data(one_hot=True, resize_pics=(227,227))
x = tf.placeholder(tf.float32, [None, 227, 227, 3])
y = tf.placeholder(tf.float32, [None, 17])
keep_prob = tf.placeholder(tf.float32) #dropout (keep probability)
trainX, trainY, testX, testY = X[0:1224], Y[0:1224], X[1224:1360], Y[1224:1360] # Divide training sets and test sets
trainX = trainX.reshape(-1, 227, 227, 3)
testX = testX.reshape(-1, 227, 227, 3)
print (len(trainX))
print (len(testX))
# Parameters
batch_size = 64
test_size = len(testX)
# Create some wrappers
def conv2d(x, W, b, strides): # Conv2D wrapper, with bias and relu activation
x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME')
x = tf.nn.bias_add(x, b)
return tf.nn.relu(x)
def maxpool2d(x, k, strides): # MaxPool2D wrapper
return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, strides, strides, 1], padding='SAME')
def avgpool2d(x, k, strides): # AveragePool2D wrapper
return tf.nn.avg_pool(x, ksize=[1, k, k, 1], strides=[1, strides, strides, 1], padding='SAME')
def local_response_normalization(incoming, depth_radius=5, bias=1.0, alpha=0.0001, beta=0.75,
name="LocalResponseNormalization"):
return tf.nn.lrn(incoming, depth_radius=depth_radius, bias=bias, alpha=alpha, beta=beta, name=name)
weights = {
...
}
biases = {
...
}
# Create NN
x = tf.reshape(x, shape=[-1, 227, 227, 1])
conv1_7_7 = conv2d(x, weights['w_c1_77'], biases['b_c1_77'], strides=2)
pool1_3_3 = maxpool2d(conv1_7_7, k=3, strides=2)
pool1_3_3 = local_response_normalization(pool1_3_3)
conv2_1_1 = conv2d(pool1_3_3, weights['w_c2_11'], biases['b_c2_11'], strides=1)
conv2_3_3 = conv2d(conv2_1_1, weights['w_c2_33'], biases['b_c2_33'], strides=1)
conv2_3_3_lrn = local_response_normalization(conv2_3_3)
pool2_3_3 = maxpool2d(conv2_3_3_lrn, k=3, strides=2)
# Inception module (3a)
inception_3a_1_1 = conv2d(pool2_3_3, weights['w_inception_3a_11'], biases['b_inception_3a_11'], strides=1)
inception_3a_3_3_reduce = conv2d(pool2_3_3, weights['w_inception_3a_33_reduce'], biases['b_inception_3a_33_reduce'],
strides=1)
inception_3a_3_3 = conv2d(inception_3a_3_3_reduce, weights['w_inception_3a_33'], biases['b_inception_3a_33'], strides=1)
inception_3a_5_5_reduce = conv2d(pool2_3_3, weights['w_inception_3a_55_reduce'], biases['b_inception_3a_55_reduce'],
strides=1)
inception_3a_5_5 = conv2d(inception_3a_5_5_reduce, weights['w_inception_3a_55'], biases['b_inception_3a_55'], strides=1)
inception_3a_maxpool = maxpool2d(pool2_3_3, k=3, strides=1)
inception_3a_maxpool_reduce = conv2d(inception_3a_maxpool, weights['w_inception_3a_mp_reduce'],
biases['b_inception_3a_mp_reduce'], strides=1)
inception_3a_concat = tf.concat(3, [inception_3a_1_1, inception_3a_3_3, inception_3a_5_5, inception_3a_maxpool_reduce])
...
# Inception module (5b)
inception_5b_1_1 = conv2d(inception_5a_concat, weights['w_inception_5b_11'], biases['b_inception_5b_11'], strides=1)
inception_5b_3_3_reduce = conv2d(inception_5a_concat, weights['w_inception_5b_33_reduce'],
biases['b_inception_5b_33_reduce'], strides=1)
inception_5b_3_3 = conv2d(inception_5b_3_3_reduce, weights['w_inception_5b_33'], biases['b_inception_5b_33'], strides=1)
inception_5b_5_5_reduce = conv2d(inception_5a_concat, weights['w_inception_5b_55_reduce'],
biases['b_inception_5b_55_reduce'], strides=1)
inception_5b_5_5 = conv2d(inception_5b_5_5_reduce, weights['w_inception_5b_55'], biases['b_inception_5b_55'], strides=1)
inception_5b_maxpool = maxpool2d(inception_5a_concat, k=3, strides=1)
inception_5b_maxpool_reduce = conv2d(inception_5b_maxpool, weights['w_inception_5a_mp_reduce'],
biases['b_inception_5a_mp_reduce'], strides=1)
inception_5b_concat = tf.concat(3, [inception_5b_1_1, inception_5b_3_3, inception_5b_5_5, inception_5b_maxpool_reduce])
pool5_7_7 = avgpool2d(inception_5b_concat, 7, 1)
pool5_7_7_dropout = tf.nn.dropout(pool5_7_7, 0.4)
fc = tf.reshape(pool5_7_7_dropout, [-1, weights['w_fc'].get_shape().as_list()[0]])
fc = tf.add(tf.matmul(fc, weights['w_fc']), biases['b_fc'])
#### Network design is finished.
cost_function = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(fc, y))
optimizer = tf.train.MomentumOptimizer(learning_rate=0.001, momentum=0.9)
predict = tf.argmax(fc, 1)
init = tf.initialize_all_variables()
# Launch the graph
# This code is extracted from "http://pythonkim.tistory.com/56"
# Some variables are changed
with tf.Session() as sess:
sess.run(init)
for i in range(1):
training_batch = zip(range(0, len(trainX), batch_size), range(batch_size, len(trainX)+1, batch_size))
tf.reset_default_graph() # added by minho, from "https://github.com/tensorflow/tensorflow/issues/1470"
for start, end in training_batch:
sess.run(optimizer, feed_dict={x: trainX[start:end], y: trainY[start:end], keep_prob: 1.0}) # modified by minho
test_indices = np.arange(len(testX)) # Get A Test Batch
np.random.shuffle(test_indices)
test_indices = test_indices[0:test_size]
print(len(testX[test_indices]))
print(i, np.mean(np.argmax(testY[test_indices], axis=1) ==
sess.run(predict, feed_dict={x: testX[test_indices], y: testY[test_indices], keep_prob: 1.0}))) # modified by minho
Here is an error log.
File "/home/mh0205/GoogLeNet/googlenet.py", line 443, in
sess.run(predict, feed_dict={x: testX[test_indices], y: testY[test_indices], keep_prob: 1.0}))) # modified by minho File
"/home/mh0205/anaconda2/lib/python2.7/site-packages/tensorflow/python/client/session.py",
line 1159, in exit
self._default_graph_context_manager.exit(exec_type, exec_value, exec_tb) File
"/home/mh0205/anaconda2/lib/python2.7/contextlib.py", line 35, in
exit
self.gen.throw(type, value, traceback) File "/home/mh0205/anaconda2/lib/python2.7/site-packages/tensorflow/python/framework/ops.py",
line 3671, in get_controller
if self.stack[-1] is not default: IndexError: list index out of range
I cannot fix error. Please help me.
Add :
tf.reset_default_graph()
Before your code.
If tensorflow is not already imported, add it:
import tensorflow as tf
Hi I am trying to run a conv. neural network addapted from MINST2 tutorial in tensorflow. I am having the following error, but i am not sure what is going on:
W tensorflow/core/framework/op_kernel.cc:909] Invalid argument: Shape mismatch in tuple component 0. Expected [784], got [6272]
W tensorflow/core/framework/op_kernel.cc:909] Invalid argument: Shape mismatch in tuple component 0. Expected [784], got [6272]
Traceback (most recent call last):
File "4_Treino_Rede_Neural.py", line 161, in <module>
train_accuracy = accuracy.eval(feed_dict={keep_prob: 1.0})
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 555, in eval
return _eval_using_default_session(self, feed_dict, self.graph, session)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 3498, in _eval_using_default_session
return session.run(tensors, feed_dict)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 372, in run
run_metadata_ptr)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 636, in _run
feed_dict_string, options, run_metadata)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 708, in _do_run
target_list, options, run_metadata)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 728, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors.OutOfRangeError: RandomShuffleQueue '_0_input/shuffle_batch/random_shuffle_queue' is closed and has insufficient elements (requested 100, current size 0)
[[Node: input/shuffle_batch = QueueDequeueMany[_class=["loc:#input/shuffle_batch/random_shuffle_queue"], component_types=[DT_FLOAT, DT_INT32], timeout_ms=-1, _device="/job:localhost/replica:0/task:0/cpu:0"](input/shuffle_batch/random_shuffle_queue, input/shuffle_batch/n)]]
Caused by op u'input/shuffle_batch', defined at:
File "4_Treino_Rede_Neural.py", line 113, in <module>
x, y_ = inputs(train=True, batch_size=FLAGS.batch_size, num_epochs=FLAGS.num_epochs)
File "4_Treino_Rede_Neural.py", line 93, in inputs
min_after_dequeue=1000)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/input.py", line 779, in shuffle_batch
dequeued = queue.dequeue_many(batch_size, name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/data_flow_ops.py", line 400, in dequeue_many
self._queue_ref, n=n, component_types=self._dtypes, name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/gen_data_flow_ops.py", line 465, in _queue_dequeue_many
timeout_ms=timeout_ms, name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/op_def_library.py", line 704, in apply_op
op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 2260, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 1230, in __init__
self._traceback = _extract_stack()
My program is:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os.path
import time
import numpy as np
import tensorflow as tf
# Basic model parameters as external flags.
flags = tf.app.flags
FLAGS = flags.FLAGS
flags.DEFINE_integer('num_epochs', 2, 'Number of epochs to run trainer.')
flags.DEFINE_integer('batch_size', 100, 'Batch size.')
flags.DEFINE_string('train_dir', '/root/data', 'Directory with the training data.')
#flags.DEFINE_string('train_dir', '/root/data2', 'Directory with the training data.')
# Constants used for dealing with the files, matches convert_to_records.
TRAIN_FILE = 'train.tfrecords'
VALIDATION_FILE = 'validation.tfrecords'
# Set-up dos pacotes
sess = tf.InteractiveSession()
def read_and_decode(filename_queue):
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(
serialized_example,
# Defaults are not specified since both keys are required.
features={
'image_raw': tf.FixedLenFeature([], tf.string),
'label': tf.FixedLenFeature([], tf.int64),
})
# Convert from a scalar string tensor (whose single string has
# length mnist.IMAGE_PIXELS) to a uint8 tensor with shape
# [mnist.IMAGE_PIXELS].
image = tf.decode_raw(features['image_raw'], tf.uint8)
image.set_shape([784])
# OPTIONAL: Could reshape into a 28x28 image and apply distortions
# here. Since we are not applying any distortions in this
# example, and the next step expects the image to be flattened
# into a vector, we don't bother.
# Convert from [0, 255] -> [-0.5, 0.5] floats.
image = tf.cast(image, tf.float32) * (1. / 255) - 0.5
# Convert label from a scalar uint8 tensor to an int32 scalar.
label = tf.cast(features['label'], tf.int32)
return image, label
def inputs(train, batch_size, num_epochs):
"""Reads input data num_epochs times.
Args:
train: Selects between the training (True) and validation (False) data.
batch_size: Number of examples per returned batch.
num_epochs: Number of times to read the input data, or 0/None to
train forever.
Returns:
A tuple (images, labels), where:
* images is a float tensor with shape [batch_size, 30,26,1]
in the range [-0.5, 0.5].
* labels is an int32 tensor with shape [batch_size] with the true label,
a number in the range [0, char letras).
Note that an tf.train.QueueRunner is added to the graph, which
must be run using e.g. tf.train.start_queue_runners().
"""
if not num_epochs: num_epochs = None
filename = os.path.join(FLAGS.train_dir,
TRAIN_FILE if train else VALIDATION_FILE)
with tf.name_scope('input'):
filename_queue = tf.train.string_input_producer(
[filename], num_epochs=num_epochs)
# Even when reading in multiple threads, share the filename
# queue.
image, label = read_and_decode(filename_queue)
# Shuffle the examples and collect them into batch_size batches.
# (Internally uses a RandomShuffleQueue.)
# We run this in two threads to avoid being a bottleneck.
images, sparse_labels = tf.train.shuffle_batch(
[image, label], batch_size=batch_size, num_threads=2,
capacity=1000 + 3 * batch_size,
# Ensures a minimum amount of shuffling of examples.
min_after_dequeue=1000)
return images, sparse_labels
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME')
#Variaveis
x, y_ = inputs(train=True, batch_size=FLAGS.batch_size, num_epochs=FLAGS.num_epochs)
#onehot_y_ = tf.one_hot(y_, 36, dtype=tf.float32)
#y_ = tf.string_to_number(y_, out_type=tf.int32)
#Layer 1
W_conv1 = weight_variable([5, 5, 1, 32])
b_conv1 = bias_variable([32])
x_image = tf.reshape(x, [-1,28,28,1])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)
#Layer 2
W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)
#Densely Connected Layer
W_fc1 = weight_variable([7 * 7 * 64, 1024])
b_fc1 = bias_variable([1024])
h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
#Dropout - reduz overfitting
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
#Readout layer
W_fc2 = weight_variable([1024, 36])
b_fc2 = bias_variable([36])
#y_conv=tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)
y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
#Train and evaluate
#cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y_conv), reduction_indices=[1]))
#cross_entropy = tf.reduce_mean(-tf.reduce_sum(onehot_y_ * tf.log(y_conv), reduction_indices=[1]))
cross_entropy = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(y_conv, y_))
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
sess.run(tf.initialize_all_variables())
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
for i in range(20000):
if i%100 == 0:
train_accuracy = accuracy.eval(feed_dict={keep_prob: 1.0})
print("step %d, training accuracy %g"%(i, train_accuracy))
train_step.run(feed_dict={keep_prob: 0.5})
x, y_ = inputs(train=True, batch_size=2000)
#y_ = tf.string_to_number(y_, out_type=tf.int32)
print("test accuracy %g"%accuracy.eval(feed_dict={keep_prob: 1.0}))
coord.join(threads)
sess.close()
I have tried changing the num_epochs to 10000 and to None but the same error message appears. I am wondering if anyone knows how to solve this.
Thanks
Marcelo
This looks like an issue with your image.set_shape([784]). The error is saying that it was expecting something of size [784] but it got [6272]. I'm semi-familiar with this tutorial and the images should be 28x28 which would give you a size of 784 but maybe there are 6272 images and your dimensions are confused because the first dimension should be the amount of observations and not the size of a single observation? Sorry this isn't a concrete answer but I would start there.