Tensorflow, binary classification - python

Is it possible to make a binary classification? Especially for pedestrian detection, whether it is or not a pedestrian. I couldn't find anything in the API or any good tutorials for this.
I tried to adapt the code from the deep MNIST tutorial, that was used for multi-class classification; I made the images with pedestrians in them labeled with 1, and the negatives with 0, and used 3 channels(for colours, shouldn't be a problem right?), but the accuracy just jumps all over the place.
The code
import dataset as input_data
import tensorflow as tf
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 3, 3, 1],
strides=[1, 2, 2, 1], padding='SAME')
data = input_data.read_data_sets()
sess = tf.InteractiveSession()
x = tf.placeholder("float", shape=[None, input_data.HEIGHT * input_data.WIDTH * 3])
y_ = tf.placeholder("float", shape=[None, 2])
W_conv1 = weight_variable([5, 5, 3, 64])
b_conv1 = bias_variable([64])
x_image = tf.reshape(x, [-1, input_data.WIDTH, input_data.HEIGHT, 3])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)
h_norm1 = tf.nn.lrn(h_pool1, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75)
W_conv2 = weight_variable([5, 5, 64, 64])
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_norm1, W_conv2) + b_conv2)
h_norm2 = tf.nn.lrn(h_conv2, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75)
h_pool2 = max_pool_2x2(h_norm2)
W_fc1 = weight_variable([input_data.HEIGHT / 4 * input_data.WIDTH / 4 * 64, 1024])
b_fc1 = bias_variable([1024])
h_pool2_flat = tf.reshape(h_pool2, [-1, input_data.HEIGHT / 4 * input_data.WIDTH / 4 * 64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
keep_prob = tf.placeholder("float")
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
W_fc2 = weight_variable([1024, 2])
b_fc2 = bias_variable([2])
y_conv = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)
cross_entropy = -tf.reduce_sum(y_ * tf.log(y_conv))
train_step = tf.train.AdamOptimizer(1e-6).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
sess.run(tf.initialize_all_variables())
for i in range(20000):
batch = data.train.next_batch(50)
if i % 100 == 0:
train_accuracy = accuracy.eval(feed_dict={
x: batch[0], y_: batch[1], keep_prob: 1.0})
print "step %d, training accuracy %g" % (i, train_accuracy)
train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})
print "test accuracy %g" % accuracy.eval(feed_dict={
x: data.test.images, y_: data.test.labels, keep_prob: 1.0})
The output
step 0, training accuracy 0.14
step 100, training accuracy 0.54
step 200, training accuracy 0.28
step 300, training accuracy 0.46
step 400, training accuracy 0.32
step 500, training accuracy 0.52
step 600, training accuracy 0.56
step 700, training accuracy 0.76
step 800, training accuracy 0.66
Help would be apriciated, thanks.

You should definitely use tensorboard to visualize the cross entropy , biases and weights summaries. I think it will give you a much better view of what is going on.
Try with this code and then run tensorboard:
import dataset as input_data
import tensorflow as tf
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 3, 3, 1],
strides=[1, 2, 2, 1], padding='SAME')
data = input_data.read_data_sets()
sess = tf.InteractiveSession()
x = tf.placeholder("float", shape=[None, input_data.HEIGHT * input_data.WIDTH * 3])
y_ = tf.placeholder("float", shape=[None, 2])
W_conv1 = weight_variable([5, 5, 3, 64])
b_conv1 = bias_variable([64])
x_image = tf.reshape(x, [-1, input_data.WIDTH, input_data.HEIGHT, 3])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)
h_norm1 = tf.nn.lrn(h_pool1, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75)
W_conv2 = weight_variable([5, 5, 64, 64])
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_norm1, W_conv2) + b_conv2)
h_norm2 = tf.nn.lrn(h_conv2, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75)
h_pool2 = max_pool_2x2(h_norm2)
W_fc1 = weight_variable([input_data.HEIGHT / 4 * input_data.WIDTH / 4 * 64, 1024])
b_fc1 = bias_variable([1024])
h_pool2_flat = tf.reshape(h_pool2, [-1, input_data.HEIGHT / 4 * input_data.WIDTH / 4 * 64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
keep_prob = tf.placeholder("float")
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
W_fc2 = weight_variable([1024, 2])
b_fc2 = bias_variable([2])
y_conv = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)
# Add summary ops to collect data
w_fc2_hist = tf.histogram_summary("weights_fc2", W_fc2)
b_fc2_hist = tf.histogram_summary("bias_fc2", b_fc2)
w_in_hist = tf.histogram_summary("weights_in", W)
b_in_hist = tf.histogram_summary("bias_in", b)
y_hist = tf.histogram_summary("y_conv", y_conv)
cross_entropy = -tf.reduce_sum(y_ * tf.log(y_conv))
ce_summ = tf.scalar_summary("cross entropy", cross_entropy)
train_step = tf.train.AdamOptimizer(1e-6).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
accuracy_summary = tf.scalar_summary("train accuracy", accuracy)
# Merge all the summaries and write them out to /tmp/tf
merged = tf.merge_all_summaries()
writer = tf.train.SummaryWriter("/tmp/tf", sess.graph_def)
sess.run(tf.initialize_all_variables())
for i in range(20000):
batch = data.train.next_batch(50)
feed={x: batch[0], y_: batch[1], keep_prob: 0.5}
result = sess.run([merged, accuracy, train_step], feed_dict=feed)
if i % 100 == 0: # Record summary data, and the accuracy
summary_str = result[0]
acc = result[1]
writer.add_summary(summary_str, i)
print("Accuracy at step {0}/{1}: {2}%".format(i, 20000, int(acc*100)))
print "test accuracy %g" % accuracy.eval(feed_dict={
x: data.test.images, y_: data.test.labels, keep_prob: 1.0})

Related

TensorFlow: Unable to evaluate / obtain tensor's value in CNN

from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
import tensorflow as tf
sess = tf.InteractiveSession()
x = tf.placeholder(tf.float32, shape=[None, 784])
y_ = tf.placeholder(tf.float32, shape=[None, 10])
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME')
W_conv1 = weight_variable([5, 5, 1, 32])
b_conv1 = bias_variable([32])
x_image = tf.reshape(x, [-1, 28, 28, 1])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)
W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)
W_fc1 = weight_variable([7 * 7 * 64, 1024])
b_fc1 = bias_variable([1024])
h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
W_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])
y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
cross_entropy = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv))
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(20000):
batch = mnist.train.next_batch(50)
if i % 100 == 0:
train_accuracy = accuracy.eval(feed_dict={
x: batch[0], y_: batch[1], keep_prob: 1.0})
print('step %d, training accuracy %g' % (i, train_accuracy))
train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})
print('test accuracy %g' % accuracy.eval(feed_dict={
x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0}))
above is the code for multilayer Convolutional Neural Network straight from https://www.tensorflow.org/versions/r1.3/get_started/mnist/pros
I've been trying to obtain the values in h_conv1 and h_conv2, and I've tried using
get_value = h_conv1.eval() or h_conv1.eval(session=sess)
both are not successful, I even tried setting name in h_conv1 and get it by using
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1, name='example')
test = tf.get_default_graph().get_tensor_by_name("example:0")
and still it wasn't successful.
However, it's easy to extract the values of W_conv1 by using
weights = W_conv1.eval()
and it will shows up in Spyder's variable explorer as a numpy array and I can do whatever I want with it.
I was wondering is there any other way to get the h_conv1 value, so I can do some processing steps on those values before feeding it to the next operation.
If you're running into the problem I expect you are, the problem is that the weights you are printing out succesfully are tensorflow variables (meaning their values get stored as part of the session), but h_conv1 is an operation, meaning it has an output that is defined as a function of its input. Since those inputs end up routing back to placeholder variables, and you're not giving any placeholder variables when you eval the operation, it fails with InvalidArgumentOperation.
What I'm guessing what you're looking for is the value of the output the last time you ran the training operation. To get this, the approach I got working is simply to attach a tf.Print operation to that node, and then evaluating that at the same time as I run the training operation. So in your graph definition you have something like this:
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_conv1_printop = tf.Print(h_conv1, [h_conv1])
Then replace this line:
train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})
with this line:
_, hconv1_out = sess.run([train_step, h_conv1_printop], feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})
print(hconv1_out)
Basically, attach a print Op to the relu Op you want to see the output of, and then evaluate that Op at the same time you are running the rest of the session so that it gets filled in with the value.
Hopefully that makes sense and solves your issue, always helpful to review the docs on graphs and sessions since this is all a common point confusion.

Tensorflow LeNet Model MNIST

I can't find my mistake in my following tensorflow leNet model. I get the following error : ValueError: Tried to convert 'input' to a tensor and failed. Error: Shapes must be equal rank, but are 2 and 1
From merging shape 22 with other shapes. for 'Print_4/packed' (op: 'Pack') with input shapes: [5,5,1,20], [20], [5,5,20,50], [50], [2450,200], [200], [200,10], [10], [5,5,1,20], [20], [5,5,20,50], [50], [2450,200], [200], [200,10], [10], [5,5,1,20], [20], [5,5,20,50], [50], [2450,200], [200], [200,10], [10].
it seems the my architecture is not correct in term of dimensions but i can't seem to figure out where the problem is here is my code :
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='VALID')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME')
# Input layer
x = tf.placeholder(tf.float32, [None, 784], name='x')
y_ = tf.placeholder(tf.float32, [None, 10], name='y_')
x_image = tf.reshape(x, [-1, 28, 28, 1])
# Convolutional layer 1
W_conv1 = weight_variable([5, 5, 1, 20])
b_conv1 = bias_variable([20])
h_conv1 = conv2d(x_image, W_conv1) + b_conv1
h_pool1 = max_pool_2x2(h_conv1)
W_conv2 = weight_variable([5, 5, 20, 50])
b_conv2 = bias_variable([50])
h_conv2 = conv2d(h_pool1, W_conv2) + b_conv2
h_pool2 = max_pool_2x2(h_conv2)
h_pool2_flat = tf.reshape(h_pool2, [-1, 8*8*50])
W_fc1 = weight_variable([8 * 8* 50, 500])
b_fc1 = bias_variable([500])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
W_fc2 = weight_variable([500, 10])
b_fc2 = bias_variable([10])
y = tf.nn.softmax(tf.matmul(h_fc1, W_fc2) + b_fc2, name='y')
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y),
reduction_indices=[1]))
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32),
name='accuracy')
# Training algorithm
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
max_steps = 20000
for step in range(max_steps):
a = tf.Print(v, [v], message="This is a: ")
#print(a.eval())
batch_xs, batch_ys = mnist.train.next_batch(50)
sess.run([train_step], feed_dict={x: batch_xs, y_: batch_ys,
keep_prob: 0.5})
print(max_steps, sess.run(accuracy, feed_dict={x: mnist.test.images,
y_: mnist.test.labels, keep_prob: 1.0}))
shape of h_pool2 is (?,4,4,50). so your these 2 line of code is wrong:
h_pool2_flat = tf.reshape(h_pool2, [-1, 8*8*50])
W_fc1 = weight_variable([8 * 8* 50, 500])
change to 4*4*50 should work.

Error when trying to split neural network testing

If I have tensorflow neural network, which I ran on the test data like this:
result = sess.run(y_conv, feed_dict={x: test_inputs})
However, this would have memory issues, so I tried to break up the computation like this:
result = []
for i in range(0, len(test_inputs), 100):
end = min(i+100 - 1, len(test_inputs) - 1)
r = sess.run(y_conv, feed_dict={x: test_inputs.loc[i:end, :]})
result.append(r)
However, now I get this error:
InvalidArgumentError (see above for traceback): You must feed a value for placeholder tensor 'Placeholder_2' with dtype float
[[Node: Placeholder_2 = Placeholder[dtype=DT_FLOAT, shape=<unknown>, _device="/job:localhost/replica:0/task:0/cpu:0"]()]]
So what is the cause of this problem?
I would have thought that the network would work well on smaller batch of examples.
If this bares any relevance, the neural network is created like this:
W_conv1 = weight_variable([5, 5, 1, 32])
b_conv1 = bias_variable([32])
x_image = tf.reshape(x, [-1, 28, 28, 1])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)
W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)
W_fc1 = weight_variable([7 * 7 * 64, 1024])
b_fc1 = bias_variable([1024])
h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
W_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])
y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
You have fed x as your input but not fed keep_prob. Your network looks similar to Deep MNIST for Experts. An example snippet:
train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})
For inference, you should change keep_prob to 1.0.

Dillema with prediction with TensorFlow on MNIST set

currently I am a newbie on TensorFlow, I have trained a model using MNIST set and now I have made some pictures with numbers and I want to try to test the precision. I think I have a syntax or understanding of how things in TensorFlow are working
This is my model:
x = tf.placeholder(tf.float32, shape=[None, 784])
y_ = tf.placeholder(tf.float32, shape=[None, 10])
sess = tf.InteractiveSession()
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
#stride 1 and 0 padding
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
#pooling over 2x2
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME')
W_conv1 = weight_variable([5, 5, 1, 32])
b_conv1 = bias_variable([32])
x_image = tf.reshape(x, [-1,28,28,1])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)
# Second Layer
W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)
#Fully connected layer
W_fc1 = weight_variable([7 * 7 * 64, 1024])
b_fc1 = bias_variable([1024])
h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
#Readout layer
W_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])
y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
cross_entropy = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv))
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
sess.run(tf.global_variables_initializer())
for i in range(200):
batch = mnist.train.next_batch(50)
if i%100 == 0:
train_accuracy = accuracy.eval(feed_dict={
x:batch[0], y_: batch[1], keep_prob: 1.0})
print("step %d, training accuracy %g"%(i, train_accuracy))
train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})
# Here is my custom dataset
custom_data=GetDataset()
print sess.run(y_conv,feed_dict={x: custom_data})
It is not the right syntax for making prediction with my custom data ? I am missing something here ? My data are in the same format as the one from the MNIST set but I can find a correct syntax for how to make a prediction:
print sess.run(y_conv,feed_dict={x: custom_data})
Thanks a lot for any help !
y_conv will provide you what you need to make recommendations. You're probably just not understanding the form the data takes on in that tensor.
In your code you have a loss function and an optimizer:
cross_entropy = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv))
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
Note that you pass y_conv to the softmax_cross_encropy_with_logits method. y_conv at this point is an unscaled number. Negative values represent the negative class, positive values represent the positive class.
softmax will convert these to a probability distribution over all outputs. This notably converts all outputs to a [0,1] range. Cross entropy then computes the error (cross entropy assumes values in the [0,1] range).
It's common to simply create another tensor that actually computes the prediction, if you're using softmax then:
prediction = tf.softmax(y_conv)
That would give you the predicted probability distribution over the labels. Just request that tensor in your sess.run step.
If you just care about the most probable class, then you can take the max of the y_conv values. Note also that if this statement is true you might want to experiment with tf.nn.sigmoid_cross_entropy_with_logits which is a slightly more tuned to producing results that are not a probability distribution (slightly better for single class prediction, and mandatory for multi class prediction).

Uninitialized value error while using Adadelta optimizer in Tensorflow

I am trying to build a CNN using Adagrad optimizer but am getting the following error.
tensorflow.python.framework.errors.FailedPreconditionError: Attempting to use uninitialized value Variable_7/Adadelta
[[Node: Adadelta/update_Variable_7/ApplyAdadelta = ApplyAdadelta[T=DT_FLOAT, _class=["loc:#Variable_7"], use_locking=false, _device="/job:localhost/replica:0/task:0/cpu:0"](Variable_7, Variable_7/Adadelta, Variable_7/Adadelta_1, Adadelta/lr, Adadelta/rho, Adadelta/epsilon, gradients/add_3_grad/tuple/control_dependency_1)]]
Caused by op u'Adadelta/update_Variable_7/ApplyAdadelta',
optimizer = tf.train.AdadeltaOptimizer(learning_rate).minimize(cross_entropy)
I tried reinitializing the session variables after the adagrad statement as mentioned in this post, but that didn't help too.
How can I avoid this error? Thanks.
Tensorflow: Using Adam optimizer
import tensorflow as tf
import numpy
from tensorflow.examples.tutorials.mnist import input_data
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME')
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
# Parameters
learning_rate = 0.01
training_epochs = 100
batch_size = 1000
display_step = 1
# Set model weights
W = tf.Variable(tf.zeros([784, 10]), name="weights")
b = tf.Variable(tf.zeros([10]), name="bias")
W_conv1 = weight_variable([5, 5, 1, 32])
b_conv1 = bias_variable([32])
W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])
W_fc1 = weight_variable([7 * 7 * 64, 1024])
b_fc1 = bias_variable([1024])
W_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])
# Initializing the variables
init = tf.initialize_all_variables()
with tf.Session() as sess:
sess.run(init)
for epoch in range(training_epochs):
total_batch = int(mnist.train.num_examples/batch_size)
for i in range(total_batch):
batch_xs, batch_ys = mnist.train.next_batch(batch_size)
x_image = tf.reshape(batch_xs, [-1,28,28,1])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)
h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
y_conv=tf.nn.softmax(tf.matmul(h_fc1, W_fc2) + b_fc2)
cross_entropy = tf.reduce_mean(-tf.reduce_sum(batch_ys * tf.log(y_conv), reduction_indices=[1]))
#optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cross_entropy)
optimizer = tf.train.AdadeltaOptimizer(learning_rate).minimize(cross_entropy)
sess.run(init)
correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(batch_ys,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
sess.run([cross_entropy, y_conv,optimizer])
print cross_entropy.eval()
The problem here is that tf.initialize_all_variables() is a misleading name. It really means "return an operation that initializes all variables that have already been created (in the default graph)". When you call tf.train.AdadeltaOptimizer(...).minimize(), TensorFlow creates additional variables, which are not covered by the init op that you created earlier.
Moving the line:
init = tf.initialize_all_variables()
...after the construction of the tf.train.AdadeltaOptimizer should make your program work.
N.B. Your program rebuilds the entire network, apart from the variables, on each training step. This is likely to be very inefficient, and the Adadelta algorithm will not adapt as expected because its state is recreated on each step. I would strongly recommend moving the code from the definition of batch_xs to the creation of the optimizer outside of the two nested for loops. You should define tf.placeholder() ops for the batch_xs and batch_ys inputs, and use the feed_dict argument to sess.run() to pass in the values returned by mnist.train.next_batch().

Categories