Trouble predicting with tensorflow model

Trouble predicting with tensorflow model - python

I've trained a Deep Neural Network on the MNIST dataset. Here is the code for training.
n_classes = 10
batch_size = 100
x = tf.placeholder(tf.float32, [None, 784],name='Xx')
y = tf.placeholder(tf.float32,[None,10],name='Yy')
input = 784
n_nodes_1 = 300
n_nodes_2 = 300
def neural_network_model(data):
variables = {'w1':tf.Variable(tf.random_normal([input,n_nodes_1])),
'w2':tf.Variable(tf.random_normal([n_nodes_1,n_nodes_2])),
'w3':tf.Variable(tf.random_normal([n_nodes_2,n_classes])),
'b1':tf.Variable(tf.random_normal([n_nodes_1])),
'b2':tf.Variable(tf.random_normal([n_nodes_2])),
'b3':tf.Variable(tf.random_normal([n_classes]))}
output1 = tf.add(tf.matmul(data,variables['w1']),variables['b1'])
output2 = tf.nn.relu(output1)
output3 = tf.add(tf.matmul(output2, variables['w2']), variables['b2'])
output4 = tf.nn.relu(output3)
output5 = tf.add(tf.matmul(output4, variables['w3']), variables['b3'],name='last')
return output5
def train_neural_network(x):
prediction = neural_network_model(x)
name_of_final_layer = 'fin'
final = tf.nn.softmax_cross_entropy_with_logits_v2(logits=prediction,
labels=y,name=name_of_final_layer)
cost = tf.reduce_mean(final)
optimizer = tf.train.AdamOptimizer().minimize(cost)
hm_epochs = 3
saver = tf.train.Saver()
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for epoch in range(hm_epochs):
epoch_loss = 0
for _ in range(int(mnist.train.num_examples/batch_size)):
epoch_x, epoch_y = mnist.train.next_batch(batch_size)
_,c=sess.run([optimizer,cost],feed_dict={x:epoch_x,y:epoch_y})
epoch_loss += c
print("Epoch",epoch+1,"Completed Total Loss:",epoch_loss)
correct = tf.equal(tf.argmax(prediction,1),tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct,'float'))
print('Accuracy on val_set:',accuracy.eval({x:mnist.test.images,y:mnist.test.labels}))
path = saver.save(sess,"net/network")
print("Saved to",path)
Here is my code for evaluating a single datapoint
def eval_neural_network():
with tf.Session() as sess:
new_saver = tf.train.import_meta_graph('net/network.meta')
new_saver.restore(sess, "net/network")
sing = np.reshape(mnist.test.images[0],(-1,784))
output = sess.run([y],feed_dict={x:sing})
print(output)
eval_neural_network()
The error that popped up is :
InvalidArgumentError (see above for traceback): You must feed a value for placeholder tensor 'Yy' with dtype float and shape [?,10]
[[Node: Yy = Placeholder[dtype=DT_FLOAT, shape=[?,10], _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
I've researched this online for multiple days now and still could not get it to work. Any advice?

The losses are oscillating like this but the predictions don't seem to be bad. It works.
It also extracts the mnist archive repeatedly. Accuracy also can reach 0.98 with a simpler network.
Epoch 1 Completed Total Loss: 47.47844
Accuracy on val_set: 0.8685
Epoch 2 Completed Total Loss: 10.217445
Accuracy on val_set: 0.9
Epoch 3 Completed Total Loss: 14.013474
Accuracy on val_set: 0.9104
[2]
import tensorflow as tf
import tensorflow.examples.tutorials.mnist.input_data as input_data
import numpy as np
import matplotlib.pyplot as plt
n_classes = 10
batch_size = 100
x = tf.placeholder(tf.float32, [None, 784],name='Xx')
y = tf.placeholder(tf.float32,[None,10],name='Yy')
input = 784
n_nodes_1 = 300
n_nodes_2 = 300
mnist = input_data.read_data_sets('MNIST_data/', one_hot=True)
def neural_network_model(data):
variables = {'w1':tf.Variable(tf.random_normal([input,n_nodes_1])),
'w2':tf.Variable(tf.random_normal([n_nodes_1,n_nodes_2])),
'w3':tf.Variable(tf.random_normal([n_nodes_2,n_classes])),
'b1':tf.Variable(tf.random_normal([n_nodes_1])),
'b2':tf.Variable(tf.random_normal([n_nodes_2])),
'b3':tf.Variable(tf.random_normal([n_classes]))}
output1 = tf.add(tf.matmul(data,variables['w1']),variables['b1'])
output2 = tf.nn.relu(output1)
output3 = tf.add(tf.matmul(output2, variables['w2']), variables['b2'])
output4 = tf.nn.relu(output3)
output5 = tf.add(tf.matmul(output4, variables['w3']), variables['b3'],name='last')
return output5
def train_neural_network(x):
prediction = neural_network_model(x)
name_of_final_layer = 'fin'
final = tf.nn.softmax_cross_entropy_with_logits_v2(logits=prediction,
labels=y,name=name_of_final_layer)
cost = tf.reduce_mean(final)
optimizer = tf.train.AdamOptimizer().minimize(cost)
hm_epochs = 3
saver = tf.train.Saver()
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for epoch in range(hm_epochs):
for _ in range(int(mnist.train.num_examples/batch_size)):
epoch_x, epoch_y = mnist.train.next_batch(batch_size)
_,c=sess.run([optimizer,cost],feed_dict={x:epoch_x,y:epoch_y})
print("Epoch",epoch+1,"Completed Total Loss:",c)
correct = tf.equal(tf.argmax(prediction,1),tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct,'float'))
print('Accuracy on val_set:',accuracy.eval({x:mnist.test.images,y:mnist.test.labels}))
#path = saver.save(sess,"net/network")
#print("Saved to",path)
return prediction
def eval_neural_network(prediction):
with tf.Session() as sess:
new_saver = tf.train.import_meta_graph('net/network.meta')
new_saver.restore(sess, "net/network")
singleprediction = tf.argmax(prediction, 1)
sing = np.reshape(mnist.test.images[1], (-1, 784))
output = singleprediction.eval(feed_dict={x:sing},session=sess)
digit = mnist.test.images[1].reshape((28, 28))
plt.imshow(digit, cmap='gray')
plt.show()
print(output)
prediction = train_neural_network(x)
eval_neural_network(prediction)

This complete example based on tensorflow github worked for me:
(I modified few lines of code by removing name scope for x, keep_prob and changing to tf.placeholder_with_default. There's probably a better way to do this somewhere.

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import pandas as pd
import argparse
import sys
import tempfile

from tensorflow.examples.tutorials.mnist import input_data

import tensorflow as tf

FLAGS = None


def deepnn(x):
"""deepnn builds the graph for a deep net for classifying digits.

Args:
x: an input tensor with the dimensions (N_examples, 784), where 784 is the
number of pixels in a standard MNIST image.

Returns:
A tuple (y, keep_prob). y is a tensor of shape (N_examples, 10), with values
equal to the logits of classifying the digit into one of 10 classes (the
digits 0-9). keep_prob is a scalar placeholder for the probability of
dropout.
"""
# Reshape to use within a convolutional neural net.
# Last dimension is for "features" - there is only one here, since images are
# grayscale -- it would be 3 for an RGB image, 4 for RGBA, etc.
with tf.name_scope('reshape'):
x_image = tf.reshape(x, [-1, 28, 28, 1])

# First convolutional layer - maps one grayscale image to 32 feature maps.
with tf.name_scope('conv1'):
W_conv1 = weight_variable([5, 5, 1, 32])
b_conv1 = bias_variable([32])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)

# Pooling layer - downsamples by 2X.
with tf.name_scope('pool1'):
h_pool1 = max_pool_2x2(h_conv1)

# Second convolutional layer -- maps 32 feature maps to 64.
with tf.name_scope('conv2'):
W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)

# Second pooling layer.
with tf.name_scope('pool2'):
h_pool2 = max_pool_2x2(h_conv2)

# Fully connected layer 1 -- after 2 round of downsampling, our 28x28 image
# is down to 7x7x64 feature maps -- maps this to 1024 features.
with tf.name_scope('fc1'):
W_fc1 = weight_variable([7 * 7 * 64, 1024])
b_fc1 = bias_variable([1024])

h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)

# Dropout - controls the complexity of the model, prevents co-adaptation of
# features.

keep_prob = tf.placeholder_with_default(1.0,())
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

# Map the 1024 features to 10 classes, one for each digit
with tf.name_scope('fc2'):
W_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])

y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
return y_conv, keep_prob


def conv2d(x, W):
"""conv2d returns a 2d convolution layer with full stride."""
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')


def max_pool_2x2(x):
"""max_pool_2x2 downsamples a feature map by 2X."""
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME')


def weight_variable(shape):
"""weight_variable generates a weight variable of a given shape."""
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)


def bias_variable(shape):
"""bias_variable generates a bias variable of a given shape."""
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)


# Import data
mnist = input_data.read_data_sets("/tmp")
# Create the model
x = tf.placeholder(tf.float32, [None, 784],name='x')
# Define loss and optimizer
y_ = tf.placeholder(tf.int64, [None])
# Build the graph for the deep net
y_conv, keep_prob = deepnn(x)
with tf.name_scope('loss'):
cross_entropy = tf.losses.sparse_softmax_cross_entropy(
labels=y_, logits=y_conv)
cross_entropy = tf.reduce_mean(cross_entropy)

with tf.name_scope('adam_optimizer'):
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)

with tf.name_scope('accuracy'):
correct_prediction = tf.equal(tf.argmax(y_conv, 1), y_)
correct_prediction = tf.cast(correct_prediction, tf.float32)
accuracy = tf.reduce_mean(correct_prediction)

graph_location = tempfile.mkdtemp()
print('Saving graph to: %s' % graph_location)
train_writer = tf.summary.FileWriter(graph_location)
train_writer.add_graph(tf.get_default_graph())

with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(1000):
batch = mnist.train.next_batch(50)
if i % 100 == 0:
train_accuracy = accuracy.eval(feed_dict={
x: batch[0], y_: batch[1], keep_prob: 1.0})
print('step %d, training accuracy %g' % (i, train_accuracy))
train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})

print('test accuracy %g' % accuracy.eval(feed_dict={
x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0}))
sing = np.reshape(mnist.test.images[0],(-1,784))
output = sess.run(y_conv,feed_dict={x:sing,keep_prob:1.0})
print(tf.argmax(output,1).eval())
saver = tf.train.Saver()
saver.save(sess,"/tmp/network")
Extracting /tmp/train-images-idx3-ubyte.gz
Extracting /tmp/train-labels-idx1-ubyte.gz
Extracting /tmp/t10k-images-idx3-ubyte.gz
Extracting /tmp/t10k-labels-idx1-ubyte.gz
Saving graph to: /tmp/tmp17hf_6c7
step 0, training accuracy 0.2
step 100, training accuracy 0.86
step 200, training accuracy 0.8
step 300, training accuracy 0.94
step 400, training accuracy 0.94
step 500, training accuracy 0.96
step 600, training accuracy 0.88
step 700, training accuracy 0.98
step 800, training accuracy 0.98
step 900, training accuracy 0.98
test accuracy 0.9663
[7]
If you want to restore from a new python run:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
import numpy as np
import argparse
import sys
import tempfile
from tensorflow.examples.tutorials.mnist import input_data
sess = tf.Session()
saver = tf.train.import_meta_graph('/tmp/network.meta')
saver.restore(sess,tf.train.latest_checkpoint('/tmp'))
graph = tf.get_default_graph()
mnist = input_data.read_data_sets("/tmp")
simg = np.reshape(mnist.test.images[0],(-1,784))
op_to_restore = graph.get_tensor_by_name("fc2/MatMul:0")
x = graph.get_tensor_by_name("x:0")
output = sess.run(op_to_restore,feed_dict= {x:simg})
print("Result = ", np.argmax(output))

Related

ValueError: Cannot feed value of shape

I'm new to Tensorflow and trying to programm a CNN with it. Im watching a tutorial and do exactly, 100% same code, and still get an error message at the end, that says:
ValueError: Cannot feed value of shape (100, 10) for Tensor 'y_2:0', which has shape '(?, 1)'
from __future__ import absolute_import, division, print_function
import tensorflow as tf
import numpy as np
get_ipython().run_line_magic('matplotlib', 'inline')
import matplotlib
import matplotlib.pyplot as plt
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
training_digits, training_labels = mnist.train.next_batch(1000)
test_digits, test_labels = mnist.test.next_batch(200)
height = 28
width = 28
channels = 1
n_inputs = height * width
conv1_feature_maps = 32
conv1_kernel_size = 3
conv1_stride = 1
conv1_pad = "SAME"
conv2_feature_maps = 64
conv2_kernel_size = 3
conv2_stride = 2
conv2_pad = "SAME"
pool3_feature_maps = conv2_feature_maps
n_fullyconn1 = 64
n_outputs = 10
tf.reset_default_graph()
X = tf.placeholder(tf.float32, shape=[None, n_inputs], name="X")
X_reshaped = tf.reshape(X, shape=[-1, height, width, channels])
y = tf.placeholder(tf.int32, shape=[None], name="y")
conv1 = tf.layers.conv2d(X_reshaped, filters=conv1_feature_maps,
kernel_size=conv1_kernel_size,
strides=conv1_stride, padding=conv1_pad,
activation = tf.nn.relu, name="conv1")
conv2 = tf.layers.conv2d(conv1, filters=conv2_feature_maps,
kernel_size=conv2_kernel_size,
strides=conv2_stride, padding=conv2_pad,
activation = tf.nn.relu, name="conv2")
pool3 = tf.nn.max_pool(conv2, ksize=[1,2,2,1], strides=[1,2,2,1], padding="VALID")
pool3_flat = tf.reshape(pool3, shape=[-1,pool3_feature_maps * 7 *7])
fullyconn1 = tf.layers.dense(pool3_flat, n_fullyconn1, activation = tf.nn.relu, name="fc1")
logits = tf.layers.dense(fullyconn1, n_outputs, name="output")
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits = logits,
labels = y)
loss = tf.reduce_mean(xentropy)
optimizer = tf.train.AdamOptimizer()
training_op = optimizer.minimize(loss)
correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
init = tf.global_variables_initializer()
saver = tf.train.Saver()
n_epochs = 5
batch_size = 100
with tf.Session() as sess:
init.run()
for epoch in range(n_epochs):
for iteration in range(mnist.train.num_examples // batch_size):
X_batch, y_batch = mnist.train.next_batch(batch_size)
sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
acc_test = accuracy.eval(feed_dict={X: mnist.test.images, y: mnist.test.labels})
print(epoch, "Train accuracy: ", acc_train, "Test accuracy: ", acc_test)
save_path = saver.save(sess, "./my_mnist_model")
What am I doing wrong and what should I change to solve this problem?
I tried to read all the other answers on stackoverflow, but i cannot connect them to my code.
Thanks!

You define y of size [?, 1] here:
y = tf.placeholder(tf.int32, shape=[None,1], name="y").
Change it to:
y = tf.placeholder(tf.int32, shape=[None,10], name="y")
Note that the shape is changed to [None,10].
Edit:
Set one_hot to False in mnist = input_data.read_data_sets("MNIST_data/", one_hot=True).

Tensor Flow Tutorial: Why isn't my version of 'Deep MNIST for Experts' working correctly?

I tried to follow the tensorflow tutorial 'Deep MNIST for Experts' [1]. My program (source below) is working, but it converges very slowly and achieves a quite bad accuracy of ~90%. It should achieve about 99,2% accuracy. I compared my solution to the 'mnist_deep.py' available for download [2], which looks quite similar ... but achieves those 99,2% accuracy on the same machine (so it's not a bug in tensorflow nor anything wrong with my installation). Surprisingly, it needs much more time for the training on the same machine telling me the trained model must be different / more complex. I checked my sources and compared it to mine, reordered stuff and checked the numbers. But, I didn't find any relevant difference except for coding style. I'm new to python - so maybe it's just some simple syntax issue...
Questions:
What is the difference in my version causing this problem?
Additional: How to debug those issues in tensorflow? I saw some generated graphs of the models on the webpage... how to generate them from the source?
My Program:
import os
import tensorflow as tf
# os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1,1,1,1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
from tensorflow.examples.tutorials.mnist import input_data
x = tf.placeholder(tf.float32, [None, 784])
# reshape
x_image = tf.reshape(x, [-1, 28, 28, 1])
# conv1
W_conv1 = weight_variable([5,5,1,32])
b_conv1 = bias_variable([32])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
# pool1
h_pool1 = max_pool_2x2(h_conv1)
# conv2
W_conv2 = weight_variable([5,5,32,64])
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
# pool2
h_pool2 = max_pool_2x2(h_conv2)
# fcl
W_fc1 = weight_variable([7*7*64, 1024])
b_fc1 = bias_variable([1024])
h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
# dropout
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
# map 1024 features to 10 classes
W_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])
y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
y_ = tf.placeholder(tf.float32, [None, 10])
# loss function
cross_entropy = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv)
)
# ADAM Optimizer
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
# accuracy
correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(y_,1))
correct_prediction = tf.cast(correct_prediction, tf.float32)
accuracy = tf.reduce_mean(correct_prediction)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(20000):
batch = mnist.train.next_batch(50)
if i % 100 == 0:
train_accuracy = accuracy.eval(feed_dict={
x: batch[0], y_: batch[1], keep_prob: 1.0
})
print('step %d, training accuracy %g' % (i, train_accuracy))
train_step.run(feed_dict={x: batch[0], y_:batch[1], keep_prob: 0.5})
print('test accuracy %g' % accuracy.eval(feed_dict={
x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0}))
OUTPUT:
# python3 mnist_deep.py
Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
step 0, training accuracy 0.06
step 100, training accuracy 0.06
step 200, training accuracy 0
step 300, training accuracy 0.12
step 400, training accuracy 0.06
step 500, training accuracy 0.1
step 600, training accuracy 0.18
step 700, training accuracy 0.12
step 800, training accuracy 0.1
step 900, training accuracy 0.22
step 1000, training accuracy 0.2
[...]
Version from the Webpage:
# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""A deep MNIST classifier using convolutional layers.
See extensive documentation at
https://www.tensorflow.org/get_started/mnist/pros
"""
# Disable linter warnings to maintain consistency with tutorial.
# pylint: disable=invalid-name
# pylint: disable=g-bad-import-order
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import argparse
import sys
import tempfile
from tensorflow.examples.tutorials.mnist import input_data
import tensorflow as tf
FLAGS = None
def deepnn(x):
"""deepnn builds the graph for a deep net for classifying digits.
Args:
x: an input tensor with the dimensions (N_examples, 784), where 784 is the
number of pixels in a standard MNIST image.
Returns:
A tuple (y, keep_prob). y is a tensor of shape (N_examples, 10), with values
equal to the logits of classifying the digit into one of 10 classes (the
digits 0-9). keep_prob is a scalar placeholder for the probability of
dropout.
"""
# Reshape to use within a convolutional neural net.
# Last dimension is for "features" - there is only one here, since images are
# grayscale -- it would be 3 for an RGB image, 4 for RGBA, etc.
with tf.name_scope('reshape'):
x_image = tf.reshape(x, [-1, 28, 28, 1])
# First convolutional layer - maps one grayscale image to 32 feature maps.
with tf.name_scope('conv1'):
W_conv1 = weight_variable([5, 5, 1, 32])
b_conv1 = bias_variable([32])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
# Pooling layer - downsamples by 2X.
with tf.name_scope('pool1'):
h_pool1 = max_pool_2x2(h_conv1)
# Second convolutional layer -- maps 32 feature maps to 64.
with tf.name_scope('conv2'):
W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
# Second pooling layer.
with tf.name_scope('pool2'):
h_pool2 = max_pool_2x2(h_conv2)
# Fully connected layer 1 -- after 2 round of downsampling, our 28x28 image
# is down to 7x7x64 feature maps -- maps this to 1024 features.
with tf.name_scope('fc1'):
W_fc1 = weight_variable([7 * 7 * 64, 1024])
b_fc1 = bias_variable([1024])
h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
# Dropout - controls the complexity of the model, prevents co-adaptation of
# features.
with tf.name_scope('dropout'):
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
# Map the 1024 features to 10 classes, one for each digit
with tf.name_scope('fc2'):
W_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])
y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
return y_conv, keep_prob
def conv2d(x, W):
"""conv2d returns a 2d convolution layer with full stride."""
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
"""max_pool_2x2 downsamples a feature map by 2X."""
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME')
def weight_variable(shape):
"""weight_variable generates a weight variable of a given shape."""
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
"""bias_variable generates a bias variable of a given shape."""
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def main(_):
# Import data
mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True)
# Create the model
x = tf.placeholder(tf.float32, [None, 784])
# Define loss and optimizer
y_ = tf.placeholder(tf.float32, [None, 10])
# Build the graph for the deep net
y_conv, keep_prob = deepnn(x)
with tf.name_scope('loss'):
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=y_,
logits=y_conv)
cross_entropy = tf.reduce_mean(cross_entropy)
with tf.name_scope('adam_optimizer'):
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
with tf.name_scope('accuracy'):
correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
correct_prediction = tf.cast(correct_prediction, tf.float32)
accuracy = tf.reduce_mean(correct_prediction)
graph_location = tempfile.mkdtemp()
print('Saving graph to: %s' % graph_location)
train_writer = tf.summary.FileWriter(graph_location)
train_writer.add_graph(tf.get_default_graph())
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(20000):
batch = mnist.train.next_batch(50)
if i % 100 == 0:
train_accuracy = accuracy.eval(feed_dict={
x: batch[0], y_: batch[1], keep_prob: 1.0})
print('step %d, training accuracy %g' % (i, train_accuracy))
train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})
print('test accuracy %g' % accuracy.eval(feed_dict={
x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0}))
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--data_dir', type=str,
default='/tmp/tensorflow/mnist/input_data',
help='Directory for storing input data')
FLAGS, unparsed = parser.parse_known_args()
tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
OUTPUT:
# python3 mnist_deep.py
Successfully downloaded train-images-idx3-ubyte.gz 9912422 bytes.
Extracting /tmp/tensorflow/mnist/input_data/train-images-idx3-ubyte.gz
Successfully downloaded train-labels-idx1-ubyte.gz 28881 bytes.
Extracting /tmp/tensorflow/mnist/input_data/train-labels-idx1-ubyte.gz
Successfully downloaded t10k-images-idx3-ubyte.gz 1648877 bytes.
Extracting /tmp/tensorflow/mnist/input_data/t10k-images-idx3-ubyte.gz
Successfully downloaded t10k-labels-idx1-ubyte.gz 4542 bytes.
Extracting /tmp/tensorflow/mnist/input_data/t10k-labels-idx1-ubyte.gz
Saving graph to: /tmp/tmpw8uaz0vs
step 0, training accuracy 0.12
step 100, training accuracy 0.64
step 200, training accuracy 0.86
step 300, training accuracy 0.94
step 400, training accuracy 0.94
[...]
Links
[1] https://www.tensorflow.org/get_started/mnist/pros
[2] https://www.github.com/tensorflow/tensorflow/blob/r1.3/tensorflow/examples/tutorials/mnist/mnist_deep.py

Question 1
Possible reason for poor results: train_step.run(feed_dict={x: batch[0], y_:batch[1], keep_prob: 0.5}) should be outside the if i%100 statement.
Question 2
To answer you second question, the graphs are generated by tensorboard (as mentioned by #JoshVarty). To visualize your graph, you need to write it to a file. Add the following somewhere before running the session and after defining the complete graph:
file_writer = tf.summary.FileWriter(".", tf.get_default_graph())
Then start tensorboard server in the terminal from your current path tensorboard --logdir=".", and you can open it from your browser with default port 6006 and the graph will appear after you run the script.

I have built a CNN for detecting human faces . From the very first epoch I am getting higher accuracy. What might be the reason for it?

The training data set consists of face images taken from youtube faces database labelled as one and non face images are taken from 256Object categories 25k images are chosen for both positive and negative data. so totally 50k for training and another 10k images is taken from yooutube faces and 256 object categories which are not repeated.
The problem is I am getting 99% accuracy after just 12k iterations in first epoch itself and I am printing cost value also it is also starts from very high value like 596014.000 like that. When it is tested against the other face images it performs very badly.
cost vs epoch graph
import tensorflow as tf
import read_data
from sklearn import metrics
import numpy as np
import os
import graph_plotter as gp
# Parameters
learning_rate = 0.001
epochs = 30
batch_size = 100
display_step = 5
# tf Graph input
input_data = tf.placeholder(tf.float32, [None, 27, 31, 3])
output_data = tf.placeholder(tf.float32, [None, 1])
keep_prob = tf.placeholder(tf.float32) #dropout (keep probability)
# Getting train and test data
train_data, train_label , test_data, test_label = read_data.getData()
def conv2d(x, w, bias, k=1):
x = tf.nn.conv2d(x, w, strides=[1, k, k, 1], padding='SAME')
x = tf.nn.bias_add(x, bias)
return tf.nn.relu(x)
# Performs max pooling on the convolution layer output
def maxpool2d(x, k=2):
return tf.nn.max_pool(x,
ksize=[1, k, k, 1], strides=[1, k, k, 1],
padding='SAME')
# Weights generated randomly according to layer
weights = {
# Conv 4*4 , 1 input , 32 outputs
'wc1': tf.Variable(tf.random_normal([4, 4, 3, 32])),
# Conv 3*3 , 32 inputs , 32 outputs
'wc2': tf.Variable(tf.random_normal([3, 3, 32, 64])),
# Conv 5*6 , 64 input , 128 outputs
'wc3': tf.Variable(tf.random_normal([5, 6, 64, 128])),
# Conv 1*1 , 128 inputs , 256 outputs
'wc4': tf.Variable(tf.random_normal([1, 1, 128, 256])),
# Conv 1*1 , 256 inputs , 256 outputs
'wc5': tf.Variable(tf.random_normal([1, 1, 256, 512])),
# Output Layer 7*8*256 inputs and 1 output ( face or non-face )
'out': tf.Variable(tf.random_normal([7*8*512, 1]))
}
biases = {
'bc1': tf.Variable(tf.random_normal([32])),
'bc2': tf.Variable(tf.random_normal([64])),
'bc3': tf.Variable(tf.random_normal([128])),
'bc4': tf.Variable(tf.random_normal([256])),
'bc5': tf.Variable(tf.random_normal([512])),
'out': tf.Variable(tf.random_normal([1]))
}
def model(x, weight, bias, dropout):
# Layer 1
conv1 = conv2d(x, weight['wc1'], bias['bc1'])
conv1 = maxpool2d(conv1, k=2)
# Layer 2
conv2 = conv2d(conv1, weight['wc2'], bias['bc2'])
conv2 = maxpool2d(conv2, k=2)
# Layer 3
conv3 = conv2d(conv2, weight['wc3'], bias['bc3'])
# Layer 4
conv4 = conv2d(conv3, weight['wc4'], bias['bc4'])
# Layer 5
conv5 = conv2d(conv4, weight['wc5'], bias['bc5'])
#conv5 = tf.nn.dropout(conv5, dropout)
# Flattening data
intermediate = tf.reshape(conv5, shape=[-1, 7*8*512])
# Output Layer
output = tf.add(tf.matmul(intermediate, weight['out']), bias['out'])
return output
pred = model(input_data, weights, biases, keep_prob)
l2_loss = 0.001*(
tf.nn.l2_loss(weights.get('wc4')) +
tf.nn.l2_loss(weights.get('wc5')) +
tf.nn.l2_loss(weights.get('out')))
cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(
pred, output_data)) + l2_loss
tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
correct_pred = tf.equal(
tf.greater(sigmoid_output, 0.5), tf.greater(output_data, 0.5))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
y_p = tf.cast(tf.greater(sigmoid_output, 0.5), tf.int32)
saver = tf.train.Saver()
tf.add_to_collection('y_p', y_p)
tf.add_to_collection('pred', pred)
tf.add_to_collection('x', input_data)
tf.add_to_collection('y', output_data)
init = tf.global_variables_initializer()
with tf.device("/gpu:0"):
with tf.Session() as sess:
sess.run(init)
train_data_minibatches = [train_data[k:k + batch_size]
for k in range(0, len(train_data), batch_size)]
train_label_minibatches = [train_label[k:k + batch_size]
for k in range(0, len(train_label), batch_size)]
step = 0
batch_count = 0
avg_cost_list = []
avg_accuracy_list = []
for epoch in range(epochs):
print('Epoch '+epoch.__str__())
cost_list = []
accuracy_list = []
for batch_x, batch_y in zip(
train_data_minibatches, train_label_minibatches):
batch_count += 1
sess.run(optimizer, feed_dict={
input_data: batch_x, output_data: batch_y,
keep_prob: 0.75})
# if epoch % display_step == 0:
loss, acc, output = sess.run([cost, accuracy, sig],
feed_dict={input_data: batch_x, output_data: batch_y, keep_prob: 0.75})
cost_list.append(loss)
accuracy_list.append(acc)
print("Iter " + str(step * batch_size) +" Loss "+ "{:.5f}".format(loss)+ ", Training Accuracy= " +
"{:.5f}".format(acc))
step += 1
average_cost = sum(cost_list) / len(cost_list)
average_acc = sum(accuracy_list) / len(accuracy_list)
avg_cost_list.append(average_cost)
avg_accuracy_list.append(average_acc)
if epoch % display_step == 0:
test_acc, y_pred = sess.run([accuracy, y_p], feed_dict={input_data: test_data,
output_data: test_label,
keep_prob: 0.75})
print(metrics.confusion_matrix(test_label, y_pred))
print("Testing Accuracy : " + "{:.5f}".format(test_acc))
print("Optimization finished !!")
# Saving cost Vs epoch graph, and accuracy Vs epoch graphs.
gp.cost_vs_epoch(avg_cost_list)
gp.accuracy_vs_epoch(avg_accuracy_list)
save_path = saver.save(sess=sess, save_path=save_path, write_meta_graph=True)

The line
correct_pred = tf.equal(tf.greater(sigmoid_output, 0.5),
tf.greater(output_data, 0.5))
is likely to be wrong. Your code seems not to have sigmoid_output, but only pred.
If that is not the problem, I would look into train_data. How many "True" labels > 0.5 do you have? How many labels which evaluate to false do you have?

Tensorflow batch normalization

Below are the code i am using as a learning programming in Tensorflow.
from __future__ import print_function
from datetime import datetime
import time, os
import tensorflow as tf
# Import data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
# Parameters
learning_rate = 0.001
training_epoch = 5
batch_size = 128
display_step = 10
model_path = "./output/model.ckpt"
logs_path = './logs'
directory = os.path.dirname(model_path)
if not os.path.exists(directory):
os.makedirs(directory)
directory = os.path.dirname(logs_path)
if not os.path.exists(directory):
os.makedirs(directory)
# Network Parameters
n_input = 784 # data input
n_classes = 10 # classes
dropout = 0.5 # Dropout, probability to keep units
l2_regularization_strength = 0.0005 #l2 regularization strength
# tf Graph input
x = tf.placeholder(tf.float32, [None, n_input], name='InputData')
y = tf.placeholder(tf.float32, [None, n_classes], name='LabelData')
keep_prob = tf.placeholder(tf.float32) #dropout (keep probability)
mode = tf.placeholder(tf.int32);
# Create some wrappers for simplicity
def conv2d(x, kernel_shape, strides=1, mode=0):
# Conv2D wrapper, with batch normalization and relu activation
weights = tf.get_variable('weights', kernel_shape, initializer=tf.contrib.layers.xavier_initializer())
x = tf.nn.conv2d(x, weights, strides=[1, strides, strides, 1], padding='SAME')
pop_mean = tf.get_variable('bn_pop_mean', [x.get_shape()[-1]], initializer=tf.constant_initializer(0), trainable=False)
pop_var = tf.get_variable('bn_pop_var', [x.get_shape()[-1]], initializer=tf.constant_initializer(1), trainable=False)
scale = tf.get_variable('bn_scale', [x.get_shape()[-1]], initializer=tf.constant_initializer(1))
beta = tf.get_variable('bn_beta', [x.get_shape()[-1]], initializer=tf.constant_initializer(0))
epsilon = 1e-3
decay = 0.999
if mode == 0:
batch_mean, batch_var = tf.nn.moments(x,[0, 1, 2])
train_mean = tf.assign(pop_mean, pop_mean * decay + batch_mean * (1 - decay))
train_var = tf.assign(pop_var, pop_var * decay + batch_var * (1 - decay))
with tf.control_dependencies([train_mean, train_var]):
bn = tf.nn.batch_normalization(x, batch_mean, batch_var, beta, scale, epsilon, name='bn')
else:
bn = tf.nn.batch_normalization(x, pop_mean, pop_var, beta, scale, epsilon, name='bn')
return tf.nn.relu(bn, name = 'relu')
def maxpool2d(x, k=2):
# MaxPool2D wrapper
return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1], padding='SAME', name='maxpool')
# Create model
def conv_net(x, dropout, mode):
# Reshape input picture
x = tf.reshape(x, shape=[-1, 28, 28, 1])
with tf.variable_scope("conv1"):
# Convolution Layer
conv1 = conv2d(x, [5, 5, 1, 32], mode=mode)
# Max Pooling (down-sampling)
conv1 = maxpool2d(conv1, k=2)
with tf.variable_scope("conv2"):
# Convolution Layer
conv2 = conv2d(conv1, [5, 5, 32, 64], mode=mode)
# Max Pooling (down-sampling)
conv2 = maxpool2d(conv2, k=2)
with tf.variable_scope("fc1"):
# Fully connected layer
# Reshape conv2 output to fit fully connected layer input
weights = tf.get_variable("weights", [7*7*64, 1024], initializer=tf.contrib.layers.xavier_initializer())
biases = tf.get_variable("biases", [1024], initializer=tf.constant_initializer(0.0))
fc1 = tf.reshape(conv2, [-1, weights.get_shape().as_list()[0]])
fc1 = tf.add(tf.matmul(fc1, weights), biases)
fc1 = tf.nn.relu(fc1, name = 'relu')
# Apply Dropout
fc1 = tf.nn.dropout(fc1, dropout, name='dropout')
with tf.variable_scope("output"):
# Output, class prediction
weights = tf.get_variable("weights", [1024, n_classes], initializer=tf.contrib.layers.xavier_initializer())
biases = tf.get_variable("biases", [n_classes], initializer=tf.constant_initializer(0.0))
out = tf.add(tf.matmul(fc1, weights), biases)
return out
with tf.name_scope('Model'):
# Construct model
pred = conv_net(x, keep_prob, mode)
with tf.name_scope('Loss'):
# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y))
vars = tf.trainable_variables()
l2_regularization = tf.add_n([tf.nn.l2_loss(v) for v in vars if any(x in v.name for x in ['weights', 'biases'])])
for v in vars:
if any(x in v.name for x in ['weights', 'biases']):
print(v.name + '-included!')
else:
print(v.name)
cost += l2_regularization_strength*l2_regularization
with tf.name_scope('Optimizer'):
# Define optimizer
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
# Op to calculate every variable gradient
grads = tf.gradients(cost, tf.trainable_variables())
grads = list(zip(grads, tf.trainable_variables()))
# Op to update all variables according to their gradient
apply_grads = optimizer.apply_gradients(grads_and_vars=grads)
with tf.name_scope('Accuracy'):
# Evaluate model
correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
# Initializing the variables
init = tf.initialize_all_variables()
# Create a summary to monitor cost tensor
tf.scalar_summary('cost', cost)
# Create a summary to monitor l2_regularization tensor
tf.scalar_summary('l2_regularization', l2_regularization)
# Create a summary to monitor accuracy tensor
tf.scalar_summary('accuracy', accuracy)
# Create summaries to visualize weights
for var in tf.trainable_variables():
tf.histogram_summary(var.name, var)
for var in tf.all_variables():
if 'bn_pop' in var.name:
tf.histogram_summary(var.name, var)
# Summarize all gradients
for grad, var in grads:
tf.histogram_summary(var.name + '/gradient', grad)
# Merge all summaries into a single op
merged_summary_op = tf.merge_all_summaries()
# 'Saver' op to save and restore all the variables
saver = tf.train.Saver()
# Launch the graph
with tf.Session() as sess:
sess.run(init)
step = 1
# op to write logs to Tensorboard
summary_writer = tf.train.SummaryWriter(logs_path, graph=tf.get_default_graph())
# Keep training until reach max epoch
while step * batch_size < training_epoch * mnist.train.num_examples:
start_time = time.time()
# Get barch
batch_x, batch_y = mnist.train.next_batch(batch_size)
# Run optimization op (backprop)
sess.run(apply_grads, feed_dict={x: batch_x, y: batch_y, keep_prob: dropout, mode: 0})
duration = time.time() - start_time
if step % display_step == 0:
# Calculate batch loss and accuracy
loss, acc, summary = sess.run([cost, accuracy, merged_summary_op], feed_dict={x: batch_x,
y: batch_y,
keep_prob: 1.,
mode: 1})
# Write logs at every iteration
summary_writer.add_summary(summary, step)
# Calculate number sample per sec
samples_per_sec = batch_size / duration
format_str = ('%s: Iter %d, Epoch %d, (%.1f examples/sec; %.3f sec/batch), Minibatch Loss = %.5f , Training Accuracy=%.5f')
print (format_str % (datetime.now(), step*batch_size, int(step*batch_size/mnist.train.num_examples) + 1, samples_per_sec, float(duration), loss, acc))
step += 1
print("Optimization Finished!")
# Calculate accuracy for 256 mnist test images
print("Testing Accuracy:", \
sess.run(accuracy, feed_dict={x: mnist.test.images[:5000],
y: mnist.test.labels[:5000],
keep_prob: 1.,
mode: 2}))
# Save model weights to disk
save_path = saver.save(sess, model_path)
print("Model saved in file: %s" % save_path)
When i open the tensorboard and look at the histogram and distribution sesstion, the 'bn_pop_mean' and 'bn_pop_var' in 'conv1' and 'conv2' are not updateing (they are constant at the initialised value).
Although after the training i achieved around 97% accuracy, i don't know if it the batch normalization is in effect.

In your conv_net function, you didn't set the "reuse" parameter for the tf.variable_scope(). The default setting for "reuse" is "None". Every time conv2d function is called, "bn_pop_mean" and "bn_pop_var" are re-initalized.

if mode == 0:
batch_mean, batch_var = tf.nn.moments(x,[0, 1, 2])
train_mean = tf.assign(pop_mean, pop_mean * decay + batch_mean * (1 - decay))
train_var = tf.assign(pop_var, pop_var * decay + batch_var * (1 - decay))
with tf.control_dependencies([train_mean, train_var]):
bn = tf.nn.batch_normalization(x, batch_mean, batch_var, beta, scale, epsilon, name='bn')
else:
bn = tf.nn.batch_normalization(x, pop_mean, pop_var, beta, scale, epsilon, name='bn')
It seems that the if prediction here always evaluate to be False. I guess what you want to do is using mode via feed_dict to control your batch normalization. So you should use tf.cond in TensorFlow instead of if in Python.

Tensorflow accuracy at .99 but predictions awful

Maybe I'm making predictions wrong?
Here's the project... I have a greyscale input image that I am trying to segment. The segmentation is a simple binary classification (think of foreground vs background). So the ground truth (y) is a matrix of 0's and 1's -- so there's 2 classifications. Oh and the input image is a square, so I just use one variable called n_input
My accuracy essentially converges to 0.99 but when I make a prediction I get all zero's. EDIT --> there is a single 1 in each output matrices, both in the same place...
Here's my session code(everything else is working)...
with tf.Session() as sess:
sess.run(init)
summary = tf.train.SummaryWriter('/tmp/logdir/', sess.graph_def)
step = 1
from tensorflow.contrib.learn.python.learn.datasets.scroll import scroll_data
data = scroll_data.read_data('/home/kendall/Desktop/')
# Keep training until reach max iterations
flag = 0
# while flag == 0:
while step * batch_size < training_iters:
batch_y, batch_x = data.train.next_batch(batch_size)
# pdb.set_trace()
# batch_x = batch_x.reshape((batch_size, n_input))
batch_x = batch_x.reshape((batch_size, n_input, n_input))
batch_y = batch_y.reshape((batch_size, n_input, n_input))
batch_y = convert_to_2_channel(batch_y, batch_size)
# batch_y = batch_y.reshape((batch_size, n_output, n_classes))
batch_y = batch_y.reshape((batch_size, 200, 200, n_classes))
sess.run(optimizer, feed_dict={x: batch_x, y: batch_y,
keep_prob: dropout})
if step % display_step == 0:
flag = 1
# Calculate batch loss and accuracy
loss, acc = sess.run([cost, accuracy], feed_dict={x: batch_x,
y: batch_y,
keep_prob: 1.})
print "Iter " + str(step*batch_size) + ", Minibatch Loss= " + \
"{:.6f}".format(loss) + ", Training Accuracy= " + \
"{:.5f}".format(acc)
step += 1
print "Optimization Finished!"
save_path = "model.ckpt"
saver.save(sess, save_path)
im = Image.open('/home/kendall/Desktop/HA900_frames/frame0635.tif')
batch_x = np.array(im)
pdb.set_trace()
batch_x = batch_x.reshape((1, n_input, n_input))
batch_x = batch_x.astype(float)
# pdb.set_trace()
prediction = sess.run(pred, feed_dict={x: batch_x, keep_prob: 1.})
print prediction
arr1 = np.empty((n_input,n_input))
arr2 = np.empty((n_input,n_input))
for i in xrange(n_input):
for j in xrange(n_input):
for k in xrange(2):
if k == 0:
arr1[i][j] = prediction[0][i][j][k]
else:
arr2[i][j] = prediction[0][i][j][k]
# prediction = np.asarray(prediction)
# prediction = np.reshape(prediction, (200,200))
# np.savetxt("prediction.csv", prediction, delimiter=",")
np.savetxt("prediction1.csv", arr1, delimiter=",")
np.savetxt("prediction2.csv", arr2, delimiter=",")
Since there are two classifications, that end part (with the couple of loops) is just to partition the prediction into two 2x2 matrices.
I saved the prediction arrays to a CSV file, and like I said, they were all zeros.
I have also confirmed all data is correct (dimensions and values).
Why would the training converge, but predictions are awful?
If you want to look at all the code, here it is...
import tensorflow as tf
import pdb
import numpy as np
from numpy import genfromtxt
from PIL import Image
# Import MINST data
# from tensorflow.examples.tutorials.mnist import input_data
# mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
# Parameters
learning_rate = 0.001
training_iters = 20000
batch_size = 128
display_step = 1
# Network Parameters
n_input = 200 # MNIST data input (img shape: 28*28)
n_output = 40000 # MNIST total classes (0-9 digits)
n_classes = 2
#n_input = 200
dropout = 0.75 # Dropout, probability to keep units
# tf Graph input
x = tf.placeholder(tf.float32, [None, n_input, n_input])
y = tf.placeholder(tf.float32, [None, n_input, n_input, n_classes])
keep_prob = tf.placeholder(tf.float32) #dropout (keep probability)
# Create some wrappers for simplicity
def conv2d(x, W, b, strides=1):
# Conv2D wrapper, with bias and relu activation
x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME')
x = tf.nn.bias_add(x, b)
return tf.nn.relu(x)
def maxpool2d(x, k=2):
# MaxPool2D wrapper
return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1],
padding='SAME')
# Create model
def conv_net(x, weights, biases, dropout):
# Reshape input picture
x = tf.reshape(x, shape=[-1, n_input, n_input, 1])
# Convolution Layer
conv1 = conv2d(x, weights['wc1'], biases['bc1'])
# Max Pooling (down-sampling)
conv1 = maxpool2d(conv1, k=2)
conv1 = tf.nn.local_response_normalization(conv1)
# Convolution Layer
conv2 = conv2d(conv1, weights['wc2'], biases['bc2'])
# Max Pooling (down-sampling)
conv2 = tf.nn.local_response_normalization(conv2)
conv2 = maxpool2d(conv2, k=2)
# Convolution Layer
conv3 = conv2d(conv2, weights['wc3'], biases['bc3'])
# Max Pooling (down-sampling)
conv3 = tf.nn.local_response_normalization(conv3)
conv3 = maxpool2d(conv3, k=2)
# pdb.set_trace()
# Fully connected layer
# Reshape conv2 output to fit fully connected layer input
fc1 = tf.reshape(conv3, [-1, weights['wd1'].get_shape().as_list()[0]])
fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1'])
fc1 = tf.nn.relu(fc1)
# Apply Dropout
fc1 = tf.nn.dropout(fc1, dropout)
output = []
for i in xrange(2):
output.append(tf.nn.softmax(tf.add(tf.matmul(fc1, weights['out']), biases['out'])))
return output
# return tf.nn.softmax(tf.add(tf.matmul(fc1, weights['out']), biases['out']))
# Store layers weight & bias
weights = {
# 5x5 conv, 1 input, 32 outputs
'wc1': tf.Variable(tf.random_normal([5, 5, 1, 32])),
# 5x5 conv, 32 inputs, 64 outputs
'wc2': tf.Variable(tf.random_normal([5, 5, 32, 64])),
# 5x5 conv, 32 inputs, 64 outputs
'wc3': tf.Variable(tf.random_normal([5, 5, 64, 128])),
# fully connected, 7*7*64 inputs, 1024 outputs
'wd1': tf.Variable(tf.random_normal([25*25*128, 1024])),
# 1024 inputs, 10 outputs (class prediction)
'out': tf.Variable(tf.random_normal([1024, n_output]))
}
biases = {
'bc1': tf.Variable(tf.random_normal([32])),
'bc2': tf.Variable(tf.random_normal([64])),
'bc3': tf.Variable(tf.random_normal([128])),
'bd1': tf.Variable(tf.random_normal([1024])),
'out': tf.Variable(tf.random_normal([n_output]))
}
# Construct model
pred = conv_net(x, weights, biases, keep_prob)
# pdb.set_trace()
pred = tf.pack(tf.transpose(pred,[1,2,0]))
pred = tf.reshape(pred, [-1,n_input,n_input,n_classes])
# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(pred, y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# Evaluate model
correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
# Initializing the variables
init = tf.initialize_all_variables()
saver = tf.train.Saver()
def convert_to_2_channel(x, batch_size):
#assume input has dimension (batch_size,x,y)
#output will have dimension (batch_size,x,y,2)
output = np.empty((batch_size, 200, 200, 2))
temp_arr1 = np.empty((batch_size, 200, 200))
temp_arr2 = np.empty((batch_size, 200, 200))
for i in xrange(batch_size):
for j in xrange(200):
for k in xrange(200):
if x[i][j][k] == 1:
temp_arr1[i][j][k] = 1
temp_arr2[i][j][k] = 0
else:
temp_arr1[i][j][k] = 0
temp_arr2[i][j][k] = 1
for i in xrange(batch_size):
for j in xrange(200):
for k in xrange(200):
for l in xrange(2):
if l == 0:
output[i][j][k][l] = temp_arr1[i][j][k]
else:
output[i][j][k][l] = temp_arr2[i][j][k]
return output
# Launch the graph
with tf.Session() as sess:
sess.run(init)
summary = tf.train.SummaryWriter('/tmp/logdir/', sess.graph_def)
step = 1
from tensorflow.contrib.learn.python.learn.datasets.scroll import scroll_data
data = scroll_data.read_data('/home/kendall/Desktop/')
# Keep training until reach max iterations
flag = 0
# while flag == 0:
while step * batch_size < training_iters:
batch_y, batch_x = data.train.next_batch(batch_size)
# pdb.set_trace()
# batch_x = batch_x.reshape((batch_size, n_input))
batch_x = batch_x.reshape((batch_size, n_input, n_input))
batch_y = batch_y.reshape((batch_size, n_input, n_input))
batch_y = convert_to_2_channel(batch_y, batch_size)
# batch_y = batch_y.reshape((batch_size, n_output, n_classes))
batch_y = batch_y.reshape((batch_size, 200, 200, n_classes))
sess.run(optimizer, feed_dict={x: batch_x, y: batch_y,
keep_prob: dropout})
if step % display_step == 0:
flag = 1
# Calculate batch loss and accuracy
loss, acc = sess.run([cost, accuracy], feed_dict={x: batch_x,
y: batch_y,
keep_prob: 1.})
print "Iter " + str(step*batch_size) + ", Minibatch Loss= " + \
"{:.6f}".format(loss) + ", Training Accuracy= " + \
"{:.5f}".format(acc)
step += 1
print "Optimization Finished!"
save_path = "model.ckpt"
saver.save(sess, save_path)
im = Image.open('/home/kendall/Desktop/HA900_frames/frame0635.tif')
batch_x = np.array(im)
pdb.set_trace()
batch_x = batch_x.reshape((1, n_input, n_input))
batch_x = batch_x.astype(float)
# pdb.set_trace()
prediction = sess.run(pred, feed_dict={x: batch_x, keep_prob: 1.})
print prediction
arr1 = np.empty((n_input,n_input))
arr2 = np.empty((n_input,n_input))
for i in xrange(n_input):
for j in xrange(n_input):
for k in xrange(2):
if k == 0:
arr1[i][j] = prediction[0][i][j][k]
else:
arr2[i][j] = prediction[0][i][j][k]
# prediction = np.asarray(prediction)
# prediction = np.reshape(prediction, (200,200))
# np.savetxt("prediction.csv", prediction, delimiter=",")
np.savetxt("prediction1.csv", arr1, delimiter=",")
np.savetxt("prediction2.csv", arr2, delimiter=",")
# Calculate accuracy for 256 mnist test images
print "Testing Accuracy:", \
sess.run(accuracy, feed_dict={x: data.test.images[:256],
y: data.test.labels[:256],
keep_prob: 1.})

Errors in the code
There are multiple errors in your code:
you shouldn't call tf.nn.sigmoid_cross_entropy_with_logits with the output of a softmax layer, but with the unscaled logits:
WARNING: This op expects unscaled logits, since it performs a softmax on logits internally for efficiency. Do not call this op with the output of softmax, as it will produce incorrect results.
in fact since you have 2 classes, you should use a loss with softmax, using tf.nn.softmax_cross_entropy_with_logits
When using tf.argmax(pred, 1), you only apply argmax over axis 1, which is the height of the output image. You should use tf.argmax(pred, 3) on the last axis (of size 2).
This might explain why you get 0.99 accuracy
On the output image, it will take the argmax over the height of the image, which is by default 0 (as all values are equal for each channel)
Wrong model
The biggest drawback is that your model in general will be very hard to optimize.
You have a softmax over 40,000 classes, which is huge.
You do not take advantage at all of the fact that you want to output an image (the prediction foreground / background).
for instance prediction 2,345 is highly correlated with prediction 2,346 and prediction 2,545 but you don't take that into account
I recommend reading a bit about semantic segmentation first:
this paper: Fully Convolutional Networks for Semantic Segmentation
these slides from CS231n (Stanford): especially the part about upsampling and deconvolution
Recommendations
If you want to work with TensorFlow, you will need to start small. First try a very simple network with maybe 1 hidden layer.
You need to plot all the shapes of your tensors to make sure they correspond to what you thought. For instance, if you had plotted tf.argmax(y, 1), you would have realized the shape is [batch_size, 200, 2] instead of the expected [batch_size, 200, 200].
TensorBoard is your friend, you should try to plot the input image here, as well as your predictions to see what they look like.
Try small, with a very small dataset of 10 images and see if you can overfit it and predict almost the exact response.
To conclude, I am not sure of all my suggestions but they are worth trying, and I hope this will help you on the path to success !

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.