I was exploring TensorFlow modules with convnet on notMNIST Assignment 4 from the Udacity Deep Learning course.
My initial model uses tf.nn.conv2d and tf.nn.max_pool and provides validation accuracy of around 90%
Initial model, tf.nn.*
batch_size = 32
patch_size = 5
depth = 16
num_hidden = 64
seed=4242
tf.reset_default_graph()
graph = tf.Graph()
with graph.as_default():
# Input data.
tf_train_dataset = tf.placeholder(
tf.float32, shape=(batch_size, image_size, image_size, num_channels))
tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
tf_valid_dataset = tf.constant(valid_dataset)
tf_test_dataset = tf.constant(test_dataset)
def conv_relu_maxpool(inputs, kernel_shape, bias_shape, bias_init_v=0.0):
weights = tf.get_variable("weights", shape = kernel_shape,
initializer = tf.truncated_normal_initializer(stddev=0.1, seed=seed))
biases = tf.get_variable("biases", shape = bias_shape,
initializer = tf.constant_initializer(bias_init_v))
conv = tf.nn.conv2d(inputs, weights, [1,1,1,1], padding='SAME')
hidden = tf.nn.relu(conv + biases)
maxpool = tf.nn.max_pool(hidden, [1,2,2,1], [1,2,2,1], 'SAME')
return maxpool
def dense(inputs, units, activation=None):
return tf.layers.dense(
inputs,
units=units,
activation=activation,
kernel_initializer=tf.truncated_normal_initializer(stddev=0.05, seed=seed),
bias_initializer=tf.constant_initializer(0.1),
name="Dense"
)
# Model.
def model(data, keep_prob=1):
with tf.variable_scope("conv1"):
layer1 = conv_relu_maxpool(data, [patch_size, patch_size, num_channels, depth], [depth], 0.01)
with tf.variable_scope("conv2"):
layer2 = conv_relu_maxpool(layer1, [patch_size, patch_size, depth, depth], [depth], 1.0)
shape = layer2.get_shape().as_list()
reshape = tf.reshape(layer2, [shape[0], shape[1] * shape[2] * shape[3]])
with tf.variable_scope("dense1"):
dense1 = dense( reshape, 64, activation=tf.nn.relu)
dropout1 = tf.nn.dropout( dense1, keep_prob=keep_prob, seed=seed)
with tf.variable_scope("dense2"):
dense2 = dense( dense1, 32, activation=tf.nn.relu)
dropout2 = tf.nn.dropout( dense2, keep_prob=keep_prob, seed=seed)
with tf.variable_scope("dense3"):
dense3 = dense( dropout2, num_labels)
return dense3
with tf.variable_scope("model"):
# Training computation.
logits = model(tf_train_dataset, keep_prob=0.7)
loss = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(labels=tf_train_labels, logits=logits))
# Optimizer.
global_step = tf.Variable(0) # count the number of steps taken.
learning_rate = tf.train.exponential_decay(0.05, global_step, 1001, 0.8)
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)
# Predictions for the training, validation, and test data.
train_prediction = tf.nn.softmax(logits)
with tf.variable_scope("model", reuse=True):
valid_prediction = tf.nn.softmax(model(tf_valid_dataset))
with tf.variable_scope("model", reuse=True):
test_prediction = tf.nn.softmax(model(tf_test_dataset))
I rewrote it to tf.layers.conv2d and tf.layers.max_pooling2d followed by tf.layers.dense and tf.layers.dropout. Now the model fails to train, runs too fast and gives 10% (random choice) validation accuracy. Cannot figure out what's the cause.
The graph:
batch_size = 32
patch_size = 5
depth = 16
num_hidden = 64
seed=4242
graph = tf.Graph()
tf.reset_default_graph()
with graph.as_default():
# Input data.
tf_train_dataset = tf.placeholder(
tf.float32, shape=(batch_size, image_size, image_size, num_channels))
tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
tf_valid_dataset = tf.constant(valid_dataset)
tf_test_dataset = tf.constant(test_dataset)
# Model.
def model(data, keep_prob=0.5, training=False):
conv_1 = tf.layers.conv2d(
data, filters=depth, kernel_size=1, padding='SAME',
activation=tf.nn.relu,
kernel_initializer=tf.truncated_normal_initializer(stddev=0.1, seed=seed),
bias_initializer=tf.constant_initializer(0.0),
name="Conv_1"
)
pool_1 = tf.layers.max_pooling2d( conv_1, pool_size=patch_size, strides=2, padding='SAME')
conv_2 = tf.layers.conv2d(
pool_1, filters=depth, kernel_size=1, padding='SAME',
activation=tf.nn.relu,
kernel_initializer=tf.truncated_normal_initializer(stddev=0.1, seed=2*seed),
bias_initializer=tf.constant_initializer(1.0),
name="Conv_2"
)
pool_2 = tf.layers.max_pooling2d( conv_2, pool_size=patch_size, strides=2, padding='SAME')
shape = pool_2.get_shape().as_list()
reshape = tf.reshape(pool_2, [shape[0], shape[1] * shape[2] * shape[3]])
dense_1 = tf.layers.dense(
reshape, units=num_hidden, activation=tf.nn.relu,
kernel_initializer=tf.truncated_normal_initializer(stddev=0.1, seed=seed),
bias_initializer=tf.constant_initializer(1.0)
)
dropout_1 = tf.layers.dropout( dense_1, rate=(1.0-keep_prob), seed=seed, training=training)
dense_2 = tf.layers.dense(
dropout_1, units=num_labels, activation=None,
kernel_initializer=tf.truncated_normal_initializer(stddev=0.1, seed=seed),
bias_initializer=tf.constant_initializer(1.0)
)
return dense_2
with tf.variable_scope("model"):
# Training computation.
logits = model(tf_train_dataset, training=True)
loss = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(labels=tf_train_labels, logits=logits))
# Optimizer.
global_step = tf.Variable(0, name="globalStep", trainable=False) # count the number of steps taken.
learning_rate = tf.train.exponential_decay(0.05, global_step, 1001, 0.8)
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)
# Predictions for the training, validation, and test data.
train_prediction = tf.nn.softmax(logits)
with tf.variable_scope("model", reuse=True):
valid_prediction = tf.nn.softmax(model(tf_valid_dataset))
with tf.variable_scope("model", reuse=True):
test_prediction = tf.nn.softmax(model(tf_test_dataset))
Training and evaluation:
num_steps = 1001
with tf.Session(graph=graph) as session:
tf.global_variables_initializer().run()
print('Initialized')
for step in range(num_steps):
offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
batch_data = train_dataset[offset:(offset + batch_size), :, :, :]
batch_labels = train_labels[offset:(offset + batch_size), :]
feed_dict = {tf_train_dataset: batch_data, tf_train_labels: batch_labels}
_, l, predictions = session.run(
[optimizer, loss, train_prediction], feed_dict=feed_dict)
if (step % 50 == 0):
print('Step {:4d} rate {:.4f} '.format(step, learning_rate.eval()), end='')
accPred = accuracy(predictions, batch_labels)
accValid = accuracy(valid_prediction.eval(), valid_labels)
print('Loss {:2.3f} Batch acc.: {:02.2f}% Validation acc.: {:2.2f}%'.format(l,accPred,accValid))
print('\nTest accuracy: {:.2f}%'.format(accuracy(test_prediction.eval(), test_labels)))
Loss freezes at 2.303 and validation accuracy at 10%:
Initialized
Step 0 rate 0.0500 Loss 4.903 Batch acc.: 18.75% Validation acc.: 9.98%
Step 50 rate 0.0494 Loss 2.301 Batch acc.: 15.62% Validation acc.: 10.00%
Step 100 rate 0.0489 Loss 2.302 Batch acc.: 12.50% Validation acc.: 10.00%
Step 150 rate 0.0483 Loss 2.303 Batch acc.: 6.25% Validation acc.: 10.00%
Step 200 rate 0.0478 Loss 2.306 Batch acc.: 0.00% Validation acc.: 10.00%
Step 250 rate 0.0473 Loss 2.306 Batch acc.: 3.12% Validation acc.: 10.00%
Please advice what could possibly go wrong?
Full notebook problem version at github. The problem cells are at the bottom.
I see two weird things:
Your second model has a different architecture than the first one (in the first you have three dense layers, in the second just two)
possibly connected to 1., you're missing the linear layer at the end of the network (your output is dense2, which has a relu activation).
My idea is, then, that you forgot the last piece of your network:
dense_3 = tf.layers.dense(
dense_2,
units=num_labels,
activation=None, #<<<<<<<<<<<<<<<<<<<<<
kernel_initializer=tf.truncated_normal_initializer(stddev=0.1, seed=seed),
bias_initializer=tf.constant_initializer(1.0)
)
return dense_3
Alternatively, remove the activation function from dense_2.
Update:
In the notebook, I see you define the convolutional+pool layers as follows:
conv_1 = tf.layers.conv2d(
data, filters=depth, kernel_size=1, padding='SAME',
activation=tf.nn.relu,
kernel_initializer=tf.truncated_normal_initializer(stddev=0.1, seed=seed),
bias_initializer=tf.constant_initializer(0.0),
name="Conv_1"
)
pool_1 = tf.layers.max_pooling2d( conv_1, pool_size=patch_size, strides=2, padding='SAME')
If I compare this to the way you defined the layers before:
def conv_relu_maxpool(inputs, kernel_shape, bias_shape, bias_init_v=0.0):
weights = tf.get_variable("weights", shape = kernel_shape,
initializer = tf.truncated_normal_initializer(stddev=0.1, seed=seed))
biases = tf.get_variable("biases", shape = bias_shape,
initializer = tf.constant_initializer(bias_init_v))
conv = tf.nn.conv2d(inputs, weights, [1,1,1,1], padding='SAME')
hidden = tf.nn.relu(conv + biases)
maxpool = tf.nn.max_pool(hidden, [1,2,2,1], [1,2,2,1], 'SAME')
return maxpool
conv_relu_maxpool(data, [patch_size, patch_size, num_channels, depth], [depth], 0.01)
I see that you have two errors:
You're using 1 x 1 kernels in the convolution instead of patch_size x patch_size (kernel_size=1 in the call to conv2d)
You're doing maxpool on 5 x 5 windows instead of 2 x 2 (pool_size=patch_size in the call to max_pooling2d)
Please note, it is possible that there are more differences between the architectures, these are just those I saw immediately.
Related
I'm new to tensorflow and trying to run a CNN on Twitter embedding matrices (each embedding matrix is 574x300 - word x embedding length) in batches of 100 tweets at a time. I keep getting the error ValueError: setting an array element with a sequence. at the following line at the bottom: sess.run(training_op, feed_dict={input_tweets: x_batch, tweet_labels: y_batch}).
filter_size = 2
embedding_size = 300
length_embedding = 575
num_filters = 100
filter_shape = [filter_size, embedding_size, 1, num_filters]
batch_size = 100
n_epochs = 10
n_inputs = length_embedding*embedding_size
n_outputs = 2 #classify between 2 categories
num_train_examples = 2000
with tf.name_scope("inputs"):
input_tweets = tf.placeholder(tf.float32, shape = [batch_size, length_embedding], name="input_tweets")
input_tweets_reshaped = tf.expand_dims(input_tweets, -1)
tweet_labels = tf.placeholder(tf.int32, shape = [batch_size], name="tweet_labels")
W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W")
b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b")
conv = tf.nn.conv2d(input_tweets_reshaped, W,
strides = [1,1,1,1], padding="VALID", name="conv")
conv_bias = tf.nn.bias_add(conv, b)
#pooling
sequence_length=input_tweets_reshaped.shape[1]
with tf.name_scope("pool"):
pool = tf.nn.max_pool(conv, ksize=[1, sequence_length - filter_size + 1, 1, 1],
strides=[1,1,1,1],
padding="VALID",
name="pool")
pool_flat = tf.reshape(pool, shape=[-1, num_filters])
#fully-connected layer
with tf.name_scope("fc_layer"):
fc_layer = tf.layers.dense(pool_flat, num_filters, activation=tf.nn.relu, name="fc_layer")
#output
with tf.name_scope("output_layer"):
logits = tf.layers.dense(fc_layer, n_outputs, name="output_layer")
Y_proba = tf.nn.softmax(logits, name="Y_proba")
#train
with tf.name_scope("train"):
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=tweet_labels)
loss=tf.reduce_mean(xentropy)
optimizer=tf.train.AdamOptimizer()
training_op=optimizer.minimize(loss)
with tf.name_scope("eval"):
correct = tf.nn.in_top_k(logits, tweet_labels, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
with tf.name_scope("init_and_save"):
init = tf.global_variables_initializer()
saver = tf.train.Saver()
#--run model
with tf.Session() as sess:
init.run()
for epoch in range(n_epochs):
for iteration in range(num_train_examples // batch_size):
print("iteration: "+str(iteration))
x_batch = x_train[iteration*batch_size : (iteration+1)*batch_size]
y_batch = y_train[iteration*batch_size : (iteration+1)*batch_size]
sess.run(training_op, feed_dict={input_tweets: x_batch, tweet_labels: y_batch})
acc_train = accuracy.eval(feed_dict={input_tweets: x_batch, tweet_labels: y_batch})
acc_test = accuracy.eval(feed_dict={input_tweets: x_test, tweet_labels: y_test})
print(epoch, "Train accuracy:", acc_train, "Test accuracy:", acc_test)
x_batch is a numpy array of length 100, and each element is a matrix of dimension 575 x 300 (though when I call x_batch.shape, it returns (100, 575)). y_batch is a 1d numpy array of 1's and 0's; y_batch.shape returns (100,). I think the problem is maybe about the dimensions of the inputs - can anyone see clearly what the mismatch is?
Thank you!
The input to the conv2d must have rank=4, but you have rank=3.
embedding_size, which determines the second dimension of your filter, must be less than or equal to the third dimension of your input tensor. You have third dimension equal to 1 - expanded dimension. Therefore, it cannot be greater than 1!
You could use tf.layers.conv2d() that will automatically create variables for convolution.
Maybe you intended to use tf.layers.conv1d() It expects a tensor of rank=3 as input.
I'm not sure what you want to achieve with your code, but here's the modified version that works:
import tensorflow as tf
import numpy as np
filter_size = 2
embedding_size = 300
length_embedding = 575
num_filters = 100
filter_shape = [filter_size, 1, 1, num_filters]
batch_size = 100
n_epochs = 10
n_inputs = length_embedding*embedding_size
n_outputs = 2 #classify between 2 categories
num_train_examples = 2000
with tf.name_scope("inputs"):
input_tweets = tf.placeholder(tf.float32, shape = [None, length_embedding], name="input_tweets")
input_tweets_reshaped = input_tweets[..., tf.newaxis, tf.newaxis]
tweet_labels = tf.placeholder(tf.int32, shape = [None], name="tweet_labels")
W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W")
b = tf.Variable(0.1*tf.ones([num_filters]), name="b")
conv = tf.nn.conv2d(input_tweets_reshaped,
W,
strides=[1,1,1,1],
padding="VALID",
name="conv")
conv_bias = tf.nn.bias_add(conv, b)
#pooling
sequence_length=input_tweets_reshaped.shape[1]
with tf.name_scope("pool"):
pool = tf.nn.max_pool(conv, ksize=[1, sequence_length - filter_size + 1, 1, 1],
strides=[1,1,1,1],
padding="VALID",
name="pool")
pool_flat = tf.reshape(pool, shape=[-1, num_filters])
#fully-connected layer
with tf.name_scope("fc_layer"):
fc_layer = tf.layers.dense(pool_flat, num_filters, activation=tf.nn.relu, name="fc_layer")
#output
with tf.name_scope("output_layer"):
logits = tf.layers.dense(fc_layer, n_outputs, name="output_layer")
Y_proba = tf.nn.softmax(logits, name="Y_proba")
#train
with tf.name_scope("train"):
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=tweet_labels)
loss=tf.reduce_mean(xentropy)
optimizer=tf.train.AdamOptimizer()
training_op=optimizer.minimize(loss)
with tf.name_scope("eval"):
correct = tf.nn.in_top_k(logits, tweet_labels, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
with tf.name_scope("init_and_save"):
init = tf.global_variables_initializer()
saver = tf.train.Saver()
x_train = np.random.normal(size=(10*batch_size, length_embedding, ))
y_train = np.random.randint(low=0, high=2, size=10*batch_size)
x_test = x_train
y_test = y_train
with tf.Session() as sess:
init.run()
for epoch in range(n_epochs):
for iteration in range(num_train_examples // batch_size):
print("iteration: "+str(iteration))
x_batch = x_train[iteration*batch_size : (iteration+1)*batch_size]
y_batch = y_train[iteration*batch_size : (iteration+1)*batch_size]
sess.run(training_op, feed_dict={input_tweets: x_batch, tweet_labels: y_batch})
acc_train = accuracy.eval(feed_dict={input_tweets: x_batch, tweet_labels: y_batch})
acc_test = accuracy.eval(feed_dict={input_tweets: x_test, tweet_labels: y_test})
print(epoch, "Train accuracy:", acc_train, "Test accuracy:", acc_test)
Context: I'm using a fully convolutional network to perform image segmentation. Typically, the input is an RGB image shape = [512, 256] and the target is a 2 channels binary mask defining the annotated regions (2nd channel is the opposite of the fist channel).
Question: I have the same CNN implementation using Tensorflow and Keras. But the Tensorflow model doesn't start learning. Actually, the loss even grows with the number of epochs! What is wrong in this Tensorflow implementation that prevents it from learning?
Setup: The dataset is split into 3 subsets: training (78%), testing (8%) and validation (14%) sets which are fed to the network by batches of 8 images. The graphs show the evolution of the loss for each subsets. The images show the prediction after 10 epoch for two different images.
Tensorflow implementation and results
import tensorflow as tf
tf.reset_default_graph()
x = inputs = tf.placeholder(tf.float32, shape=[None, shape[1], shape[0], 3])
targets = tf.placeholder(tf.float32, shape=[None, shape[1], shape[0], 2])
for d in range(4):
x = tf.layers.conv2d(x, filters=np.exp2(d+4), kernel_size=[3,3], strides=[1,1], padding="SAME", activation=tf.nn.relu)
x = tf.layers.max_pooling2d(x, strides=[2,2], pool_size=[2,2], padding="SAME")
x = tf.layers.conv2d(x, filters=2, kernel_size=[1,1])
logits = tf.image.resize_images(x, [shape[1], shape[0]], align_corners=True)
prediction = tf.nn.softmax(logits)
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=targets, logits=logits))
optimizer = tf.train.RMSPropOptimizer(learning_rate=0.001).minimize(loss)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
def run(mode, x_batch, y_batch):
if mode == 'TRAIN':
return sess.run([loss, optimizer], feed_dict={inputs: x_batch, targets: y_batch})
else:
return sess.run([loss, prediction], feed_dict={inputs: x_batch, targets: y_batch})
Keras implementation and reslults
import keras as ke
ke.backend.clear_session()
x = inputs = ke.layers.Input(shape=[shape[1], shape[0], 3])
for d in range(4):
x = ke.layers.Conv2D(int(np.exp2(d+4)), [3,3], padding="SAME", activation="relu")(x)
x = ke.layers.MaxPool2D(padding="SAME")(x)
x = ke.layers.Conv2D(2, [1,1], padding="SAME")(x)
logits = ke.layers.Lambda(lambda x: ke.backend.tf.image.resize_images(x, [shape[1], shape[0]], align_corners=True))(x)
prediction = ke.layers.Activation('softmax')(logits)
model = ke.models.Model(inputs=inputs, outputs=prediction)
model.compile(optimizer="rmsprop", loss="categorical_crossentropy")
def run(mode, x_batch, y_batch):
if mode == 'TRAIN':
loss = model.train_on_batch(x=x_batch, y=y_batch)
return loss, None
else:
loss = model.evaluate(x=x_batch, y=y_batch, batch_size=None, verbose=0)
prediction = model.predict(x=x_batch, batch_size=None)
return loss, prediction
There must be a difference between the two but my understanding of the documentation lead me nowhere. I would be really interested to know where the difference lies. Thanks in advance!
The answer was in the Keras implementation of softmax where they subtract an unexpected max:
def softmax(x, axis=-1):
# when x is a 2 dimensional tensor
e = K.exp(x - K.max(x, axis=axis, keepdims=True))
s = K.sum(e, axis=axis, keepdims=True)
return e / s
Here is the Tensorflow implementation updated with the max hack and the good results associated
import tensorflow as tf
tf.reset_default_graph()
x = inputs = tf.placeholder(tf.float32, shape=[None, shape[1], shape[0], 3])
targets = tf.placeholder(tf.float32, shape=[None, shape[1], shape[0], 2])
for d in range(4):
x = tf.layers.conv2d(x, filters=np.exp2(d+4), kernel_size=[3,3], strides=[1,1], padding="SAME", activation=tf.nn.relu)
x = tf.layers.max_pooling2d(x, strides=[2,2], pool_size=[2,2], padding="SAME")
x = tf.layers.conv2d(x, filters=2, kernel_size=[1,1])
logits = tf.image.resize_images(x, [shape[1], shape[0]], align_corners=True)
# The misterious hack took from Keras
logits = logits - tf.expand_dims(tf.reduce_max(logits, axis=-1), -1)
prediction = tf.nn.softmax(logits)
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=targets, logits=logits))
optimizer = tf.train.RMSPropOptimizer(learning_rate=0.001).minimize(loss)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
def run(mode, x_batch, y_batch):
if mode == 'TRAIN':
return sess.run([loss, optimizer], feed_dict={inputs: x_batch, targets: y_batch})
else:
return sess.run([loss, prediction], feed_dict={inputs: x_batch, targets: y_batch})
Huge thanks to Simon for pointing this out on the Keras implementation :-)
The training data set consists of face images taken from youtube faces database labelled as one and non face images are taken from 256Object categories 25k images are chosen for both positive and negative data. so totally 50k for training and another 10k images is taken from yooutube faces and 256 object categories which are not repeated.
The problem is I am getting 99% accuracy after just 12k iterations in first epoch itself and I am printing cost value also it is also starts from very high value like 596014.000 like that. When it is tested against the other face images it performs very badly.
cost vs epoch graph
import tensorflow as tf
import read_data
from sklearn import metrics
import numpy as np
import os
import graph_plotter as gp
# Parameters
learning_rate = 0.001
epochs = 30
batch_size = 100
display_step = 5
# tf Graph input
input_data = tf.placeholder(tf.float32, [None, 27, 31, 3])
output_data = tf.placeholder(tf.float32, [None, 1])
keep_prob = tf.placeholder(tf.float32) #dropout (keep probability)
# Getting train and test data
train_data, train_label , test_data, test_label = read_data.getData()
def conv2d(x, w, bias, k=1):
x = tf.nn.conv2d(x, w, strides=[1, k, k, 1], padding='SAME')
x = tf.nn.bias_add(x, bias)
return tf.nn.relu(x)
# Performs max pooling on the convolution layer output
def maxpool2d(x, k=2):
return tf.nn.max_pool(x,
ksize=[1, k, k, 1], strides=[1, k, k, 1],
padding='SAME')
# Weights generated randomly according to layer
weights = {
# Conv 4*4 , 1 input , 32 outputs
'wc1': tf.Variable(tf.random_normal([4, 4, 3, 32])),
# Conv 3*3 , 32 inputs , 32 outputs
'wc2': tf.Variable(tf.random_normal([3, 3, 32, 64])),
# Conv 5*6 , 64 input , 128 outputs
'wc3': tf.Variable(tf.random_normal([5, 6, 64, 128])),
# Conv 1*1 , 128 inputs , 256 outputs
'wc4': tf.Variable(tf.random_normal([1, 1, 128, 256])),
# Conv 1*1 , 256 inputs , 256 outputs
'wc5': tf.Variable(tf.random_normal([1, 1, 256, 512])),
# Output Layer 7*8*256 inputs and 1 output ( face or non-face )
'out': tf.Variable(tf.random_normal([7*8*512, 1]))
}
biases = {
'bc1': tf.Variable(tf.random_normal([32])),
'bc2': tf.Variable(tf.random_normal([64])),
'bc3': tf.Variable(tf.random_normal([128])),
'bc4': tf.Variable(tf.random_normal([256])),
'bc5': tf.Variable(tf.random_normal([512])),
'out': tf.Variable(tf.random_normal([1]))
}
def model(x, weight, bias, dropout):
# Layer 1
conv1 = conv2d(x, weight['wc1'], bias['bc1'])
conv1 = maxpool2d(conv1, k=2)
# Layer 2
conv2 = conv2d(conv1, weight['wc2'], bias['bc2'])
conv2 = maxpool2d(conv2, k=2)
# Layer 3
conv3 = conv2d(conv2, weight['wc3'], bias['bc3'])
# Layer 4
conv4 = conv2d(conv3, weight['wc4'], bias['bc4'])
# Layer 5
conv5 = conv2d(conv4, weight['wc5'], bias['bc5'])
#conv5 = tf.nn.dropout(conv5, dropout)
# Flattening data
intermediate = tf.reshape(conv5, shape=[-1, 7*8*512])
# Output Layer
output = tf.add(tf.matmul(intermediate, weight['out']), bias['out'])
return output
pred = model(input_data, weights, biases, keep_prob)
l2_loss = 0.001*(
tf.nn.l2_loss(weights.get('wc4')) +
tf.nn.l2_loss(weights.get('wc5')) +
tf.nn.l2_loss(weights.get('out')))
cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(
pred, output_data)) + l2_loss
tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
correct_pred = tf.equal(
tf.greater(sigmoid_output, 0.5), tf.greater(output_data, 0.5))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
y_p = tf.cast(tf.greater(sigmoid_output, 0.5), tf.int32)
saver = tf.train.Saver()
tf.add_to_collection('y_p', y_p)
tf.add_to_collection('pred', pred)
tf.add_to_collection('x', input_data)
tf.add_to_collection('y', output_data)
init = tf.global_variables_initializer()
with tf.device("/gpu:0"):
with tf.Session() as sess:
sess.run(init)
train_data_minibatches = [train_data[k:k + batch_size]
for k in range(0, len(train_data), batch_size)]
train_label_minibatches = [train_label[k:k + batch_size]
for k in range(0, len(train_label), batch_size)]
step = 0
batch_count = 0
avg_cost_list = []
avg_accuracy_list = []
for epoch in range(epochs):
print('Epoch '+epoch.__str__())
cost_list = []
accuracy_list = []
for batch_x, batch_y in zip(
train_data_minibatches, train_label_minibatches):
batch_count += 1
sess.run(optimizer, feed_dict={
input_data: batch_x, output_data: batch_y,
keep_prob: 0.75})
# if epoch % display_step == 0:
loss, acc, output = sess.run([cost, accuracy, sig],
feed_dict={input_data: batch_x, output_data: batch_y, keep_prob: 0.75})
cost_list.append(loss)
accuracy_list.append(acc)
print("Iter " + str(step * batch_size) +" Loss "+ "{:.5f}".format(loss)+ ", Training Accuracy= " +
"{:.5f}".format(acc))
step += 1
average_cost = sum(cost_list) / len(cost_list)
average_acc = sum(accuracy_list) / len(accuracy_list)
avg_cost_list.append(average_cost)
avg_accuracy_list.append(average_acc)
if epoch % display_step == 0:
test_acc, y_pred = sess.run([accuracy, y_p], feed_dict={input_data: test_data,
output_data: test_label,
keep_prob: 0.75})
print(metrics.confusion_matrix(test_label, y_pred))
print("Testing Accuracy : " + "{:.5f}".format(test_acc))
print("Optimization finished !!")
# Saving cost Vs epoch graph, and accuracy Vs epoch graphs.
gp.cost_vs_epoch(avg_cost_list)
gp.accuracy_vs_epoch(avg_accuracy_list)
save_path = saver.save(sess=sess, save_path=save_path, write_meta_graph=True)
The line
correct_pred = tf.equal(tf.greater(sigmoid_output, 0.5),
tf.greater(output_data, 0.5))
is likely to be wrong. Your code seems not to have sigmoid_output, but only pred.
If that is not the problem, I would look into train_data. How many "True" labels > 0.5 do you have? How many labels which evaluate to false do you have?
Maybe I'm making predictions wrong?
Here's the project... I have a greyscale input image that I am trying to segment. The segmentation is a simple binary classification (think of foreground vs background). So the ground truth (y) is a matrix of 0's and 1's -- so there's 2 classifications. Oh and the input image is a square, so I just use one variable called n_input
My accuracy essentially converges to 0.99 but when I make a prediction I get all zero's. EDIT --> there is a single 1 in each output matrices, both in the same place...
Here's my session code(everything else is working)...
with tf.Session() as sess:
sess.run(init)
summary = tf.train.SummaryWriter('/tmp/logdir/', sess.graph_def)
step = 1
from tensorflow.contrib.learn.python.learn.datasets.scroll import scroll_data
data = scroll_data.read_data('/home/kendall/Desktop/')
# Keep training until reach max iterations
flag = 0
# while flag == 0:
while step * batch_size < training_iters:
batch_y, batch_x = data.train.next_batch(batch_size)
# pdb.set_trace()
# batch_x = batch_x.reshape((batch_size, n_input))
batch_x = batch_x.reshape((batch_size, n_input, n_input))
batch_y = batch_y.reshape((batch_size, n_input, n_input))
batch_y = convert_to_2_channel(batch_y, batch_size)
# batch_y = batch_y.reshape((batch_size, n_output, n_classes))
batch_y = batch_y.reshape((batch_size, 200, 200, n_classes))
sess.run(optimizer, feed_dict={x: batch_x, y: batch_y,
keep_prob: dropout})
if step % display_step == 0:
flag = 1
# Calculate batch loss and accuracy
loss, acc = sess.run([cost, accuracy], feed_dict={x: batch_x,
y: batch_y,
keep_prob: 1.})
print "Iter " + str(step*batch_size) + ", Minibatch Loss= " + \
"{:.6f}".format(loss) + ", Training Accuracy= " + \
"{:.5f}".format(acc)
step += 1
print "Optimization Finished!"
save_path = "model.ckpt"
saver.save(sess, save_path)
im = Image.open('/home/kendall/Desktop/HA900_frames/frame0635.tif')
batch_x = np.array(im)
pdb.set_trace()
batch_x = batch_x.reshape((1, n_input, n_input))
batch_x = batch_x.astype(float)
# pdb.set_trace()
prediction = sess.run(pred, feed_dict={x: batch_x, keep_prob: 1.})
print prediction
arr1 = np.empty((n_input,n_input))
arr2 = np.empty((n_input,n_input))
for i in xrange(n_input):
for j in xrange(n_input):
for k in xrange(2):
if k == 0:
arr1[i][j] = prediction[0][i][j][k]
else:
arr2[i][j] = prediction[0][i][j][k]
# prediction = np.asarray(prediction)
# prediction = np.reshape(prediction, (200,200))
# np.savetxt("prediction.csv", prediction, delimiter=",")
np.savetxt("prediction1.csv", arr1, delimiter=",")
np.savetxt("prediction2.csv", arr2, delimiter=",")
Since there are two classifications, that end part (with the couple of loops) is just to partition the prediction into two 2x2 matrices.
I saved the prediction arrays to a CSV file, and like I said, they were all zeros.
I have also confirmed all data is correct (dimensions and values).
Why would the training converge, but predictions are awful?
If you want to look at all the code, here it is...
import tensorflow as tf
import pdb
import numpy as np
from numpy import genfromtxt
from PIL import Image
# Import MINST data
# from tensorflow.examples.tutorials.mnist import input_data
# mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
# Parameters
learning_rate = 0.001
training_iters = 20000
batch_size = 128
display_step = 1
# Network Parameters
n_input = 200 # MNIST data input (img shape: 28*28)
n_output = 40000 # MNIST total classes (0-9 digits)
n_classes = 2
#n_input = 200
dropout = 0.75 # Dropout, probability to keep units
# tf Graph input
x = tf.placeholder(tf.float32, [None, n_input, n_input])
y = tf.placeholder(tf.float32, [None, n_input, n_input, n_classes])
keep_prob = tf.placeholder(tf.float32) #dropout (keep probability)
# Create some wrappers for simplicity
def conv2d(x, W, b, strides=1):
# Conv2D wrapper, with bias and relu activation
x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME')
x = tf.nn.bias_add(x, b)
return tf.nn.relu(x)
def maxpool2d(x, k=2):
# MaxPool2D wrapper
return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1],
padding='SAME')
# Create model
def conv_net(x, weights, biases, dropout):
# Reshape input picture
x = tf.reshape(x, shape=[-1, n_input, n_input, 1])
# Convolution Layer
conv1 = conv2d(x, weights['wc1'], biases['bc1'])
# Max Pooling (down-sampling)
conv1 = maxpool2d(conv1, k=2)
conv1 = tf.nn.local_response_normalization(conv1)
# Convolution Layer
conv2 = conv2d(conv1, weights['wc2'], biases['bc2'])
# Max Pooling (down-sampling)
conv2 = tf.nn.local_response_normalization(conv2)
conv2 = maxpool2d(conv2, k=2)
# Convolution Layer
conv3 = conv2d(conv2, weights['wc3'], biases['bc3'])
# Max Pooling (down-sampling)
conv3 = tf.nn.local_response_normalization(conv3)
conv3 = maxpool2d(conv3, k=2)
# pdb.set_trace()
# Fully connected layer
# Reshape conv2 output to fit fully connected layer input
fc1 = tf.reshape(conv3, [-1, weights['wd1'].get_shape().as_list()[0]])
fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1'])
fc1 = tf.nn.relu(fc1)
# Apply Dropout
fc1 = tf.nn.dropout(fc1, dropout)
output = []
for i in xrange(2):
output.append(tf.nn.softmax(tf.add(tf.matmul(fc1, weights['out']), biases['out'])))
return output
# return tf.nn.softmax(tf.add(tf.matmul(fc1, weights['out']), biases['out']))
# Store layers weight & bias
weights = {
# 5x5 conv, 1 input, 32 outputs
'wc1': tf.Variable(tf.random_normal([5, 5, 1, 32])),
# 5x5 conv, 32 inputs, 64 outputs
'wc2': tf.Variable(tf.random_normal([5, 5, 32, 64])),
# 5x5 conv, 32 inputs, 64 outputs
'wc3': tf.Variable(tf.random_normal([5, 5, 64, 128])),
# fully connected, 7*7*64 inputs, 1024 outputs
'wd1': tf.Variable(tf.random_normal([25*25*128, 1024])),
# 1024 inputs, 10 outputs (class prediction)
'out': tf.Variable(tf.random_normal([1024, n_output]))
}
biases = {
'bc1': tf.Variable(tf.random_normal([32])),
'bc2': tf.Variable(tf.random_normal([64])),
'bc3': tf.Variable(tf.random_normal([128])),
'bd1': tf.Variable(tf.random_normal([1024])),
'out': tf.Variable(tf.random_normal([n_output]))
}
# Construct model
pred = conv_net(x, weights, biases, keep_prob)
# pdb.set_trace()
pred = tf.pack(tf.transpose(pred,[1,2,0]))
pred = tf.reshape(pred, [-1,n_input,n_input,n_classes])
# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(pred, y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# Evaluate model
correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
# Initializing the variables
init = tf.initialize_all_variables()
saver = tf.train.Saver()
def convert_to_2_channel(x, batch_size):
#assume input has dimension (batch_size,x,y)
#output will have dimension (batch_size,x,y,2)
output = np.empty((batch_size, 200, 200, 2))
temp_arr1 = np.empty((batch_size, 200, 200))
temp_arr2 = np.empty((batch_size, 200, 200))
for i in xrange(batch_size):
for j in xrange(200):
for k in xrange(200):
if x[i][j][k] == 1:
temp_arr1[i][j][k] = 1
temp_arr2[i][j][k] = 0
else:
temp_arr1[i][j][k] = 0
temp_arr2[i][j][k] = 1
for i in xrange(batch_size):
for j in xrange(200):
for k in xrange(200):
for l in xrange(2):
if l == 0:
output[i][j][k][l] = temp_arr1[i][j][k]
else:
output[i][j][k][l] = temp_arr2[i][j][k]
return output
# Launch the graph
with tf.Session() as sess:
sess.run(init)
summary = tf.train.SummaryWriter('/tmp/logdir/', sess.graph_def)
step = 1
from tensorflow.contrib.learn.python.learn.datasets.scroll import scroll_data
data = scroll_data.read_data('/home/kendall/Desktop/')
# Keep training until reach max iterations
flag = 0
# while flag == 0:
while step * batch_size < training_iters:
batch_y, batch_x = data.train.next_batch(batch_size)
# pdb.set_trace()
# batch_x = batch_x.reshape((batch_size, n_input))
batch_x = batch_x.reshape((batch_size, n_input, n_input))
batch_y = batch_y.reshape((batch_size, n_input, n_input))
batch_y = convert_to_2_channel(batch_y, batch_size)
# batch_y = batch_y.reshape((batch_size, n_output, n_classes))
batch_y = batch_y.reshape((batch_size, 200, 200, n_classes))
sess.run(optimizer, feed_dict={x: batch_x, y: batch_y,
keep_prob: dropout})
if step % display_step == 0:
flag = 1
# Calculate batch loss and accuracy
loss, acc = sess.run([cost, accuracy], feed_dict={x: batch_x,
y: batch_y,
keep_prob: 1.})
print "Iter " + str(step*batch_size) + ", Minibatch Loss= " + \
"{:.6f}".format(loss) + ", Training Accuracy= " + \
"{:.5f}".format(acc)
step += 1
print "Optimization Finished!"
save_path = "model.ckpt"
saver.save(sess, save_path)
im = Image.open('/home/kendall/Desktop/HA900_frames/frame0635.tif')
batch_x = np.array(im)
pdb.set_trace()
batch_x = batch_x.reshape((1, n_input, n_input))
batch_x = batch_x.astype(float)
# pdb.set_trace()
prediction = sess.run(pred, feed_dict={x: batch_x, keep_prob: 1.})
print prediction
arr1 = np.empty((n_input,n_input))
arr2 = np.empty((n_input,n_input))
for i in xrange(n_input):
for j in xrange(n_input):
for k in xrange(2):
if k == 0:
arr1[i][j] = prediction[0][i][j][k]
else:
arr2[i][j] = prediction[0][i][j][k]
# prediction = np.asarray(prediction)
# prediction = np.reshape(prediction, (200,200))
# np.savetxt("prediction.csv", prediction, delimiter=",")
np.savetxt("prediction1.csv", arr1, delimiter=",")
np.savetxt("prediction2.csv", arr2, delimiter=",")
# Calculate accuracy for 256 mnist test images
print "Testing Accuracy:", \
sess.run(accuracy, feed_dict={x: data.test.images[:256],
y: data.test.labels[:256],
keep_prob: 1.})
Errors in the code
There are multiple errors in your code:
you shouldn't call tf.nn.sigmoid_cross_entropy_with_logits with the output of a softmax layer, but with the unscaled logits:
WARNING: This op expects unscaled logits, since it performs a softmax on logits internally for efficiency. Do not call this op with the output of softmax, as it will produce incorrect results.
in fact since you have 2 classes, you should use a loss with softmax, using tf.nn.softmax_cross_entropy_with_logits
When using tf.argmax(pred, 1), you only apply argmax over axis 1, which is the height of the output image. You should use tf.argmax(pred, 3) on the last axis (of size 2).
This might explain why you get 0.99 accuracy
On the output image, it will take the argmax over the height of the image, which is by default 0 (as all values are equal for each channel)
Wrong model
The biggest drawback is that your model in general will be very hard to optimize.
You have a softmax over 40,000 classes, which is huge.
You do not take advantage at all of the fact that you want to output an image (the prediction foreground / background).
for instance prediction 2,345 is highly correlated with prediction 2,346 and prediction 2,545 but you don't take that into account
I recommend reading a bit about semantic segmentation first:
this paper: Fully Convolutional Networks for Semantic Segmentation
these slides from CS231n (Stanford): especially the part about upsampling and deconvolution
Recommendations
If you want to work with TensorFlow, you will need to start small. First try a very simple network with maybe 1 hidden layer.
You need to plot all the shapes of your tensors to make sure they correspond to what you thought. For instance, if you had plotted tf.argmax(y, 1), you would have realized the shape is [batch_size, 200, 2] instead of the expected [batch_size, 200, 200].
TensorBoard is your friend, you should try to plot the input image here, as well as your predictions to see what they look like.
Try small, with a very small dataset of 10 images and see if you can overfit it and predict almost the exact response.
To conclude, I am not sure of all my suggestions but they are worth trying, and I hope this will help you on the path to success !
I'm trying to use tf.nn.sparse_softmax_cross_entropy_with_logits and I have followed the answer by user Olivier Moindrot [here][1] but I'm getting a dimension error
I'm building a segmentation network, so the input image is 200x200 and the output image is 200x200. The classification is binary, so foreground and background.
After I build the CNN pred = conv_net(x, weights, biases, keep_prob)
pred looks like this <tf.Tensor 'Add_1:0' shape=(?, 40000) dtype=float32>
The CNN has a couple of conv layers followed by a fully connected layer. The fully connected layer is 40000 because it is 200x200 flattened.
According to the above link, I reshape pred like so...
(side note: I also tried packing tf.pack() two pred's -- like above -- together, but I thought that was wrong)
pred = tf.reshape(pred, [-1, 200, 200, 2])
...so that there are 2 classifications. Continuing the above link...
temp_pred = tf.reshape(pred, [-1,2])
temp_y = tf.reshape(y, [-1])
cost = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(temp_pred, temp_y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
I have the following placeholders and batch data...
x = tf.placeholder(tf.float32, [None, 200, 200])
y = tf.placeholder(tf.int64, [None, 200, 200])
(Pdb) batch_x.shape
(10, 200, 200)
(Pdb) batch_y.shape
(10, 200, 200)
When I run a training session, I get the following dimension error:
tensorflow.python.framework.errors.InvalidArgumentError: logits first
dimension must match labels size. logits shape=[3200000,2] labels
shape=[400000]
My full code looks like this:
import tensorflow as tf
import pdb
import numpy as np
# Import MINST data
# from tensorflow.examples.tutorials.mnist import input_data
# mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
# Parameters
learning_rate = 0.001
training_iters = 200000
batch_size = 10
display_step = 1
# Network Parameters
n_input = 200 # MNIST data input (img shape: 28*28)
n_classes = 2 # MNIST total classes (0-9 digits)
n_output = 40000
#n_input = 200
dropout = 0.75 # Dropout, probability to keep units
# tf Graph input
x = tf.placeholder(tf.float32, [None, n_input, n_input])
y = tf.placeholder(tf.int64, [None, n_input, n_input])
keep_prob = tf.placeholder(tf.float32) #dropout (keep probability)
# Create some wrappers for simplicity
def conv2d(x, W, b, strides=1):
# Conv2D wrapper, with bias and relu activation
x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME')
x = tf.nn.bias_add(x, b)
return tf.nn.relu(x)
def maxpool2d(x, k=2):
# MaxPool2D wrapper
return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1],
padding='SAME')
# Create model
def conv_net(x, weights, biases, dropout):
# Reshape input picture
x = tf.reshape(x, shape=[-1, 200, 200, 1])
# Convolution Layer
conv1 = conv2d(x, weights['wc1'], biases['bc1'])
# Max Pooling (down-sampling)
# conv1 = tf.nn.local_response_normalization(conv1)
# conv1 = maxpool2d(conv1, k=2)
# Convolution Layer
conv2 = conv2d(conv1, weights['wc2'], biases['bc2'])
# Max Pooling (down-sampling)
# conv2 = tf.nn.local_response_normalization(conv2)
# conv2 = maxpool2d(conv2, k=2)
# Convolution Layer
conv3 = conv2d(conv2, weights['wc3'], biases['bc3'])
# # Max Pooling (down-sampling)
# conv3 = tf.nn.local_response_normalization(conv3)
# conv3 = maxpool2d(conv3, k=2)
# return conv3
# Fully connected layer
# Reshape conv2 output to fit fully connected layer input
fc1 = tf.reshape(conv2, [-1, weights['wd1'].get_shape().as_list()[0]])
fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1'])
fc1 = tf.nn.relu(fc1)
# Apply Dropout
fc1 = tf.nn.dropout(fc1, dropout)
return tf.add(tf.matmul(fc1, weights['out']), biases['out'])
# Output, class prediction
# output = []
# for i in xrange(2):
# # output.append(tf.nn.softmax(tf.add(tf.matmul(fc1, weights['out']), biases['out'])))
# output.append((tf.add(tf.matmul(fc1, weights['out']), biases['out'])))
#
# return output
# Store layers weight & bias
weights = {
# 5x5 conv, 1 input, 32 outputs
'wc1': tf.Variable(tf.random_normal([5, 5, 1, 32])),
# 5x5 conv, 32 inputs, 64 outputs
'wc2': tf.Variable(tf.random_normal([5, 5, 32, 64])),
# 5x5 conv, 32 inputs, 64 outputs
'wc3': tf.Variable(tf.random_normal([5, 5, 64, 128])),
# fully connected, 7*7*64 inputs, 1024 outputs
'wd1': tf.Variable(tf.random_normal([50*50*64, 1024])),
# 1024 inputs, 10 outputs (class prediction)
'out': tf.Variable(tf.random_normal([1024, n_output]))
}
biases = {
'bc1': tf.Variable(tf.random_normal([32])),
'bc2': tf.Variable(tf.random_normal([64])),
'bc3': tf.Variable(tf.random_normal([128])),
'bd1': tf.Variable(tf.random_normal([1024])),
'out': tf.Variable(tf.random_normal([n_output]))
}
# Construct model
pred = conv_net(x, weights, biases, keep_prob)
pdb.set_trace()
# pred = tf.pack(tf.transpose(pred,[1,2,0]))
pred = tf.reshape(pred, [-1, n_input, n_input, 2])
temp_pred = tf.reshape(pred, [-1,2])
temp_y = tf.reshape(y, [-1])
# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(temp_pred, temp_y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# Evaluate model
# correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
temp_pred2 = tf.reshape(pred, [-1,n_input,n_input])
correct_pred = tf.equal(tf.cast(y,tf.float32),tf.sub(temp_pred2,tf.cast(y,tf.float32)))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
# Initializing the variables
init = tf.initialize_all_variables()
# Launch the graph
with tf.Session() as sess:
sess.run(init)
summ = tf.train.SummaryWriter('/tmp/logdir/', sess.graph_def)
step = 1
from tensorflow.contrib.learn.python.learn.datasets.scroll import scroll_data
data = scroll_data.read_data('/home/kendall/Desktop/')
# Keep training until reach max iterations
while step * batch_size < training_iters:
batch_x, batch_y = data.train.next_batch(batch_size)
# Run optimization op (backprop)
batch_x = batch_x.reshape((batch_size, n_input, n_input))
batch_y = batch_y.reshape((batch_size, n_input, n_input))
batch_y = np.int64(batch_y)
# y = tf.reshape(y, [-1,n_input,n_input])
pdb.set_trace()
sess.run(optimizer, feed_dict={x: batch_x, y: batch_y, keep_prob: dropout})
if step % display_step == 0:
# Calculate batch loss and accuracy
pdb.set_trace()
loss, acc = sess.run([cost, accuracy], feed_dict={x: batch_x, y: batch_y, keep_prob: 1.})
print "Iter " + str(step*batch_size) + ", Minibatch Loss= " + \
"{:.6f}".format(loss) + ", Training Accuracy= " + \
"{:.5f}".format(acc)
step += 1
print "Optimization Finished!"
# Calculate accuracy for 256 mnist test images
print "Testing Accuracy:", \
sess.run(accuracy, feed_dict={x: data.test.images[:256],
y: data.test.labels[:256],
keep_prob: 1.})
[1]: http://stackoverflow.com/questions/35317029/how-to-implement-pixel-wise-classification-for-scene-labeling-in-tensorflow/37294185?noredirect=1#comment63253577_37294185
Let's forget about softmax and use a simpler tf.nn.sigmoid_cross_entropy_with_logits here:
with sigmoid, you only need one prediction per pixel
if pred[pixel] > 0.5, you predict 1
if pred[pixel] < 0.5, you predict 0
the shape of prediction and target should then be [batch_size, 40000]
pred = conv_net(x, weights, biases, keep_prob) # shape [batch_size, 40000]
flattened_y = tf.reshape(y, [-1, 40000]) # shape [batch_size, 40000]
loss = tf.nn.sigmoid_cross_entropy_with_logits(pred, flattened_y)
Using sparse softmax is going to be of help only after the last layer you want to resize the image to the original size (200*200).In this case using reshape as you have would ensure that the the code would be error free.
But in your case you don't have to use sparse softmax. To see why check the dimensions of "pred".