Related
I am trying to implement the VGG but am getting the above odd error. I am running TFv2 on Ubuntu. Could this be because I am not running CUDA?
The code is from here.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
# Imports
import time
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
# tf.logging.set_verbosity(tf.logging.INFO)
from tensorflow.keras.layers import Conv2D, Dense, Flatten
np.random.seed(1)
mnist = tf.keras.datasets.mnist
(train_data, train_labels), (eval_data, eval_labels) = mnist.load_data()
train_data, train_labels = train_data / 255.0, train_labels / 255.0
# Add a channels dimension
train_data = train_data[..., tf.newaxis]
train_labels = train_labels[..., tf.newaxis]
index = 7
plt.imshow(train_data[index].reshape(28, 28))
plt.show()
time.sleep(5);
print("y = " + str(np.squeeze(train_labels[index])))
print ("number of training examples = " + str(train_data.shape[0]))
print ("number of evaluation examples = " + str(eval_data.shape[0]))
print ("X_train shape: " + str(train_data.shape))
print ("Y_train shape: " + str(train_labels.shape))
print ("X_test shape: " + str(eval_data.shape))
print ("Y_test shape: " + str(eval_labels.shape))
print("done")
def cnn_model_fn(features, labels, mode):
# Input Layer
input_height, input_width = 28, 28
input_channels = 1
input_layer = tf.reshape(features["x"], [-1, input_height, input_width, input_channels])
# Convolutional Layer #1 and Pooling Layer #1
conv1_1 = tf.layers.conv2d(inputs=input_layer, filters=64, kernel_size=[3, 3], padding="same",
activation=tf.nn.relu)
conv1_2 = tf.layers.conv2d(inputs=conv1_1, filters=64, kernel_size=[3, 3], padding="same", activation=tf.nn.relu)
pool1 = tf.layers.max_pooling2d(inputs=conv1_2, pool_size=[2, 2], strides=2, padding="same")
# Convolutional Layer #2 and Pooling Layer #2
conv2_1 = tf.layers.conv2d(inputs=pool1, filters=128, kernel_size=[3, 3], padding="same", activation=tf.nn.relu)
conv2_2 = tf.layers.conv2d(inputs=conv2_1, filters=128, kernel_size=[3, 3], padding="same", activation=tf.nn.relu)
pool2 = tf.layers.max_pooling2d(inputs=conv2_2, pool_size=[2, 2], strides=2, padding="same")
# Convolutional Layer #3 and Pooling Layer #3
conv3_1 = tf.layers.conv2d(inputs=pool2, filters=256, kernel_size=[3, 3], padding="same", activation=tf.nn.relu)
conv3_2 = tf.layers.conv2d(inputs=conv3_1, filters=256, kernel_size=[3, 3], padding="same", activation=tf.nn.relu)
pool3 = tf.layers.max_pooling2d(inputs=conv3_2, pool_size=[2, 2], strides=2, padding="same")
# Convolutional Layer #4 and Pooling Layer #4
conv4_1 = tf.layers.conv2d(inputs=pool3, filters=512, kernel_size=[3, 3], padding="same", activation=tf.nn.relu)
conv4_2 = tf.layers.conv2d(inputs=conv4_1, filters=512, kernel_size=[3, 3], padding="same", activation=tf.nn.relu)
pool4 = tf.layers.max_pooling2d(inputs=conv4_2, pool_size=[2, 2], strides=2, padding="same")
# Convolutional Layer #5 and Pooling Layer #5
conv5_1 = tf.layers.conv2d(inputs=pool4, filters=512, kernel_size=[3, 3], padding="same", activation=tf.nn.relu)
conv5_2 = tf.layers.conv2d(inputs=conv5_1, filters=512, kernel_size=[3, 3], padding="same", activation=tf.nn.relu)
pool5 = tf.layers.max_pooling2d(inputs=conv5_2, pool_size=[2, 2], strides=2, padding="same")
# FC Layers
pool5_flat = tf.contrib.layers.flatten(pool5)
FC1 = tf.layers.dense(inputs=pool5_flat, units=4096, activation=tf.nn.relu)
FC2 = tf.layers.dense(inputs=FC1, units=4096, activation=tf.nn.relu)
FC3 = tf.layers.dense(inputs=FC2, units=1000, activation=tf.nn.relu)
"""the training argument takes a boolean specifying whether or not the model is currently
being run in training mode; dropout will only be performed if training is true. here,
we check if the mode passed to our model function cnn_model_fn is train mode. """
dropout = tf.layers.dropout(inputs=FC3, rate=0.4, training=mode == tf.estimator.ModeKeys.TRAIN)
# Logits Layer or the output layer. which will return the raw values for our predictions.
# Like FC layer, logits layer is another dense layer. We leave the activation function empty
# so we can apply the softmax
logits = tf.layers.dense(inputs=dropout, units=10)
# Then we make predictions based on raw output
predictions = {
# Generate predictions (for PREDICT and EVAL mode)
# the predicted class for each example - a vlaue from 0-9
"classes": tf.argmax(input=logits, axis=1),
# to calculate the probablities for each target class we use the softmax
"probabilities": tf.nn.softmax(logits, name="softmax_tensor")
}
# so now our predictions are compiled in a dict object in python and using that we return an estimator object
if mode == tf.estimator.ModeKeys.PREDICT:
return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
'''Calculate Loss (for both TRAIN and EVAL modes): computes the softmax entropy loss.
This function both computes the softmax activation function as well as the resulting loss.'''
loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
# Configure the Training Options (for TRAIN mode)
if mode == tf.estimator.ModeKeys.TRAIN:
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)
train_op = optimizer.minimize(loss=loss, global_step=tf.train.get_global_step())
return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)
# Add evaluation metrics (for EVAL mode)
eval_metric_ops = {
"accuracy": tf.metrics.accuracy(labels=labels,
predictions=predictions["classes"])}
return tf.estimator.EstimatorSpec(mode=mode,
loss=loss,
eval_metric_ops=eval_metric_ops)
print("done2")
mnist_classifier = tf.estimator.Estimator(model_fn=cnn_model_fn,
model_dir="/tmp/mnist_vgg13_model")
print("done3")
train_input_fn = tf.compat.v1.estimator.inputs.numpy_input_fn(x={"x": train_data},
y=train_labels,
batch_size=100,
num_epochs=100,
shuffle=True)
print("done4")
mnist_classifier.train(input_fn=train_input_fn,
steps=None,
hooks=None)
print("done5")
eval_input_fn = tf.estimator.inputs.numpy_input_fn(x={"x": eval_data},
y=eval_labels,
num_epochs=1,
shuffle=False)
print("done6")
eval_results = mnist_classifier.evaluate(input_fn=eval_input_fn)
print(eval_results)
The code you're using was written in Tensorflow v1.x, and is not compatible as it is with Tensorflow v2. The easiest solution is probably to downgrade to a version of tensorflow v1 to run the code as it is.
An other option would be to could follow this guide to migrate the code from v1 to v2.
A third option would be to use the tf.compat module to get some retro-compatibility. For example, tf.layers does not exist anymore in Tensorflow v2. You can use tf.compat.v1.layers (see for example the Conv2D function) instead, but this is a temporary fix, as these functions will be removed in a future version.
You can use postfix compat.v1 to make code written for tensorflow 1.x work with newer versions.
In your case this can be achived by changing:
tf.layers.conv2d
to
tf.compat.v1.layers.conv2d
You can read more about migrating tensorflow v1.x to tensorflow v2.x here:
https://www.tensorflow.org/guide/migrate
Use tensorflow 1.x instead of tensorflow 2.x versions. But remember there is no 2.x version on Python 3.8. Use a lower version of Python which has tensorflow 1.x.
python3.6 -m pip install tensorflow==1.8.0
I have a CNN for CIFAR-10 dataset with layers:
[IN] -> [CONV] -> [POOL] -> [CONV] -> [POOL] -> [FC] -> [DROPOUT] -> [LOGITS] -> [OUT]
\-> [EMBEDDINGS]
Estimator code:
config = tf.contrib.learn.RunConfig(save_checkpoints_secs=30)
# Create the Estimator
classifier = tf.estimator.Estimator(model_fn=inference, config=config, model_dir=LOG_DIR)
train_images, train_labels, train_labels_onehot = Utils.load_training_data()
hooks = [
# logging hook
tf.train.LoggingTensorHook(tensors=tensors_to_log, every_n_iter=50),
]
train_input_fn = tf.estimator.inputs.numpy_input_fn(x={'x': train_images}, y=train_labels)
classifier.train(input_fn=train_input_fn, steps=FLAGS.steps, hooks=hooks)
Inference function code:
def inference(self, features, labels, mode):
try:
images = tf.cast(features['x'], tf.float32)
# Input Layer
with tf.name_scope('Data'):
input_layer = tf.reshape(images, [-1, img_width, img_height, num_channels])
# Convolutional Layer 1
with tf.variable_scope('ConvLayer1'):
conv1 = tf.layers.conv2d(inputs=input_layer, filters=32, kernel_size=[5, 5],
padding="same", activation=tf.nn.relu)
pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2)
logging.info('Convolutional Layer 1 build successful..')
# Convolutional Layer 1
with tf.variable_scope('ConvLayer2'):
conv2 = tf.layers.conv2d(inputs=pool1, filters=64, kernel_size=[5, 5],
padding="same", activation=tf.nn.relu)
pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)
logging.info('Convolutional Layer 2 build successful..')
# Fully Connected Layer
with tf.variable_scope('FullyConnectedLayer'):
pool2_flat = tf.reshape(pool2, [-1, 8 * 8 * 64])
dense = tf.layers.dense(inputs=pool2_flat, units=1024, activation=tf.nn.relu)
dropout = tf.layers.dropout(inputs=dense, rate=0.4,
training=(mode == tf.estimator.ModeKeys.TRAIN))
logging.info('Fully Connected Layer build successful..')
tf.summary.histogram('dropout', dropout)
# Logits Layer
logits = tf.layers.dense(inputs=dropout, units=10)
tf.summary.histogram('logits', logits)
logging.info('Logits Layer build successful..')
predictions = {
# Generate predictions (for PREDICT and EVAL mode)
"classes": tf.argmax(input=logits, axis=1),
# Add `softmax_tensor` to the graph. It is used for PREDICT and by the
# `logging_hook`.
"probabilities": tf.nn.softmax(logits, name="softmax_tensor")
}
if mode == tf.estimator.ModeKeys.PREDICT:
return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions,
evaluation_hooks=[])
# Calculate Loss (for both TRAIN and EVAL modes)
onehot_labels = tf.one_hot(indices=tf.cast(labels, tf.int32), depth=10)
loss = tf.losses.softmax_cross_entropy(onehot_labels=onehot_labels,
logits=logits)
tf.summary.histogram('loss', loss)
logging.info('Losses build successful..')
# Configure the Training Op (for TRAIN mode)
if mode == tf.estimator.ModeKeys.TRAIN:
learning_rate = tf.train.exponential_decay(start_learning_rate,
tf.train.get_global_step(), 1000, 0.9, staircase=True)
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss=loss, global_step=tf.train.get_global_step())
return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op,
scaffold=tf.train.Scaffold(
summary_op=tf.summary.merge_all(),
))
# Add evaluation metrics (for EVAL mode)
accuracy = tf.metrics.accuracy(labels=labels, predictions=predictions["classes"])
tf.summary.histogram('accuracy', accuracy)
logging.info('Accuracy metric build successful..')
return tf.estimator.EstimatorSpec(mode=mode, loss=loss,
train_op=train_op,
scaffold=tf.train.Scaffold(
summary_op=tf.summary.merge_all()
))
I am trying to work with Embeddings Visualization in tensorflow and here I want to visualize dropout output as embeddings.
Code that I've found to use embeddings:
sess = tf.InteractiveSession()
# Input set for Embedded TensorBoard visualization
# Performed with cpu to conserve memory and processing power
with tf.device("/cpu:0"):
embedding = tf.Variable(self._data, trainable=False, name='embedding')
sess.run(embedding.initializer)
writer = tf.summary.FileWriter(LOG_DIR + '/projector', sess.graph)
config = projector.ProjectorConfig()
embed = config.embeddings.add()
embed.tensor_name = embedding.name
embed.metadata_path = os.path.join(LOG_DIR + '/projector/metadata.tsv')
embed.sprite.image_path = os.path.join(DATA_DIR + '/cifar_10k_sprite.png')
embed.sprite.single_image_dim.extend([img_width, img_height])
projector.visualize_embeddings(writer, config)
saver = tf.train.Saver([embedding])
saver.save(sess, os.path.join(LOG_DIR, 'projector/a_model.ckpt'))
It does not work in my case, because I am using Estimator class and I have not access to the session.
Ways that I've tried:
Pass numpy.array variable to the model_fn of Estimator where I can set the value to that variable and then pass that variable to SessionRunHook where I can access session and save data to the file. Did not work because all arguments passed to the Estimator become tensors. So that way does not work cause I'm already have dropout layer tensors.
Create global variable where I can put all values of dropout layer. Did not work too because for that approach I need to access tensors values.
As far as I understood Estimator architecture the main problem is to get dropout layer output signals to the out of the Estimator and pass it somehow to the SessionRunHook to save them as Embeddings. But I think it's not the best way.
What is the right way of using Embeddings in Estimator?
This is how I did it guys (but it may be not the most efficient way):
SessinRunHook:
import tensorflow as tf
from classes.Utils import Utils
class EmbeddingSaverHook(tf.train.SessionRunHook):
def __init__(self, values, labels, captions):
self._saver = None
self._classes = Utils.get_classnames()
self._dense3 = None
self._labels = None
self._emb_values = values
self._emb_labels = labels
self._emb_captions = captions
def begin(self):
self._dense3 = tf.get_default_graph().get_tensor_by_name("dense3/BiasAdd:0")
self._labels = tf.get_default_graph().get_tensor_by_name("labels:0")
def before_run(self, run_context):
return tf.train.SessionRunArgs([self._dense3, self._labels])
def after_run(self, run_context, run_values):
self._emb_values.extend(run_values[0][0])
self._emb_labels.extend(run_values[0][1])
self._emb_captions.extend([self._classes[x] for x in run_values[0][1]])
def end(self, session):
pass
Full code you can see in my github repo
I am using Windows 10 pro, python 3.6.2rc1, Visual Studio 2017, and Tensorflow. I am working with Tensorflow example in its tutorial in the following link:
https://www.tensorflow.org/tutorials/layers
I have added another layer of convolution and pooling before flattening the last layer (3rd layer) to see if the accuracy changes.
The code I have added is as follows:
## Input Tensor Shape: [batch_size, 7, 7, 64]
## Output Tensor Shape: [batch_size, 7, 7, 64]
conv3 = tf.layers.conv2d(
inputs=pool2,
filters=64,
kernel_size=[3, 3],
padding=1,
activation=tf.nn.relu)
pool3 = tf.layers.max_pooling2d(inputs=conv3, pool_size=[2, 2], strides=1)
pool3_flat = tf.reshape(pool3, [-1, 7* 7 * 64])
The reason I have changed padding to 1 and stride to 1 is to make sure the size of output is the same as input. But after adding this new layer I get the following warnings and without showing any result the program ends:
Estimator is decoupled from Scikit Learn interface by moving into
separate class SKCompat. Arguments x, y and batch_size are only
available in the SKCompat class, Estimator will only accept input_fn.
Example conversion:
est = Estimator(...) -> est = SKCompat(Estimator(...))
WARNING:tensorflow:From E:\Apps\DA2CNNTest\TFHWDetection WIth More Layers\TFClassification\TFClassification\TFClassification.py:179: calling BaseEstimator.fit (from tensorflow.contrib.learn.python.learn.estimators.estimator) with batch_size is deprecated and will be removed after 2016-12-01.
Instructions for updating:
Estimator is decoupled from Scikit Learn interface by moving into
separate class SKCompat. Arguments x, y and batch_size are only
available in the SKCompat class, Estimator will only accept input_fn.
Example conversion:
est = Estimator(...) -> est = SKCompat(Estimator(...))
The thread 'MainThread' (0x5c8) has exited with code 0 (0x0).
The program '[13468] python.exe' has exited with code 1 (0x1).
Without adding this layer it works properly. In order to solve this problem I changed the conv3 and pool3 as follows:
conv3 = tf.layers.conv2d(
inputs=pool2,
filters=64,
kernel_size=[5, 5],
padding="same",
activation=tf.nn.relu)
# Input Tensor Shape: [batch_size, 7, 7, 64]
# Output Tensor Shape: [batch_size, 3, 3, 64]
pool3 = tf.layers.max_pooling2d(inputs=conv3, pool_size=[2, 2], strides=2)
pool3_flat = tf.reshape(pool3, [-1, 3* 3 * 64])
but then I got a different error at
nist_classifier.fit(
x=train_data,
y=train_labels,
batch_size=100,
steps=20000,
monitors=[logging_hook])
which is as follows:
tensorflow.python.framework.errors_impl.NotFoundError: Key conv2d_2/bias not found in checkpoint
[[Node: save/RestoreV2_5 = RestoreV2[dtypes=[DT_FLOAT], _device="/job:localhost/replica:0/task:0/cpu:0"](_arg_save/Const_0_0, save/RestoreV2_5/tensor_names, save/RestoreV2_5/shape_and_slices)]]
The error is exactly refering to monitors=[logging_hook].
My whole code is as follow and as you see I have commented the previous one with padding=1.
I really appreciate if you can guide me what my mistake is and why is it so. Moreover, I am correct with the dimension of my inputs and outputs in the 3rd layer?
Complete code:
"""Convolutional Neural Network Estimator for MNIST, built with tf.layers."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import tensorflow as tf
from tensorflow.contrib import learn
from tensorflow.contrib.learn.python.learn.estimators import model_fn as model_fn_lib
tf.logging.set_verbosity(tf.logging.INFO)
def cnn_model_fn(features, labels, mode):
"""Model function for CNN."""
input_layer = tf.reshape(features, [-1, 28, 28, 1])
# Input Tensor Shape: [batch_size, 28, 28, 1]
# Output Tensor Shape: [batch_size, 28, 28, 32]
conv1 = tf.layers.conv2d(
inputs=input_layer,
filters=32,
kernel_size=[5, 5],
padding="same",
activation=tf.nn.relu)
# Input Tensor Shape: [batch_size, 28, 28, 32]
# Output Tensor Shape: [batch_size, 14, 14, 32]
pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2)
# Convolutional Layer #2
# Input Tensor Shape: [batch_size, 14, 14, 32]
# Output Tensor Shape: [batch_size, 14, 14, 64]
conv2 = tf.layers.conv2d(
inputs=pool1,
filters=64,
kernel_size=[5, 5],
padding="same",
activation=tf.nn.relu)
# Pooling Layer #2
# Input Tensor Shape: [batch_size, 14, 14, 64]
# Output Tensor Shape: [batch_size, 7, 7, 64]
pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)
'''Adding a new layer of conv and pool'''
## Input Tensor Shape: [batch_size, 7, 7, 32]
## Output Tensor Shape: [batch_size, 7, 7, 64]
#conv3 = tf.layers.conv2d(
# inputs=pool2,
# filters=64,
# kernel_size=[3, 3],
# padding=1,
# activation=tf.nn.relu)
## Input Tensor Shape: [batch_size, 7, 7, 64]
## Output Tensor Shape: [batch_size, 7, 7, 64]
#pool3 = tf.layers.max_pooling2d(inputs=conv3, pool_size=[2, 2], strides=1)
#pool3_flat = tf.reshape(pool3, [-1, 7* 7 * 64])
# Input Tensor Shape: [batch_size, 7, 7, 64]
# Output Tensor Shape: [batch_size, 7, 7, 64]
conv3 = tf.layers.conv2d(
inputs=pool2,
filters=64,
kernel_size=[5, 5],
padding="same",
activation=tf.nn.relu)
# Input Tensor Shape: [batch_size, 7, 7, 64]
# Output Tensor Shape: [batch_size, 3, 3, 64]
pool3 = tf.layers.max_pooling2d(inputs=conv3, pool_size=[2, 2], strides=2)
'''End of manipulation'''
# Input Tensor Shape: [batch_size, 3, 3, 64]
# Output Tensor Shape: [batch_size, 3 * 3 * 64]
pool3_flat = tf.reshape(pool3, [-1, 3* 3 * 64])
# Input Tensor Shape: [batch_size, 3 * 3 * 64]
# Output Tensor Shape: [batch_size, 1024]
# dense(). Constructs a dense layer. Takes number of neurons and activation function as arguments.
dense = tf.layers.dense(inputs=pool3_flat, units=1024, activation=tf.nn.relu)
# Add dropout operation; 0.6 probability that element will be kept
dropout = tf.layers.dropout(
inputs=dense, rate=0.4, training=mode == learn.ModeKeys.TRAIN)
logits = tf.layers.dense(inputs=dropout, units=10)
loss = None
train_op = None
# Calculate Loss (for both TRAIN and EVAL modes)
if mode != learn.ModeKeys.INFER:
onehot_labels = tf.one_hot(indices=tf.cast(labels, tf.int32), depth=10)
loss = tf.losses.softmax_cross_entropy(
onehot_labels=onehot_labels, logits=logits)
# Configure the Training Op (for TRAIN mode)
if mode == learn.ModeKeys.TRAIN:
train_op = tf.contrib.layers.optimize_loss(
loss=loss,
global_step=tf.contrib.framework.get_global_step(),
learning_rate=0.001,
optimizer="SGD")
# Generate Predictions
# The logits layer of our model returns our predictions as raw values in a [batch_size, 10]-dimensional tensor.
predictions = {
"classes": tf.argmax(
input=logits, axis=1),
"probabilities": tf.nn.softmax(
logits, name="softmax_tensor")
}
# Return a ModelFnOps object
return model_fn_lib.ModelFnOps(
mode=mode, predictions=predictions, loss=loss, train_op=train_op)
def main(unused_argv):
# Load training and eval data
mnist = learn.datasets.load_dataset("mnist")
train_data = mnist.train.images # Returns np.array
train_labels = np.asarray(mnist.train.labels, dtype=np.int32)
eval_data = mnist.test.images # Returns np.array
eval_labels = np.asarray(mnist.test.labels, dtype=np.int32)
# Create the Estimator
mnist_classifier = learn.Estimator(
model_fn=cnn_model_fn, model_dir="/tmp/mnist_convnet_model")
# Set up logging for predictions
# Log the values in the "Softmax" tensor with label "probabilities"
tensors_to_log = {"probabilities": "softmax_tensor"}
logging_hook = tf.train.LoggingTensorHook(
tensors=tensors_to_log, every_n_iter=50)
# Train the model
mnist_classifier.fit(
x=train_data,
y=train_labels,
batch_size=100,
steps=20000,
monitors=[logging_hook])
# Configure the accuracy metric for evaluation
#change metrics variable name
metricss = {
"accuracy":
learn.MetricSpec(
metric_fn=tf.metrics.accuracy, prediction_key="classes"),
}
#Evaluate the model and print results
#for i in range(100)
eval_results = mnist_classifier.evaluate(
x=eval_data[0:100], y=eval_labels[0:100], metrics=metricss)
print(eval_results)
if __name__ == "__main__":
tf.app.run()
The error looks like the trained model which is available in the model_dir conflicts with the current graph changes. The Estimator loads checkpoints from the saved model directory and continue training from the previous saved model. So whenever your making changes in the model, you need to delete the old model and start training again.
A simple fix for this would be to define a custom checkpoint directory for the model as follows.
tf.train.generate_checkpoint_state_proto("/tmp/","/tmp/mnist_convnet_model")
This fixes the problem with the MNIST example and also gives you access to a location where you can control checkpoints.
I'm trying to replicate the CNN described in
https://pdfs.semanticscholar.org/3b57/85ca3c29c963ae396c2f94ba1a805c787cc8.pdf
and I'm stuck at the last layer. I've modeled the cnn like this
# Model function for CNN
def cnn_model_fn(features, labels, mode):
# Input Layer
# Reshape X to 4-D tensor: [batch_size, width, height, channels]
# Taxes images are 150x150 pixels, and have one color channel
input_layer = tf.reshape(features, [-1, 150, 150, 1])
# Convolutional Layer #1
# Input Tensor Shape: [batch_size, 150, 150, 1]
# Output Tensor Shape: [batch_size, 144, 144, 20]
conv1 = tf.layers.conv2d(
inputs=input_layer,
filters=20,
kernel_size=[7, 7],
padding="valid",
activation=tf.nn.relu)
# Pooling Layer #1
# Input Tensor Shape: [batch_size, 144, 144, 20]
# Output Tensor Shape: [batch_size, 36, 36, 20]
pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[4, 4], strides=4)
# Convolutional Layer #2
# Input Tensor Shape: [batch_size, 36, 36, 20]
# Output Tensor Shape: [batch_size, 32, 32, 50]
conv2 = tf.layers.conv2d(
inputs=pool1,
filters=50,
kernel_size=[5, 5],
padding="valid",
activation=tf.nn.relu)
# Pooling Layer #2
# Input Tensor Shape: [batch_size, 32, 32, 50]
# Output Tensor Shape: [batch_size, 8, 8, 50]
pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[4, 4], strides=4)
# Flatten tensor into a batch of vectors
# Input Tensor Shape: [batch_size, 8, 8, 50]
# Output Tensor Shape: [batch_size, 8 * 8 * 50]
pool2_flat = tf.reshape(pool2, [-1, 8 * 8 * 50])
# Dense Layer #1
# Densely connected layer with 1000 neurons
# Input Tensor Shape: [batch_size, 8 * 8 * 50]
# Output Tensor Shape: [batch_size, 1000]
dense1 = tf.layers.dense(inputs=pool2_flat, units=1000, activation=tf.nn.relu)
# Dense Layer #2
# Densely connected layer with 1000 neurons
# Input Tensor Shape: [batch_size, 1000]
# Output Tensor Shape: [batch_size, 1000]
dense2 = tf.layers.dense(inputs=dense1, units=1000, activation=tf.nn.relu)
# Add dropout operation; 0.5 probability that element will be kept
dropout = tf.layers.dropout(
inputs=dense2, rate=0.5, training=mode == learn.ModeKeys.TRAIN)
# Logits layer
# Input Tensor Shape: [batch_size, 1000]
# Output Tensor Shape: [batch_size, 4]
logits = tf.layers.dense(inputs=dropout, units=nClass)
loss = None
train_op = None
# Calculate Loss (for both TRAIN and EVAL modes)
if mode != learn.ModeKeys.INFER:
onehot_labels = tf.one_hot(indices=tf.cast(labels, tf.int32), depth=nClass)
loss = tf.losses.softmax_cross_entropy(
onehot_labels=onehot_labels, logits=logits)
# Configure the Training Op (for TRAIN mode)
if mode == learn.ModeKeys.TRAIN:
train_op = tf.contrib.layers.optimize_loss(
loss=loss,
global_step=tf.contrib.framework.get_global_step(),
learning_rate=0.001,
optimizer="SGD")
# Generate Predictions
predictions = {
"classes": tf.argmax(
input=logits, axis=1)
}
# Return a ModelFnOps object
return model_fn_lib.ModelFnOps(
mode=mode, predictions=predictions, loss=loss, train_op=train_op)
but the final accuracy is really poor (0.25). So I realized that actually the paper states that the last layer is a softmax layer. So i tried changed my logits layer to
logits = tf.layers.softmax(dropout)
but when I run it, it says
ValueError: Shapes (?, 1000) and (?, 4) are incompatible
So, what I'm missing here?
The original one was correct. The softmax activation is applied while calculating the loss with tf.losses.softmax_cross_entropy. If you want to calculate it separately you should add it after the logits calculation, but without replacing it as you did.
logits = tf.layers.dense(inputs=dropout, units=nClass)
softmax = tf.layers.softmax(logits)
Or you can combine both in one, but I wouldn't recommend it. It is better to calculate the softmax with the loss.
logits = tf.layers.dense(inputs=dropout, units=nClass, activation=tf.layers.softmax)
Your classifier is not doing better than random, so I would say that the problem lays somewhere else, maybe in the data loading and preprocessing.
I'm trying to use tf.nn.sparse_softmax_cross_entropy_with_logits and I have followed the answer by user Olivier Moindrot [here][1] but I'm getting a dimension error
I'm building a segmentation network, so the input image is 200x200 and the output image is 200x200. The classification is binary, so foreground and background.
After I build the CNN pred = conv_net(x, weights, biases, keep_prob)
pred looks like this <tf.Tensor 'Add_1:0' shape=(?, 40000) dtype=float32>
The CNN has a couple of conv layers followed by a fully connected layer. The fully connected layer is 40000 because it is 200x200 flattened.
According to the above link, I reshape pred like so...
(side note: I also tried packing tf.pack() two pred's -- like above -- together, but I thought that was wrong)
pred = tf.reshape(pred, [-1, 200, 200, 2])
...so that there are 2 classifications. Continuing the above link...
temp_pred = tf.reshape(pred, [-1,2])
temp_y = tf.reshape(y, [-1])
cost = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(temp_pred, temp_y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
I have the following placeholders and batch data...
x = tf.placeholder(tf.float32, [None, 200, 200])
y = tf.placeholder(tf.int64, [None, 200, 200])
(Pdb) batch_x.shape
(10, 200, 200)
(Pdb) batch_y.shape
(10, 200, 200)
When I run a training session, I get the following dimension error:
tensorflow.python.framework.errors.InvalidArgumentError: logits first
dimension must match labels size. logits shape=[3200000,2] labels
shape=[400000]
My full code looks like this:
import tensorflow as tf
import pdb
import numpy as np
# Import MINST data
# from tensorflow.examples.tutorials.mnist import input_data
# mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
# Parameters
learning_rate = 0.001
training_iters = 200000
batch_size = 10
display_step = 1
# Network Parameters
n_input = 200 # MNIST data input (img shape: 28*28)
n_classes = 2 # MNIST total classes (0-9 digits)
n_output = 40000
#n_input = 200
dropout = 0.75 # Dropout, probability to keep units
# tf Graph input
x = tf.placeholder(tf.float32, [None, n_input, n_input])
y = tf.placeholder(tf.int64, [None, n_input, n_input])
keep_prob = tf.placeholder(tf.float32) #dropout (keep probability)
# Create some wrappers for simplicity
def conv2d(x, W, b, strides=1):
# Conv2D wrapper, with bias and relu activation
x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME')
x = tf.nn.bias_add(x, b)
return tf.nn.relu(x)
def maxpool2d(x, k=2):
# MaxPool2D wrapper
return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1],
padding='SAME')
# Create model
def conv_net(x, weights, biases, dropout):
# Reshape input picture
x = tf.reshape(x, shape=[-1, 200, 200, 1])
# Convolution Layer
conv1 = conv2d(x, weights['wc1'], biases['bc1'])
# Max Pooling (down-sampling)
# conv1 = tf.nn.local_response_normalization(conv1)
# conv1 = maxpool2d(conv1, k=2)
# Convolution Layer
conv2 = conv2d(conv1, weights['wc2'], biases['bc2'])
# Max Pooling (down-sampling)
# conv2 = tf.nn.local_response_normalization(conv2)
# conv2 = maxpool2d(conv2, k=2)
# Convolution Layer
conv3 = conv2d(conv2, weights['wc3'], biases['bc3'])
# # Max Pooling (down-sampling)
# conv3 = tf.nn.local_response_normalization(conv3)
# conv3 = maxpool2d(conv3, k=2)
# return conv3
# Fully connected layer
# Reshape conv2 output to fit fully connected layer input
fc1 = tf.reshape(conv2, [-1, weights['wd1'].get_shape().as_list()[0]])
fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1'])
fc1 = tf.nn.relu(fc1)
# Apply Dropout
fc1 = tf.nn.dropout(fc1, dropout)
return tf.add(tf.matmul(fc1, weights['out']), biases['out'])
# Output, class prediction
# output = []
# for i in xrange(2):
# # output.append(tf.nn.softmax(tf.add(tf.matmul(fc1, weights['out']), biases['out'])))
# output.append((tf.add(tf.matmul(fc1, weights['out']), biases['out'])))
#
# return output
# Store layers weight & bias
weights = {
# 5x5 conv, 1 input, 32 outputs
'wc1': tf.Variable(tf.random_normal([5, 5, 1, 32])),
# 5x5 conv, 32 inputs, 64 outputs
'wc2': tf.Variable(tf.random_normal([5, 5, 32, 64])),
# 5x5 conv, 32 inputs, 64 outputs
'wc3': tf.Variable(tf.random_normal([5, 5, 64, 128])),
# fully connected, 7*7*64 inputs, 1024 outputs
'wd1': tf.Variable(tf.random_normal([50*50*64, 1024])),
# 1024 inputs, 10 outputs (class prediction)
'out': tf.Variable(tf.random_normal([1024, n_output]))
}
biases = {
'bc1': tf.Variable(tf.random_normal([32])),
'bc2': tf.Variable(tf.random_normal([64])),
'bc3': tf.Variable(tf.random_normal([128])),
'bd1': tf.Variable(tf.random_normal([1024])),
'out': tf.Variable(tf.random_normal([n_output]))
}
# Construct model
pred = conv_net(x, weights, biases, keep_prob)
pdb.set_trace()
# pred = tf.pack(tf.transpose(pred,[1,2,0]))
pred = tf.reshape(pred, [-1, n_input, n_input, 2])
temp_pred = tf.reshape(pred, [-1,2])
temp_y = tf.reshape(y, [-1])
# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(temp_pred, temp_y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# Evaluate model
# correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
temp_pred2 = tf.reshape(pred, [-1,n_input,n_input])
correct_pred = tf.equal(tf.cast(y,tf.float32),tf.sub(temp_pred2,tf.cast(y,tf.float32)))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
# Initializing the variables
init = tf.initialize_all_variables()
# Launch the graph
with tf.Session() as sess:
sess.run(init)
summ = tf.train.SummaryWriter('/tmp/logdir/', sess.graph_def)
step = 1
from tensorflow.contrib.learn.python.learn.datasets.scroll import scroll_data
data = scroll_data.read_data('/home/kendall/Desktop/')
# Keep training until reach max iterations
while step * batch_size < training_iters:
batch_x, batch_y = data.train.next_batch(batch_size)
# Run optimization op (backprop)
batch_x = batch_x.reshape((batch_size, n_input, n_input))
batch_y = batch_y.reshape((batch_size, n_input, n_input))
batch_y = np.int64(batch_y)
# y = tf.reshape(y, [-1,n_input,n_input])
pdb.set_trace()
sess.run(optimizer, feed_dict={x: batch_x, y: batch_y, keep_prob: dropout})
if step % display_step == 0:
# Calculate batch loss and accuracy
pdb.set_trace()
loss, acc = sess.run([cost, accuracy], feed_dict={x: batch_x, y: batch_y, keep_prob: 1.})
print "Iter " + str(step*batch_size) + ", Minibatch Loss= " + \
"{:.6f}".format(loss) + ", Training Accuracy= " + \
"{:.5f}".format(acc)
step += 1
print "Optimization Finished!"
# Calculate accuracy for 256 mnist test images
print "Testing Accuracy:", \
sess.run(accuracy, feed_dict={x: data.test.images[:256],
y: data.test.labels[:256],
keep_prob: 1.})
[1]: http://stackoverflow.com/questions/35317029/how-to-implement-pixel-wise-classification-for-scene-labeling-in-tensorflow/37294185?noredirect=1#comment63253577_37294185
Let's forget about softmax and use a simpler tf.nn.sigmoid_cross_entropy_with_logits here:
with sigmoid, you only need one prediction per pixel
if pred[pixel] > 0.5, you predict 1
if pred[pixel] < 0.5, you predict 0
the shape of prediction and target should then be [batch_size, 40000]
pred = conv_net(x, weights, biases, keep_prob) # shape [batch_size, 40000]
flattened_y = tf.reshape(y, [-1, 40000]) # shape [batch_size, 40000]
loss = tf.nn.sigmoid_cross_entropy_with_logits(pred, flattened_y)
Using sparse softmax is going to be of help only after the last layer you want to resize the image to the original size (200*200).In this case using reshape as you have would ensure that the the code would be error free.
But in your case you don't have to use sparse softmax. To see why check the dimensions of "pred".