I am trying to implement the convolution lstm network based on this paper: https://arxiv.org/abs/1506.04214,
I have implemented the encoder like this:
def new_convLSTM_layer(input, # The previous layer.
num_input_channels, # Num. channels in prev. layer.
filter_size, # Width and height of each filter.
num_filters, # Number of filters.
img_size, #size of image (width or height)
hid_st, #Hidden state of previous level
Cell,
use_pooling): #Cell
# Shape of the filter-weights for the convolution.
shape = [filter_size, filter_size, num_input_channels, num_filters]
shape_2 = [filter_size, filter_size, num_filters, num_filters]
# Create new weights aka. filters with the given shape.
#filters or weights for input gate
W_xi = new_weights(shape=shape)
W_hi = new_weights(shape=shape_2)
#filters or weights for forget gate
W_xf = new_weights(shape=shape)
W_hf = new_weights(shape=shape_2)
#filters or weights for cell
W_xc = new_weights(shape=shape)
W_hc = new_weights(shape=shape_2)
#filters or weights for output gate
W_xo = new_weights(shape=shape)
W_ho = new_weights(shape=shape_2)
#weights from the cell to gate vectors
#these weights should be the size of input
#The weight matrices from the cell to gate vectors (e.g. Wci) are diagonal, so element m in each gate vector only
#receives input from element m of the cell vector.
#one for each filter
W_ci = new_weights_dia(num_filters=num_filters, img_size=img_size)
W_cf = new_weights_dia(num_filters=num_filters, img_size=img_size)
W_co = new_weights_dia(num_filters=num_filters, img_size=img_size)
# Create new biases, one for each filter.
biases_input = new_biases(length=num_filters)
biases_forget = new_biases(length=num_filters)
biases_cell = new_biases(length=num_filters)
biases_output = new_biases(length=num_filters)
#weights * x(t)
Wxi_x = tf.nn.conv2d(input=input,
filter=W_xi,
strides=[1, 1, 1, 1],
padding='SAME')
Wxf_x = tf.nn.conv2d(input=input,
filter=W_xf,
strides=[1, 1, 1, 1],
padding='SAME')
Wxc_x = tf.nn.conv2d(input=input,
filter=W_xc,
strides=[1, 1, 1, 1],
padding='SAME')
Wxo_x = tf.nn.conv2d(input=input,
filter=W_xo,
strides=[1, 1, 1, 1],
padding='SAME')
#weight * h(t-1)
Whi_h = tf.nn.conv2d(input=hid_st,
filter=W_hi,
strides=[1, 1, 1, 1],
padding='SAME')
Whf_h = tf.nn.conv2d(input=hid_st,
filter=W_hf,
strides=[1, 1, 1, 1],
padding='SAME')
Whc_h = tf.nn.conv2d(input=hid_st,
filter=W_hc,
strides=[1, 1, 1, 1],
padding='SAME')
Who_h = tf.nn.conv2d(input=hid_st,
filter=W_ho,
strides=[1, 1, 1, 1],
padding='SAME')
#Hadamard products
Wci_c = tf.multiply(W_ci,Cell)
Wcf_c = tf.multiply(W_cf,Cell)
#Gates
I = tf.sigmoid(Wxi_x + Whi_h + Wci_c + biases_input)
F = tf.sigmoid(Wxf_x + Whf_h + Wcf_c + biases_forget)
Cell = (tf.multiply(F, Cell) + tf.multiply(I, tf.tanh(Wxc_x + Whc_h + biases_cell)))
Wco_c = tf.multiply(W_co,Cell)
O = tf.sigmoid(Wxo_x + Who_h + Wco_c + biases_output)
hid_st = tf.multiply(O,tf.tanh(Cell))
if use_pooling:
hid_st = tf.nn.max_pool(value=hid_st,
ksize=[1, 2, 2, 1],
strides=[1, 1, 1, 1],
padding='SAME')
O = tf.nn.max_pool(value=O,
ksize=[1, 2, 2, 1],
strides=[1, 1, 1, 1],
padding='SAME')
layer = tf.nn.relu(hid_st)
layer_Output = tf.nn.relu(O)
return layer, layer_Output, Cell
for decoder what I did is just using the above function and fed a tensor of zeros and hidden state and cell state of encoder (after the last input was read), and after that I fed the predicted output as input of next time.
But when I plot the cell state and hidden state of decoder at each time step I see that it does not learn and has the same values as the hidden and cell state of the decoder.
I would be really thankful if someone tell me where I am wrong!
This is my decoder:
def deconvLSTM(input, # Output of the previous layer.
num_input_channels, # Num. channels in prev. layer.
filter_size, # Width and height of each filter.
num_filters, # Number of filters.
img_size, #size of image (width or height)
hid_st, #Hidden state of previous level
Cell, # Cell-state of previous layer
use_pooling):
De_INPUT = input
De_OUTPUT = tf.zeros([0, 64, 64, 16])
De_Hidden = tf.zeros([0, 64, 64, 16])
De_CELL = tf.zeros([0, 64, 64, 16])
De_Hidden = tf.concat([De_Hidden, hid_st], 0)
De_CELL = tf.concat([De_CELL, Cell], 0)
for i in range(10):
deconv_Hidden_State_1, deconv_Output_1, deconv_Cell_1 = new_convLSTM_layer(input=De_INPUT, # The previous layer.
num_input_channels=num_input_channels, # Num. channels in prev. layer.
filter_size=filter_size, # Width and height of each filter.
num_filters=num_filters, # Number of filters.
img_size=img_size, #size of image (width or height)
hid_st=hid_st, #Hidden state of previous level
Cell=Cell,
use_pooling=True)
De_OUTPUT = tf.concat([De_OUTPUT, deconv_Output_1], 0)
De_Hidden = tf.concat([De_Hidden, deconv_Hidden_State_1], 0)
De_CELL = tf.concat([De_CELL, deconv_Cell_1], 0)
De_INPUT = deconv_Output_1
return De_OUTPUT, De_Hidden, De_CELL
Related
I am getting this error when joining the pool when I connect two cnn layers in python. How can I correct the error and normalize the values?
import tensorflow as tf
import numpy as np
from tensorflow.python.ops import gen_array_ops
class TextCNN(object):
"""
A CNN for text classification.
Uses an embedding layer, followed by a convolutional, max-pooling and softmax layer.
"""
def __init__(
self, sequence_length, num_classes, vocab_size,
embedding_size, filter_sizes, num_filters,fc_hidden_size, l2_reg_lambda=0.0):
# Placeholders for input, output and dropout
self.input_x = tf.placeholder(tf.int32, [None, sequence_length], name="input_x")
self.input_y = tf.placeholder(tf.float32, [None, num_classes], name="input_y")
self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob")
fc_hidden_size=1024
self.is_training = tf.placeholder(tf.bool, name="is_training")
initializer=tf.random_normal_initializer(stddev=0.1)
self.initializer=initializer
self.is_training_flag=True
# Keeping track of l2 regularization loss (optional)
l2_loss = tf.constant(0.0)
def flatten_reshape(variable):
dim = 1
for d in variable.get_shape()[1:].as_list():
dim *= d
return tf.reshape(variable, shape=[-1, dim])
def _highway_layer(input_, size, num_layers=1, bias=-2.0, f=tf.nn.relu):
"""
Highway Network (cf. http://arxiv.org/abs/1505.00387).
t = sigmoid(Wy + b)
z = t * g(Wy + b) + (1 - t) * y
where g is nonlinearity, t is transform gate, and (1 - t) is carry gate.
"""
for idx in range(num_layers):
g = f(_linear(input_, size, scope=("highway_lin_{0}".format(idx))))
t = tf.sigmoid(_linear(input_, size, scope=("highway_gate_{0}".format(idx))) + bias)
output = t * g + (1. - t) * input_
input_ = output
return output
def _linear(input_, output_size, scope="SimpleLinear"):
"""
Linear map: output[k] = sum_i(Matrix[k, i] * args[i] ) + Bias[k]
Args:
input_: a tensor or a list of 2D, batch x n, Tensors.
output_size: int, second dimension of W[i].
scope: VariableScope for the created subgraph; defaults to "SimpleLinear".
Returns:
A 2D Tensor with shape [batch x output_size] equal to
sum_i(args[i] * W[i]), where W[i]s are newly created matrices.
Raises:
ValueError: if some of the arguments has unspecified or wrong shape.
"""
shape = input_.get_shape().as_list()
if len(shape) != 2:
raise ValueError("Linear is expecting 2D arguments: {0}".format(str(shape)))
if not shape[1]:
raise ValueError("Linear expects shape[1] of arguments: {0}".format(str(shape)))
input_size = shape[1]
# Now the computation.
with tf.variable_scope(scope):
W = tf.get_variable("W", [input_size, output_size], dtype=input_.dtype)
b = tf.get_variable("b", [output_size], dtype=input_.dtype)
return tf.nn.xw_plus_b(input_, W, b)
# Embedding layer
with tf.device('/cpu:0'), tf.name_scope("embedding"):
self.W = tf.Variable(
tf.random_uniform([vocab_size, embedding_size], -1.0, 1.0),
name="W")
self.embedded_chars = tf.nn.embedding_lookup(self.W, self.input_x)
self.embedded_chars_expanded = tf.expand_dims(self.embedded_chars, -1)
# Create a convolution + maxpool layer for each filter size
pooled_outputs = []
for i, filter_size in enumerate(filter_sizes):
with tf.name_scope("conv-maxpool-%s" % filter_size):
# Convolution Layer
filter_shape = [filter_size, embedding_size, 1, num_filters]
W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W")
b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b")
conv = tf.nn.conv2d(
self.embedded_chars_expanded,
W,
strides=[1, 1, 1, 1],
padding="VALID",
name="conv")
# Apply nonlinearity
h = tf.nn.sigmoid(tf.nn.bias_add(conv, b), name="sigmoid")
#h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu")
# Maxpooling over the outputs
h2 = tf.reshape(h, [-1, sequence_length, num_filters,1]) # shape:[batch_size,sequence_length,num_filters,1]
print(h2)
# self.initializer=tf.random_normal_initializer(stddev=0.1)
filter2 = tf.get_variable("filter2-%s" % filter_size,[filter_size, num_filters, 1, num_filters],initializer=self.initializer)
conv2 = tf.nn.conv2d(h2, filter2, strides=[1, 1, 1, 1], padding="SAME",name="conv2") # shape:[batch_size,sequence_length-filter_size*2+2,1,num_filters]
b2 = tf.get_variable("b2-%s" % filter_size, [num_filters]) # ADD 2017-06-09
h3 = tf.nn.sigmoid(tf.nn.bias_add(conv2, b2), name="sigmoid")
pooled = tf.nn.max_pool(
h3,
ksize=[1, sequence_length - filter_size + 1, 1, 1],
strides=[1, 1, 1, 1],
padding='VALID',
name="pool")
#x_reshaped = tf.reshape(pooled, [-1, 3])
s=flatten_reshape(pooled)
pooled_outputs.append(s)
# Combine all the pooled features
num_filters_total = num_filters * len(filter_sizes)
self.h_pool = tf.concat(pooled_outputs, 3)
self.h_pool_flat = tf.reshape(self.h_pool, [-1, num_filters_total])
print("zzz")
num_filters_total = num_filters * len(filter_sizes)
self.pool = tf.concat(pooled_outputs, axis=3)
self.pool_flat = tf.reshape(self.pool, shape=[-1, num_filters_total])
# Fully Connected Layer
with tf.name_scope("fc"):
W = tf.Variable(tf.truncated_normal(shape=[num_filters_total, fc_hidden_size],
stddev=0.1, dtype=tf.float32), name="W")
b = tf.Variable(tf.constant(value=0.1, shape=[fc_hidden_size], dtype=tf.float32), name="b")
self.fc = tf.nn.xw_plus_b(self.pool_flat, W, b)
# Batch Normalization Layer
self.fc_bn = tf.layers.batch_normalization(self.fc, training=self.is_training)
# Apply nonlinearity
self.fc_out = tf.nn.relu(self.fc_bn, name="relu")
# Highway Layer
with tf.name_scope("highway"):
self.highway = _highway_layer(self.fc_out, self.fc_out.get_shape()[1], num_layers=1, bias=0)
# Add dropout
with tf.name_scope("dropout"):
self.h_drop = tf.nn.dropout(self.h_pool_flat, self.dropout_keep_prob)
# Final (unnormalized) scores and predictions
with tf.name_scope("output"):
W = tf.get_variable(
"W",
shape=[num_filters_total, num_classes],
initializer=tf.contrib.layers.xavier_initializer())
b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name="b")
l2_loss += tf.nn.l2_loss(W)
l2_loss += tf.nn.l2_loss(b)
self.scores = tf.nn.xw_plus_b(self.h_drop, W, b, name="scores")
self.predictions = tf.argmax(self.scores, 1, name="predictions")
# Calculate mean cross-entropy loss
with tf.name_scope("loss"):
losses = tf.nn.softmax_cross_entropy_with_logits(logits=self.scores, labels=self.input_y)
self.loss = tf.reduce_mean(losses) + l2_reg_lambda * l2_loss
# Accuracy
with tf.name_scope("accuracy"):
correct_predictions = tf.equal(self.predictions, tf.argmax(self.input_y, 1))
self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy")
ValueError: Dimension 1 in both shapes must be equal, but are 4 and 5. Shapes are [?,4,50] and [?,5,50]. for 'concat' (op: 'ConcatV2') with input shapes: [?,3,50,50], [?,4,50,50], [?,5,50,50], [] and with computed input tensors: input[3]
ValueError: Shape must be at least rank 4 but is rank 2 for 'concat' (op: 'ConcatV2') with input shapes: [?,7500], [?,10000], [?,12500], []
I want to implement the average pooling in conv1d. But tf.nn.avg_pool function can only be implemented on 4 dimensional tensor. So what should I do to overcome this problem?
def avg_pool(conv_out):
return tf.nn.avg_pool(conv_out,ksize=[1,1,2,1],strides=[1,1,2,1],padding='SAME')
i = tf.constant([1, 0, 2, 3, 0, 1], dtype=tf.float32)
data = tf.reshape(i, [1, int(i.shape[0]), 1], name='data')
kernel = tf.Variable(tf.random_normal([2,1,1]))
conv_out = tf.nn.conv1d(data, kernel, 2, 'VALID')
pool_out = avg_pool(conv_out)
One option is to add an additional dimension to your data and then remove it:
def avg_pool(conv_out):
conv_out_2d = conv_out[:, tf.newaxis]
pool_out_2d = tf.nn.avg_pool(conv_out_2d,
ksize=[1, 1, 2, 1],
strides=[1, 1, 2, 1],
padding='SAME')
pool_out = pool_out_2d[:, 0]
return pool_out
Another possibility is to use the generic tf.nn.pool:
def avg_pool(conv_out):
return tf.nn.pool(conv_out, window_shape=[2], pooling_type='AVG', padding='SAME')
Note in this case I am not including the stride because the default value matches what you used for your example, but you can modify it too if you want.
I am attempting to create a deep CNN that can classify each individual pixel in an image. I am replicating architecture from the image below taken from this paper. In the paper it is mentioned that deconvolutions are used so that any size of input is possible. This can be seen in the image below.
Github Repository
Currently, I have hard coded my model to accept images of size 32x32x7, but I would like to accept any size of input. What changes would I need to make to my code to accept variable sized input?
x = tf.placeholder(tf.float32, shape=[None, 32*32*7])
y_ = tf.placeholder(tf.float32, shape=[None, 32*32*7, 3])
...
DeConnv1 = tf.nn.conv3d_transpose(layer1, filter = w, output_shape = [1,32,32,7,1], strides = [1,2,2,2,1], padding = 'SAME')
...
final = tf.reshape(final, [1, 32*32*7])
W_final = weight_variable([32*32*7,32*32*7,3])
b_final = bias_variable([32*32*7,3])
final_conv = tf.tensordot(final, W_final, axes=[[1], [1]]) + b_final
Dynamic placeholders
Tensorflow allows to have multiple dynamic (a.k.a. None) dimensions in placeholders. The engine won't be able to ensure correctness while the graph is built, hence the client is responsible for feeding the correct input, but it provides a lot of flexibility.
So I'm going from...
x = tf.placeholder(tf.float32, shape=[None, N*M*P])
y_ = tf.placeholder(tf.float32, shape=[None, N*M*P, 3])
...
x_image = tf.reshape(x, [-1, N, M, P, 1])
to...
# Nearly all dimensions are dynamic
x_image = tf.placeholder(tf.float32, shape=[None, None, None, None, 1])
label = tf.placeholder(tf.float32, shape=[None, None, 3])
Since you intend to reshape the input to 5D anyway, so why don't use 5D in x_image right from the start. At this point, the second dimension of label is arbitrary, but we promise tensorflow that it will match with x_image.
Dynamic shapes in deconvolution
Next, the nice thing about tf.nn.conv3d_transpose is that its output shape can be dynamic. So instead of this:
# Hard-coded output shape
DeConnv1 = tf.nn.conv3d_transpose(layer1, w, output_shape=[1,32,32,7,1], ...)
... you can do this:
# Dynamic output shape
DeConnv1 = tf.nn.conv3d_transpose(layer1, w, output_shape=tf.shape(x_image), ...)
This way the transpose convolution can be applied to any image and the result will take the shape of x_image that was actually passed in at runtime.
Note that static shape of x_image is (?, ?, ?, ?, 1).
All-Convolutional network
Final and most important piece of the puzzle is to make the whole network convolutional, and that includes your final dense layer too. Dense layer must define its dimensions statically, which forces the whole neural network fix input image dimensions.
Luckily for us, Springenberg at al describe a way to replace an FC layer with a CONV layer in "Striving for Simplicity: The All Convolutional Net" paper. I'm going to use a convolution with 3 1x1x1 filters (see also this question):
final_conv = conv3d_s1(final, weight_variable([1, 1, 1, 1, 3]))
y = tf.reshape(final_conv, [-1, 3])
If we ensure that final has the same dimensions as DeConnv1 (and others), it'll make y right the shape we want: [-1, N * M * P, 3].
Combining it all together
Your network is pretty large, but all deconvolutions basically follow the same pattern, so I've simplified my proof-of-concept code to just one deconvolution. The goal is just to show what kind of network is able to handle images of arbitrary size. Final remark: image dimensions can vary between batches, but within one batch they have to be the same.
The full code:
sess = tf.InteractiveSession()
def conv3d_dilation(tempX, tempFilter):
return tf.layers.conv3d(tempX, filters=tempFilter, kernel_size=[3, 3, 1], strides=1, padding='SAME', dilation_rate=2)
def conv3d(tempX, tempW):
return tf.nn.conv3d(tempX, tempW, strides=[1, 2, 2, 2, 1], padding='SAME')
def conv3d_s1(tempX, tempW):
return tf.nn.conv3d(tempX, tempW, strides=[1, 1, 1, 1, 1], padding='SAME')
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def max_pool_3x3(x):
return tf.nn.max_pool3d(x, ksize=[1, 3, 3, 3, 1], strides=[1, 2, 2, 2, 1], padding='SAME')
x_image = tf.placeholder(tf.float32, shape=[None, None, None, None, 1])
label = tf.placeholder(tf.float32, shape=[None, None, 3])
W_conv1 = weight_variable([3, 3, 1, 1, 32])
h_conv1 = conv3d(x_image, W_conv1)
# second convolution
W_conv2 = weight_variable([3, 3, 4, 32, 64])
h_conv2 = conv3d_s1(h_conv1, W_conv2)
# third convolution path 1
W_conv3_A = weight_variable([1, 1, 1, 64, 64])
h_conv3_A = conv3d_s1(h_conv2, W_conv3_A)
# third convolution path 2
W_conv3_B = weight_variable([1, 1, 1, 64, 64])
h_conv3_B = conv3d_s1(h_conv2, W_conv3_B)
# fourth convolution path 1
W_conv4_A = weight_variable([3, 3, 1, 64, 96])
h_conv4_A = conv3d_s1(h_conv3_A, W_conv4_A)
# fourth convolution path 2
W_conv4_B = weight_variable([1, 7, 1, 64, 64])
h_conv4_B = conv3d_s1(h_conv3_B, W_conv4_B)
# fifth convolution path 2
W_conv5_B = weight_variable([1, 7, 1, 64, 64])
h_conv5_B = conv3d_s1(h_conv4_B, W_conv5_B)
# sixth convolution path 2
W_conv6_B = weight_variable([3, 3, 1, 64, 96])
h_conv6_B = conv3d_s1(h_conv5_B, W_conv6_B)
# concatenation
layer1 = tf.concat([h_conv4_A, h_conv6_B], 4)
w = tf.Variable(tf.constant(1., shape=[2, 2, 4, 1, 192]))
DeConnv1 = tf.nn.conv3d_transpose(layer1, filter=w, output_shape=tf.shape(x_image), strides=[1, 2, 2, 2, 1], padding='SAME')
final = DeConnv1
final_conv = conv3d_s1(final, weight_variable([1, 1, 1, 1, 3]))
y = tf.reshape(final_conv, [-1, 3])
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=label, logits=y))
print('x_image:', x_image)
print('DeConnv1:', DeConnv1)
print('final_conv:', final_conv)
def try_image(N, M, P, B=1):
batch_x = np.random.normal(size=[B, N, M, P, 1])
batch_y = np.ones([B, N * M * P, 3]) / 3.0
deconv_val, final_conv_val, loss = sess.run([DeConnv1, final_conv, cross_entropy],
feed_dict={x_image: batch_x, label: batch_y})
print(deconv_val.shape)
print(final_conv.shape)
print(loss)
print()
tf.global_variables_initializer().run()
try_image(32, 32, 7)
try_image(16, 16, 3)
try_image(16, 16, 3, 2)
Theoretically, it's possible. you need to set the image size of the input and label image place holder to none, and let the graph dynamically infer the image size from input data.
However, have to be careful when you define the graph. Need to use tf.shape instead of tf.get_shape(). the former dynamically infer the shape only when you session.run, the latter can get the shape when you define the graph. But when input size is set to none, the latter does not get true reshape (maybe just return None).
And to make things complicated, if you use tf.layers.conv2d or upconv2d, sometimes these high level functions do not like tf.shape, because it seems they assume the shape information are available during graph construction.
I hope I have better working example to show the points above. I'll put this answer as a placeholder and will come back and add more stuff if I get a chance.
I'm using tensor flow to process color images with a convolutional neural network. A code snippet is below.
My code runs so I think I got the number of channels right. My question is, how do I correctly order the rgb data? Is it in the form rgbrgbrgb or would it be rrrgggbbb? Presently I am using the latter. Thanks. Any help would be appreciated.
c_output = 2
c_input = 784 * 3
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME')
self.c_x = tf.placeholder(tf.float32, shape=[None, c_input])
self.c_y_ = tf.placeholder(tf.float32, shape=[None, c_output])
self.W_conv1 = weight_variable([5, 5, 3, 32])
self.b_conv1 = bias_variable([32])
self.x_image = tf.reshape(self.c_x, [-1, 28, 28 , 3])
self.h_conv1 = tf.nn.relu(conv2d(self.x_image, self.W_conv1) + self.b_conv1)
self.h_pool1 = max_pool_2x2(self.h_conv1)
self.W_conv2 = weight_variable([5, 5, 32, 64])
self.b_conv2 = bias_variable([64])
self.h_conv2 = tf.nn.relu(conv2d(self.h_pool1, self.W_conv2) + self.b_conv2)
self.h_pool2 = max_pool_2x2(self.h_conv2)
self.W_fc1 = weight_variable([7 * 7 * 64, 1024])
self.b_fc1 = bias_variable([1024])
self.h_pool2_flat = tf.reshape(self.h_pool2, [-1, 7 * 7 * 64 ])
self.h_fc1 = tf.nn.relu(tf.matmul(self.h_pool2_flat, self.W_fc1) + self.b_fc1)
self.keep_prob = tf.placeholder(tf.float32)
self.h_fc1_drop = tf.nn.dropout(self.h_fc1, self.keep_prob)
self.W_fc2 = weight_variable([1024, c_output])
self.b_fc2 = bias_variable([c_output])
self.y_conv = tf.matmul(self.h_fc1_drop, self.W_fc2) + self.b_fc2
self.c_cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(self.y_conv, self.c_y_))
self.c_train_step = tf.train.AdamOptimizer(1e-4).minimize(self.c_cross_entropy)
self.c_correct_prediction = tf.equal(tf.argmax(self.y_conv, 1), tf.argmax(self.c_y_, 1))
self.c_accuracy = tf.reduce_mean(tf.cast(self.c_correct_prediction, tf.float32))
TL;DR: With your current program, the in-memory layout of the data should be should be R-G-B-R-G-B-R-G-B-R-G-B...
I assume from this line that you are passing in RGB images with 28x28 pixels:
self.x_image = tf.reshape(self.c_x, [-1, 28, 28, 3])
We can call the dimensions of self.x_image are "batch", "height", "width", and "channel". This matches the default data format for tf.nn.conv_2d() and tf.nn.max_pool().
In TensorFlow, the in-memory representation of a tensor is row-major order (or "C" ordering, because that is the representation of arrays in the C programming language). Essentially this means that the rightmost dimension is the fastest changing, and the elements of the tensor are packed together in memory in the following order (where ? stands for the unknown batch size, minus 1):
[0, 0, 0, 0]
[0, 0, 0, 1]
[0, 0, 0, 2]
[0, 0, 1, 0]
...
[?, 27, 27, 1]
[?, 27, 27, 2]
Therefore your program probably isn't interpreting the image data correctly. There are at least two options:
Reshape your data to match its true order ("batch", "channels", "height", "width"):
self.x_image = tf.reshape(self.c_x, [-1, 3, 28, 28])
In fact, this format is sometimes more efficient for convolutions. You can instruct tf.nn.conv2d() and tf.nn.max_pool() to use it without transposing by passing the optional argument data_format="NCHW", but you will also need to change the shape of your bias variables to match.
Transpose your image data to match the result of your program using tf.transpose():
self.x_image = tf.transpose(tf.reshape(self.c_x, [-1, 3, 28, 28]), [0, 2, 3, 1])
I'm trying to apply the expert portion of the tutorial to my own data but I keep running into dimension errors. Here's the code leading up to the error.
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME')
W_conv1 = weight_variable([1, 8, 1, 4])
b_conv1 = bias_variable([4])
x_image = tf.reshape(tf_in, [-1,2,8,1])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)
And then when I try to run this command:
W_conv2 = weight_variable([1, 4, 4, 8])
b_conv2 = bias_variable([8])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)
I get the following errors:
ValueError Traceback (most recent call last)
<ipython-input-41-7ab0d7765f8c> in <module>()
3
4 h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
----> 5 h_pool2 = max_pool_2x2(h_conv2)
ValueError: ('filter must not be larger than the input: ', 'Filter: [', Dimension(2), 'x', Dimension(2), '] ', 'Input: [', Dimension(1), 'x', Dimension(4), '] ')
Just for some background information, the data that I'm dealing with is a CSV file where each row contains 10 features and 1 empty column that can be a 1 or a 0. What I'm trying to get is a probability in the empty column that the column will equal a 1.
You have to shape the input so it is compatible with both the training tensor and the output. If you input is length 1, your output should be length 1 (length is substituted for dimension).
When you're dealing with-
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 1, 1, 1],
strides=[1, 1, 1, 1], padding='SAME')
Notice how I changed the strides and the ksize to [1, 1, 1, 1]. This will match an output to a 1 dimensional input and prevent errors down the road.
When you're defining your weight variable (see code below)-
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
you're going to have to make the first 2 numbers conform to the feature tensor that you are using to train your model, the last two numbers will be the dimension of the predicted output (same as the dimension of the input).
W_conv1 = weight_variable([1, 10, 1, 1])
b_conv1 = bias_variable([1])
Notice the [1, 10, in the beginning which signifies that the feature tensor is going to be a 1x10 feature tensor; the last two numbers 1, 1] correspond to the dimensions of the input and output tensors/predictors.
When you reshape your x_foo tensor (I call it x_ [x prime]), you, for whatever reason, have to define it like so-
x_ = tf.reshape(x, [-1,1,10,1])
Notice the 1 and 10 in the middle- ...1,10,.... Once again, these numbers correspond to the dimension of your feature tensor.
For every bias variable, you choose the final number of the previously defined variable. For example, if W_conv1 = weight_variable([1, 10, 1, 1]) appears like so, you take the final number and put that into your bias variable so it can match the dimensions of the input. This is done like so- b_conv1 = bias_variable([1]).
If you need any more explanation please comment below.
The dimensions you are using for the filter are not matching the output of the hidden layer.
Let me see if I understood you: your input is composed of 8 features, and you want to reshape it into a 2x4 matrix, right?
The weights you created with weight_variable([1, 8, 1, 4]) expect a 1x8 input, in one channel, and produce a 1x8 output in 4 channels (or hidden units). The filter you are using sweeps the input in 2x2 squares. However, since the result of the weights is 1x8, they won't match.
You should reshape the input as
x_image = tf.reshape(tf_in, [-1,2,4,1])
Now, your input is actually 2x4 instead of 1x8. Then you need to change the weight shape to (2, 4, 1, hidden_units) to deal with a 2x4 output. It will also produce a 2x4 output, and the 2x2 filter now can be applied.
After that, the filter will match the output of the weights. Also note that you will have to change the shape of your second weight matrix to weight_variable([2, 4, hidden_units, hidden2_units])