Tensorflow, uninitialized variables despite running global_variable_initializer - python

I'm new to Tensorflow. I worked in Caffe previously. I'm trying to implement http://cvlab.cse.msu.edu/pdfs/Tai_Yang_Liu_CVPR2017.pdf in Tensorflow.
I'm having trouble with variables in Tensorflow, despite having them initialized. I tried using tf.get_variable instead of tf.Variable, but this didn't work. And setting initializer=tf.contrib.layers.xavier_initializer() did nothing.
My code:
import tensorflow as tf
import sys, os
import numpy as np
global xseed
def get_model(inp, train):
#create architecture
#db input, 128 batch size
with tf.name_scope('input'):
inl = tf.reshape(inp, [-1, 31, 31, 1])
with tf.name_scope('batchnorm_scale_relu'):
#batchnorm,scale,relu
normed = tf.contrib.layers.batch_norm(inl, is_training=train)
alpha = tf.Variable(tf.truncated_normal((1,), stddev=0.1), trainable=True, name="alpha")
beta = tf.Variable(tf.truncated_normal((1,), stddev=0.1), trainable=True, name="beta")
scaled = alpha * normed + beta
relud = tf.nn.relu(scaled, name="relu1")
with tf.name_scope('conv1'):
#??? padding ??? is correct
padded_input = tf.pad(relud, [[0, 0], [1, 1], [1, 1], [0, 0]], "CONSTANT")
#weight initializer msra
convolved = tf.layers.conv2d(inputs=padded_input, filters=128, kernel_size=(3, 3), strides=(1, 1), padding="VALID",
kernel_initializer=tf.contrib.layers.variance_scaling_initializer(seed=xseed))
with tf.name_scope('batchnorm_scale_relu_1a'):
#batchnorm,scale,relu
normed_1a = tf.contrib.layers.batch_norm(convolved, is_training=train)
alpha_1a = tf.Variable(tf.truncated_normal((1,), stddev=0.1), trainable=True, name="alpha_1a")
beta_1a = tf.Variable(tf.truncated_normal((1,), stddev=0.1), trainable=True, name="beta_1a")
scaled_1a = alpha_1a * normed_1a + beta_1a
relud_1a = tf.nn.relu(scaled_1a, name="relu1_1a")
with tf.name_scope('conv1_1a'):
padded_input_1a = tf.pad(relud_1a, [[0, 0], [1, 1], [1, 1], [0, 0]], "CONSTANT")
#weight initializer msra
convolved_1a = tf.layers.conv2d(inputs=padded_input_1a, filters=128, kernel_size=(3, 3), strides=(1, 1), padding="VALID",
kernel_initializer=tf.contrib.layers.variance_scaling_initializer(seed=xseed))
return convolved_1a
xseed = None
with tf.device("cpu"):
inp = tf.placeholder(tf.float32, [None, 31, 31, 1])
init_op = tf.global_variables_initializer()
init_op2 = tf.local_variables_initializer()
with tf.Session(config=tf.ConfigProto()) as sess:
m = get_model(inp, True)
sess.run(init_op)
sess.run(init_op2)
print(sess.run(tf.report_uninitialized_variables()))
res = sess.run(m, feed_dict={ inp: np.zeros((1, 31, 31, 1))})
And it reports uninitialized variables:
['BatchNorm/beta' 'BatchNorm/moving_mean' 'BatchNorm/moving_variance' 'batchnorm_scale_relu/alpha' 'batchnorm_scale_relu/beta' 'conv2d/kernel' 'conv2d/bias' 'BatchNorm_1/beta' 'BatchNorm_1/moving_mean' 'BatchNorm_1/moving_variance' 'batchnorm_scale_relu_1a/alpha_1a' 'batchnorm_scale_relu_1a/beta_1a' 'conv2d_1/kernel' 'conv2d_1/bias']
And gives me an exception when evaluating convolved tensor:
FailedPreconditionError (see above for traceback): Attempting to use uninitialized value batchnorm_scale_relu_1a/alpha_1a
[[Node: batchnorm_scale_relu_1a/alpha_1a/read = Identity[T=DT_FLOAT, _class=["loc:#batchnorm_scale_relu_1a/alpha_1a"], _device="/job:localhost/replica:0/task:0/device:CPU:0"](batchnorm_scale_relu_1a/alpha_1a)]]
Please help me.

Solved it myself. Replaced this:
with tf.device("cpu"):
inp = tf.placeholder(tf.float32, [None, 31, 31, 1])
init_op = tf.global_variables_initializer()
init_op2 = tf.local_variables_initializer()
with tf.Session(config=tf.ConfigProto()) as sess:
m = get_model(inp, True)
sess.run(init_op)
sess.run(init_op2)
print(sess.run(tf.report_uninitialized_variables()))
res = sess.run(m, feed_dict={ inp: np.zeros((1, 31, 31, 1))})
With this:
with tf.device("cpu"):
inp = tf.placeholder(tf.float32, [None, 31, 31, 1])
with tf.Session(config=tf.ConfigProto()) as sess:
m = get_model(inp, True)
sess.run(tf.initialize_all_variables())
res = sess.run(tf.report_uninitialized_variables())
#print(res) -- outputs [] (none)
res = sess.run(m, feed_dict={ inp: np.zeros((1, 31, 31, 1))})
print(res)
The whole point is tf.initialize_all_variables() instead of tf.global_variables_initializer()

Related

pytorch: "multi-target not supported" error message

So I want to classify some (3, 50, 50) pictures. First I loaded the dataset from the file without a dataloader or batches, it worked. Now, after adding both things I get that error:
RuntimeError: multi-target not supported at /pytorch/aten/src/THCUNN/generic/ClassNLLCriterion.cu:15
I found a lot of answers in the internet, mostly to use target.squeeze(1) but it doesn´t work for me.
My target-batch looks like following:
tensor([[1, 0],
[1, 0],
[1, 0],
[1, 0],
[1, 0],
[1, 0],
[1, 0],
[1, 0]], device='cuda:0')
Shouldn't that be okay?
Here the full code (notice that Im only creating the structure of the model on which Im going to apply the full and correct dataset afterwards, because I dont have the full data yet, only 32 pictures and no labels, thats why I added torch.tensor([1, 0]) as a placeholder for all labels):
import torch
import torch.utils.data
import torch.nn as nn
import torch.nn.functional as F
import torch.optim
from torch.autograd import Variable
import numpy as np
from PIL import Image
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
# model structur:
self.conv1 = nn.Conv2d(3, 10, kernel_size=(5,5), stride=(1,1))
self.conv2 = nn.Conv2d(10, 20, kernel_size=(5,5), stride=(1,1)) # with mapool: output = 20 * (9,9) feature-maps -> flatten
self.fc1 = nn.Linear(20*9*9, 250)
self.fc2 = nn.Linear(250, 100)
self.fc3 = nn.Linear(100, 2)
def forward(self, x):
# conv layers
x = F.relu(self.conv1(x)) # shape: 1, 10, 46, 46
x = F.max_pool2d(x, 2, 2) # shape: 1, 10, 23, 23
x = F.relu(self.conv2(x)) # shape: 1, 20, 19, 19
x = F.max_pool2d(x, 2, 2) # shape: 1, 20, 9, 9
# flatten to dense layer:
x = x.view(-1, 20*9*9)
# dense layers
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
output = F.log_softmax(self.fc3(x), dim=1)
return output
class Run:
def __init__(self, epochs, learning_rate, dropout, momentum):
# load model
self.model = Model().cuda()
# hyperparameters:
self.epochs = epochs
self.learning_rate = learning_rate
self.dropout = dropout
def preporcessing(self):
dataset_folder = "/media/theodor/hdd/Programming/BWKI/dataset/bilder/"
dataset = []
for i in range(0, 35):
sample_image = Image.open(dataset_folder + str(i) + ".png")
data = torch.from_numpy(np.array(sample_image)).type("torch.Tensor").reshape(3, 50, 50)
target = torch.tensor([[1, 0]])
sample = (data, target)
dataset.append(sample)
train_loader = torch.utils.data.DataLoader(dataset, batch_size=8)
return train_loader
def train(self):
train_set = self.preporcessing()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(self.model.parameters(), lr=self.learning_rate)
for epoch in range(self.epochs):
epoch_loss = 0
for i, data in enumerate(train_set, 0):
sample, target = data
# set data as cuda varibale
sample = Variable(sample.float().cuda())
target = Variable(target.cuda())
# initialize optimizer
optimizer.zero_grad()
# predict
output = self.model(sample)
# backpropagation
print(output, target.squeeze(1))
loss = criterion(output, target.squeeze(1)) # ERROR MESSAGE: RuntimeError: multi-target not supported at /pytorch/aten/src/THCUNN/generic/ClassNLLCriterion.cu:15
loss.backward()
optimizer.step()
epoch_loss += loss.item()
print("loss after epoch [", epoch, "|", self.epochs, "] :", epoch_loss)
run = Run(10, 0.001, 0.5, 0.9)
run.train()
So I expected it to start training (of course not learning anything because the labels are wrong).
For nn.CrossEntropyLoss the target has to be a single number from the interval [0, #classes] instead of a one-hot encoded target vector. Your target is [1, 0], thus PyTorch thinks you want to have multiple labels per input which is not supported.
Replace your one-hot-encoded targets:
[1, 0] --> 0
[0, 1] --> 1

Tensorflow: Tensordot reproducible results

I am playing around with tf.tensordot in Tensorflow. However, I am experiencing some inconsistencies which are bugging me. Below is a reproducible example:
tf.reset_default_graph()
tf.set_random_seed(42)
np.random.seed(42)
X = np.random.rand(150, 196, 268).astype(np.float32)
W = tf.Variable(initial_value=tf.random_normal([268, 22], stddev=0.1))
dotted_150 = tf.tensordot(X, W, axes=[[2], [0]])
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
output_150 = sess.run(dotted_150)
This returns a tensor that has dimensions (150, 196, 22)
tf.reset_default_graph()
tf.set_random_seed(42)
np.random.seed(42)
X = np.random.rand(1, 196, 268).astype(np.float32)
W = tf.Variable(initial_value=tf.random_normal([268, 22], stddev=0.1))
dotted_1 = tf.tensordot(X, W, axes=[[2], [0]])
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
output_1 = sess.run(dotted_1)
This returns a tensor that has dimensions (1, 196, 22)
Now, if we test whether the first element from output_150 is almost equal to the first and only element from output_1, the result is a mismatch between the two arrays.
np.testing.assert_allclose(output_1[0], output_150[0])
On the other hand, if we do:
np.random.seed(42)
input_150 = np.random.rand(150, 196, 268).astype(np.float32)
np.random.seed(42)
input_1 = np.random.rand(1, 196, 268).astype(np.float32)
np.testing.assert_equal(input_150[0], input_1[0])
We see that the inputs are exactly the same. With that said, I would expect that the outputs from the tf.tensordot to be the same as well and they are not.
On the same note, here is a tf.tensordot equivalent using tf.reshape and tf.matmul:
tf.reset_default_graph()
tf.set_random_seed(42)
np.random.seed(42)
X = np.random.rand(150, 196, 268).astype(np.float32)
W = tf.Variable(initial_value=tf.random_normal([268, 22], stddev=0.1))
reshaped = tf.reshape(X, [-1, 268])
mulled_150 = tf.reshape(tf.matmul(reshaped, W), [-1, 196, 22])
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
output_150 = sess.run(mulled_150)
tf.reset_default_graph()
tf.set_random_seed(42)
np.random.seed(42)
X = np.random.rand(1, 196, 268).astype(np.float32)
W = tf.Variable(initial_value=tf.random_normal([268, 22], stddev=0.1))
reshaped = tf.reshape(X, [-1, 268])
mulled_1 = tf.reshape(tf.matmul(reshaped, W), [-1, 196, 22])
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
output_1 = sess.run(mulled_1)
np.testing.assert_allclose(output_1[0], output_150[0])
The outcome is exactly the same, a mismatch between the output arrays. How can that be?
Apparently, if I use tf.float64 precision instead of tf.float32 the results are identical.

How to make a custom activation function in tensorflow

I need to make an activation function which is not exist in tensorflow.How should I do? I ever saw this link,
How to make a custom activation function with only Python in Tensorflow?
but I still don't know how to implement the new type of activation funcation in the picture.
relu,leaky_relu and a new type of relu
I think this one could serve you. I have only used functions that incorporate tensorflow in that way it is he who manages the backpropagation.
If you use python functions you would have to program both the forward and the backward. But the problem is when you have to save the function's masks of the piecewise function in a "cache" (personally I do not know how it is done and it would be interesting to know).
import numpy as np
import tensorflow as tf
def new_relu(x, k=0.2):
part_1 = tf.to_float(tf.math.less_equal(0.0, x))
part_2 = tf.to_float(tf.math.logical_and(tf.math.less_equal(-1.0, x), tf.math.less(x, 0.0)))
part_3 = tf.to_float(tf.math.less(x, -1.0))
return part_1*x + part_2*x*k #+ part_3*0
def new_relu_test():
# create data
x = tf.random_normal([10])*10000
y = new_relu(x)
with tf.Session():
diff = tf.test.compute_gradient_error(x, [10], y, [10])
print(diff)
# use in dense
x = tf.placeholder(shape=[None, 3], dtype=tf.float32)
nn = tf.layers.dense(x, 3, activation=new_relu)
EDIT:
If you want the second parameter to be a tensor too, you must be the same size as the input.
import numpy as np
import tensorflow as tf
def new_relu(x, k=0.2):
part_1 = tf.to_float(tf.math.less_equal(0.0, x))
part_2 = tf.to_float(tf.math.logical_and(tf.math.less_equal(-1.0, x), tf.math.less(x, 0.0)))
part_3 = tf.to_float(tf.math.less(x, -1.0))
return part_1*x + part_2*x*k #+ part_3*0
def new_relu_test():
# create data
x = tf.random_normal([10])*10000
y = new_relu(x)
with tf.Session():
diff = tf.test.compute_gradient_error(x, [10], y, [10])
print(diff)
# use in dense
x = tf.placeholder(shape=[None, 3], dtype=tf.float32)
x_b = tf.placeholder(shape=[None], dtype=tf.float32)
nn_1 = tf.layers.dense(x, 3)
nn_2 = tf.layers.dense(x, 3)
nn = tf.layers.dense(nn_2, 1, activation=None)
new_r = new_relu(x, tf.tile(tf.expand_dims(x_b, -1), [1, 3]))
with tf.Session() as sess:
sess.run(tf.initializers.global_variables())
sess.run(new_r, feed_dict={x: np.random.rand(100, 3), x_b: np.random.rand(100)})
new_relu_test()
EDIT 2:
Using conv2d
import numpy as np
import tensorflow as tf
def new_relu(x, k=0.2):
part_1 = tf.to_float(tf.math.less_equal(0.0, x))
part_2 = tf.to_float(tf.math.logical_and(tf.math.less_equal(-1.0, x), tf.math.less(x, 0.0)))
part_3 = tf.to_float(tf.math.less(x, -1.0))
return part_1*x + part_2*x*k #+ part_3*0
def new_relu_test():
# create data
x = tf.random_normal([10])*10000
y = new_relu(x)
with tf.Session():
diff = tf.test.compute_gradient_error(x, [10], y, [10])
print(diff)
# use in dense
x = tf.placeholder(shape=[None, 28, 28, 3], dtype=tf.float32)
conv1_weights = tf.get_variable("weight",[3,3,3,32],initializer=tf.truncated_normal_initializer(stddev=0.1))
conv1_biases = tf.get_variable("bias", [32], initializer=tf.constant_initializer(0.0))
conv1 = tf.nn.conv2d(x, conv1_weights, strides=[1, 1, 1, 1], padding='SAME')
relu1 = new_relu(tf.nn.bias_add(conv1, conv1_biases))
with tf.Session() as sess:
sess.run(tf.initializers.global_variables())
sess.run(relu1, feed_dict={x: np.random.rand(100, 28, 28, 3)})
new_relu_test()

GRU same configurations but in two different ways produces two different output in tensorflow

I would like to do some sequence prediction in tensorflow using GRU. so I have created the same model in 2 different ways as follows:
In model 1 I have a 2 GRUs, one after the other, that is, the new_state1, the final hidden state of the first GRU, acts as the initial state to the second GRU. Therefore, the model outputs new_state1 and new_state2 consequentially. Note that this is not a 2 layer model, but only 1 layer. From the code below, I divided the input and the output into 2 parts where GRU1 takes the first part, and the second GRU takes the second part.
Also the random_seed is set and fixed for both model so that results can be comparable.
Model 1
import tensorflow as tf
import numpy as np
cell_size = 32
seq_length = 1000
time_steps1 = 500
time_steps2 = seq_length - time_steps1
x_t = np.arange(1, seq_length + 1)
x_t_plus_1 = np.arange(2, seq_length + 2)
tf.set_random_seed(123)
m_dtype = tf.float32
input_1 = tf.placeholder(dtype=m_dtype, shape=[None, time_steps1, 1], name="input_1")
input_2 = tf.placeholder(dtype=m_dtype, shape=[None, time_steps2, 1], name="input_2")
labels1 = tf.placeholder(dtype=m_dtype, shape=[None, time_steps1, 1], name="labels_1")
labels2 = tf.placeholder(dtype=m_dtype, shape=[None, time_steps2, 1], name="labels_2")
labels = tf.concat([labels1, labels2], axis=1, name="labels")
initial_state = tf.placeholder(shape=[None, cell_size], dtype=m_dtype, name="initial_state")
def model(input_feat1, input_feat2):
with tf.variable_scope("GRU"):
cell1 = tf.nn.rnn_cell.GRUCell(cell_size)
cell2 = tf.nn.rnn_cell.GRUCell(cell_size)
with tf.variable_scope("First50"):
# output1: shape=[1, time_steps1, 32]
output1, new_state1 = tf.nn.dynamic_rnn(cell1, input_feat1, dtype=m_dtype, initial_state=initial_state)
with tf.variable_scope("Second50"):
# output2: shape=[1, time_steps2, 32]
output2, new_state2 = tf.nn.dynamic_rnn(cell2, input_feat2, dtype=m_dtype, initial_state=new_state1)
with tf.variable_scope("output"):
# output shape: [1, time_steps1 + time_steps2, 32] => [1, 100, 32]
output = tf.concat([output1, output2], axis=1)
output = tf.reshape(output, shape=[-1, cell_size])
output = tf.layers.dense(output, units=1)
output = tf.reshape(output, shape=[1, time_steps1 + time_steps2, 1])
with tf.variable_scope("outputs_1_2_reshaped"):
output1 = tf.slice(input_=output, begin=[0, 0, 0], size=[-1, time_steps1, -1])
output2 = tf.slice(input_=output, begin=[0, time_steps1, 0], size=[-1, time_steps2, 1])
print(output.get_shape().as_list(), "1")
print(output1.get_shape().as_list(), "2")
print(output2.get_shape().as_list(), "3")
return output, output1, output2, initial_state, new_state1, new_state2
output, output1, output2, initial_state, new_state1, new_state2 = model(input_1, input_2)
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
to_run_list = [new_state1, new_state2]
in1 = np.reshape(x_t[:time_steps1], newshape=(1, time_steps1, 1))
in2 = np.reshape(x_t[time_steps1:], newshape=(1, time_steps2, 1))
l1 = np.reshape(x_t_plus_1[:time_steps1], newshape=(1, time_steps1, 1))
l2 = np.reshape(x_t_plus_1[time_steps1:], newshape=(1, time_steps2, 1))
i_s = np.zeros([1, cell_size])
new_s1, new_s2 = sess.run(to_run_list, feed_dict={input_1: in1,
input_2: in2,
labels1: l1,
labels2: l2,
initial_state: i_s})
print(np.shape(new_s1), np.shape(new_s2))
print(np.mean(new_s1), np.mean(new_s2))
print(np.sum(new_s1), np.sum(new_s2))
In this model, Instead of having 2 different GRU, I created one, and I divided the input and labels into 2 different parts as well, and I used a for loop to iterate over my input dataset. Then the final state is taken and fed back into the same model as initial state.
Note that both model1 and model2 have the very first initial state of zeros.
Model 2
import tensorflow as tf
import numpy as np
cell_size = 32
seq_length = 1000
time_steps = 500
x_t = np.arange(1, seq_length + 1)
x_t_plus_1 = np.arange(2, seq_length + 2)
tf.set_random_seed(123)
m_dtype = tf.float32
inputs = tf.placeholder(dtype=m_dtype, shape=[None, time_steps, 1], name="inputs")
labels = tf.placeholder(dtype=m_dtype, shape=[None, time_steps, 1], name="labels")
initial_state = tf.placeholder(shape=[None, cell_size], dtype=m_dtype, name="initial_state")
grads_initial_state = tf.placeholder(dtype=m_dtype, shape=[None, cell_size], name="prev_grads")
this_is_last_batch = tf.placeholder(dtype=tf.bool, name="this_is_last_batch")
def model(input_feat):
with tf.variable_scope("GRU"):
cell = tf.nn.rnn_cell.GRUCell(cell_size)
with tf.variable_scope("cell"):
# output1: shape=[1, time_steps, 32]
output, new_state = tf.nn.dynamic_rnn(cell, input_feat, dtype=m_dtype, initial_state=initial_state)
with tf.variable_scope("output"):
output = tf.reshape(output, shape=[-1, cell_size])
output = tf.layers.dense(output, units=1)
output = tf.reshape(output, shape=[1, time_steps, 1])
print(output.get_shape().as_list(), "1")
return output, new_state
output, new_state = model(inputs)
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
# 1000 // 500 = 2
num_iterations = seq_length // time_steps
print("num_iterations:", num_iterations)
final_states = []
to_run_list = [grads_wrt_initial_state, new_state]
for i in range(num_iterations):
current_xt = x_t[i * time_steps: (i + 1)*time_steps]
current_xt_plus_1 = x_t_plus_1[i*time_steps: (i + 1)*time_steps]
in1 = np.reshape(current_xt, newshape=(1, time_steps, 1))
l1 = np.reshape(current_xt_plus_1, newshape=(1, time_steps, 1))
i_s = np.zeros([1, cell_size])
if i == 0:
new_s = sess.run(new_state, feed_dict={inputs: in1,
labels: l1,
initial_state: i_s})
final_states.append(new_s)
print("---->", np.mean(final_states[-1]), np.sum(final_states[-1]), i)
else:
new_s = sess.run(new_state, feed_dict={inputs: in1,
labels: l1,
initial_state: final_states[-1]})
final_states.append(new_s)
print("---->", np.mean(final_states[-1]), np.sum(final_states[-1]), i)
Finally, after printing out the statistics of new_state1 and new_state2 in model1, they were different from the new_state, after each iteration, in model2.
I would like to know how to fix this problem and why is that happening.
Edit:
I have figured out that the weights values of the gru in both files are different
Now how can I reproduce the same results in 2 the different files even after setting the random seed?
Any help is much appreciated!!!
so to reproduce the same results in different files, tf.set_random_seed() is not enough. I figured out that we need to also set the seed for the intializers of the gru cells as well as the initializers of the weights in the dense layer at the output (this is at least acccording to my model); so the definition of the cell is now:
cell1 = tf.nn.rnn_cell.GRUCell(cell_size, kernel_initializer=tf.glorot_normal_initializer(seed=123, dtype=m_dtype))
And for the dense layer:
output = tf.layers.dense(output, units=1, kernel_initializer=tf.glorot_uniform_initializer(seed=123, dtype=m_dtype))
Note that any other initializer could be used as long as we set the seed the dtype for it.

How to predict a specific image using Mnist

I am new to tensorflow, and I think I got the right answer, but I am missing something minimal, that I cant find online. I hope someone send me a reference or leads me to what I am missing.
import tensorflow as tf
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data
batch_size = 128
test_size = 256
def init_weights(shape):
return tf.Variable(tf.random_normal(shape, stddev=0.01))
def model(X, w, w2, w3, w4, w_o, p_keep_conv, p_keep_hidden):
l1a = tf.nn.relu(tf.nn.conv2d(X, w, # l1a shape=(?, 28, 28, 32)
strides=[1, 1, 1, 1], padding='SAME'))
l1 = tf.nn.max_pool(l1a, ksize=[1, 2, 2, 1], # l1 shape=(?, 14, 14, 32)
strides=[1, 2, 2, 1], padding='SAME')
l1 = tf.nn.dropout(l1, p_keep_conv)
l2a = tf.nn.relu(tf.nn.conv2d(l1, w2, # l2a shape=(?, 14, 14, 64)
strides=[1, 1, 1, 1], padding='SAME'))
l2 = tf.nn.max_pool(l2a, ksize=[1, 2, 2, 1], # l2 shape=(?, 7, 7, 64)
strides=[1, 2, 2, 1], padding='SAME')
l2 = tf.nn.dropout(l2, p_keep_conv)
l3a = tf.nn.relu(tf.nn.conv2d(l2, w3, # l3a shape=(?, 7, 7, 128)
strides=[1, 1, 1, 1], padding='SAME'))
l3 = tf.nn.max_pool(l3a, ksize=[1, 2, 2, 1], # l3 shape=(?, 4, 4, 128)
strides=[1, 2, 2, 1], padding='SAME')
l3 = tf.reshape(l3, [-1, w4.get_shape().as_list()[0]]) # reshape to (?, 2048)
l3 = tf.nn.dropout(l3, p_keep_conv)
l4 = tf.nn.relu(tf.matmul(l3, w4))
l4 = tf.nn.dropout(l4, p_keep_hidden)
pyx = tf.matmul(l4, w_o)
return pyx
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
trX, trY, teX, teY = mnist.train.images, mnist.train.labels, mnist.test.images, mnist.test.labels
trX = trX.reshape(-1, 28, 28, 1) # 28x28x1 input img
teX = teX.reshape(-1, 28, 28, 1) # 28x28x1 input img
X = tf.placeholder("float", [None, 28, 28, 1])
Y = tf.placeholder("float", [None, 10])
w = init_weights([3, 3, 1, 32]) # 3x3x1 conv, 32 outputs
w2 = init_weights([3, 3, 32, 64]) # 3x3x32 conv, 64 outputs
w3 = init_weights([3, 3, 64, 128]) # 3x3x32 conv, 128 outputs
w4 = init_weights([128 * 4 * 4, 625]) # FC 128 * 4 * 4 inputs, 625 outputs
w_o = init_weights([625, 10]) # FC 625 inputs, 10 outputs (labels)
p_keep_conv = tf.placeholder("float")
p_keep_hidden = tf.placeholder("float")
py_x = model(X, w, w2, w3, w4, w_o, p_keep_conv, p_keep_hidden)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=py_x, labels=Y))
train_op = tf.train.RMSPropOptimizer(0.001, 0.9).minimize(cost)
predict_op = tf.argmax(py_x, 1)
# Launch the graph in a session
saver = tf.train.Saver()
with tf.Session() as sess:
# you need to initialize all variables
tf.global_variables_initializer().run()
for i in range(100):
training_batch = zip(range(0, len(trX), batch_size),
range(batch_size, len(trX)+1, batch_size))
for start, end in training_batch:
sess.run(train_op, feed_dict={X: trX[start:end], Y: trY[start:end],
p_keep_conv: 0.8, p_keep_hidden: 0.5})
test_indices = np.arange(len(teX)) # Get A Test Batch
np.random.shuffle(test_indices)
test_indices = test_indices[0:test_size]
print(i, np.mean(np.argmax(teY[test_indices], axis=1) ==
sess.run(predict_op, feed_dict={X: teX[test_indices],
Y: teY[test_indices],
p_keep_conv: 1.0,
p_keep_hidden: 1.0})))
save_path = saver.save(sess, "tmp/model.ckpt")
print("Model saved in file: %s" % save_path)
After all of this, now I am trying to predict a single image from this array just as an example (I know its not a proper test), to give me the class using:
with tf.Session() as sess:
# Restore variables from disk.
saver.restore(sess, "tmp/model.ckpt")
print "...Model Loaded..."
prediction=tf.argmax(predict_op,1)
print prediction.eval(feed_dict={X: teX[2].reshape(1,28,28,1)}, session=sess)
But im getting this error:
InvalidArgumentError: You must feed a value for placeholder tensor
'Placeholder_3' with dtype
This previous problem has been solved by adding p_keep_conv: 1.0, p_keep_hidden: 1.0 to the dict.
After this another issue appeared:
InvalidArgumentError Traceback (most recent call last)
<ipython-input-91-3e9ead14a8b3> in <module>()
4 print "...Model Loaded..."
5 prediction=tf.argmax(predict_op,1)
----> 6 classification = sess.run(tf.argmax(predict_op, 1), feed_dict={X: teX[3].reshape(1,28,28,1),p_keep_conv: 1.0,p_keep_hidden: 1.0})
7
....
InvalidArgumentError: Expected dimension in the range [-1, 1), but got 1
[[Node: ArgMax_21 = ArgMax[T=DT_INT64, Tidx=DT_INT32, output_type=DT_INT64, _device="/job:localhost/replica:0/task:0/device:CPU:0"](ArgMax/_37, ArgMax_21/dimension)]]
I'm summing up what we said in the comments in this answer.
Placeholder error:
Your prediction.eval() call has a feed_dict that doesn't contain a value for p_keep_conv and p_keep_hidden. Note that, since you don't provide a name=... argument whe defining your placholders, they get the default name Placeholder_N which is what the error message shows. It's a good practice to always give a meaningful name to variables, constants and placeholders in order to make debugging easier.
Argmax expected dimension:
tf.argmax's definition states:
axis: A Tensor. Must be one of the following types: int32, int64.
int32, 0 <= axis < rank(input). Describes which axis of the input
Tensor to reduce across.
It seems, then, that the only way to run argmax on the last axis of the tensor is by giving it axis=-1, because of the "strictly less than" sign in the definition of the function (I don't understand why they made this design choice).

Categories