I am playing around with tf.tensordot in Tensorflow. However, I am experiencing some inconsistencies which are bugging me. Below is a reproducible example:
tf.reset_default_graph()
tf.set_random_seed(42)
np.random.seed(42)
X = np.random.rand(150, 196, 268).astype(np.float32)
W = tf.Variable(initial_value=tf.random_normal([268, 22], stddev=0.1))
dotted_150 = tf.tensordot(X, W, axes=[[2], [0]])
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
output_150 = sess.run(dotted_150)
This returns a tensor that has dimensions (150, 196, 22)
tf.reset_default_graph()
tf.set_random_seed(42)
np.random.seed(42)
X = np.random.rand(1, 196, 268).astype(np.float32)
W = tf.Variable(initial_value=tf.random_normal([268, 22], stddev=0.1))
dotted_1 = tf.tensordot(X, W, axes=[[2], [0]])
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
output_1 = sess.run(dotted_1)
This returns a tensor that has dimensions (1, 196, 22)
Now, if we test whether the first element from output_150 is almost equal to the first and only element from output_1, the result is a mismatch between the two arrays.
np.testing.assert_allclose(output_1[0], output_150[0])
On the other hand, if we do:
np.random.seed(42)
input_150 = np.random.rand(150, 196, 268).astype(np.float32)
np.random.seed(42)
input_1 = np.random.rand(1, 196, 268).astype(np.float32)
np.testing.assert_equal(input_150[0], input_1[0])
We see that the inputs are exactly the same. With that said, I would expect that the outputs from the tf.tensordot to be the same as well and they are not.
On the same note, here is a tf.tensordot equivalent using tf.reshape and tf.matmul:
tf.reset_default_graph()
tf.set_random_seed(42)
np.random.seed(42)
X = np.random.rand(150, 196, 268).astype(np.float32)
W = tf.Variable(initial_value=tf.random_normal([268, 22], stddev=0.1))
reshaped = tf.reshape(X, [-1, 268])
mulled_150 = tf.reshape(tf.matmul(reshaped, W), [-1, 196, 22])
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
output_150 = sess.run(mulled_150)
tf.reset_default_graph()
tf.set_random_seed(42)
np.random.seed(42)
X = np.random.rand(1, 196, 268).astype(np.float32)
W = tf.Variable(initial_value=tf.random_normal([268, 22], stddev=0.1))
reshaped = tf.reshape(X, [-1, 268])
mulled_1 = tf.reshape(tf.matmul(reshaped, W), [-1, 196, 22])
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
output_1 = sess.run(mulled_1)
np.testing.assert_allclose(output_1[0], output_150[0])
The outcome is exactly the same, a mismatch between the output arrays. How can that be?
Apparently, if I use tf.float64 precision instead of tf.float32 the results are identical.
Related
I would like to do some sequence prediction in tensorflow using GRU. so I have created the same model in 2 different ways as follows:
In model 1 I have a 2 GRUs, one after the other, that is, the new_state1, the final hidden state of the first GRU, acts as the initial state to the second GRU. Therefore, the model outputs new_state1 and new_state2 consequentially. Note that this is not a 2 layer model, but only 1 layer. From the code below, I divided the input and the output into 2 parts where GRU1 takes the first part, and the second GRU takes the second part.
Also the random_seed is set and fixed for both model so that results can be comparable.
Model 1
import tensorflow as tf
import numpy as np
cell_size = 32
seq_length = 1000
time_steps1 = 500
time_steps2 = seq_length - time_steps1
x_t = np.arange(1, seq_length + 1)
x_t_plus_1 = np.arange(2, seq_length + 2)
tf.set_random_seed(123)
m_dtype = tf.float32
input_1 = tf.placeholder(dtype=m_dtype, shape=[None, time_steps1, 1], name="input_1")
input_2 = tf.placeholder(dtype=m_dtype, shape=[None, time_steps2, 1], name="input_2")
labels1 = tf.placeholder(dtype=m_dtype, shape=[None, time_steps1, 1], name="labels_1")
labels2 = tf.placeholder(dtype=m_dtype, shape=[None, time_steps2, 1], name="labels_2")
labels = tf.concat([labels1, labels2], axis=1, name="labels")
initial_state = tf.placeholder(shape=[None, cell_size], dtype=m_dtype, name="initial_state")
def model(input_feat1, input_feat2):
with tf.variable_scope("GRU"):
cell1 = tf.nn.rnn_cell.GRUCell(cell_size)
cell2 = tf.nn.rnn_cell.GRUCell(cell_size)
with tf.variable_scope("First50"):
# output1: shape=[1, time_steps1, 32]
output1, new_state1 = tf.nn.dynamic_rnn(cell1, input_feat1, dtype=m_dtype, initial_state=initial_state)
with tf.variable_scope("Second50"):
# output2: shape=[1, time_steps2, 32]
output2, new_state2 = tf.nn.dynamic_rnn(cell2, input_feat2, dtype=m_dtype, initial_state=new_state1)
with tf.variable_scope("output"):
# output shape: [1, time_steps1 + time_steps2, 32] => [1, 100, 32]
output = tf.concat([output1, output2], axis=1)
output = tf.reshape(output, shape=[-1, cell_size])
output = tf.layers.dense(output, units=1)
output = tf.reshape(output, shape=[1, time_steps1 + time_steps2, 1])
with tf.variable_scope("outputs_1_2_reshaped"):
output1 = tf.slice(input_=output, begin=[0, 0, 0], size=[-1, time_steps1, -1])
output2 = tf.slice(input_=output, begin=[0, time_steps1, 0], size=[-1, time_steps2, 1])
print(output.get_shape().as_list(), "1")
print(output1.get_shape().as_list(), "2")
print(output2.get_shape().as_list(), "3")
return output, output1, output2, initial_state, new_state1, new_state2
output, output1, output2, initial_state, new_state1, new_state2 = model(input_1, input_2)
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
to_run_list = [new_state1, new_state2]
in1 = np.reshape(x_t[:time_steps1], newshape=(1, time_steps1, 1))
in2 = np.reshape(x_t[time_steps1:], newshape=(1, time_steps2, 1))
l1 = np.reshape(x_t_plus_1[:time_steps1], newshape=(1, time_steps1, 1))
l2 = np.reshape(x_t_plus_1[time_steps1:], newshape=(1, time_steps2, 1))
i_s = np.zeros([1, cell_size])
new_s1, new_s2 = sess.run(to_run_list, feed_dict={input_1: in1,
input_2: in2,
labels1: l1,
labels2: l2,
initial_state: i_s})
print(np.shape(new_s1), np.shape(new_s2))
print(np.mean(new_s1), np.mean(new_s2))
print(np.sum(new_s1), np.sum(new_s2))
In this model, Instead of having 2 different GRU, I created one, and I divided the input and labels into 2 different parts as well, and I used a for loop to iterate over my input dataset. Then the final state is taken and fed back into the same model as initial state.
Note that both model1 and model2 have the very first initial state of zeros.
Model 2
import tensorflow as tf
import numpy as np
cell_size = 32
seq_length = 1000
time_steps = 500
x_t = np.arange(1, seq_length + 1)
x_t_plus_1 = np.arange(2, seq_length + 2)
tf.set_random_seed(123)
m_dtype = tf.float32
inputs = tf.placeholder(dtype=m_dtype, shape=[None, time_steps, 1], name="inputs")
labels = tf.placeholder(dtype=m_dtype, shape=[None, time_steps, 1], name="labels")
initial_state = tf.placeholder(shape=[None, cell_size], dtype=m_dtype, name="initial_state")
grads_initial_state = tf.placeholder(dtype=m_dtype, shape=[None, cell_size], name="prev_grads")
this_is_last_batch = tf.placeholder(dtype=tf.bool, name="this_is_last_batch")
def model(input_feat):
with tf.variable_scope("GRU"):
cell = tf.nn.rnn_cell.GRUCell(cell_size)
with tf.variable_scope("cell"):
# output1: shape=[1, time_steps, 32]
output, new_state = tf.nn.dynamic_rnn(cell, input_feat, dtype=m_dtype, initial_state=initial_state)
with tf.variable_scope("output"):
output = tf.reshape(output, shape=[-1, cell_size])
output = tf.layers.dense(output, units=1)
output = tf.reshape(output, shape=[1, time_steps, 1])
print(output.get_shape().as_list(), "1")
return output, new_state
output, new_state = model(inputs)
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
# 1000 // 500 = 2
num_iterations = seq_length // time_steps
print("num_iterations:", num_iterations)
final_states = []
to_run_list = [grads_wrt_initial_state, new_state]
for i in range(num_iterations):
current_xt = x_t[i * time_steps: (i + 1)*time_steps]
current_xt_plus_1 = x_t_plus_1[i*time_steps: (i + 1)*time_steps]
in1 = np.reshape(current_xt, newshape=(1, time_steps, 1))
l1 = np.reshape(current_xt_plus_1, newshape=(1, time_steps, 1))
i_s = np.zeros([1, cell_size])
if i == 0:
new_s = sess.run(new_state, feed_dict={inputs: in1,
labels: l1,
initial_state: i_s})
final_states.append(new_s)
print("---->", np.mean(final_states[-1]), np.sum(final_states[-1]), i)
else:
new_s = sess.run(new_state, feed_dict={inputs: in1,
labels: l1,
initial_state: final_states[-1]})
final_states.append(new_s)
print("---->", np.mean(final_states[-1]), np.sum(final_states[-1]), i)
Finally, after printing out the statistics of new_state1 and new_state2 in model1, they were different from the new_state, after each iteration, in model2.
I would like to know how to fix this problem and why is that happening.
Edit:
I have figured out that the weights values of the gru in both files are different
Now how can I reproduce the same results in 2 the different files even after setting the random seed?
Any help is much appreciated!!!
so to reproduce the same results in different files, tf.set_random_seed() is not enough. I figured out that we need to also set the seed for the intializers of the gru cells as well as the initializers of the weights in the dense layer at the output (this is at least acccording to my model); so the definition of the cell is now:
cell1 = tf.nn.rnn_cell.GRUCell(cell_size, kernel_initializer=tf.glorot_normal_initializer(seed=123, dtype=m_dtype))
And for the dense layer:
output = tf.layers.dense(output, units=1, kernel_initializer=tf.glorot_uniform_initializer(seed=123, dtype=m_dtype))
Note that any other initializer could be used as long as we set the seed the dtype for it.
I'm new to Tensorflow. I worked in Caffe previously. I'm trying to implement http://cvlab.cse.msu.edu/pdfs/Tai_Yang_Liu_CVPR2017.pdf in Tensorflow.
I'm having trouble with variables in Tensorflow, despite having them initialized. I tried using tf.get_variable instead of tf.Variable, but this didn't work. And setting initializer=tf.contrib.layers.xavier_initializer() did nothing.
My code:
import tensorflow as tf
import sys, os
import numpy as np
global xseed
def get_model(inp, train):
#create architecture
#db input, 128 batch size
with tf.name_scope('input'):
inl = tf.reshape(inp, [-1, 31, 31, 1])
with tf.name_scope('batchnorm_scale_relu'):
#batchnorm,scale,relu
normed = tf.contrib.layers.batch_norm(inl, is_training=train)
alpha = tf.Variable(tf.truncated_normal((1,), stddev=0.1), trainable=True, name="alpha")
beta = tf.Variable(tf.truncated_normal((1,), stddev=0.1), trainable=True, name="beta")
scaled = alpha * normed + beta
relud = tf.nn.relu(scaled, name="relu1")
with tf.name_scope('conv1'):
#??? padding ??? is correct
padded_input = tf.pad(relud, [[0, 0], [1, 1], [1, 1], [0, 0]], "CONSTANT")
#weight initializer msra
convolved = tf.layers.conv2d(inputs=padded_input, filters=128, kernel_size=(3, 3), strides=(1, 1), padding="VALID",
kernel_initializer=tf.contrib.layers.variance_scaling_initializer(seed=xseed))
with tf.name_scope('batchnorm_scale_relu_1a'):
#batchnorm,scale,relu
normed_1a = tf.contrib.layers.batch_norm(convolved, is_training=train)
alpha_1a = tf.Variable(tf.truncated_normal((1,), stddev=0.1), trainable=True, name="alpha_1a")
beta_1a = tf.Variable(tf.truncated_normal((1,), stddev=0.1), trainable=True, name="beta_1a")
scaled_1a = alpha_1a * normed_1a + beta_1a
relud_1a = tf.nn.relu(scaled_1a, name="relu1_1a")
with tf.name_scope('conv1_1a'):
padded_input_1a = tf.pad(relud_1a, [[0, 0], [1, 1], [1, 1], [0, 0]], "CONSTANT")
#weight initializer msra
convolved_1a = tf.layers.conv2d(inputs=padded_input_1a, filters=128, kernel_size=(3, 3), strides=(1, 1), padding="VALID",
kernel_initializer=tf.contrib.layers.variance_scaling_initializer(seed=xseed))
return convolved_1a
xseed = None
with tf.device("cpu"):
inp = tf.placeholder(tf.float32, [None, 31, 31, 1])
init_op = tf.global_variables_initializer()
init_op2 = tf.local_variables_initializer()
with tf.Session(config=tf.ConfigProto()) as sess:
m = get_model(inp, True)
sess.run(init_op)
sess.run(init_op2)
print(sess.run(tf.report_uninitialized_variables()))
res = sess.run(m, feed_dict={ inp: np.zeros((1, 31, 31, 1))})
And it reports uninitialized variables:
['BatchNorm/beta' 'BatchNorm/moving_mean' 'BatchNorm/moving_variance' 'batchnorm_scale_relu/alpha' 'batchnorm_scale_relu/beta' 'conv2d/kernel' 'conv2d/bias' 'BatchNorm_1/beta' 'BatchNorm_1/moving_mean' 'BatchNorm_1/moving_variance' 'batchnorm_scale_relu_1a/alpha_1a' 'batchnorm_scale_relu_1a/beta_1a' 'conv2d_1/kernel' 'conv2d_1/bias']
And gives me an exception when evaluating convolved tensor:
FailedPreconditionError (see above for traceback): Attempting to use uninitialized value batchnorm_scale_relu_1a/alpha_1a
[[Node: batchnorm_scale_relu_1a/alpha_1a/read = Identity[T=DT_FLOAT, _class=["loc:#batchnorm_scale_relu_1a/alpha_1a"], _device="/job:localhost/replica:0/task:0/device:CPU:0"](batchnorm_scale_relu_1a/alpha_1a)]]
Please help me.
Solved it myself. Replaced this:
with tf.device("cpu"):
inp = tf.placeholder(tf.float32, [None, 31, 31, 1])
init_op = tf.global_variables_initializer()
init_op2 = tf.local_variables_initializer()
with tf.Session(config=tf.ConfigProto()) as sess:
m = get_model(inp, True)
sess.run(init_op)
sess.run(init_op2)
print(sess.run(tf.report_uninitialized_variables()))
res = sess.run(m, feed_dict={ inp: np.zeros((1, 31, 31, 1))})
With this:
with tf.device("cpu"):
inp = tf.placeholder(tf.float32, [None, 31, 31, 1])
with tf.Session(config=tf.ConfigProto()) as sess:
m = get_model(inp, True)
sess.run(tf.initialize_all_variables())
res = sess.run(tf.report_uninitialized_variables())
#print(res) -- outputs [] (none)
res = sess.run(m, feed_dict={ inp: np.zeros((1, 31, 31, 1))})
print(res)
The whole point is tf.initialize_all_variables() instead of tf.global_variables_initializer()
I am trying to augment the MNIST dataset. This is what I tried. Can't get any success.
from tensorflow.examples.tutorials.mnist import input_data
import tensorflow as tf
X = mnist.train.images
y = mnist.train.labels
def flip_images(X_imgs):
X_flip = []
tf.reset_default_graph()
X = tf.placeholder(tf.float32, shape = (28, 28, 1))
input_d = tf.reshape(X_imgs, [-1, 28, 28, 1])
tf_img1 = tf.image.flip_left_right(X)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for img in input_d:
flipped_imgs = sess.run([tf_img1], feed_dict = {X: img})
X_flip.extend(flipped_imgs)
X_flip = np.array(X_flip, dtype = np.float32)
return X_flip
flip = flip_images(X)
What am I doing wrong? I can't seem to figure out.
Error:
Line: for img in input_d:
raise TypeError("'Tensor' object is not iterable.")
TypeError: 'Tensor' object is not iterable
First, note that your tf.reshape changes the type from an ndarray to a tensor. It will take an .eval() call to bring it back down. In that for loop, you are trying to iterate over a tensor (not a list or a true iterable), consider indexing numerically as in:
X = mnist.train.images
y = mnist.train.labels
def flip_images(X_imgs):
X_flip = []
tf.reset_default_graph()
X = tf.placeholder(tf.float32, shape = (28, 28, 1))
input_d = tf.reshape(X_imgs, [-1, 28, 28, 1])
tf_img1 = tf.image.flip_left_right(X)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for img_ind in range(input_d.shape[0]):
img = input_d[img_ind].eval()
flipped_imgs = sess.run([tf_img1], feed_dict={X: img})
X_flip.extend(flipped_imgs)
X_flip = np.array(X_flip, dtype = np.float32)
return X_flip
flip = flip_images(X)
Let me know if this resolves your issue! Might want to set the range to a small constant for testing, this could take a while if you don't have a GPU around.
I am learning TensorFLow. So to understand how to make something, I tried to copy some code from a source and execute it. But I'm hitting an error message. So I tried some solution from this website but it does not work (I kept my test in comments).
"""programme 1 """
import tensorflow as tf
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
X = tf.placeholder(tf.float32,[None, 28, 28, 1]) #28 * 28 taille image 1 = 1pixel car noir et blanc "X" valeur
W = tf.Variable(tf.zeros([784, 10])) # 28*28 = 784 , 10 -> 0 à 9 "W" = weight = poid
b = tf.Variable(tf.zeros([10])) #chiffre de 0 à 9 a reconnaitre "b" = constante
init = tf.initialize_all_variables()
#model
Y = tf.nn.softmax(tf.matmul(tf.reshape(X,[-1, 784]), W) + b) #fonction "matmul": produit matriciel "-1": reussite obligatoire
#Place holder
Y_ = tf.placeholder(tf.float32, [None, 10])
#loss function
cross_entropy = -1 * tf.reduce_sum(Y_ * tf.log(Y)) #formule
# % of correct annwer found in batch
is_correct = tf.equal(tf.argmax(Y,1),tf.argmax(Y_,1))
accuracy = tf.reduce_mean(tf.cast(is_correct,tf.float32))
#training step
optimizer = tf.train.GradientDescentOptimizer(0.003) #petit pas
train_step = optimizer.minimize(cross_entropy)
sess = tf.Session()
sess.run(init)
for i in range(10000):
#load batch of image and ocrrects answer
batch_X, batch_Y = mnist.train.next_batch(100)
batch_X = np.reshape(batch_X, (-1, 784))
#batch_Y = np.reshape(batch_Y, (-1, 784))
train_data = {X: batch_X, Y_: batch_Y}
#train
sess.run(train_step, feed_dict=train_data)
a,c = sess.run([accuracy,cross_entropy],feed = train_data)
test_data = {X:mnist.test.images, Y_:mnist.test.labels}
a,c = sess.run([accuracy,cross_entropy],feed = test_data)
the log :
Traceback (most recent call last):
File "d:\tensorflow\test1.py", line 46, in <module>
sess.run(train_step, feed_dict=train_data)
File "C:\Users\Proprietaire\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\client\session.py", line 895, in run
run_metadata_ptr)
File "C:\Users\Proprietaire\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\client\session.py", line 1100, in _run
% (np_val.shape, subfeed_t.name, str(subfeed_t.get_shape())))
ValueError: Cannot feed value of shape (100, 784) for Tensor 'Placeholder:0', which has shape '(?, 28, 28, 1)'
2017-08-30 19:07:37.406994: W C:\tf_jenkins\home\workspace\rel-win\M\windows\PY\35\tensorflow\core\platform\cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use AVX instructions, but these are available on your machine and could speed up CPU computations.
line 46 is
sess.run(train_step, feed_dict=train_data)
What can I do to resolve this error?
You are getting that error because there is a mismatch between the shape of what you are feeding in and what the TensorFlow is expecting. To fix the issue, you might want to reshape your data at placeholder:0 which is batch_X to
(?, 28, 28, 1). For example, you would do the following:
batch_X = np.reshape(batch_X, (-1, 28, 28, 1))
You need to reshape X.
X = tf.placeholder(tf.float32 , [None ,28 , 28 , 1])
X = tf.reshape(X , [-1 , 784])
When you define a placeholder in TensorFlow, the shape of the input during the session should be the same as the shape of the placeholder.
In batch_X, batch_Y = mnist.train.next_batch(100), the batch_x is a 2D array of pixel values, which will have a shape of [batch_size, 28*28].
In X = tf.placeholder(tf.float32,[None, 28, 28, 1]), the input placeholder is defined to have a 4D shape of [batch_size, 28, 28, 1]
You can either 1) reshape batch_x before the feeding to TensorFlow. e.g.batch_x = np.reshape(batch_x, [-1, 28, 28, 1]) or 2) Change the shape of the placeholder. e.g. X = tf.placeholder(tf.float32,[None, 784])
I would recommend 2), since this saves you from doing any reshaping operations both in and outside of the TensorFlow graph.
I have wrote a simple code to try out the Tensorflow summarize feature. The code is below.
import tensorflow as tf
import numpy as np
graph = tf.Graph()
with graph.as_default():
x = tf.placeholder(tf.float32, [1, 2], name='x')
W = tf.ones([2, 1], tf.float32, name='W')
b = tf.constant([1.5], dtype=tf.float32, shape=(1, 1), name='bias')
y_ = tf.add(tf.matmul(x, W, name='mul'), b, name='add')
tf.summary.scalar('y', y_)
with tf.Session(graph=graph) as session:
merged = tf.summary.merge_all()
fw = tf.summary.FileWriter("/tmp/tensorflow/logs", graph=graph)
tf.global_variables_initializer().run()
x_var = np.array([1., 1.], np.float32).reshape([1, 2])
print(x_var)
summary, y = session.run([merged, y_], feed_dict={x: x_var})
fw.add_summary(summary, 0)
print(y)
fw.close()
Basically, it tries to implement y=Wx + b.
The code works if I remove all the summary related code. But if I add the summary related code, I got below error:
InvalidArgumentError (see above for traceback): tags and values not the same shape: [] != [1,1] (tag 'y')
[[Node: y = ScalarSummary[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"](y/tags, add)]]
I tried in both normal python, and IPython.
Tags and values do not have the same shape. You are passing x_var which is a vector and the summary takes a scalar value. You can simply use tf.reduce_mean to solve this problem:
with graph.as_default():
x = tf.placeholder(tf.float32, [None, 2], name='x')
W = tf.ones([2, 1], tf.float32, name='W')
b = tf.constant([1.5], dtype=tf.float32, shape=(1, 1), name='bias')
y_ = tf.add(tf.matmul(x, W, name='mul'), b, name='add')
tf.summary.scalar('y', tf.reduce_mean(y_))
This will create a scalar value.