I am wondering if TF has the capacity to temporarily store data during the training phase? Below is an example:
import tensorflow as tf
import numpy as np
def loss_function(values, a, b):
N = values.shape[0]
i = tf.constant(0)
values_array = tf.get_variable(
"values", values.shape, initializer=tf.constant_initializer(values), dtype=tf.float32) # The temporary data solution in this example
result = tf.constant(0, dtype=tf.float32)
def body1(i):
op2 = tf.assign(values_array[i, 0],
234.0) # Here is where it should be updated. The value being assigned is actually calculated from variable a and b.
with tf.control_dependencies([op2]):
return i + 1
def condition1(i): return tf.less(i, N)
i = tf.while_loop(condition1, body1, [i])
op1 = tf.assign(values_array[0, 0],
9999.0) # Here is where it should be updated
result = result + tf.reduce_mean(values_array) # The final cost is calculated based on the entire values_array
with tf.control_dependencies([op1]):
return result
# The parameters we want to calculate in the end
a = tf.Variable(tf.random_uniform([1], 0, 700), name='a')
b = tf.Variable(tf.random_uniform([1], -700, 700), name='b')
values = np.ones([2, 4], dtype=np.float32)
# cost function
cost_function = loss_function(values, a, b)
# training algorithm
optimizer = tf.train.MomentumOptimizer(
0.1, momentum=0.9).minimize(cost_function)
# initializing the variables
init = tf.global_variables_initializer()
# starting the session session
sess = tf.Session()
sess.run(init)
_, training_cost = sess.run([optimizer, cost_function])
print tf.get_collection(
tf.GraphKeys.GLOBAL_VARIABLES, scope="values")[0].eval(session=sess)
Currently, what I get from the console is:
[[ 0.98750001 0.98750001 0.98750001 0.98750001]
[ 0.98750001 0.98750001 0.98750001 0.98750001]]
What expected to get from this example is (if the temporary data can be printed out):
[[ 9999.0 1.0 1.0 1.0]
[ 234.0 1.0 1.0 1.0]]
Overall, what I want is that the cost function calculates a temporary 2D array based on the input numpy 2D array and parameters a and b. Then, the final cost is calculated from the temporary 2D array. But I think using a TF variable as the temporary storage is probably not correct...
Any help?
Thanks!
Your while loop never runs because i is never used again. use tf.control_dependencies to make it run.
Also, you are adding the mean of values_array, when you seem to just want to add the array as-is. Get rid of reduce_mean to get your desired output.
op1 = tf.assign(values_array[0, 0], 9999.0) wasn't being done because there was no op in the following control_dependencies context. Move the op to the context to ensure that the assignment op is included in the graph.
def loss_function(values, a, b):
N = values.shape[0]
i = tf.constant(0)
values_array = tf.get_variable(
"values", values.shape, initializer=tf.constant_initializer(values), dtype=tf.float32, trainable=False)
temp_values_array = tf.get_variable(
"temp_values", values.shape, dtype=tf.float32)
# copy previous values for calculations & gradients
temp_values_array = tf.assign(temp_values_array, values_array)
result = tf.constant(0, dtype=tf.float32)
def body1(i):
op2 = tf.assign(temp_values_array[i, 0],
234.0) # Here is where it should be updated. The value being assigned is actually calculated from variable a and b.
with tf.control_dependencies([op2]):
return [i+1]
def condition1(i): return tf.less(i, N)
i = tf.while_loop(condition1, body1, [i])
with tf.control_dependencies([i]):
op1 = tf.assign(temp_values_array[0, 0],
9999.0) # Here is where it should be updated
with tf.control_dependencies([op1]):
result = result + temp_values_array # The final cost is calculated based on the entire values_array
# save the calculations for later
op3 = tf.assign(values_array, temp_values_array)
with tf.control_dependencies([op3]):
return tf.identity(result)
Also, you are fetching optimizer so the non-assigned elements of your output are going to be smaller than you expect. Your results would be closer if you did:
training_cost = sess.run([cost_function])
_ = sess.run([optimizer])
This will ensure that you don't optimize before getting the results of cost_function
Related
I have an LSTM network that outputs a decision via tf.tanh. Then I use a tf.sign to get the binary action of either 1, -1 or 0. However, I do not want the model to make a non zero action when output of tf.tanh is small, so I want to implement a threshold T before tf.sign so that only output larger than T or smaller than -T will be transformed to +1 and -1.
Example tensor output from tf.tanh:
logits = [0.6,0.4,-0.6,-0.4]
threshold = 0.5
Desired tensor output while preserving the gradient:
action = [1,0,-1,0]
It's sort of like a double side ReLU with a threshold, but I don't know how to do it inside tensorflow graph. Any help is appreciated.
Here is something that works for me. The code is pretty self-explanatory if you look at the variable names. Let me know if something is unclear.
import tensorflow as tf
def suppress_range(x, a, b):
"""Sets all the elements in the range [a, b) to 0"""
assert (a < b), "a must be less than b"
significant = tf.logical_or(x <= a, x > b)
zero = tf.zeros_like(x)
return tf.where(significant, x, zero)
def main():
logits = tf.placeholder(tf.float32)
output = tf.sign(suppress_range(logits, -0.5, 0.5))
with tf.Session() as sess:
x = [[0.6, 0.4, -0.6, -0.4], [0.5, 0.1, -0.7, -0.2]]
print(sess.run(output, feed_dict={logits: x}))
if __name__ == '__main__':
main()
I have a tensorflow code that runs well and accurately, but occupies a lot of memory. Specifically, in my code, I have a for-loop that looks something like this:
K = 10
myarray1 = tf.placeholder(tf.float32, shape=[None,5,5]) # shape = [None, 5, 5]
myarray2 = tf.Variable( np.zeros([K,5,5]), dtype=tf.float32 )
vals = []
for k in range(0,K):
tmp = tf.reduce_sum(myarray1*myarray2[k],axis=(1,2))
vals.append(tmp)
result = tf.min( tf.stack(vals,axis=-1), axis=-1 )
Unfortunately, that takes a lot of memory as K gets to be big in my application. So, I want to have a better way of doing it. For example, in numpy/python, you would just keep track of the minimum value as you iterate through the loops, and update it on each iteration. It seems like I could use tf.assign, as:
K = 10
myarray1 = tf.placeholder(tf.float32, shape=[None,5,5]) # shape = [None, 5, 5]
myarray2 = tf.Variable( np.zeros([K,5,5]), dtype=tf.float32 )
min_value = tf.Variable(myarray1, validate_shape=False, trainable=False)
for k in range(0,K):
tmp = myarray1*myarray2[k]
idx = tf.where(tmp<min_value)
tf.scatter_nd_assign(min_value, idx, tmp[idx], use_locking=True)
result = min_value
While this code builds the graph (when validate_shape=False), it fails to run because it complains that min_value has not been initialized. The issue is, when I run the initializer as:
sess.run(tf.global_variables_initializer())
or
sess.run(tf.variables_initializer(tf.trainable_variables()))
it complains that I am not feeding in a placeholder. This actually makes sense because the definition of min_value depends on myarray1 in the graph.
What I would actually want to do is define a dummy variable that doesn't depend on myarray1's values, but does match its shape. I would like these values to be initialized as some number (in this case something large is fine), as I will manually ensure these are overwritten in the network.
Note: as far as I know, currently you cannot define a variable with an unknown shape unless you feed in another variable of the desired shape and set validate_shape=False). Maybe there is another way?
Any help / suggestions appreciated.
Try this, if don't know how to feed placeholder, read the tutorial.
K = 10
myarray1 = tf.placeholder(tf.float32, shape=[None,5,5]) # shape = [None, 5, 5]
###################ADD THIS ####################
sess=tf.Session()
FOO = tf.run(myarray1,feed_dict={myarray1: YOURDATA}) #get myarray1 value
#replace all myarray1 below with FOO
################################################
myarray2 = tf.Variable( np.zeros([K,5,5]), dtype=tf.float32 )
min_value = tf.Variable(FOO, validate_shape=False, trainable=False)
for k in range(0,K):
tmp = FOO*myarray2[k]
idx = tf.where(tmp<min_value)
tf.scatter_nd_assign(min_value, idx, tmp[idx], use_locking=True)
result = min_value
-------above new 15.April.2018------
Since I don't know your input data, I would like to try to make some steps.
Step_1: make a place for input data
x = tf.placeholder(tf.float32, shape=[None,2])
Step_2: Get batches of data
batch_x=[[1,2],[3,4]] #example
#since x=[None,2], the batch size would be batch_x_size/x_size=2
Step_3: make a session
sess=tf.Session()
if you have variables add the following code to initialize before calculation
init=tf.gobal_variables_initializer()
sess.run(init)
Step_4:
yourplaceholderdictiornay={x: batch_x}
sess.run(x, feed_dict=yourplaceholderdictiornay)
always feed your placeholder so it gets the value to calculate.
There is a Tensorflow and Deep Learning without a PHD, very helpful PDF file, you can also find it on youtube with this title.
EDIT: Solved -- it was the stupidity of using different training examples for the gradients vs the optimizer update.
OK this has me totally stumped.
I have a parameter vector, let's call it w.
w = [-1.34554319, 0.86998659, 0.52366061, 2.6723526 , 0.18756115,
0.16547382]
I use compute_gradients to figure out the gradients to w, it tells me the gradient is:
dw = [-0.0251517 , 0.88050844, 0.80362262, 0.14870925, 0.10019595,
1.33597524]
My learning rate is 0.1. Ergo:
w_new = w - 0.1 * dw
w_new = [-1.34302802, 0.78193575, 0.44329835, 2.65748168, 0.17754156,
0.0318763 ]
You can check the math yourself but it should check out. However, if I run the tensorflow code and evaluate the value of w_new, I get:
w_new_tf = [-1.27643258, 0.9212401 , 0.09922112, 2.55617223, 0.38039282, 0.15450044]
I honestly have no idea why it's doing this.
Edit: Let me provide you the exact code to show you why it doesn't work. It might be due to indexing, as you will see.
Here is the boilerplate starter code.
import numpy as np
import tensorflow as tf
max_item = 331922
max_user = 1581603
k = 6
np.random.seed(0)
_item_biases = np.random.normal(size=max_item)
np.random.seed(0)
_latent_items = np.random.normal(size=(max_item, k))
np.random.seed(0)
_latent_users = np.random.normal(size=(max_user, k))
item_biases = tf.Variable(_item_biases, name='item_biases')
latent_items = tf.Variable(_latent_items, name='latent_items')
latent_users = tf.Variable(_latent_users, name='latent_users')
input_data = tf.placeholder(tf.int64, shape=[3], name='input_data')
Here is the custom objective function.
def objective(data, lam, item_biases, latent_items, latent_users):
with tf.name_scope('indices'):
user = data[0]
rated_item = data[1]
unrated_item = data[2]
with tf.name_scope('input_slices'):
rated_item_bias = tf.gather(item_biases, rated_item, name='rated_item_bias')
unrated_item_bias = tf.gather(item_biases, unrated_item, name='unrated_item_bias')
rated_latent_item = tf.gather(latent_items, rated_item, name='rated_latent_item')
unrated_latent_item = tf.gather(latent_items, unrated_item, name='unrated_latent_item')
latent_user = tf.gather(latent_users, user, name='latent_user')
with tf.name_scope('bpr_opt'):
difference = tf.subtract(rated_item_bias, unrated_item_bias, 'bias_difference')
ld = tf.subtract(rated_latent_item, unrated_latent_item, 'latent_item_difference')
latent_difference = tf.reduce_sum(tf.multiply(ld, latent_user), name='latent_difference')
total_difference = tf.add(difference, latent_difference, name='total_difference')
with tf.name_scope('obj'):
obj = tf.sigmoid(total_difference, name='activation')
with tf.name_scope('regularization'):
reg = lam * tf.reduce_sum(rated_item_bias**2)
reg += lam * tf.reduce_sum(unrated_item_bias**2)
reg += lam * tf.reduce_sum(rated_latent_item**2)
reg += lam * tf.reduce_sum(unrated_latent_item**2)
reg += lam * tf.reduce_sum(latent_user**2)
with tf.name_scope('final'):
final_obj = -tf.log(obj) + reg
return final_obj
Here is some boilerplate code to actually minimize the function. At two points I do a sess.run call on the tf.Variables to see how the values have changed.
obj = objective(input_data, 0.05, item_biases, latent_items, latent_users)
optimizer = tf.train.GradientDescentOptimizer(0.1)
trainer = optimizer.minimize(obj)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
citem_biases, clatent_items, clatent_users = \
sess.run([item_biases, latent_items, latent_users])
print (clatent_users[1490103]) # [-1.34554319, 0.86998659, 0.52366061, 2.6723526 , 0.18756115, 0.16547382]
cvalues = sess.run([trainer, obj], feed_dict={input_data:[1490103, 278755, 25729]})
citem_biases, clatent_items, clatent_users = \
sess.run([item_biases, latent_items, latent_users])
print (clatent_users[1490103]) #[-1.27643258, 0.9212401 , 0.09922112, 2.55617223, 0.38039282, 0.15450044]
Finally, here is some code to actually get the gradients. These gradients are double checked against a hand-derived gradients so they are correct. Sorry for the ugliness of the code, it's a blatant copy & paste of another SO answer:
grads_and_vars = optimizer.compute_gradients(obj, tf.trainable_variables())
sess = tf.Session()
sess.run(tf.global_variables_initializer())
gradients_and_vars = sess.run(grads_and_vars, feed_dict={input_data:[1490103, 278830, 140306]})
print (gradients_and_vars[2][0]) #[-0.0251517 , 0.88050844, 0.80362262, 0.14870925, 0.10019595, 1.33597524]
You did not provide complete code, but I ran a similar example and it did work for me as it should. Here is my code:
with tf.Graph().as_default():
ph = tf.constant([1., 2., 3.])
v = tf.get_variable('v', (3,))
loss = tf.square(ph-v)
optimizer = tf.train.GradientDescentOptimizer(0.1)
trainer = optimizer.minimize(loss)
gradients = optimizer.compute_gradients(loss)[0][0]
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
grad_mat = sess.run(gradients)
v_0 = sess.run(v)
sess.run(trainer)
v_1 = sess.run(v)
print(grad_mat)
print(v_0)
print(v_0 - 0.1*grad_mat)
print(v_1)
Here is the output (obviously it will be a bit different each time, due to the random initialization of get_variable):
[-2.01746035 -5.61006117 -6.7561307 ]
[-0.00873017 -0.80503058 -0.37806535]
[ 0.19301586 -0.24402446 0.29754776]
[ 0.19301586 -0.24402446 0.29754776]
The last two lines are identical, as you would expect.
Problem: I was feeding different inputs for gradients vs. the trainer.
Solution: Feed the same input.
I want to create a function batch_rot90(batch_of_images) using TensorFlow's tf.image.rot90(), the latter only takes one image at a time, the former should take a batch of n images at once (shape = [n,x,y,f]).
So naturally, one should just itterate through all images in the batch and rotate them one by one. In numpy this would look like:
def batch_rot90(batch):
for i in range(batch.shape[0]):
batch_of_images[i] = rot90(batch[i,:,:,:])
return batch
How is this done in TensorFlow?
using tf.while_loop I got his far:
batch = tf.placeholder(tf.float32, shape=[2, 256, 256, 4])
def batch_rot90(batch, k, name=''):
i = tf.constant(0)
def cond(batch, i):
return tf.less(i, tf.shape(batch)[0])
def body(im, i):
batch[i] = tf.image.rot90(batch[i], k)
i = tf.add(i, 1)
return batch, i
r = tf.while_loop(cond, body, [batch, i])
return r
But the assignment to im[i] is not allowed, and I'm confused about what is returned with r.
I realize there might be a workaround for this particular case using tf.batch_to_space() but I believe it should be possible with a loop of some kind too.
Updated Answer:
x = tf.placeholder(tf.float32, shape=[2, 3])
def cond(batch, output, i):
return tf.less(i, tf.shape(batch)[0])
def body(batch, output, i):
output = output.write(i, tf.add(batch[i], 10))
return batch, output, i + 1
# TensorArray is a data structure that support dynamic writing
output_ta = tf.TensorArray(dtype=tf.float32,
size=0,
dynamic_size=True,
element_shape=(x.get_shape()[1],))
_, output_op, _ = tf.while_loop(cond, body, [x, output_ta, 0])
output_op = output_op.stack()
with tf.Session() as sess:
print(sess.run(output_op, feed_dict={x: [[1, 2, 3], [0, 0, 0]]}))
I think you should consider using tf.scatter_update to update one image in the batch, instead of using batch[i] = .... Refer to this link for detail. In your case, I suggest change the first line of body to:
tf.scatter_update(batch, i, tf.image.rot90(batch[i], k))
There is a map function in tf, that will work:
def batch_rot90(batch, k, name=''):
fun = lambda x: tf.images.rot90(x, k = 1)
return = tf.map_fn(fun, batch)
I am trying to produce a very easy example for combination of TensorArray and while_loop:
# 1000 sequence in the length of 100
matrix = tf.placeholder(tf.int32, shape=(100, 1000), name="input_matrix")
matrix_rows = tf.shape(matrix)[0]
ta = tf.TensorArray(tf.float32, size=matrix_rows)
ta = ta.unstack(matrix)
init_state = (0, ta)
condition = lambda i, _: i < n
body = lambda i, ta: (i + 1, ta.write(i,ta.read(i)*2))
# run the graph
with tf.Session() as sess:
(n, ta_final) = sess.run(tf.while_loop(condition, body, init_state),feed_dict={matrix: tf.ones(tf.float32, shape=(100,1000))})
print (ta_final.stack())
But I am getting the following error:
ValueError: Tensor("while/LoopCond:0", shape=(), dtype=bool) must be from the same graph as Tensor("Merge:0", shape=(), dtype=float32).
Anyone has on idea what is the problem?
There are several things in your code to point out. First, you don't need to unstack the matrix into the TensorArray to use it inside the loop, you can safely reference the matrix Tensor inside the body and index it using matrix[i] notation. Another issue is the different data type between your matrix (tf.int32) and the TensorArray (tf.float32), based on your code you're multiplying the matrix ints by 2 and writing the result into the array so it should be int32 as well. Finally, when you wish to read the final result of the loop, the correct operation is TensorArray.stack() which is what you need to run in your session.run call.
Here's a working example:
import numpy as np
import tensorflow as tf
# 1000 sequence in the length of 100
matrix = tf.placeholder(tf.int32, shape=(100, 1000), name="input_matrix")
matrix_rows = tf.shape(matrix)[0]
ta = tf.TensorArray(dtype=tf.int32, size=matrix_rows)
init_state = (0, ta)
condition = lambda i, _: i < matrix_rows
body = lambda i, ta: (i + 1, ta.write(i, matrix[i] * 2))
n, ta_final = tf.while_loop(condition, body, init_state)
# get the final result
ta_final_result = ta_final.stack()
# run the graph
with tf.Session() as sess:
# print the output of ta_final_result
print sess.run(ta_final_result, feed_dict={matrix: np.ones(shape=(100,1000), dtype=np.int32)})