Sorry if the title isn't very clear... I'm trying to solve for the value of "w" in the following problem with Tensorflow:
Y = X*B(w) + e
where Y is a 22x5 matrix, X is a 22x3 matrix, and B(w) is a 3*5 matrix with the following structure:
B = [[1, 1, 1, 1, 1],
[exp(-3w), exp(-6w), exp(-12w), exp(-24w), exp(-36w)],
[3*exp(-3w), 6*exp(-6w), 12*exp(-12w), 24*exp(-24w), 36*exp(-36w)]]
Here's my code:
# Parameters
learning_rate = 0.01
display_step = 50
tolerance = 0.0000000000000001
# Training Data
Y_T = df.values
X_T = factors.values
X = tf.placeholder("float32", shape = (22, 3))
Y = tf.placeholder("float32", shape = (22, 5))
w = tf.Variable(1.0, name="w")
def slope_loading(q):
return tf.exp(tf.multiply(tf.negative(q),w))
def curve_loading(q):
return tf.multiply(w,tf.exp(tf.multiply(tf.negative(q),w)))
B = tf.Variable([[1.0, 1.0, 1.0, 1.0, 1.0],
[slope_loading(float(x)) for x in [3, 6, 12, 24, 36]],
[curve_loading(float(x)) for x in [3, 6, 12, 24, 36]]])
pred = tf.matmul(X,B)
cost = tf.matmul(tf.transpose(Y-pred), (Y-pred))/22
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
# Initializing the variables
init = tf.global_variables_initializer()
# Launch the graph
with tf.Session() as sess:
# Set initial values for weights
sess.run(init)
# Set initial values for the error tolerance
tol = abs(sess.run(cost, feed_dict={X: X_T, Y: Y_T})[0][0])
iteration = 0
while tol > tolerance:
c_old = sess.run(cost, feed_dict={X: X_T, Y: Y_T})[0][0]
sess.run(optimizer, feed_dict={X: X_T, Y: Y_T})
c_new = sess.run(cost, feed_dict={X: X_T, Y: Y_T})[0][0]
tol = abs(c_new - c_old)
iteration = iteration + 1
if iteration % display_step == 0:
print("Iteration= ", iteration, "Gain= ", tol)
training_cost = sess.run(cost, feed_dict={X: X_T, Y: Y_T})
But i'm getting the error "FailedPreconditionError (see above for traceback): Attempting to use uninitialized value w..."
I'm guessing this has to do with the how I'm constructing B and passing it along to the cost function, but I'm too new to Tensorflow to see what I'm doing wrong.
Any help?
You can't use a variable to define the initial value for another variable. A better way to construct B is like this
ones = tf.ones(5)
vals = tf.constant([3.0, 6.0, 12.0, 24.0, 36.0])
slopes = slope_loading(vals)
curves = curve_loading(vals)
B = tf.stack([ones, slopes, curves])
Related
I am training an autoencoder by giving 2 placeholders that store the following:
x1 = [x1]
X = [x1,x2,x3...xn]
It holds that:
y1 = W*x1 + b_encoding1
Therefore, I have a variable named b_encoder1 (the b)
(When I print it I get: <tf.Variable 'b_encoder1:0' shape=(10,) dtype=float32_ref>)
But it also holds that:
Y = W*X + b_encoding1
The size of the second b_encoding1 has to be (10,n) intead of (10,). How can I augment it and pass it in tensorflow?
Y = tf.compat.v1.nn.xw_plus_b(X, W1, b_encoder1, name='Y')
The whole code looks like this:
x1 = tf.compat.v1.placeholder( tf.float32, [None,input_shape], name = 'x1')
X = tf.compat.v1.placeholder( tf.float32, [None,input_shape,sp], name = 'X')
W1 = tf.Variable(tf.initializers.GlorotUniform()(shape=[input_shape,code_length]),name='W1')
b_encoder1 = tf.compat.v1.get_variable(name='b_encoder1',shape=[code_length],initializer=tf.compat.v1.initializers.zeros(), use_resource=False)
K = tf.Variable(tf.initializers.GlorotUniform()(shape=[code_length,code_length]),name='K')
b_decoder1 = tf.compat.v1.get_variable(name='b_decoder1',shape=[input_shape],initializer=tf.compat.v1.initializers.zeros(), use_resource=False)
y1 = tf.compat.v1.nn.xw_plus_b(x1, W1, b_encoder1, name='y1')
Y = tf.compat.v1.nn.xw_plus_b(X, W1, b_encoder1, name='Y')
I also declare the loss function and so on and then train with:
with tf.compat.v1.Session() as sess:
sess.run(tf.compat.v1.global_variables_initializer())
for epoch_i in range(epochs):
for batch_i in range(number_of_batches):
batch_data = getBatch(shuffled_data, batch_i, batch_size)
sess.run(optimizer, feed_dict={x1: batch_data[:,:,0], X: batch_data})
train_loss = sess.run(loss, feed_dict={x1: aug_data[:,:,0], X: aug_data})
print(epoch_i, train_loss)
You can consider X as a batch of x. X can take in an arbitrary number of samples:
import tensorflow as tf
import numpy as np
X = tf.placeholder(shape=(None, 100), dtype=tf.float32)
W = tf.get_variable('kernel', [100,10])
b = tf.get_variable('bias',[10])
Y = tf.nn.xw_plus_b(X, W,b, name='Y')
with tf.Session() as sess:
sess.run(tf.global_variables_initializer()) # tf version < 1.13
out = sess.run(Y, {X: np.random.rand(128, 100)}) # here n=128
Note that dimension of bias b is still 10-D regardless value of n.
Please try:
b_encoding1 = tf.expand_dims(b_encoding1, axis = 1)
I'm running across a very weird issue and was hoping to get someone who might be a bit more familiar. I'm attempting a basic LSTM to do some binary classification with the following coding:
class FakeData(object):
def __init__(self, n):
self.x = np.random.randint(4, size=(n, 90, 4))
blah = np.random.randint(2, size=(n))
self.y = np.zeros((n,2))
self.y[:,0] = blah
self.y[:,1] = 1 - blah
self.mask = np.arange(n)
self.cnt = 0
self.n = n
def getdata(self, n):
if self.cnt + n > self.n:
np.randoom.shuffle(self.mask)
self.cnt = 0
mask = self.mask[self.cnt : self.cnt + n]
return self.x[mask], self.y[mask]
n_data = 10000
batch_size = 10
fd = FakeData(n_data)
n_units = 200
n_classes = 2
x = tf.placeholder(tf.float32, shape=[None, 90, 4])
y_ = tf.placeholder(tf.float32, shape=[None, n_classes])
dropout = tf.placeholder(tf.float32)
w_out = tf.Variable(tf.truncated_normal([n_units, n_classes]))
b_out = tf.Variable(tf.truncated_normal([n_classes]))
lstm = tf.contrib.rnn.LSTMCell(n_units)
cell = tf.contrib.rnn.DropoutWrapper(lstm, output_keep_prob=1.0 - dropout)
new_x = tf.unstack(x, 90, 1)
new_x = tf.Print(new_x, [tf.shape(new_x)], message='newx is: ')
output, state = tf.nn.dynamic_rnn(cell, new_x, dtype=tf.float32)
output = tf.Print(output, [tf.shape(output)], message='output is: ')
logits = tf.matmul(output[-1], w_out) + b_out
logits = tf.Print(logits, [tf.shape(logits)], message='logits is: ')
preds = tf.nn.softmax(logits)
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits,
labels=y_))
training = tf.train.GradientDescentOptimizer(0.5).minimize(loss)
correct = tf.equal(tf.argmax(preds, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
#
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(10):
batch_x, batch_y = fd.getdata(batch_size)
sess.run([training], feed_dict={x: batch_x, y_: batch_y, dropout: 0})
if i % 100 == 0:
print "Accuracy {}".format(accuracy.eval(feed_dict={x: batch_x,
y_: batch_y, dropout: 0}))
The specific question I have is, for some reason, when I run the code without the tf.Print lines, I get some sort of weird shape transformation error
ValueError: Dimension must be 2 but is 3 for 'transpose' (op: 'Transpose') with shapes: [?,4], [3].
on line
output, state = tf.nn.dynamic_rnn(cell, new_x, dtype=tf.float32)
However, when I include the tf.Print lines, it correctly logs the shapes and is able to run the whole session. Am I missing something?
For clarity, the shapes should be:
input: n x 90 x 4
new_x: 90 x n x 4
output: 90 x n x 200
logits: n x 2
Adding the answer here in case anyone else runs across this problem in the future.
Turns out, a lot of old RNN examples floating around use unstack. However, that turns it into a list of tensors, which dynamic_rnn cannot take as input. The print was converting it from a list of 2d tensors to a 3d tensor so that it was able to handle it correctly. Solution is to transform the data dimensionally in some other way like:
new_x = tf.transpose(x, perm=(1, 0, 2)) (thanks rvinas)
I am new to Tensorflow and I still have troubles understanding how it works. I saw some examples but I am still not sure. I am trying to print the predictions and the accuracy.
I have this code:
def linear_function(x, w, b):
y_est = tf.add(tf.matmul(w, x), b)
y_est = tf.reshape(y_est, [])
return y_est
def initialize_parameters():
W = tf.get_variable('W', [1, num_of_features],
initializer=tf.contrib.layers.xavier_initializer())
b = tf.get_variable("b1", [1, 1], initializer=tf.zeros_initializer())
return W, b
if __name__ == '__main__':
trainSetX, trainSetY = utils.load_train_set(num_of_examples)
# create placeholders & variables
X = tf.placeholder(tf.float32, shape=(num_of_features,))
X_reshaped = tf.reshape(X, [num_of_features, 1])
y = tf.placeholder(tf.float32, shape=())
W, b = initialize_parameters()
# prediction
y_estim = linear_function(X_reshaped, W, b)
y_pred = tf.sigmoid(y_estim)
# set the optimizer
loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=y, logits=y_pred)
loss_mean = tf.reduce_mean(loss)
optimizer = tf.train.GradientDescentOptimizer(learning_rate=alpha).minimize(loss_mean)
# training phase
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
for idx in range(num_of_examples):
cur_x, cur_y = trainSetX[idx], trainSetY[idx]
_, c = sess.run([optimizer, loss_mean], feed_dict={X: cur_x, y: cur_y})
So, now I want to actually read the values of y_pred and calculate the accuracy.
In some other sources I saw people adding this line to with tf.Session() as sess:
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print(accuracy.eval(feed_dict={X: trainSetX.T, y: trainSetY}))
Clearly, it does not work for me, because my trainSetX has all the examples, while X is a placeholder for only 1 example at a time. I have tried to put the correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1)) and modify another like like this:
for idx in range(num_of_examples):
cur_x, cur_y = trainSetX[idx], trainSetY[idx]
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
_, c, acc = sess.run([optimizer, loss_mean, correct_prediction], feed_dict={X: cur_x, y: cur_y})
But it just gives the following arror for ArgMax (Why?)
InvalidArgumentError (see above for traceback): Expected dimension in the range [0, 0), but got 1
[[Node: ArgMax_1 = ArgMax[T=DT_FLOAT, Tidx=DT_INT32, output_type=DT_INT64, _device="/job:localhost/replica:0/task:0/device:CPU:0"](_arg_Placeholder_1_0_1, ArgMax/dimension)]]
I am studying Tensorflow and got some problems. I want to minimize loss function when i am trying to approximate 2x+2z-3t=y (to get a,b,c values where a=2,b=2,c=-3) but it doesn't work. Where is my mistake?
This is my output:
a: [ 0.51013279] b: [ 0.51013279] c: [ 1.00953674] loss: 2.72952e+10
I need a:2 b:2 c:-3 and loss close to 0
import tensorflow as tf
import numpy as np
a = tf.Variable([1], dtype=tf.float32)
b = tf.Variable([1], dtype=tf.float32)
c = tf.Variable([0], dtype=tf.float32)
x = tf.placeholder(tf.float32)
z = tf.placeholder(tf.float32)
t = tf.placeholder(tf.float32)
linear_model = a * x + b * z + c * t
y = tf.placeholder(tf.float32)
loss = tf.reduce_sum(tf.square(linear_model - y)) # sum of the squares
optimizer = tf.train.GradientDescentOptimizer(0.01)
train = optimizer.minimize(loss)
x_train = np.arange(0, 5000, 1)
z_train = np.arange(0, 10000, 2)
t_train = np.arange(0, 5000, 1)
y_train = list(map(lambda x, z, t: 2 * x + 2 * z - 3 * t, x_train, z_train,
t_train))
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
for i in range(10000):
sess.run(train, {x: x_train, z: z_train, t: t_train, y: y_train})
curr_a, curr_b, curr_c, curr_loss = sess.run([a, b, c, loss], {x: x_train,
z: z_train, t: t_train, y: y_train})
print("a: %s b: %s c: %s loss: %s" % (curr_a, curr_b, curr_c, curr_loss))
I changed Maxim's code a bit to see values of a,b,c like this:
_, loss_val, curr_a, curr_b, curr_c, model_val = sess.run([optimizer,
loss,a, b, c, linear model], {x: x_train, z: z_train, t: t_train,
y: y_train})
So my output is:
10 2.04454e-11 1.83333 0.666667 -0.166667
20 2.04454e-11 1.83333 0.666667 -0.166667
30 2.04454e-11 1.83333 0.666667 -0.166667
I expected a=2,b=2,c=-3
First up, there is no single solution, so the optimizer can converge to any one of local minima. The exact value greatly depends on initialization of your variables.
Short answer concerning your bug: be careful with the learning rate. Checkout my version of your code:
a = tf.Variable(2, dtype=tf.float32)
b = tf.Variable(1, dtype=tf.float32)
c = tf.Variable(0, dtype=tf.float32)
x = tf.placeholder(shape=[None, 1], dtype=tf.float32)
z = tf.placeholder(shape=[None, 1], dtype=tf.float32)
t = tf.placeholder(shape=[None, 1], dtype=tf.float32)
y = tf.placeholder(shape=[None, 1], dtype=tf.float32)
linear_model = a * x + b * z + c * t
loss = tf.reduce_mean(tf.square(linear_model - y)) # sum of the squares
optimizer = tf.train.GradientDescentOptimizer(0.0001).minimize(loss)
n = 50
x_train = np.arange(0, n, 1).reshape([-1, 1])
z_train = np.arange(0, 2*n, 2).reshape([-1, 1])
t_train = np.arange(0, n, 1).reshape([-1, 1])
y_train = np.array(map(lambda x, z, t: 2 * x + 2 * z - 3 * t, x_train, z_train, t_train)).reshape([-1, 1])
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(101):
_, loss_val = sess.run([optimizer, loss], {x: x_train, z: z_train, t: t_train, y: y_train})
if i % 10 == 0:
a_val, b_val, c_val = sess.run([a, b, c])
print('iteration %2i, loss=%f a=%.5f b=%.5f c=%.5f' % (i, loss_val, a_val, b_val, c_val))
If you run it, you'll notice that it converges very fast - in less than 10 iterations. However, if you increase the training size n from 50 to 75, the model is going to diverge. But decreasing the learning rate 0.00001 will make it converge again, though not so fast as before. The more data you push to the optimizer, the more important an appropriate learning rate becomes.
You've tried 5000 training size: I can't even imaging how small the learning rate should be to process that many points at once correctly.
While studying the tensorflow, I faced a problem.
The cost function output 'nan'.
And, if you find any other wrong in source code let me know the links for it.
I am trying to send the cost function value to my trained model, but its not working.
tf.reset_default_graph()
tf.set_random_seed(777)
X = tf.placeholder(tf.float32, [None, 20, 20, 3])
Y = tf.placeholder(tf.float32, [None, 1])
with tf.variable_scope('conv1') as scope:
W1 = tf.Variable(tf.random_normal([4, 4, 3, 32], stddev=0.01), name='weight1')
L1 = tf.nn.conv2d(X, W1, strides=[1, 1, 1, 1], padding='SAME')
L1 = tf.nn.relu(L1)
L1 = tf.nn.max_pool(L1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
L1 = tf.reshape(L1, [-1, 10 * 10 * 32])
W1_hist = tf.summary.histogram('conv_weight1', W1)
L1_hist = tf.summary.histogram('conv_layer1', L1)
with tf.name_scope('fully_connected_layer1') as scope:
W2 = tf.get_variable('W2', shape=[10 * 10 * 32, 1], initializer=tf.contrib.layers.xavier_initializer())
b = tf.Variable(tf.random_normal([1]))
hypothesis = tf.matmul(L1, W2) + b
W2_hist = tf.summary.histogram('fully_connected_weight1', W2)
b_hist = tf.summary.histogram('fully_connected_bias', b)
hypothesis_hist = tf.summary.histogram('hypothesis', hypothesis)
with tf.name_scope('cost') as scope:
cost = -tf.reduce_mean(Y * tf.log(hypothesis) + (1 - Y) * tf.log(1 - hypothesis))
cost_summary = tf.summary.scalar('cost', cost)
with tf.name_scope('train_optimizer') as scope:
optimizer = tf.train.AdamOptimizer(learning_rate=0.0001).minimize(cost)
predicted = tf.cast(hypothesis > 0.5, dtype=tf.float32)
accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, Y), dtype=tf.float32))
accuracy_summary = tf.summary.scalar('accuracy', accuracy)
train_data_batch, train_labels_batch = tf.train.batch([train_data, train_labels], enqueue_many=True , batch_size=100, allow_smaller_final_batch=True)
with tf.Session() as sess:
# tensorboard --logdir=./logs/planesnet2_log
merged_summary = tf.summary.merge_all()
writer = tf.summary.FileWriter('./logs/planesnet2_log')
writer.add_graph(sess.graph)
sess.run(tf.global_variables_initializer())
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
total_cost = 0
for step in range(20):
x_batch, y_batch = sess.run([train_data_batch, train_labels_batch])
feed_dict = {X: x_batch, Y: y_batch}
_, cost_val = sess.run([optimizer, cost], feed_dict = feed_dict)
total_cost += cost_val
print('total_cost: ', total_cost, 'cost_val: ', cost_val)
coord.request_stop()
coord.join(threads)
You use a cross entropy loss without a sigmoid activation function to hypothesis, thus your values are not bounded in ]0,1]. The log function is not defined for negative values and it most likely get somes. Add a sigmoid and epsilon factor to avoid negative or 0 values and you should be fine.
As I know,
Cross entropy cost function assumes that the hypothesis which you want to predict is stochastic value. Because cross entropy uses log function and (1-Y_) formula. Therefore, cross entropy loss should be used only for stochastic cases.
So you have to use the softmax function to make the results of the hypothesis probability.
W2 = tf.get_variable('W2', shape=[10 * 10 * 32, 1],
initializer=tf.contrib.layers.xavier_initializer())
b = tf.Variable(tf.random_normal([1]))
# hypothesis = tf.matmul(L1, W2) + b
hypothesis = tf.nn.softmax(tf.add(tf.matmul(L1, W2), b))
cost = -tf.reduce_mean(Y * tf.log(hypothesis) + (1 - Y) * tf.log(1 - hypothesis))
Or you can use this code
W2 = tf.get_variable('W2', shape=[10 * 10 * 32, 1],
initializer=tf.contrib.layers.xavier_initializer())
b = tf.Variable(tf.random_normal([1]))
hypothesis = tf.matmul(L1, W2) + b
cost = tf.nn.softmax_cross_entropy_with_logits(labels=Y, logits=hypothesis)