Learn the sum of two numbers in Tensorflow - python

I'm trying to train a neural network to predict the sum of two numbers. But I don't understand what's wrong with my model. Model consists of 2 inputs, 2 hidden and 1 output layers. Every 1000 iteration I print test execution, but the result is getting smaller and smaller.
import numpy as np
import tensorflow as tf
input_size = 2
hidden_size = 3
out_size = 1
def generate_test_data():
inp = 0.5*np.random.rand(10, 2)
oup = np.zeros((10, 1))
for idx, val in enumerate(inp):
oup[idx] = np.array([val[0] + val[1]])
return inp, oup
def create_network():
x = tf.placeholder(tf.float32, [None, input_size])
w01 = tf.Variable(tf.truncated_normal([input_size, hidden_size], stddev=0.1))
y1 = tf.sigmoid(tf.matmul(tf.sigmoid(x), w01))
w12 = tf.Variable(tf.truncated_normal([hidden_size, out_size], stddev=0.1))
y2 = tf.sigmoid(tf.matmul(y1, w12))
y_ = tf.placeholder(tf.float32, [None, out_size])
return x, y_, y2
def train(x, y_, y2):
cross_entropy = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y2)
)
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
sess = tf.InteractiveSession()
tf.global_variables_initializer().run()
# Train
for i in range(100000):
batch_xs, batch_ys = generate_test_data()
sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})
# Test
if i % 1000 == 0:
out_batch = sess.run(y2, {x: batch_xs})
inx = 0
print(batch_xs[inx][0], " + ", batch_xs[inx][1], " = ", out_batch[inx][0])
(x, y_, y2) = create_network()
train(x, y_, y2)
Output every 1000 iteration:
0.37301352864927173 + 0.28949461772342683 = 0.49111518
0.050899466843458474 + 0.006174158992116541 = 0.0025260744
0.3974852369427063 + 0.22402098418952499 = 0.00090828544
0.15735921047969498 + 0.39645077887600294 = 0.0005903727
0.23560825884336228 + 0.29010766384718145 = 0.0004317883
0.4250063393420791 + 0.24181166029062096 = 0.00031525563
= smaller and smaller

Cross-entropy loss is used for classification problems, while your task is clearly a regression. The computed cross_entropy value doesn't make sense, hence the result.
Change your loss to:
cross_entropy = tf.reduce_mean(
tf.nn.l2_loss(y_ - y2)
)
... and you'll see much more sensible results.

Maxim, thanks a lot. Now it's work.
import numpy as np
import tensorflow as tf
input_size = 2
hidden_size = 3
out_size = 1
def generate_test_data():
inp = 0.5*np.random.rand(10, 2)
oup = np.zeros((10, 1))
for idx, val in enumerate(inp):
oup[idx] = np.array([val[0] + val[1]])
return inp, oup
def create_network():
x = tf.placeholder(tf.float32, [None, input_size])
w01 = tf.Variable(tf.truncated_normal([input_size, hidden_size], stddev=0.1))
y1 = tf.matmul(x, w01)
w12 = tf.Variable(tf.truncated_normal([hidden_size, out_size], stddev=0.1))
y2 = tf.matmul(y1, w12)
y_ = tf.placeholder(tf.float32, [None, out_size])
return x, y_, y2
def train(x, y_, y2):
cross_entropy = tf.reduce_mean(
tf.nn.l2_loss(y_ - y2)
)
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
sess = tf.InteractiveSession()
tf.global_variables_initializer().run()
# Train
for i in range(100000):
batch_xs, batch_ys = generate_test_data()
sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})
# Test
if i % 2000 == 0:
out_batch = sess.run(y2, {x: batch_xs})
inx = 0
print(batch_xs[inx][0], " + ", batch_xs[inx][1], " = ", out_batch[inx][0], "|", batch_xs[inx][0] + batch_xs[inx][1])
(x, y_, y2) = create_network()
train(x, y_, y2)

If you consider predicting each digit to be a classification problem where you predict a value in "0123456789 ", you can use cross-entropy as your loss. For reference, see the Keras - Addition RNN Example.
But like Maxim said, it shouldn't be used for a regression problem.

Related

Augment size of a tf.variable

I am training an autoencoder by giving 2 placeholders that store the following:
x1 = [x1]
X = [x1,x2,x3...xn]
It holds that:
y1 = W*x1 + b_encoding1
Therefore, I have a variable named b_encoder1 (the b)
(When I print it I get: <tf.Variable 'b_encoder1:0' shape=(10,) dtype=float32_ref>)
But it also holds that:
Y = W*X + b_encoding1
The size of the second b_encoding1 has to be (10,n) intead of (10,). How can I augment it and pass it in tensorflow?
Y = tf.compat.v1.nn.xw_plus_b(X, W1, b_encoder1, name='Y')
The whole code looks like this:
x1 = tf.compat.v1.placeholder( tf.float32, [None,input_shape], name = 'x1')
X = tf.compat.v1.placeholder( tf.float32, [None,input_shape,sp], name = 'X')
W1 = tf.Variable(tf.initializers.GlorotUniform()(shape=[input_shape,code_length]),name='W1')
b_encoder1 = tf.compat.v1.get_variable(name='b_encoder1',shape=[code_length],initializer=tf.compat.v1.initializers.zeros(), use_resource=False)
K = tf.Variable(tf.initializers.GlorotUniform()(shape=[code_length,code_length]),name='K')
b_decoder1 = tf.compat.v1.get_variable(name='b_decoder1',shape=[input_shape],initializer=tf.compat.v1.initializers.zeros(), use_resource=False)
y1 = tf.compat.v1.nn.xw_plus_b(x1, W1, b_encoder1, name='y1')
Y = tf.compat.v1.nn.xw_plus_b(X, W1, b_encoder1, name='Y')
I also declare the loss function and so on and then train with:
with tf.compat.v1.Session() as sess:
sess.run(tf.compat.v1.global_variables_initializer())
for epoch_i in range(epochs):
for batch_i in range(number_of_batches):
batch_data = getBatch(shuffled_data, batch_i, batch_size)
sess.run(optimizer, feed_dict={x1: batch_data[:,:,0], X: batch_data})
train_loss = sess.run(loss, feed_dict={x1: aug_data[:,:,0], X: aug_data})
print(epoch_i, train_loss)
You can consider X as a batch of x. X can take in an arbitrary number of samples:
import tensorflow as tf
import numpy as np
X = tf.placeholder(shape=(None, 100), dtype=tf.float32)
W = tf.get_variable('kernel', [100,10])
b = tf.get_variable('bias',[10])
Y = tf.nn.xw_plus_b(X, W,b, name='Y')
with tf.Session() as sess:
sess.run(tf.global_variables_initializer()) # tf version < 1.13
out = sess.run(Y, {X: np.random.rand(128, 100)}) # here n=128
Note that dimension of bias b is still 10-D regardless value of n.
Please try:
b_encoding1 = tf.expand_dims(b_encoding1, axis = 1)

Tensorflow: batch not shaping correctly for labels

I am using an LSTMBlockFusedCell for a network I'm working on.
When fed a single input of size [6, 3169] and an output of [-1, 3169] (casts to input size), it operates properly and predicts. The problem comes when I try and batch those same inputs. With a batch of 100, the input reshapes fine, but the output broadcasts into [600, 3169]. I have tried setting the placeholder specs exactly to the specified input length, but the same error happened. I'm pretty confident that my data is in the correct shape. I run the batch generator and print the output sizes afterward.
Here's my network:
def rnn(x, weight, bias, n_input, vocab):
x = tf.reshape(x, [-1, n_input, vocab])
rnn_cell = tf.contrib.rnn.LSTMBlockFusedCell(n_hidden)
outputs, states = rnn_cell(x, dtype=tf.float32)
return tf.matmul(outputs[-1], weight['output']) + bias['output']
my batch generator:
def new_data(dat, dic, n_steps, batch_size=100):
x = np.zeros(shape=(batch_size, n_steps, len(dic)))
y = np.zeros(shape=(batch_size, n_steps, len(dic)))
j = 0
x_dat = np.zeros(shape=(n_steps, len(dic)))
for sen in dat:
if len(sen) - 1 > n_steps:
for i, word in enumerate(sen[0:n_steps]):
x_dat[i] = one_hot(word, dic)
y_dat = one_hot(sen[n_steps], dic)
x[j % batch_size] = x_dat
y[j % batch_size] = y_dat
if j % batch_size == 0:
yield x,y
x = np.zeros(shape=(batch_size, n_steps, len(dic)))
y = np.zeros(shape=(batch_size, n_steps, len(dic)))
j += 1
and my setup:
X = tf.placeholder(tf.float32)
Y = tf.placeholder(tf.float32)
weights = {
"output" : tf.Variable(tf.random_normal([n_hidden, vocab_size]),name="weight_output")
}
bias = {
"output" : tf.Variable(tf.random_normal([vocab_size]), name="bias_output")
}
pred = rnn(X, weights, bias, n_input, vocab)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=pred, labels=Y))
with tf.Session() as sess:
sess.run(init)
step = 0
for epoch in range(n_epochs):
for x,y in new_data(dat, dic, n_steps):
_ , c = sess.run([optimizer, cost], feed_dict={X: x ,Y: y})

How to print the predicted 'y' or 'output' matrix in this TensorFlow code?

I have a training model in TensorFlow (see below code).
My cumulative 'Test Accuracy' is showing: 0.92357 after training my model and I want to check it to be sure I didn't mess something up.
How do I print a predicted output matrix or 'y' after I train the model given the code I have below?
# x will be the input matrix flattened (28x29)
x = tf.placeholder(tf.float32, [None, 812])
# Define the weights (initial value doesn't matter since these will be learned)
W = tf.Variable(tf.random_uniform([812, 812], minval=0, dtype=tf.float32))
b = tf.Variable(tf.random_uniform([812], minval=0, dtype=tf.float32))
# Predict output matrix
y = tf.nn.softmax(tf.matmul(x, W) + b)
# Actual output matrix from the training set
y_ = tf.placeholder(tf.float32, [None, 812])
# Calculate loss and optimize
cross_entropy = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=y_, logits=y))
train_step = tf.train.AdamOptimizer(0.025).minimize(cross_entropy)
sess = tf.InteractiveSession()
tf.global_variables_initializer().run()
a, b = get_batch()
train_len = len(a)
correct_prediction = tf.equal(y_, y)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# Training
for i in range(train_len):
batch_xs = a[i]
batch_ys = b[i]
_, loss, acc = sess.run([train_step, cross_entropy, accuracy], feed_dict={x: batch_xs, y_: batch_ys})
print("Loss= " + "{:.6f}".format(loss) + " Accuracy= " + "{:.5f}".format(acc))
# Test trained model
cumulative_accuracy = 0.0
for i in range(train_len):
acc_batch_xs = a[i]
acc_batch_ys = b[i]
cumulative_accuracy += accuracy.eval(feed_dict={x: acc_batch_xs, y_: acc_batch_ys})
print("Test Accuracy= {}".format(cumulative_accuracy / train_len))
The value of any tensor object can be obtained by using
tensorFlowObject.eval()
Hence, you can use y.eval() to get the value of y

Function approximation with Neural Network - Loss 0

I'm trying to create a NN to approximate functions (sine, cos, custom...) but I'm struggling with the format, I don't want to use input-label, but rather, input-output. How do I change it?
I'm following this tutorial
import tensorflow as tf
import random
from math import sin
import numpy as np
n_nodes_hl1 = 500
n_nodes_hl2 = 500
n_nodes_hl3 = 500
n_inputs = 1 # CHANGES HERE
n_outputs = 1 #CHANGES HERE
batch_size = 100
x = tf.placeholder('float', [None, n_inputs]) #CHANGES HERE
y = tf.placeholder('float', [None, n_outputs]) #CHANGES HERE
def neural_network_model(data):
hidden_layer_1 = {'weights':tf.Variable(tf.random_normal([n_inputs, n_nodes_hl1])),
'biases': tf.Variable(tf.random_normal([n_nodes_hl1]))} #CHANGES HERE
hidden_layer_2 = {'weights':tf.Variable(tf.random_normal([n_nodes_hl1, n_nodes_hl2])),
'biases': tf.Variable(tf.random_normal([n_nodes_hl2]))}
hidden_layer_3 = {'weights':tf.Variable(tf.random_normal([n_nodes_hl2, n_nodes_hl3])),
'biases': tf.Variable(tf.random_normal([n_nodes_hl2]))}
output_layer = {'weights':tf.Variable(tf.random_normal([n_nodes_hl3, n_outputs])),
'biases': tf.Variable(tf.random_normal([n_outputs]))} #CHANGES HERE
l1 = tf.add(tf.matmul(data, hidden_layer_1['weights']), hidden_layer_1['biases'])
l1 = tf.nn.relu(l1)
l2 = tf.add(tf.matmul(l1, hidden_layer_2['weights']), hidden_layer_2['biases'])
l2 = tf.nn.relu(l2)
l3 = tf.add(tf.matmul(l2, hidden_layer_3['weights']), hidden_layer_3['biases'])
l3 = tf.nn.relu(l3)
output = tf.add(tf.matmul(l3, output_layer['weights']), output_layer['biases'])
return output
def train_neural_network(x):
prediction = neural_network_model(x)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=y))
optmizer = tf.train.AdamOptimizer().minimize(cost)
epochs = 10
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for epoch in range(epochs):
loss = 0
for _ in range(batch_size^2): #CHANGES HERE
batch_x, batch_y = generate_input_output(batch_size) #CHANGES HERE
a, c = sess.run([optmizer, cost], feed_dict={x: batch_x, y:batch_y})
loss += c
print("Epoch:", epoch+1, "out of", epochs, "- Loss:", loss)
correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
test_x, test_y = generate_input_output(batch_size) #CHANGES HERE
print('Accuracy', accuracy.eval({x:test_x, y:test_y}))
def desired_function(x): #CHANGES HERE
return sin(x)
def generate_input_output(batch_size): #CHANGES HERE
batch_x = [random.uniform(-10, 10) for _ in range(batch_size)]
batch_y = [desired_function(x) for x in batch_x]
batch_x = np.reshape(batch_x, (-1, 1))
batch_y = np.reshape(batch_y, (-1, 1))
return batch_x, batch_y
train_neural_network(x)
Your solution seems very verbose to me. I'll post a much simplified solution, could you point out what part of it you don't understand?
import tensorflow as tf
import numpy as np
n_nodes_hl1 = 500
n_nodes_hl2 = 500
n_nodes_hl3 = 500
batch_size = 100
x = tf.placeholder('float', [None, 1])
y = tf.placeholder('float', [None, 1])
x1 = tf.contrib.layers.fully_connected(x, n_nodes_hl1)
x2 = tf.contrib.layers.fully_connected(x1, n_nodes_hl2)
x3 = tf.contrib.layers.fully_connected(x2, n_nodes_hl3)
result = tf.contrib.layers.fully_connected(x3, 1,
activation_fn=None)
loss = tf.nn.l2_loss(result - y)
train_op = tf.train.AdamOptimizer().minimize(loss)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(10000):
xpts = np.random.rand(batch_size) * 10
ypts = np.sin(xpts)
_, loss_result = sess.run([train_op, loss],
feed_dict={x: xpts[:, None],
y: ypts[:, None]})
print('iteration {}, loss={}'.format(i, loss_result))
It seems to me that your solution was intended for classification and you have not fully rewrote it for regression, since there are things like softmax_cross_entropy_with_logits left in there, which you certainly don't want in a regression network.
Have not tried it myself, so there might be more things you need to change to get the model to run, but you will definitely want to change this line:
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=y))
to something more like:
cost = tf.reduce_sum(tf.square(prediction - y))
Basically, your cost function is much simpler in this case...you just want to reduce the sum of the squared difference between the network's output and the expected y value.

tensorflow nn mnist example with modified layer dimensions

I modified this mnist example so that it has two outputs and a middle layer of 10 nodes. It doesn't work, giving me a .50 score all the time. I think it just picks one of the outputs and responds with that no matter what the input is. How could I fix this so that it actually does some learning? The outputs are supposed to represent 0 for 'skin tone' and 1 for 'no skin tone'. I use png input.
def nn_setup(self):
input_num = 784 * 3 # like mnist but with three channels
mid_num = 10
output_num = 2
x = tf.placeholder(tf.float32, [None, input_num])
W_1 = tf.Variable(tf.random_normal([input_num, mid_num], stddev=0.04))
b_1 = tf.Variable(tf.random_normal([mid_num], stddev=0.5))
y_mid = tf.nn.relu(tf.matmul(x,W_1) + b_1)
W_2 = tf.Variable(tf.random_normal([mid_num, output_num],stddev=0.4))
b_2 = tf.Variable(tf.random_normal([output_num],stddev=0.5))
y_logits = tf.matmul(y_mid, W_2) + b_2
y = tf.nn.softmax(y_logits)
y_ = tf.placeholder(tf.float32, [None, output_num])
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y_logits, y_))
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
init = tf.initialize_all_variables()
self.sess = tf.Session()
self.sess.run(init)
for i in range(1000):
batch_xs, batch_ys = self.get_nn_next_train()
self.sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
self.nn_test.images, self.nn_test.labels = self.get_nn_next_test()
print(self.sess.run(accuracy, feed_dict={x: self.nn_test.images, y_: self.nn_test.labels}))

Categories