predict sine with tensorflow is different than with keras - python

Here is a code written with Keras, to regress a sine funtion. It works perfect.
import numpy as np
from keras.layers import Dense, Activation
from keras.models import Sequential
import matplotlib.pyplot as plt
import math
import time
x = np.arange(0, math.pi*2*2, 0.1)
y = np.sin(x)
model = Sequential([Dense(10, input_shape=(1,)), Activation('tanh'), Dense(3),Activation('tanh'),Dense(1)])
model.compile(loss='mean_squared_error', optimizer='SGD', metrics=['mean_squared_error'])
t1 = time.clock()
for i in range(40):
model.fit(x, y, epochs=1000, batch_size=len(x), verbose=0)
predictions = model.predict(x)
print i," ", np.mean(np.square(predictions - y))," t: ", time.clock()-t1
plt.hold(False)
plt.plot(x, y, 'b', x, predictions, 'r--')
plt.hold(True)
plt.ylabel('Y / Predicted Value')
plt.xlabel('X Value')
plt.title([str(i)," Loss: ",np.mean(np.square(predictions - y))," t: ", str(time.clock()-t1)])
plt.pause(0.001)
plt.savefig("fig2.png")
plt.show()
I try to write the same code with a lower API to understand how neural networks work.
Here is code I wrote to regress a sine function with Tensorflow:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import math
# Model input and output
x = tf.placeholder(tf.float32, [None, 1])
y = tf.placeholder(tf.float32, [None, 1])
# training data
x_plot = np.arange(0, math.pi*2*2, 0.1)
x_train = x_plot.reshape(-1, 1)
y_train_tf = tf.sin(x)
# Model parameters
W1 = tf.Variable(tf.ones([1,10])*.3, dtype=tf.float32)
b1 = tf.Variable(tf.ones([10])*(-.3), dtype=tf.float32)
W2 = tf.Variable(tf.ones([10,3])*.3, dtype=tf.float32)
b2 = tf.Variable(tf.ones([3])*(-.3), dtype=tf.float32)
W3 = tf.Variable(tf.ones([3,1])*.3, dtype=tf.float32)
b3 = tf.Variable(tf.ones([1])*(-.3), dtype=tf.float32)
layer1 = tf.tanh(tf.multiply(x,W1) + b1)
layer2 = tf.tanh(tf.matmul(layer1, W2) + b2)
linear_model = tf.reduce_sum(tf.matmul(layer2, W3), 1, keep_dims=True) + b3
# loss
loss = tf.reduce_sum(tf.square(linear_model - y)) # sum of the squares
# optimizer
optimizer = tf.train.GradientDescentOptimizer(0.01)
train = optimizer.minimize(loss)
# training loop
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init) # reset values to wrong
fig, ax = plt.subplots()
for i in range(40000):
y_train = sess.run(y_train_tf, {x: x_train}) # das kann weg, dafuer ist dann in der naechsten zeile nur xtrain input, kein ytrain
f_predict, _ = sess.run([linear_model, train], feed_dict={x: x_train, y: y_train})
curr_layer1, curr_layer2, curr_W1, curr_b1, curr_W2, curr_b2, curr_W3, curr_b3, curr_loss = sess.run([layer1, layer2, W1, b1, W2, b2, W3, b3, loss],
{x: x_train, y: y_train})
if i % 1000 == 999:
print "step ", i
print("W1: %s b1: %s" % (curr_W1, curr_b1))
print("W2: %s b2: %s" % (curr_W2, curr_b2))
print("W3: %s b3: %s" % (curr_W3, curr_b3))
print("layer1: %s layer2: %s" % (curr_layer1, curr_layer2))
print("linear_model: %s loss: %s" % (f_predict, curr_loss))
print " "
y_plot = y_train.reshape(1, -1)[0]
pred_plot = f_predict.reshape(1, -1)[0]
plt.hold(False)
ax.plot(x_plot, y_train[:])
plt.hold(True)
ax.plot(x_plot, f_predict, 'o-')
ax.set(xlabel='X Value', ylabel='Y / Predicted Value',
title=[str(i)," Loss: ",curr_loss])
plt.pause(0.001)
fig.savefig("fig1.png")
plt.show()
But at its not working.
I cant understand where the difference is. The learning rate in the Keras code is 0.01 by default.
The optimizer is the same. The network is the same. I don't know where my mistake is.

Here is the Answer! I forgot to find the right weights to start! tf.random_normal([1,10], stddev=0.03)
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import math
# Model input and output
x = tf.placeholder(tf.float32, [None, 1])
# training data
x_plot = np.arange(0, math.pi*2*2, 0.1)
x_train = x_plot.reshape(-1, 1)
y_train_tf = tf.sin(x)
# Model parameters
W1 = tf.Variable(tf.random_normal([1,10], stddev=0.03), dtype=tf.float32, name='W1')
b1 = tf.Variable(tf.random_normal([10], stddev=0.03), dtype=tf.float32, name='b1')
W2 = tf.Variable(tf.random_normal([10,3], stddev=0.03), dtype=tf.float32, name='W2')
b2 = tf.Variable(tf.random_normal([3], stddev=0.03), dtype=tf.float32, name='b2')
W3 = tf.Variable(tf.random_normal([3,1], stddev=0.03), dtype=tf.float32, name='W3')
b3 = tf.Variable(tf.random_normal([1], stddev=0.03), dtype=tf.float32, name='b3')
layer1 = tf.tanh(tf.multiply(x,W1) + b1)
layer2 = tf.tanh(tf.matmul(layer1, W2) + b2)
linear_model = tf.reduce_sum(tf.matmul(layer2, W3) + b3, 1, keep_dims=True)
# loss
#loss = tf.reduce_sum(tf.square(linear_model - y_train_tf)) # sum of the squares
loss = tf.losses.mean_squared_error(y_train_tf,linear_model)
tf.summary.scalar('loss', loss)
# optimizer
optimizer = tf.train.GradientDescentOptimizer(0.01)
train = optimizer.minimize(loss)
# training loop
init = tf.global_variables_initializer()
sess = tf.Session()
# Merge all the summaries
merged = tf.summary.merge_all()
train_writer = tf.summary.FileWriter('train_tensorboard',sess.graph)
sess.run(init) # reset values to wrong
fig, ax = plt.subplots()
for i in range(40000):
summary, f_predict, _ = sess.run([merged, linear_model, train], feed_dict={x: x_train})
y_train, curr_layer1, curr_layer2, curr_W1, curr_b1, curr_W2, curr_b2, curr_W3, curr_b3, curr_loss = sess.run([y_train_tf,layer1, layer2, W1, b1, W2, b2, W3, b3, loss],
{x: x_train})
train_writer.add_summary(summary, i)
if i % 1000 == 999:
print "step ", i
print("W1: %s b1: %s" % (curr_W1, curr_b1))
print("W2: %s b2: %s" % (curr_W2, curr_b2))
print("W3: %s b3: %s" % (curr_W3, curr_b3))
print("layer1: %s layer2: %s" % (curr_layer1, curr_layer2))
print("linear_model: %s loss: %s" % (f_predict, curr_loss))
print " "
y_plot = y_train.reshape(1, -1)[0]
pred_plot = f_predict.reshape(1, -1)[0]
plt.hold(False)
ax.plot(x_plot, y_train[:])
plt.hold(True)
ax.plot(x_plot, f_predict, 'g--')
ax.set(xlabel='X Value', ylabel='Y / Predicted Value', title=[str(i)," Loss: ", curr_loss])
plt.pause(0.001)
fig.savefig("fig1.png")
plt.show()

Related

Weird decision boundary using neural net in Tensorflow

I have generated a balanced dataset of 4000 examples, 2000 for the negative class and 2000 for the positive one. Then, I've build a neural net with one single hidden layer and 3 neurons with a ReLU activation function and an output layer with a sigmoid. The cost function is a standard cross-entropy function and I chose Adam as optimizer. Using minibatches of 15 examples, after 1000 epochs of running the final accuracy 96.37%, so I am assuming that the model is doing well on the test set. But when I want to display the decision boundary, that's what I get:
I cannot figure out if the problem is a code error or the model just needs mode training. Script I'm using for this:
# implement a neural network that finds a decision boundary under a
constraint on the second hidden layer with tensorflow
import numpy as np
from sklearn.utils import shuffle
from sklearn.preprocessing import normalize
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tf_utils import random_mini_batches
import matplotlib.pyplot as plt
def generate_dataset():
np.random.seed(2)
# positive class samples
d1_x = np.random.normal(5, 10, 1000)
d1_y = np.random.normal(5, 2, 1000)
d2_x = np.random.normal(40, 20, 1000)
d2_y = np.random.normal(2, 1, 1000)
# negative class samples
d3_x = np.random.normal(60, 5, 2000)
d3_y = np.random.normal(10, 1, 2000)
plt.scatter(d1_x, d1_y, color='b')
plt.scatter(d2_x, d2_y, color='b')
plt.scatter(d3_x, d3_y, color='r')
Y = np.zeros((4000, 1))
d_x = np.concatenate([d1_x, d2_x, d3_x])
d_y = np.concatenate([d1_y, d2_y, d3_y])
d_x = d_x.reshape(d_x.shape[0], 1)
d_y = d_y.reshape(d_y.shape[0], 1)
X = np.concatenate([d_x, d_y], axis=1)
Y[2000:] = 1
return X, Y
# define a tensorflow model 5-3-1 with two hideen layers and the output
being scalar
costs = []
print_cost = True
learning_rate = .0009
minibatch_size = 15
num_epochs = 1000
XX, YY = generate_dataset()
XX, YY = shuffle(XX, YY)
X_norm = normalize(XX)
X_train, X_test, y_train, y_test = train_test_split(X_norm, YY,
test_size=0.2, random_state=42)
X_train = np.transpose(X_train)
y_train = np.transpose(y_train)
X_test = np.transpose(X_test)
y_test = np.transpose(y_test)
# define train and test sets
m = XX.shape[1] # input dimension
n = YY.shape[1] # output dimension
X = tf.placeholder(tf.float32, shape = [m, None], name = 'X')
y = tf.placeholder(tf.float32, shape = [n, None], name = 'y')
# model parameters
n1 = 3 # output dimension of the first hidden layer
#n2 = 4 # output dimension of the second hidden layer
#n3 = 2
W1 = tf.get_variable("W1", [n1, m],
initializer=tf.contrib.layers.xavier_initializer(seed=1))
b1 = tf.get_variable("b1", [n1 ,1], initializer=tf.zeros_initializer)
#W2 = tf.get_variable("W2", [n2, n1],
initializer=tf.contrib.layers.xavier_initializer(seed=1))
#b2 = tf.get_variable("b2", [n2, 1], initializer=tf.zeros_initializer)
#W3 = tf.get_variable("W3", [n3, n2],
initializer=tf.contrib.layers.xavier_initializer(seed=1))
#b3 = tf.get_variable("b3", [n3, 1], initializer=tf.zeros_initializer)
W4 = tf.get_variable("W4", [n, n1],
initializer=tf.contrib.layers.xavier_initializer(seed=1))
b4 = tf.get_variable("b4", [n, 1], initializer=tf.zeros_initializer)
# forward propagation
z1 = tf.add(tf.matmul(W1, X), b1)
a1 = tf.nn.relu(z1)
#z2 = tf.add(tf.matmul(W2, a1), b2)
#a2 = tf.nn.relu(z2)
#z3 = tf.add(tf.matmul(W3, a2), b3)
#a3 = tf.nn.relu(z3)
z4 = tf.add(tf.matmul(W4, a1), b4)
pred = tf.nn.sigmoid(z4)
# cost function
cost = tf.reduce_mean(tf.losses.log_loss(labels=y, predictions=pred)) #
logit is the probability estimate given by the model --> this is what is used inside the formula, not the net input z
# ADAM optimizer
optimizer =
tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# metrics
correct_prediction = tf.less_equal(tf.abs(pred - y), 0.5)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
init = tf.global_variables_initializer()
with tf.Session() as sess:
seed = 1
sess.run(init)
for epoch in range(num_epochs):
epoch_cost = 0
seed += 1
num_minibatches = int(X_train.shape[0] / minibatch_size)
minibatches = random_mini_batches(X_train, y_train, minibatch_size, seed)
for minibatch in minibatches:
(minibatch_X, minibatch_Y) = minibatch
_, minibatch_cost = sess.run([optimizer, cost], feed_dict={X:minibatch_X, y:minibatch_Y})
epoch_cost += minibatch_cost / minibatch_size
# Print the cost every epoch
if print_cost == True and epoch % 100 == 0:
print("Cost after epoch %i: %f" % (epoch, epoch_cost))
if print_cost == True and epoch % minibatch_size == 0:
costs.append(epoch_cost)
#plt.plot(costs)
#plt.show()
cp, val_accuracy = sess.run([correct_prediction, accuracy], feed_dict={X: X_test, y: y_test})
# plot the cost
# plt.plot(np.squeeze(costs))
# plt.ylabel('cost'), feed_dict={X: X_test, y: y_test})
# plt.xlabel('iterations (per fives)')
# plt.title("Learning rate =" + str(learning_rate))
# plt.show()
cmap = plt.get_cmap('Paired')
# Define region of interest by data limits
xmin, xmax = min(XX[:, 0]) - 1, max(XX[:, 0]) + 1
ymin, ymax = min(XX[:, 1]) - 1, max(XX[:, 1]) + 1
steps = 100
x_span = np.linspace(xmin, xmax, steps)
y_span = np.linspace(ymin, ymax, steps)
xx, yy = np.meshgrid(x_span, y_span)
A = np.concatenate([[xx.ravel()], [yy.ravel()]], axis=0)
A = normalize(A, axis=0)
# Make predictions across region of interest
predictions = sess.run(pred, feed_dict={X: A})
# Plot decision boundary in region of interest
z = predictions.reshape(xx.shape)
plt.contourf(xx, yy, z, cmap=cmap, alpha=.5)
plt.show()
# Get predicted labels on training data and plot
#train_labels = model.predict(X)
#ax.scatter(X[:, 0], X[:, 1], c=y, cmap=cmap, lw=0)

Tensorflow outputs accuracy 1 but all the results are the same (and wrong)

I'm trying to build a Tensorflow Neural Network, but can't get it to work correctly. It always outputs the same values for every observation. Tried to change activations functions, changing the learning rate, reshaping tensors and arrays, I'm missing something here.
The dataset is entirely numerical.
import tensorflow as tf
import numpy as np
import pandas as pd
from sklearn.preprocessing import Normalizer
from sklearn.model_selection import train_test_split
df = pd.read_csv("data.csv")
X = np.array(df[["area","bathrooms", "sq_price"]])
y = df[["price"]]
y = np.array(y).reshape([47, 1])
normalizer = Normalizer()
X = normalizer.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
X_test = np.array(X_test, dtype='float32')
y_test = np.array(y_test, dtype="float32")
X_test.shape
n_inputs = len(X_train[0])
n_hidden = 5
n_outputs = 1
lr = 0.001
epochs = 5000
batch_size = 1
xph = tf.placeholder(tf.float32, [None, n_inputs])
yph = tf.placeholder(tf.float32, [None, n_outputs])
W1 = tf.Variable(tf.truncated_normal([n_inputs, n_hidden], stddev=0.1))
W2 = tf.Variable(tf.truncated_normal([n_hidden, n_outputs], stddev=0.1))
b1 = tf.Variable(tf.ones([n_hidden]))
b2 = tf.Variable(tf.ones([n_outputs]))
def feed_forward(X, W1, W2, b1, b2):
l1 = tf.tanh(tf.add(tf.matmul(X, W1), b1))
l2 = tf.add(tf.matmul(l1, W2),b2)
return l2
output = feed_forward(xph, W1, W2, b1, b2)
error = tf.reduce_mean((output - yph)**2)
optimizer = tf.train.GradientDescentOptimizer(lr).minimize(error)
init = tf.global_variables_initializer()
correct_prediction = tf.equal(tf.argmax(output,1), tf.argmax(yph,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
with tf.Session() as sess:
sess.run(init)
print("Weights 1 before training:\n ", sess.run([W1]), "\n")
for i in range(epochs):
rand_ind = np.random.randint(len(X_train), size = batch_size)
loss = sess.run([optimizer], feed_dict={xph: X_train[rand_ind], yph: y_train[rand_ind]})
if i == (epochs-1):
print("Done!\n")
print("Weights 1 after training: \n", sess.run([W1]), "\n")
print("Accuracy: ", sess.run(accuracy, feed_dict={xph:X_test, yph:y_test}), "\n")
print("Results for testing:\n ", sess.run(feed_forward(X_test, W1, W2, b1, b2), feed_dict={xph: X_test}), "\n")
print("Expected values:\n ", y_test)
The output of the code:
Weights 1 before training:
[array([[ 0.02620826, -0.11837681, 0.01349821, 0.04195584, 0.14087772],
[-0.10512593, 0.1383599 , -0.0632275 , 0.07759375, -0.09907298],
[-0.14932911, 0.13720822, 0.15072195, -0.09748196, 0.08388615]],
dtype=float32)]
Done!
Weights 1 after training:
[array([[ 1.0387952e+01, 1.7232073e+01, 2.9152514e+01, -1.8471668e+01,
2.2215986e+01],
[-8.4001064e-02, 1.7373289e-01, -3.8207369e-03, 3.9849356e-02,
-5.4067656e-02],
[ 5.5025685e-01, 1.3086454e+00, 2.1180787e+00, -1.3474523e+00,
1.5743145e+00]], dtype=float32)]
Accuracy: 1.0
Results for testing:
[[340043.88]
[340043.88]
[340043.88]
[340043.88]
[340043.88]
[340043.88]
[340043.88]
[340043.88]
[340043.88]
[340043.88]]
Expected values:
[[349900.]
[369000.]
[573900.]
[252900.]
[299900.]
[329900.]
[449900.]
[285900.]
[212000.]
[229900.]]

Single Layer Perceptron Error while optimizing and minimising cost using Tensorflow

I thought to execute single layer perceptron concept using tensorflow. The input, excepted output, weights and bias all are defined in proper way. But while running the program the error is throwing during Optimization and Minimizing the Cost.
Here is the code:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
learning_rate = 0.001
training_epochs = 200
display_step = 50
# X must be 1X5
# Y must be 1X3
# w must be 5X3
# B must be 1X3
train_X = np.random.rand(1,5)
train_Y = np.random.rand(1,3)
#
X = tf.placeholder(tf.float32, [1, 5])
Y = tf.placeholder(tf.float32, [1, 3])
W = tf.Variable([[0.0,0.0,0.0],[0.0,0.0,0.0],[0.0,0.0,0.0],[0.0,0.0,0.0],[0.0,0.0,0.0]])
b = tf.Variable([0.0 ,0.0,0.0])
pred = tf.add(tf.matmul(X, W), b)
cost = tf.reduce_sum(tf.pow(pred-Y,2))
optimizer =tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(cost)
init = tf.global_variables_initializer()
avg_set = []
epoch_set = []
with tf.Session() as sess:
for i in range(training_epochs):
sess.run(optimizer, feed_dict={X: train_X, Y: train_Y})
#Here the error is throwing from above line
if (i+1) % display_step == 0:
cc = sess.run(cost, feed_dict={X: train_X, Y: train_Y})
#Again here also the error is throwing from above line
print("Epoch:", '%04d' % (i + 1), "cost=", "{:.9f}".format(cc), \
"W=", sess.run(W), "b=", sess.run(b))
avg_set.append(cc)
epoch_set.append(i + 1)
print("Optimization Finished!")
training_cost = sess.run(cost, feed_dict={X: train_X, Y: train_Y})
print("Training cost=", training_cost, "W=", sess.run(W), "b=", sess.run(b), '\n')
plt.plot(epoch_set,avg_set,'o',label = 'SLP Training phase')
plt.ylabel('cost')
plt.xlabel('epochs')
plt.legend()
plt.show()
Please help me for getting out of this error. Thanks
I found the error is from with tf.Session() as sess: Before for()y loop and removing that, making separate declaration of the session. It worked
Here is the code which will work.
main.py
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
learning_rate = 0.001
training_epochs = 1000
display_step = 50
# taking input as array from numpy package and converting it into tensor
# X must be 1X5
# Y must be 1X3
# w must be 5X1
# B must be 1X3
train_X = np.random.rand(1,5)
train_Y = np.random.rand(1,3)
#
X = tf.placeholder(tf.float32, [1, 5])
Y = tf.placeholder(tf.float32, [1, 3])
W = tf.Variable([[0.0,0.0,0.0],[0.0,0.0,0.0],[0.0,0.0,0.0],[0.0,0.0,0.0],[0.0,0.0,0.0]])
b = tf.Variable([0.0 ,0.0,0.0])
pred = tf.add(tf.matmul(X, W), b)
cost = tf.reduce_sum(tf.pow(pred-Y,2))
optimizer =tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(cost)
init = tf.global_variables_initializer()
avg_set = []
epoch_set = []
sess = tf.Session()
sess.run(init)
for i in range(training_epochs):
sess.run(optimizer, feed_dict={X: train_X, Y: train_Y})
if (i+1) % display_step == 0:
cc = sess.run(cost, feed_dict={X: train_X, Y: train_Y})
#check what it thinks when you give it the input data
print("Epoch:", '%04d' % (i + 1), "cost=", "{:.9f}".format(cc), \
"W=", sess.run(W), "b=", sess.run(b))
avg_set.append(cc)
epoch_set.append(i + 1)
print("Optimization Finished!")
training_cost = sess.run(cost, feed_dict={X: train_X, Y: train_Y})
print("Training cost=", training_cost, "W=", sess.run(W), "b=", sess.run(b), '\n')
plt.plot(epoch_set,avg_set,'o',label = 'SLP Training phase')
plt.ylabel('cost')
plt.xlabel('epochs')
plt.legend()
plt.show()

Learn the sum of two numbers in Tensorflow

I'm trying to train a neural network to predict the sum of two numbers. But I don't understand what's wrong with my model. Model consists of 2 inputs, 2 hidden and 1 output layers. Every 1000 iteration I print test execution, but the result is getting smaller and smaller.
import numpy as np
import tensorflow as tf
input_size = 2
hidden_size = 3
out_size = 1
def generate_test_data():
inp = 0.5*np.random.rand(10, 2)
oup = np.zeros((10, 1))
for idx, val in enumerate(inp):
oup[idx] = np.array([val[0] + val[1]])
return inp, oup
def create_network():
x = tf.placeholder(tf.float32, [None, input_size])
w01 = tf.Variable(tf.truncated_normal([input_size, hidden_size], stddev=0.1))
y1 = tf.sigmoid(tf.matmul(tf.sigmoid(x), w01))
w12 = tf.Variable(tf.truncated_normal([hidden_size, out_size], stddev=0.1))
y2 = tf.sigmoid(tf.matmul(y1, w12))
y_ = tf.placeholder(tf.float32, [None, out_size])
return x, y_, y2
def train(x, y_, y2):
cross_entropy = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y2)
)
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
sess = tf.InteractiveSession()
tf.global_variables_initializer().run()
# Train
for i in range(100000):
batch_xs, batch_ys = generate_test_data()
sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})
# Test
if i % 1000 == 0:
out_batch = sess.run(y2, {x: batch_xs})
inx = 0
print(batch_xs[inx][0], " + ", batch_xs[inx][1], " = ", out_batch[inx][0])
(x, y_, y2) = create_network()
train(x, y_, y2)
Output every 1000 iteration:
0.37301352864927173 + 0.28949461772342683 = 0.49111518
0.050899466843458474 + 0.006174158992116541 = 0.0025260744
0.3974852369427063 + 0.22402098418952499 = 0.00090828544
0.15735921047969498 + 0.39645077887600294 = 0.0005903727
0.23560825884336228 + 0.29010766384718145 = 0.0004317883
0.4250063393420791 + 0.24181166029062096 = 0.00031525563
= smaller and smaller
Cross-entropy loss is used for classification problems, while your task is clearly a regression. The computed cross_entropy value doesn't make sense, hence the result.
Change your loss to:
cross_entropy = tf.reduce_mean(
tf.nn.l2_loss(y_ - y2)
)
... and you'll see much more sensible results.
Maxim, thanks a lot. Now it's work.
import numpy as np
import tensorflow as tf
input_size = 2
hidden_size = 3
out_size = 1
def generate_test_data():
inp = 0.5*np.random.rand(10, 2)
oup = np.zeros((10, 1))
for idx, val in enumerate(inp):
oup[idx] = np.array([val[0] + val[1]])
return inp, oup
def create_network():
x = tf.placeholder(tf.float32, [None, input_size])
w01 = tf.Variable(tf.truncated_normal([input_size, hidden_size], stddev=0.1))
y1 = tf.matmul(x, w01)
w12 = tf.Variable(tf.truncated_normal([hidden_size, out_size], stddev=0.1))
y2 = tf.matmul(y1, w12)
y_ = tf.placeholder(tf.float32, [None, out_size])
return x, y_, y2
def train(x, y_, y2):
cross_entropy = tf.reduce_mean(
tf.nn.l2_loss(y_ - y2)
)
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
sess = tf.InteractiveSession()
tf.global_variables_initializer().run()
# Train
for i in range(100000):
batch_xs, batch_ys = generate_test_data()
sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})
# Test
if i % 2000 == 0:
out_batch = sess.run(y2, {x: batch_xs})
inx = 0
print(batch_xs[inx][0], " + ", batch_xs[inx][1], " = ", out_batch[inx][0], "|", batch_xs[inx][0] + batch_xs[inx][1])
(x, y_, y2) = create_network()
train(x, y_, y2)
If you consider predicting each digit to be a classification problem where you predict a value in "0123456789 ", you can use cross-entropy as your loss. For reference, see the Keras - Addition RNN Example.
But like Maxim said, it shouldn't be used for a regression problem.

How to concat linear models in TensorFlow

I'm trying to build a model in tensor flow with models like f_i(x) = m_ix + b_i such that:
f(x) = [f_1(x), f_2(x)]^T [x, x] + b
this is just an exercise. My difficulty is in understanding how to concatenate two tensors:
# Model 1
f1 = tf.add(tf.mul(X, W), b)
# Model 2
f2 = tf.add(tf.mul(X, W2), b2)
# Concatenate 1 & 2
fi = tf.concat(0, [f1, f2])
# Final model
pred = tf.add(tf.mul(fi, W3), b3)
Unfortunately this doesn't seem to work.
Here's the full example:
'''
A linear regression learning algorithm example using TensorFlow library.
Author: Aymeric Damien (original author) # I am altering it though
Project: https://github.com/aymericdamien/TensorFlow-Examples/
'''
from __future__ import print_function
import tensorflow as tf
import numpy
import matplotlib.pyplot as plt
rng = numpy.random
# Parameters
learning_rate = 0.01
training_epochs = 1000
display_step = 50
# Training Data
train_X = numpy.asarray(
[3.3, 4.4, 5.5, 6.71, 6.93, 4.168, 9.779, 6.182, 7.59, 2.167,
7.042, 10.791, 5.313, 7.997, 5.654, 9.27, 3.1])
train_Y = numpy.asarray(
[1.7, 2.76, 2.09, 3.19, 1.694, 1.573, 3.366, 2.596, 2.53, 1.221,
2.827, 3.465, 1.65, 2.904, 2.42, 2.94, 1.3])
n_samples = train_X.shape[0]
# tf Graph Input
X = tf.placeholder("float")
Y = tf.placeholder("float")
# Set model weights
W = tf.Variable(rng.randn(), name="weight")
b = tf.Variable(rng.randn(), name="bias")
W2 = tf.Variable(rng.randn(), name="weight2")
b2 = tf.Variable(rng.randn(), name="bias2")
W3 = tf.Variable([rng.randn(), rng.randn()], name="weight3")
b3 = tf.Variable(rng.randn(), name="bias3")
# Model 1
f1 = tf.add(tf.mul(X, W), b)
# Model 2
f2 = tf.add(tf.mul(X, W2), b2)
# Concatenate 1 & 2
fi = tf.concat(0, [f1, f2])
# Final model
pred = tf.add(tf.mul(fi, W3), b3)
# Mean squared error
cost = tf.reduce_sum(tf.pow(pred - Y, 2)) / (2 * n_samples)
# Gradient descent
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
# Initializing the variables
init = tf.initialize_all_variables()
# Launch the graph
with tf.Session() as sess:
sess.run(init)
# Fit all training data
for epoch in range(training_epochs):
for (x, y) in zip(train_X, train_Y):
sess.run(optimizer, feed_dict={X: x, Y: y})
# Display logs per epoch step
if (epoch + 1) % display_step == 0:
c = sess.run(cost, feed_dict={X: train_X, Y: train_Y})
print("Epoch:", '%04d' % (epoch + 1), "cost=", "{:.9f}".format(c), \
"W=", sess.run(W), "b=", sess.run(b))
print("Optimization Finished!")
training_cost = sess.run(cost, feed_dict={X: train_X, Y: train_Y})
print("Training cost=", training_cost, "W=", sess.run(W), "b=", sess.run(b),
'\n')
# Graphic display
plt.plot(train_X, train_Y, 'ro', label='Original data')
plt.plot(train_X, sess.run(W) * train_X + sess.run(b), label='Fitted line')
plt.legend()
plt.show()
# Testing example, as requested (Issue #2)
test_X = numpy.asarray([6.83, 4.668, 8.9, 7.91, 5.7, 8.7, 3.1, 2.1])
test_Y = numpy.asarray([1.84, 2.273, 3.2, 2.831, 2.92, 3.24, 1.35, 1.03])
print("Testing... (Mean square loss Comparison)")
testing_cost = sess.run(
tf.reduce_sum(tf.pow(pred - Y, 2)) / (2 * test_X.shape[0]),
feed_dict={X: test_X, Y: test_Y}) # same function as cost above
print("Testing cost=", testing_cost)
print("Absolute mean square loss difference:", abs(
training_cost - testing_cost))
plt.plot(test_X, test_Y, 'bo', label='Testing data')
plt.plot(train_X, sess.run(W) * train_X + sess.run(b), label='Fitted line')
plt.legend()
plt.show()
One way to achieve a similar result without the headache of tf.concat is
pred = tf.add(tf.add(f1, f2), b3)

Categories