I implemented a basic MLP and I want it to predict a user-generated set of data, but the prediction looks as follows:
.
I am not sure why... I have nonlinearities in the hidden layers, and I tried multiple activations (ReLU, tanh, sigmoid), tried different optimisers, different learning rates, various architectures (more layers, fewer layers, dropout), but I never got this right.
Please note that I do believe it may be because of how I compute the predictions at the end (pred = sess.run(out, feed_dict={inputs:X.reshape(n_input, 1)})) as it may be incorrect, but I wouldn't know why. I also tried other methods like extracting the weights with w = sess.run(weights) and then feeding them to the model() function along with the input, but nothing worked.
Also, when monitoring the error, the error decreases between epochs.
Any ideas?
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
# Architecture
input_size = 1
output_size = 1
h1_size = 20
h2_size = 50
# 2 hidden layers network
def model(inputs, weights):
out1 = tf.nn.relu(tf.matmul(inputs, weights['h1']))
out2 = tf.nn.relu(tf.matmul(out1, weights['h2']))
return tf.matmul(out2, weights['h3'])
# Inputs/label placeholders
inputs = tf.placeholder('float', shape=(None, input_size))
labels = tf.placeholder('float', shape=(None, output_size))
# Learnable weights
weights = {
'h1': tf.Variable(tf.random_normal(shape=(input_size, h1_size))),
'h2': tf.Variable(tf.random_normal(shape=(h1_size, h2_size))),
'h3': tf.Variable(tf.random_normal(shape=(h2_size, output_size))),
}
# Stores the result from the net
out = model(inputs, weights)
# Cost and optimisation
cost = tf.reduce_mean(tf.square(out - labels))
opt = tf.train.AdadeltaOptimizer()
opt_operation = opt.minimize(cost)
# Generate some data
n_input = 1000
X = np.linspace(0, 1, n_input).astype('f')
y = X + 5 * np.sin(X * 10)
y /= max(y)
# Train
epochs = 2000
lr = 0.0000001
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for epoch in range(epochs):
_, c = sess.run([opt_operation, cost], feed_dict={
inputs: X.reshape(n_input, 1),
labels: y.reshape(n_input, 1),
})
if not epoch % int(epochs/20):
print(c)
pred = sess.run(out, feed_dict={inputs:X.reshape(n_input, 1)})
plt.scatter(X, pred, color='red', label='prediction')
plt.scatter(X, y, label='data')
plt.legend()
plt.show()
Forgot bias terms: new graph
It works now but not sure if this fixed it?
New code uses:
weights = {
'h1': tf.Variable(tf.random_normal(shape=(input_size, h1_size))),
'h2': tf.Variable(tf.random_normal(shape=(h1_size, h2_size))),
'h3': tf.Variable(tf.random_normal(shape=(h2_size, output_size))),
'b1': tf.Variable(tf.zeros(shape=[1])),
'b2': tf.Variable(tf.zeros(shape=[1])),
'b3': tf.Variable(tf.zeros(shape=[1])),
}
and
def model(inputs, weights):
out1 = tf.nn.relu(tf.matmul(inputs, weights['h1']) + weights['b1'])
out2 = tf.nn.relu(tf.matmul(out1, weights['h2']) + weights['b2'])
return tf.matmul(out2, weights['h3'] + weights['b3'])
Related
I want to use the dropout function of tensorflow to check if I can improve my results (TPR, FPR) of my recurrent neural network.
However I implemented it by following a guide. So I am not sure if I did any mistakes. But if I train my model with with e.g. 10 epochs I get nearly the same results after validation. Thats why I am not sure if I use the dropout function correctly. Is this the right implementation in the following code or did I made something wrong? If I did everything right why do I get nearly the same result then?
hm_epochs = 10
n_classes = 2
batch_size = 128
chunk_size = 341
n_chunks = 5
rnn_size = 32
dropout_prop = 0.5 # Dropout, probability to drop a unit
batch_size_validation = 65536
x = tf.placeholder('float', [None, n_chunks, chunk_size])
y = tf.placeholder('float')
def recurrent_neural_network(x):
layer = {'weights':tf.Variable(tf.random_normal([rnn_size, n_classes])),
'biases':tf.Variable(tf.random_normal([n_classes]))}
x = tf.transpose(x, [1,0,2])
x = tf.reshape(x, [-1, chunk_size])
x = tf.split(x, n_chunks, 0)
lstm_cell = rnn.BasicLSTMCell(rnn_size)
outputs, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32)
output = tf.matmul(outputs[-1], layer['weights']) + layer['biases']
#DROPOUT Implementation -> is this code really working?
#The result is nearly the same after 20 epochs...
output_layer = tf.layers.dropout(output, rate=dropout_prop)
return output
def train_neural_network(x):
prediction = recurrent_neural_network(x)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=prediction,labels=y))
optimizer = tf.train.AdamOptimizer().minimize(cost)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for epoch in range(1,hm_epochs+1):
epoch_loss = 0
for i in range(0, training_data.shape[0], batch_size):
epoch_x = np.array(training_data[i:i+batch_size, :, :], dtype='float')
epoch_y = np.array(training_labels[i:i+batch_size, :], dtype='float')
if len(epoch_x) != batch_size:
epoch_x = epoch_x.reshape((len(epoch_x), n_chunks, chunk_size))
else:
epoch_x = epoch_x.reshape((batch_size, n_chunks, chunk_size))
_, c = sess.run([optimizer, cost], feed_dict={x: epoch_x, y: epoch_y})
epoch_loss += c
train_neural_network(x)
print("rnn - finished!")
In its most basic form, dropout should happen inside the cell and applied to the weights. You only applied it afterwards. This article explained it pretty well with some good visualization and few variations.
To use it in your code, you can either
Implement your own RNN cell where the keep probability is a parameter that initializes the cell or is a parameter that got passed in when it's called every time.
Use an rnn dropout wrapper here.
I am new to tensorflow and i am tasked to design a feedforward neural network which consists of: an input layer, one hidden perceptron layer of 10 neurons and an output softmax layer. Assume a learning rate of 0.01, L2 regularization with weight decay parameter of 0.000001, and batch size of 32.
I would like to know if there is anyway to know if the network that I have created is what intend to create. Like a graph showing the nodes?
The following is attempt on the task but I am not sure if it is correct.
import math
import tensorflow as tf
import numpy as np
import pylab as plt
# scale data
def scale(X, X_min, X_max):
return (X - X_min)/(X_max-X_min)
def tfvariables(start_nodes, end_nodes):
W = tf.Variable(tf.truncated_normal([start_nodes, end_nodes], stddev=1.0/math.sqrt(float(start_nodes))))
b = tf.Variable(tf.zeros([end_nodes]))
return W, b
NUM_FEATURES = 36
NUM_CLASSES = 6
learning_rate = 0.01
beta = 10 ** -6
epochs = 10000
batch_size = 32
num_neurons = 10
seed = 10
np.random.seed(seed)
#read train data
train_input = np.loadtxt('sat_train.txt',delimiter=' ')
trainX, train_Y = train_input[:, :36], train_input[:, -1].astype(int)
trainX = scale(trainX, np.min(trainX, axis=0), np.max(trainX, axis=0))
# There are 6 class-labels 1,2,3,4,5,7
train_Y[train_Y == 7] = 6
trainY = np.zeros((train_Y.shape[0], NUM_CLASSES))
trainY[np.arange(train_Y.shape[0]), train_Y-1] = 1 #one matrix
# experiment with small datasets
trainX = trainX[:1000]
trainY = trainY[:1000]
n = trainX.shape[0]
# Create the model
x = tf.placeholder(tf.float32, [None, NUM_FEATURES])
y_ = tf.placeholder(tf.float32, [None, NUM_CLASSES])
# Build the graph for the deep net
W1, b1 = tfvariables(NUM_FEATURES, num_neurons)
W2, b2 = tfvariables(num_neurons, NUM_CLASSES)
logits_1 = tf.matmul(x, W1) + b1
perceptron_layer = tf.nn.sigmoid(logits_1)
logits_2 = tf.matmul(perceptron_layer, W2) + b2
cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(labels=y_, logits=logits_2)
# Standard Loss
loss = tf.reduce_mean(cross_entropy)
# Loss function with L2 Regularization with beta
regularizers = tf.nn.l2_loss(W1) + tf.nn.l2_loss(W2)
loss = tf.reduce_mean(loss + beta * regularizers)
# Create the gradient descent optimizer with the given learning rate.
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
train_op = optimizer.minimize(cross_entropy)
correct_prediction = tf.cast(tf.equal(tf.argmax(logits_2, 1), tf.argmax(y_, 1)), tf.float32)
accuracy = tf.reduce_mean(correct_prediction)
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
with tf.Session(config=config) as sess:
sess.run(tf.global_variables_initializer())
train_acc = []
train_loss = []
for i in range(epochs):
train_op.run(feed_dict={x: trainX, y_: trainY})
train_acc.append(accuracy.eval(feed_dict={x: trainX, y_: trainY}))
train_loss.append(loss.eval(feed_dict={x: trainX, y_: trainY}))
if i % 500 == 0:
print('iter %d: accuracy %g loss %g'%(i, train_acc[i], train_loss[i]))
# plot learning curves
plt.figure(1)
plt.plot(range(epochs), train_acc)
plt.xlabel(str(epochs) + ' iterations')
plt.ylabel('Train accuracy')
# plot learning curves
plt.figure(1)
plt.plot(range(epochs), train_loss)
plt.xlabel(str(epochs) + ' iterations')
plt.ylabel('Train loss')
plt.show()
plt.show()
You can utitilize Tensorboard to visualize the graph you created. Basically, you have to follow a few steps to do this:
declare a writer as writer = tf.summary.FileWriter('PATH/TO/A/LOGDIR')
add the graph to the writer with writer.add_graph(sess.graph) with sess being your current tf.Session() in which you execute the graph
possibly you have to use writer.flush() to write it to disk immediately
Note that you have to add these lines AFTER building your graph.
You can view the graph by executing this command in your shell:
tensorboard --logdir=PATH/TO/A/LOGDIR
Then you are presented an address (usually something like localhost:6006) on which you can view the graph with your browser (Chrome and Firefox are guaranteed to work).
Tensorboard (in TensorFlow) is useful tool.
Use tf.summary.FileWriter for writing the graph into a folder and run tensorboard from the corresponding directory.
Check the following links:
https://www.tensorflow.org/guide/graphs
https://www.tensorflow.org/guide/summaries_and_tensorboard
I am making a simple generative adverserial network on mnist dataset.
This is my implementation :
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/",one_hot=True)
def noise(batch_size):
return np.random.uniform(-1, 1, (batch_size, 100))
learning_rate = 0.001
batch_size = 128
input = tf.placeholder('float', [None, 100])
real_data = tf.placeholder('float', [None, 784])
def generator(x):
weights = {
'hl1' : tf.Variable(tf.random_normal([100, 200])),
'ol' : tf.Variable(tf.random_normal([200, 784]))
}
biases = {
'hl1' : tf.Variable(tf.random_normal([200])),
'ol' : tf.Variable(tf.random_normal([784]))
}
hl1 = tf.add(tf.matmul(x, weights['hl1']), biases['hl1'])
ol = tf.nn.sigmoid(tf.add(tf.matmul(hl1, weights['ol']), biases['ol']))
return ol
def discriminator(x):
weights = {
'hl1' : tf.Variable(tf.random_normal([784, 200])),
'ol' : tf.Variable(tf.random_normal([200, 1]))
}
biases = {
'hl1' : tf.Variable(tf.random_normal([200])),
'ol' : tf.Variable(tf.random_normal([1]))
}
hl1 = tf.add(tf.matmul(x, weights['hl1']), biases['hl1'])
ol = tf.nn.sigmoid(tf.add(tf.matmul(hl1, weights['ol']), biases['ol']))
return ol
with tf.variable_scope("G"):
G = generator(input)
with tf.variable_scope("D"):
D_real = discriminator(real_data)
with tf.variable_scope("D", reuse = True):
D_gen = discriminator(G)
generator_parameters = [x for x in tf.trainable_variables() if x.name.startswith('G/')]
discriminator_parameters = [x for x in tf.trainable_variables() if x.name.startswith('D/')]
G_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=D_gen, labels=tf.ones_like(D_gen)))
D_real_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=D_real, labels=tf.ones_like(D_real)))
D_fake_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=D_gen, labels=tf.zeros_like(D_gen)))
D_total_loss = tf.add(D_fake_loss, D_real_loss)
G_train = tf.train.AdamOptimizer(learning_rate).minimize(G_loss,var_list=generator_parameters)
D_train = tf.train.AdamOptimizer(learning_rate).minimize(D_total_loss,var_list=discriminator_parameters)
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
loss_g_function = []
loss_d_function = []
for epoch in range(200):
for iteratiion in range(int(len(mnist.train.images)/batch_size)):
real_batch, _ = mnist.train.next_batch(batch_size)
_, d_err = sess.run([D_train, D_total_loss], feed_dict = {real_data : real_batch, input : noise(batch_size)})
_, g_err = sess.run([G_train, G_loss], feed_dict = {input : noise(batch_size)})
print("Epoch = ", epoch)
print("D_loss = ", d_err)
print("G_loss = ", g_err)
loss_g_function.append(g_err)
loss_d_function.append(d_err)
# Visualizing
import matplotlib.pyplot as plt
test_noise = noise(1)
plt.subplot(2, 2, 1)
plt.plot(test_noise[0])
plt.title("Noise")
plt.subplot(2, 2, 2)
plt.imshow(np.reshape(sess.run(G, feed_dict = {input : test_noise})[0], [28, 28]))
plt.title("Generated Image")
plt.subplot(2, 2, 3)
plt.plot(loss_d_function, 'r')
plt.xlabel("Epochs")
plt.ylabel("Discriminator Loss")
plt.title("D-Loss")
plt.subplot(2, 2, 4)
plt.plot(loss_g_function, 'b')
plt.xlabel("Epochs")
plt.ylabel("Generator Loss")
plt.title("G_Loss")
plt.show()
I have tried lr = 0.001 lr = 0.0001 and lr = 0.00003.
These are my results : https://imgur.com/a/6KUnO1H
What could be the reason? My weights initialization are randomly drawn from the normal distribution. Also, please check the loss function, are they correct?
Issues:
It has just a single layer:
hl1 = tf.add(tf.matmul(x, weights['hl1']), biases['hl1'])
ol = tf.nn.sigmoid(tf.add(tf.matmul(hl1, weights['ol']), biases['ol']))
Above network defined for both discriminator and generator has no activation defined for the first layer. This literally means the network is just one layer: y = act(w2(x*w1+b1)+b2) = act(x*w+b)
Sigmoid applied twice:
ol = tf.nn.sigmoid(tf.add(tf.matmul(hl1, weights['ol']) ...
D_real_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(...)
As mentioned in the comments, activation is applied twice.
Weight initializations:
tf.Variable(tf.random_normal([784, 200]))
In case of sigmoid activation if the weights are large, the gradients will be small, which means the weights are effectively not changing values. (Bigger w + very small delta(w)). May be the reason why when i run the above code, the loss seems to not change much. Its better to adopt industry best practices and use something like: xavier_initializer().
Dynamic range inconsistencies:
The input to the generator is in the dynamic range of [-1, 1], it gets multipled by a weight of [-1, 1] but gets outputed to a [ 0 1] range. There is nothing wrong with this, a bias can learn to map the output range. But its better to use a activation layer, that outputs [-1, 1] like a tanh, so the network can learn faster. If tanh is used as activation for the generator, then the input images feed to the descriminator need to be scaled to [-1 1] for training consistency.
With the above changes, you can get something similar to:
The above network is a really simple one and the output quality is not great. I have deliberately not changed the complexity to find out what kind of output one can get out of a simple network.
You can build a bigger network (that includes CNN) and as well try out recent GAN models to obtain better quality results.
Code for reproducing the above can be obtained from here.
I have been trying to use an LSTM for regression in TensorFlow, but it doesn't fit the data. I have successfully fit the same data in Keras (with the same size network). My code for trying to overfit a sine wave is below:
import tensorflow as tf
import numpy as np
yt = np.cos(np.linspace(0, 2*np.pi, 256))
xt = np.array([yt[i-50:i] for i in range(50, len(yt))])[...,None]
yt = yt[-xt.shape[0]:]
g = tf.Graph()
with g.as_default():
x = tf.constant(xt, dtype=tf.float32)
y = tf.constant(yt, dtype=tf.float32)
lstm = tf.nn.rnn_cell.BasicLSTMCell(32)
outputs, state = tf.nn.dynamic_rnn(lstm, x, dtype=tf.float32)
pred = tf.layers.dense(outputs[:,-1], 1)
loss = tf.reduce_mean(tf.square(pred-y))
train_op = tf.train.AdamOptimizer().minimize(loss)
init = tf.global_variables_initializer()
sess = tf.InteractiveSession(graph=g)
sess.run(init)
for i in range(200):
_, l = sess.run([train_op, loss])
print(l)
This results in a MSE of 0.436067 (while Keras got to 0.0022 after 50 epochs), and the predictions range from -0.1860 to -0.1798. What am I doing wrong here?
Edit:
When I change my loss function to the following, the model fits properly:
def pinball(y_true, y_pred):
tau = np.arange(1,100).reshape(1,-1)/100
pin = tf.reduce_mean(tf.maximum(y_true[:,None] - y_pred, 0) * tau +
tf.maximum(y_pred - y_true[:,None], 0) * (1 - tau))
return pin
I also change the assignments of pred and loss to
pred = tf.layers.dense(outputs[:,-1], 99)
loss = pinball(y, pred)
This results in a decrease of loss from 0.3 to 0.003 as it trains, and seems to properly fit the data.
Looks like a shape/broadcasting issue. Here's a working version:
import tensorflow as tf
import numpy as np
yt = np.cos(np.linspace(0, 2*np.pi, 256))
xt = np.array([yt[i-50:i] for i in range(50, len(yt))])
yt = yt[-xt.shape[0]:]
g = tf.Graph()
with g.as_default():
x = tf.constant(xt, dtype=tf.float32)
y = tf.constant(yt, dtype=tf.float32)
lstm = tf.nn.rnn_cell.BasicLSTMCell(32)
outputs, state = tf.nn.dynamic_rnn(lstm, x[None, ...], dtype=tf.float32)
pred = tf.squeeze(tf.layers.dense(outputs, 1), axis=[0, 2])
loss = tf.reduce_mean(tf.square(pred-y))
train_op = tf.train.AdamOptimizer().minimize(loss)
init = tf.global_variables_initializer()
sess = tf.InteractiveSession(graph=g)
sess.run(init)
for i in range(200):
_, l = sess.run([train_op, loss])
print(l)
x gets a batch dimension of 1 before going into dynamic_rnn, since with time_major=False the first dimension is expected to be a batch dimension. It's important that the last dimension of the output of tf.layers.dense get squeezed off so that it doesn't broadcast with y (TensorShape([256, 1]) and TensorShape([256]) broadcast to TensorShape([256, 256])). With those fixes it converges:
5.78507e-05
You are not passing-on the state from one call of dynamic_rnn to next. That's the problem for sure.
Also, why take only last item of the output through the dense layer and onward?
Could someone help or guide me through what I should do better in order for this to work?
I changed the number of inputs to 2 and generated some random data, "x1" and "x2" (one number to be added to another). The idea is to use variables "add" and "mul" as the real output and base the cost (variable "Y") off of that, but I'm having trouble manipulating the data so it inputs properly.
I tried to make another variable with
x = tf.Variable([100 * np.random.random_sample([100]), 100 * np.random.random_sample([100]))
and a few other alternative ways, but that caused errors. Also, if there's anything else wrong in my code, please critique it! Anything helps.
Thank you.
'''
A Recurrent Neural Network implementation example using TensorFlow Library.
Author: *********
'''
import numpy as np
import tensorflow as tf
from tensorflow.models.rnn import rnn, rnn_cell
# import matplotlib.pyplot as plt
# from mpl_toolkits.mplot3d import Axes3D
# Parameters
training_iters = 1000
n_epochs = 1000
batch_size = 128
display_step = 100
learning_rate = 0.001
n_observations = 100
n_input = 2 # Input data (Num + Num)
n_steps = 28 # timesteps
n_hidden_1 = 256 # 1st layer number of features
n_hidden_2 = 256 # 2nd layer number of features
n_classes = 1 # Output
X = tf.placeholder("float", [None, n_input])
X1 = tf.placeholder(tf.float32)
X2 = tf.placeholder(tf.float32)
Y = tf.placeholder(tf.float32)
# Random input data
x1 = 100 * np.random.random_sample([100,])
x2 = 100 * np.random.random_sample([100,])
add = tf.add(x1, x2)
mul = tf.mul(X1, X2)
weights = {
'hidden1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),
#'hidden2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
'out': tf.Variable(tf.random_normal([n_hidden_1, n_classes]))
}
biases = {
'hidden1': tf.Variable(tf.random_normal([n_hidden_1])),
#'hidden2': tf.Variable(tf.random_normal([n_hidden_2])),
'out': tf.Variable(tf.random_normal([n_classes]))
}
def RNN(_X1, _weights, _biases):
# Layer 1.1
layer_1 = tf.add(tf.matmul(_X1, weights['hidden1']), biases['hidden1'])
layer_1 = tf.nn.relu(layer_1)
# Layer 1.2
# layer_1_2 = tf.add(tf.matmul(_X2, weights['hidden2']), biases['hidden2'])
# layer_1_2 = tf.nn.relu(layer_1_2)
# Hidden layer with RELU activation
layer_2 = tf.add(tf.matmul(layer_1, weights['out']), biases['out'])
output = tf.nn.relu(layer_2)
return output
pred = RNN(X1, weights, biases)
cost = tf.reduce_sum(tf.pow(pred - Y, 2)) / (n_observations - 1)
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost) # Adam Optimizer
correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(Y,1))
init = tf.initialize_all_variables()
# initData = tf.initialize_variables(x1.all(), x2.all())
with tf.Session() as sess:
# Here we tell tensorflow that we want to initialize all
# the variables in the graph so we can use them
sess.run(init)
# Fit all training data
prev_training_cost = 0.0
for epoch_i in range(n_epochs) :
for (_x1) in x1:
for (_x2) in x2:
print("Input 1:")
print(_x1)
print("Input 2:")
print(_x2)
print("Add function: ")
print(sess.run(add, feed_dict={X1: x1, X2: x2}))
y = sess.run(add, feed_dict={X1: x1, X2: x2})
print(y)
sess.run(optimizer, feed_dict={X: x, Y: y})
training_cost = sess.run(
cost, feed_dict={X: xs, Y: ys})
print(training_cost)
if epoch_i % 20 == 0:
ax.plot(X1, X2, pred.eval(
feed_dict={X1: x1, X2: x2}, session=sess),
'k', alpha=epoch_i / n_epochs)
fig.show()
plt.draw()
# Allow the training to quit if we've reached a minimum
if np.abs(prev_training_cost - training_cost) < 0.000001:
break
prev_training_cost = training_cost
So are you training a feed forward network or a recurrent neural network?
The code you write within RNN() remind me of a simple neural network (feedforward network). Yet your tittle says you are working on RNN's
You might find this implementation interesting. Like you, it generates vectors of integers and uses an RNN to do the addition