I have developed deep sparse auto encoders cost function with Tensorflow and I have download the autoencoder structure from the following link:
https://github.com/aymericdamien/TensorFlow-Examples/blob/master/examples/3_NeuralNetworks/autoencoder.py .
I have the following cost function in simple AutoEncoder:
loss = tf.reduce_mean(tf.pow(y_true - y_pred, 2))
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)
I have developed sparsity in AutoEncoders by using the following mathematical functions:
I have developed these mathematical function with the following code:
learning_rate = 0.01
training_epochs = 1000
batch_size = 256
display_step = 1
examples_to_show = 10
lambda_ = 3e-3
beta = 3
Nv = batch_size
def KL_divergence(x1, y1):
return x1* tf.log(x1 / y1) + (1 - x1) * tf.log((1 - x1) / (1 - y1))
#Weights
W1 = sum(tf.reduce_sum(tf.abs(var)**2) for var in tf.trainable_variables() if
'encoder_' in var.name)
W2 = sum(tf.reduce_sum(tf.abs(var)**2) for var in tf.trainable_variables() if
'decoder_' in var.name)
## Sparsity
rho_hat = (1+tf.reduce_mean(encoder(X),axis=0))/2
rho = np.tile(sparsity_param, n_output)
cost = tf.reduce_sum(tf.pow(y_true - y_pred, 2))/(2*Nv) + (lambda_/2)*(W1+W2)
+ beta * tf.reduce_sum(KL_divergence(rho,rho_hat))
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
the name of paper that i have used the mathematical functions:
"Visualization of Driving Behavior Based on Hidden Feature Extraction by Using Deep Learning"
Thanks
Hi I have developed the final version of Deep sparse AutoEncoder with the following python code:
it is ok and ready for using:
from __future__ import division, print_function, absolute_import
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
def next_batch(num, data, labels):
'''
Return a total of `num` random samples and labels.
'''
idx = np.arange(0 , len(data))
np.random.shuffle(idx)
idx = idx[:num]
data_shuffle = [data[ i] for i in idx]
labels_shuffle = [data[ i] for i in idx]
return np.asarray(data_shuffle), np.asarray(labels_shuffle)
# Parameters
learning_rate = 0.01
training_epochs = 1000
batch_size = 256
display_step = 1
examples_to_show = 10
lambda_ = 3e-3
beta = 3
# tf Graph input (only pictures)
X = tf.placeholder("float", [None, n_input])
# Network Parameters
n_input = 60 # number of input layers
n_hidden_1 = 30 # 1st layer num features
n_hidden_2 = 10 # 2nd layer num features
n_output = 3 # output layer num features
sparsity_param = 0.05
weights = {
'encoder_h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),
'encoder_h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
'encoder_h3': tf.Variable(tf.random_normal([n_hidden_2, n_output])),
'decoder_h1': tf.Variable(tf.random_normal([n_output, n_hidden_2])),
'decoder_h2': tf.Variable(tf.random_normal([n_hidden_2, n_hidden_1])),
'decoder_h3': tf.Variable(tf.random_normal([n_hidden_1, n_input])),
}
biases = {
'encoder_b1': tf.Variable(tf.random_normal([n_hidden_1])),
'encoder_b2': tf.Variable(tf.random_normal([n_hidden_2])),
'encoder_b3': tf.Variable(tf.random_normal([n_output])),
'decoder_b1': tf.Variable(tf.random_normal([n_hidden_2])),
'decoder_b2': tf.Variable(tf.random_normal([n_hidden_1])),
'decoder_b3': tf.Variable(tf.random_normal([n_input])),
}
# Building the encoder
def encoder(x):
# Encoder Hidden layer with sigmoid activation #1
layer_1 = tf.nn.sigmoid(tf.add(tf.matmul(x, weights['encoder_h1']),
biases['encoder_b1']))
# Decoder Hidden layer with sigmoid activation #2
layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1, weights['encoder_h2']),
biases['encoder_b2']))
# Decoder Hidden layer with sigmoid activation #3
layer_3 = tf.nn.sigmoid(tf.add(tf.matmul(layer_2, weights['encoder_h3']),
biases['encoder_b3']))
return layer_3
# Building the decoder
def decoder(x):
# Encoder Hidden layer with sigmoid activation #1
layer_1 = tf.nn.sigmoid(tf.add(tf.matmul(x, weights['decoder_h1']),
biases['decoder_b1']))
# Decoder Hidden layer with sigmoid activation #2
layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1, weights['decoder_h2']),
biases['decoder_b2']))
# Decoder Hidden layer with sigmoid activation #3
layer_3 = tf.nn.sigmoid(tf.add(tf.matmul(layer_2, weights['decoder_h3']),
biases['decoder_b3']))
return layer_3
def KL_divergence(x1, y1):
return x1* tf.log(x1 / y1) + (1 - x1) * tf.log((1 - x1) / (1 - y1))
# Construct model
Nv = batch_size
encoder_op = encoder(X)
decoder_op = decoder(encoder_op)
#Weights
W1 = sum(tf.reduce_sum(tf.abs(var)**2) for var in tf.trainable_variables() if 'encoder_' in var.name)
W2 = sum(tf.reduce_sum(tf.abs(var)**2) for var in tf.trainable_variables() if 'decoder_' in var.name)
# Prediction
y_pred = decoder_op
# Targets (Labels) are the input data.
y_true = X
## Sparsity
rho_hat = tf.reduce_mean(encoder(X),axis=0)
#rho_hat = (1+tf.reduce_mean(encoder(X),axis=0))/2
rho = np.tile(sparsity_param, n_output)
# Define loss and optimizer, minimize the squared error
size = tf.shape(tf.pow(y_true - y_pred, 2))
cost = tf.reduce_sum(tf.pow(y_true - y_pred, 2))/(2*Nv) + (lambda_/2)*(W1+W2) + beta * tf.reduce_sum(KL_divergence(rho,rho_hat))
#(lambda_/2)*(tf.reduce_sum(W1**2) + tf.reduce_sum(W1**2))
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
# Initializing the variables
init = tf.global_variables_initializer()
# Launch the graph
with tf.Session() as sess:
sess.run(init)
total_batch = int(len(data)/batch_size)
# Training cycle
for epoch in range(training_epochs):
# Loop over all batches
for i in range(total_batch):
batch_xs, batch_ys = next_batch(batch_size,data[:,0:60], data[:,60:] )
# Run optimization op (backprop) and cost op (to get loss value)
_, c = sess.run([optimizer, cost], feed_dict={X: batch_xs})
# Display logs per epoch step
if epoch % display_step == 0:
print("Epoch:", '%04d' % (epoch+1),
"cost=", "{:.9f}".format(c))
print("Optimization Finished!")
tr, label = next_batch(200000,data[:,0:60], data[:,60:])
encode_decode = sess.run(
encoder_op, feed_dict={X: tr})
Here is the code for a 3 layer sparse autoencoder, implemented in Tensorflow 2.1.
The input and the output, in this case, are 1D arrays (496).
I would like to give credit to Dr. Zhiwei Lin at Ulster University for providing the initial implementation on github
https://github.com/zhiweiuu/sparse-autoencoder-tensorflow/blob/master/SparseAutoEncoder.py
I have wrapped it in a class, where each layer is now an instance variable. This makes it easier to get different outputs for each layer.
You will notice that I have used only the first layer output for the sparsity constraint.
This architecture is similar to the one used in this article: https://pubmed.ncbi.nlm.nih.gov/29302382/
My implementation is simple and the training and it can be improved :)
to train the model
model = my_model() then you loop for i in range(1000): model.network_learn(X,Y)
class my_model:
def __init__(self):
xavier=tf.keras.initializers.GlorotUniform()
self.l1 = tf.keras.layers.Dense(496,kernel_initializer=xavier,activation=tf.nn.sigmoid,input_shape=(496,))
self.l2 = tf.keras.layers.Dense(496,kernel_initializer=xavier,activation=tf.nn.sigmoid)
self.l3 = tf.keras.layers.Dense(496,kernel_initializer=xavier,activation=tf.nn.sigmoid)
self.train_op = tf.keras.optimizers.SGD(learning_rate=0.01)
self.rho = 0.05
self.alpha= 0.001
self.beta = 4
def kl_divergence(self, rho, rho_hat):
return rho * tf.math.log(rho) - rho * tf.math.log(rho_hat) + (1 - rho) * tf.math.log(1 - rho) - (1 - rho) * tf.math.log(1 - rho_hat)
def run(self,X):
out1=self.l1(X)
out2=self.l2(out1)
out3 = self.l3(out2)
return out3
def get_loss(self,X,Y):
rho_hat = tf.reduce_mean(self.l1(X),axis=0)
kl = self.kl_divergence(self.rho,rho_hat)
out1=self.l1(X)
out2=self.l2(out1)
X_prime=self.l3(out2)
diff = X-X_prime
W1 = self.l1.variables[0]
W2 = self.l2.variables[0]
W3 = self.l3.variables[0]
cost= 0.5*tf.reduce_mean(tf.reduce_sum(diff**2,axis=1)) \
+0.5*self.alpha*(tf.nn.l2_loss(W1) + tf.nn.l2_loss(W2) + tf.nn.l2_loss(W3)) \
+self.beta*tf.reduce_sum(kl)
return cost
return tf.math.square(boom2-Y)
def get_grad(self,X,Y):
with tf.GradientTape() as tape:
tape.watch(self.l1.variables)
tape.watch(self.l2.variables)
tape.watch(self.l3.variables)
L = self.get_loss(X,Y)
g = tape.gradient(L, [self.l1.variables[0],self.l1.variables[1],self.l2.variables[0],self.l2.variables[1],self.l3.variables[0],self.l3.variables[1]])
return g
def network_learn(self,X,Y):
g = self.get_grad(X,Y)
self.train_op.apply_gradients(zip(g, [self.l1.variables[0],self.l1.variables[1],self.l2.variables[0],self.l2.variables[1],self.l3.variables[0],self.l3.variables[1]]))
Here is how you would train a network like this
Related
I want to train an RNN model to connect an article and an image. The input and the output are two arrays.
I define the parameters of RNN as follow:
learning_rate = 0.001
training_iters = 100000
batch_size = 128
display_step = 10
# Network Parameters
n_input = 128
n_steps = 168 # timesteps
n_hidden = 512 # hidden layer num of features
output = 200
the image is 128*168 and the article is 200
cost = tf.reduce_mean(pow(pred-y,2)/2)
#cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
For the end result, I want to train a network to transform an image to an article. However, when I try to train the model, the cost is returned as NaN.
Here is the code:
# coding=utf-8
from __future__ import print_function
from tensorflow.contrib import rnn
import scipy.io as scio
import tensorflow as tf
import numpy as np
import os
TextPath = 'F://matlab_code//readtxt//ImageTextVector.mat';
ImageDirPath = 'F://matlab_code//CVPR10-LLC//features//1';
Text = scio.loadmat(TextPath)
learning_rate = 0.001
training_iters = 100000
batch_size = 128
display_step = 10
# Network Parameters
n_input = 128 #
n_steps = 168 # timesteps
n_hidden = 512 # hidden layer num of features
output = 200 #
x = tf.placeholder("float", [None, n_steps, n_input])
y = tf.placeholder("float", [None, output])
weights = {
'out': tf.Variable(tf.random_normal([n_hidden, output]))
}
biases = {
'out': tf.Variable(tf.random_normal([output]))
}
def RNN(x, weights, biases):
lstm_cell = rnn.BasicLSTMCell(n_hidden, forget_bias=1.0)
outputs, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32)
return tf.matmul(outputs[-1], weights['out']) + biases['out']
pred = RNN(x, weights, biases)
# Define loss and optimizer
cost = tf.reduce_mean(pow(pred-y,2)/2)
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
init = tf.global_variables_initializer()
train_count=0;
with tf.Session() as sess:
sess.run(init)
step = 0
while step* batch_size < training_iters:
iter = step*batch_size
batch_x = []
batch_y = []
while iter < (step+1)*batch_size:
ImagePath = ImageDirPath + '//' + Text['X'][train_count][0][0] +'.mat'
if os.path.exists(ImagePath):
batch_xx=[]
batch_yy=[]
Image = scio.loadmat(ImagePath)
i=0
while i<21504 :
batch_xx.append(Image['fea'][i][0])
i=i+1
batch_yy = Text['X'][train_count][1][0]
batch_xx = np.array(batch_xx)
batch_x=np.hstack((batch_x,batch_xx))
batch_y=np.hstack((batch_y,batch_yy))
iter = iter+1
train_count=train_count+1
batch_x = batch_x.reshape((batch_size,n_steps, n_input))
batch_y = batch_y.reshape((batch_size,output))
# Run optimization op (backprop)
sess.run(optimizer, feed_dict={x: batch_x, y: batch_y})
if step % display_step == 0:
# Calculate batch loss
loss = sess.run(cost, feed_dict={x: batch_x, y: batch_y})
print("Iter " + str(step* batch_size) + ", Minibatch Loss= " + \
"{:.6f}".format(loss) )
step += 1
print("Optimization Finished!")
when you pass tensor including nan values to lstm, the value in the cell of lstm's will be "forced" to nan because the numerical operation between number and nan. Check whether your data have nan value or just use numpy.nan_to_num to fill your nan data.
python 3.5.2, tensorflow 1.0.0
Somewhat new in programming with autoencoders. I am trying to implement a simple network to get familiarize from here. I have used the same input data in which a CNN is able to classify perfectly with accuracy of 98%. My data have 2000 row data and each row is a signal. I am trying with 3 stacked layers of auto encoders with 512 256 and 64 nodes.
class dimensions:
input_width, input_height = 1,1024
BATCH_SIZE = 50
layer = [input_width*input_height, 512, 256, 64]
learningrate = 0.001
def myencoder(x,corrupt_prob,dimensions):
current_input = corrupt(x) * corrupt_prob + x * (1 - corrupt_prob)
encoder = []
for layer_i, n_output in enumerate(dimensions.layer[1:]):
n_input = int(current_input.get_shape()[1])
W = tf.Variable(
tf.random_uniform([n_input, n_output],
-1.0 / math.sqrt(n_input),
1.0 / math.sqrt(n_input)))
b = tf.Variable(tf.zeros([n_output]))
encoder.append(W)
output = tf.nn.tanh(tf.matmul(current_input, W) + b)
current_input = output
z = current_input
encoder.reverse()
# Build the decoder using the same weights
for layer_i, n_output in enumerate(model.layer[:-1][::-1]):
W = tf.transpose(encoder[layer_i])
b = tf.Variable(tf.zeros([n_output]))
output = tf.nn.tanh(tf.matmul(current_input, W) + b)
current_input = output
# now have the reconstruction through the network
y = current_input
# cost function measures pixel-wise difference
cost = tf.sqrt(tf.reduce_mean(tf.square(y - x)))
return z,y,cost
sess = tf.Session()
model = dimensions()
data_train,data_test,label_train,label_test = load_data(Datainfo,folder)
x = tf.placeholder(tf.float32,[model.BATCH_SIZE,model.input_height*model.input_width])
corrupt_prob = tf.placeholder(tf.float32,[1])
z,y,cost = myencoder(x,corrupt_prob,dimensions)
train_step = tf.train.AdamOptimizer(model.learningrate).minimize(cost)
lossfun = np.zeros(STEPS)
sess.run(tf.global_variables_initializer())
for i in range(STEPS):
train_data = batchdata(data_train, model.BATCH_SIZE)
epoch_loss = 0
for j in range(model.BATCH_SIZE):
sess.run(train_step,feed_dict={x:train_data,corrupt_prob:[1.0]})
c = sess.run(cost, feed_dict={x: train_data, corrupt_prob: [1.0]})
epoch_loss += c
lossfun[i] = epoch_loss
print('Epoch', i, 'completed out of', STEPS, 'loss:', epoch_loss)
my loss function appears like this
xaxis - no of iterations, y axis - loss
the loss doesn't decrease and the network doesn't learn anything.
any help appreciated !
In the function myencoder, the weight variables W and b are initialized in every training step.
I am trying to reproduce a deep learning regression result in Tensorflow. If I train a neural network with the MLPRegressor class from sklearn I get very nice results of 98% validation.
The MLPRegressor:
http://scikit-learn.org/stable/modules/generated/sklearn.neural_network.MLPRegressor.html#sklearn.neural_network.MLPRegressor
I am trying to reproduce the model in Tensorflow. By copying the default values of the MLPRegressor class in a Tensorflow model. However I cannot get the same result. I only get 75% most of the time.
My TF model:
tf.reset_default_graph()
graph = tf.Graph()
n_input = 3 # n variables
n_hidden_1 = 100
n_hidden_2 = 1
n_output = 1
beta = 0.001
learning_rate = 0.001
with graph.as_default():
tf_train_feat = tf.placeholder(tf.float32, shape=(None, n_input))
tf_train_label = tf.placeholder(tf.float32, shape=(None))
tf_test_feat = tf.constant(test_feat, tf.float32)
"""
Weights and biases. The weights matix' columns will be the output vector.
* ndarray([rows, columns])
* ndarray([in, out])
tf.placeholder(None) and tf.placeholder([None, 3]) means that the row's size is not set. In the second
placeholder the columns are prefixed at 3.
"""
W = {
"layer_1": tf.Variable(tf.truncated_normal([n_input, n_hidden_1])),
"layer_2": tf.Variable(tf.truncated_normal([n_hidden_1, n_hidden_2])),
"layer_3": tf.Variable(tf.truncated_normal([n_hidden_2, n_output])),
}
b = {
"layer_1": tf.Variable(tf.zeros([n_hidden_1])),
"layer_2": tf.Variable(tf.zeros([n_hidden_2])),
}
def computation(X):
layer_1 = tf.nn.relu(tf.matmul(X, W["layer_1"]) + b["layer_1"])
layer_2 = tf.nn.relu(tf.matmul(layer_1, W["layer_2"]) + b["layer_2"])
return layer_2
tf_prediction = computation(tf_train_feat)
tf_test_prediction = computation(tf_test_feat)
tf_loss = tf.reduce_mean(tf.pow(tf_train_label - tf_prediction, 2))
tf_loss = tf.reduce_mean( tf_loss + beta * tf.nn.l2_loss(W["layer_2"]) )
tf_optimizer = tf.train.AdamOptimizer(learning_rate).minimize(tf_loss)
#tf_optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(tf_loss)
init = tf.global_variables_initializer()
My TF session:
def accuracy(y_pred, y):
a = 0
for i in range(y.shape[0]):
a += abs(1 - y_pred[i][0] / y[i])
return round((1 - a / y.shape[0]) * 100, 3)
def accuracy_tensor(y_pred, y):
a = 0
for i in range(y.shape[0]):
a += abs(1 - y_pred[i][0] / y[i])
return round((1 - a / y.shape[0]) * 100, 3)
# Shuffles two arrays.
def shuffle_in_unison(a, b):
assert len(a) == len(b)
shuffled_a = np.empty(a.shape, dtype=a.dtype)
shuffled_b = np.empty(b.shape, dtype=b.dtype)
permutation = np.random.permutation(len(a))
for old_index, new_index in enumerate(permutation):
shuffled_a[new_index] = a[old_index]
shuffled_b[new_index] = b[old_index]
return shuffled_a, shuffled_b
train_epoch = int(5e4)
batch = int(200)
n_batch = int(X.shape[0] // batch)
prev_acc = 0
stable_count = 0
session = tf.InteractiveSession(graph=graph)
session.run(init)
print("Initialized.\n No. of epochs: %d.\n No. of batches: %d." % (train_epoch, n_batch))
for epoch in range(train_epoch):
offset = (epoch * n_batch) % (Y.shape[0] - n_batch)
for i in range(n_batch):
x = X[offset:(offset + n_batch)]
y = Y[offset:(offset + n_batch)]
x, y = shuffle_in_unison(x, y)
feed_dict = {tf_train_feat: x, tf_train_label: y}
_, l, pred, pred_label = session.run([tf_optimizer, tf_loss, tf_prediction, tf_train_label], feed_dict=feed_dict)
if epoch % 1 == 0:
print("Epoch: %d. Batch' loss: %f" %(epoch, l))
test_pred = tf_test_prediction.eval(session=session)
acc_test = accuracy(test_pred, test_label)
acc_train = accuracy_tensor(pred, pred_label)
print("Accuracy train set %s%%" % acc_train)
print("Accuracy test set: %s%%" % acc_test)
Am I missing something in the Tensorflow code? Thanks!
Unless you have a very good reason to not use them, regression should have linear output units. I ran into a similar problem a while back and ended up using linear outputs and linear hidden units which seemed to mirror the mlpregressor in my case.
There is a great section in Goodfellow's Deep Learning Book in chapter 6, starting at page 181, that goes over the activation functions.
At the very least try this for your output layer
layer_2 = tf.matmul(layer_1, W["layer_2"]) + b["layer_2"]
I'm having a hard time trying to set up a multilayer perceptron neural network to predict the next value of a time-series using Tensorflow.
I read the time-series from a file, split it into three arrays and use those arrays to train, test and validate the network. Unfortunately, my network answers 0.9999 to every input I give to it.
The image below shows the values I expect my network to outcome, note that they range from 2.8 to 4.2
Now, these are the values my network predicts. Though they seem all the same, they're actually 0.9999... (and some difference in the 9th decimal place).
import csv
import numpy as np
from statsmodels.tsa.tsatools import lagmat
import tensorflow as tf
# Data split (values represent percentage)
perc_train = 0.5
perc_test = 0.4
perc_eval = 0.1
# Parameters
learning_rate = 10 ** -3
min_step_size_train = 10 ** -5
training_epochs = 250
display_step = 1
# Network Parameters
n_input = 15
n_classes = 1
n_hidden = (n_input + n_classes) / 2
def get_nn_sets(pmX, pmY):
'''
Splits data into three subsets
'''
trainningIndex = int(len(pmX) * perc_train)
validationIndex = int(len(pmX) * perc_test) + trainningIndex
pmXFit = pmX[:trainningIndex, :]
pmYFit = pmY[:trainningIndex]
pmXTest = pmX[trainningIndex:validationIndex, :]
pmYTest = pmY[trainningIndex:validationIndex]
pmxEvaluate = pmX[validationIndex:, :]
pmYEvaluate = pmY[validationIndex:]
return pmXFit, pmYFit, pmXTest, pmYTest, pmxEvaluate, pmYEvaluate
def read_dollar_file(clip_first = 4000):
'''
Reads the CSV file containing the dollar value for Brazilian real during the years
-----
RETURNS:
A matrix with the file contents
'''
str_vals = []
with open('dolar.csv', 'rb') as csvfile:
spamreader = csv.reader(csvfile, delimiter=',')
for row in spamreader:
# retrieving the first column of the file (the dollar value)
str_vals.append(row[1])
# removing title
str_vals = str_vals[1:]
# removing the empty strings (sunday and holidays have no values)
y = filter(None, str_vals)
# converting from string to float values
y = np.array(y).astype(np.float)
# checking if initial elements should be discarded
if (clip_first > 0):
y = y[clip_first:]
return y
# Create model
def get_multilayer_perceptron(x):
# Store layers weight & bias
weights = {
'h1': tf.Variable(tf.random_normal([n_input, n_hidden], dtype=tf.float64)),
'out': tf.Variable(tf.random_normal([n_hidden, n_classes], dtype=tf.float64))
}
biases = {
'b1': tf.Variable(tf.random_normal([n_hidden], dtype=tf.float64)),
'out': tf.Variable(tf.random_normal([n_classes], dtype=tf.float64))
}
# Hidden layer with relu activation
layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
layer_1 = tf.nn.relu(layer_1)
# Output layer with tanh activation
out_layer = tf.matmul(layer_1, weights['out']) + biases['out']
out_layer = tf.nn.tanh(out_layer)
return out_layer
def run_mlp(inp, outp):
pmXFit, pmYFit, pmXTest, pmYTest, pmXEvaluate, pmYEvaluate = get_nn_sets(inp, outp)
# tf Graph input
x = tf.placeholder("float64", [None, n_input])
y = tf.placeholder("float64", [None, n_classes])
# Construct model
pred = get_multilayer_perceptron(x)
# Define loss and optimizer
cost = tf.nn.l2_loss(tf.sub(pred, y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# Initializing the variables
init = tf.initialize_all_variables()
# Launch the graph
with tf.Session() as sess:
sess.run(init)
# Training cycle
last_cost = min_step_size_train + 1
for epoch in range(training_epochs):
# Trainning data
for i in range(len(pmXFit)):
batch_x = np.reshape(pmXFit[i,:], (1, n_input))
batch_y = np.reshape(pmYFit[i], (1, n_classes))
# Run optimization
sess.run(optimizer, feed_dict={x: batch_x, y: batch_y})
# Calculating data error
c = 0.0
for i in range(len(pmXTest)):
batch_x = np.reshape(pmXTest[i,:], (1, n_input))
batch_y = np.reshape(pmYTest[i], (1, n_classes))
# Run Cost function
c += sess.run(cost, feed_dict={x: batch_x, y: batch_y})
c /= len(pmXTest)
# Display logs per epoch step
if epoch % display_step == 0:
print("Epoch:", '%04d' % (epoch+1), "cost=", \
"{:.30f}".format(c))
if abs(c - last_cost) < min_step_size_train:
break
last_cost = c
nn_predictions = np.array([])
for i in range(len(pmXEvaluate)):
batch_x = np.reshape(pmXEvaluate[i,:], (1, n_input))
nn_predictions = np.append(nn_predictions, sess.run(pred, feed_dict={x: batch_x})[0])
print("Optimization Finished!")
nn_predictions.flatten()
return [pmYEvaluate, nn_predictions]
inp = lagmat(read_dollar_file(), n_input, trim='both')
outp = inp[1:, 0]
inp = inp[:-1]
real_value, predicted_value = run_mlp(inp, outp)
I also tried different cost functions and it didn't work. I know I may be missing something really stupid, so I really appreciate your help.
Thanks.
From your code:
out_layer = tf.nn.tanh(out_layer)
tanh can only output values between (-1.0, 1.0), remove this line will make it do better.
Could someone help or guide me through what I should do better in order for this to work?
I changed the number of inputs to 2 and generated some random data, "x1" and "x2" (one number to be added to another). The idea is to use variables "add" and "mul" as the real output and base the cost (variable "Y") off of that, but I'm having trouble manipulating the data so it inputs properly.
I tried to make another variable with
x = tf.Variable([100 * np.random.random_sample([100]), 100 * np.random.random_sample([100]))
and a few other alternative ways, but that caused errors. Also, if there's anything else wrong in my code, please critique it! Anything helps.
Thank you.
'''
A Recurrent Neural Network implementation example using TensorFlow Library.
Author: *********
'''
import numpy as np
import tensorflow as tf
from tensorflow.models.rnn import rnn, rnn_cell
# import matplotlib.pyplot as plt
# from mpl_toolkits.mplot3d import Axes3D
# Parameters
training_iters = 1000
n_epochs = 1000
batch_size = 128
display_step = 100
learning_rate = 0.001
n_observations = 100
n_input = 2 # Input data (Num + Num)
n_steps = 28 # timesteps
n_hidden_1 = 256 # 1st layer number of features
n_hidden_2 = 256 # 2nd layer number of features
n_classes = 1 # Output
X = tf.placeholder("float", [None, n_input])
X1 = tf.placeholder(tf.float32)
X2 = tf.placeholder(tf.float32)
Y = tf.placeholder(tf.float32)
# Random input data
x1 = 100 * np.random.random_sample([100,])
x2 = 100 * np.random.random_sample([100,])
add = tf.add(x1, x2)
mul = tf.mul(X1, X2)
weights = {
'hidden1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),
#'hidden2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
'out': tf.Variable(tf.random_normal([n_hidden_1, n_classes]))
}
biases = {
'hidden1': tf.Variable(tf.random_normal([n_hidden_1])),
#'hidden2': tf.Variable(tf.random_normal([n_hidden_2])),
'out': tf.Variable(tf.random_normal([n_classes]))
}
def RNN(_X1, _weights, _biases):
# Layer 1.1
layer_1 = tf.add(tf.matmul(_X1, weights['hidden1']), biases['hidden1'])
layer_1 = tf.nn.relu(layer_1)
# Layer 1.2
# layer_1_2 = tf.add(tf.matmul(_X2, weights['hidden2']), biases['hidden2'])
# layer_1_2 = tf.nn.relu(layer_1_2)
# Hidden layer with RELU activation
layer_2 = tf.add(tf.matmul(layer_1, weights['out']), biases['out'])
output = tf.nn.relu(layer_2)
return output
pred = RNN(X1, weights, biases)
cost = tf.reduce_sum(tf.pow(pred - Y, 2)) / (n_observations - 1)
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost) # Adam Optimizer
correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(Y,1))
init = tf.initialize_all_variables()
# initData = tf.initialize_variables(x1.all(), x2.all())
with tf.Session() as sess:
# Here we tell tensorflow that we want to initialize all
# the variables in the graph so we can use them
sess.run(init)
# Fit all training data
prev_training_cost = 0.0
for epoch_i in range(n_epochs) :
for (_x1) in x1:
for (_x2) in x2:
print("Input 1:")
print(_x1)
print("Input 2:")
print(_x2)
print("Add function: ")
print(sess.run(add, feed_dict={X1: x1, X2: x2}))
y = sess.run(add, feed_dict={X1: x1, X2: x2})
print(y)
sess.run(optimizer, feed_dict={X: x, Y: y})
training_cost = sess.run(
cost, feed_dict={X: xs, Y: ys})
print(training_cost)
if epoch_i % 20 == 0:
ax.plot(X1, X2, pred.eval(
feed_dict={X1: x1, X2: x2}, session=sess),
'k', alpha=epoch_i / n_epochs)
fig.show()
plt.draw()
# Allow the training to quit if we've reached a minimum
if np.abs(prev_training_cost - training_cost) < 0.000001:
break
prev_training_cost = training_cost
So are you training a feed forward network or a recurrent neural network?
The code you write within RNN() remind me of a simple neural network (feedforward network). Yet your tittle says you are working on RNN's
You might find this implementation interesting. Like you, it generates vectors of integers and uses an RNN to do the addition