I'm having a hard time trying to set up a multilayer perceptron neural network to predict the next value of a time-series using Tensorflow.
I read the time-series from a file, split it into three arrays and use those arrays to train, test and validate the network. Unfortunately, my network answers 0.9999 to every input I give to it.
The image below shows the values I expect my network to outcome, note that they range from 2.8 to 4.2
Now, these are the values my network predicts. Though they seem all the same, they're actually 0.9999... (and some difference in the 9th decimal place).
import csv
import numpy as np
from statsmodels.tsa.tsatools import lagmat
import tensorflow as tf
# Data split (values represent percentage)
perc_train = 0.5
perc_test = 0.4
perc_eval = 0.1
# Parameters
learning_rate = 10 ** -3
min_step_size_train = 10 ** -5
training_epochs = 250
display_step = 1
# Network Parameters
n_input = 15
n_classes = 1
n_hidden = (n_input + n_classes) / 2
def get_nn_sets(pmX, pmY):
'''
Splits data into three subsets
'''
trainningIndex = int(len(pmX) * perc_train)
validationIndex = int(len(pmX) * perc_test) + trainningIndex
pmXFit = pmX[:trainningIndex, :]
pmYFit = pmY[:trainningIndex]
pmXTest = pmX[trainningIndex:validationIndex, :]
pmYTest = pmY[trainningIndex:validationIndex]
pmxEvaluate = pmX[validationIndex:, :]
pmYEvaluate = pmY[validationIndex:]
return pmXFit, pmYFit, pmXTest, pmYTest, pmxEvaluate, pmYEvaluate
def read_dollar_file(clip_first = 4000):
'''
Reads the CSV file containing the dollar value for Brazilian real during the years
-----
RETURNS:
A matrix with the file contents
'''
str_vals = []
with open('dolar.csv', 'rb') as csvfile:
spamreader = csv.reader(csvfile, delimiter=',')
for row in spamreader:
# retrieving the first column of the file (the dollar value)
str_vals.append(row[1])
# removing title
str_vals = str_vals[1:]
# removing the empty strings (sunday and holidays have no values)
y = filter(None, str_vals)
# converting from string to float values
y = np.array(y).astype(np.float)
# checking if initial elements should be discarded
if (clip_first > 0):
y = y[clip_first:]
return y
# Create model
def get_multilayer_perceptron(x):
# Store layers weight & bias
weights = {
'h1': tf.Variable(tf.random_normal([n_input, n_hidden], dtype=tf.float64)),
'out': tf.Variable(tf.random_normal([n_hidden, n_classes], dtype=tf.float64))
}
biases = {
'b1': tf.Variable(tf.random_normal([n_hidden], dtype=tf.float64)),
'out': tf.Variable(tf.random_normal([n_classes], dtype=tf.float64))
}
# Hidden layer with relu activation
layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
layer_1 = tf.nn.relu(layer_1)
# Output layer with tanh activation
out_layer = tf.matmul(layer_1, weights['out']) + biases['out']
out_layer = tf.nn.tanh(out_layer)
return out_layer
def run_mlp(inp, outp):
pmXFit, pmYFit, pmXTest, pmYTest, pmXEvaluate, pmYEvaluate = get_nn_sets(inp, outp)
# tf Graph input
x = tf.placeholder("float64", [None, n_input])
y = tf.placeholder("float64", [None, n_classes])
# Construct model
pred = get_multilayer_perceptron(x)
# Define loss and optimizer
cost = tf.nn.l2_loss(tf.sub(pred, y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# Initializing the variables
init = tf.initialize_all_variables()
# Launch the graph
with tf.Session() as sess:
sess.run(init)
# Training cycle
last_cost = min_step_size_train + 1
for epoch in range(training_epochs):
# Trainning data
for i in range(len(pmXFit)):
batch_x = np.reshape(pmXFit[i,:], (1, n_input))
batch_y = np.reshape(pmYFit[i], (1, n_classes))
# Run optimization
sess.run(optimizer, feed_dict={x: batch_x, y: batch_y})
# Calculating data error
c = 0.0
for i in range(len(pmXTest)):
batch_x = np.reshape(pmXTest[i,:], (1, n_input))
batch_y = np.reshape(pmYTest[i], (1, n_classes))
# Run Cost function
c += sess.run(cost, feed_dict={x: batch_x, y: batch_y})
c /= len(pmXTest)
# Display logs per epoch step
if epoch % display_step == 0:
print("Epoch:", '%04d' % (epoch+1), "cost=", \
"{:.30f}".format(c))
if abs(c - last_cost) < min_step_size_train:
break
last_cost = c
nn_predictions = np.array([])
for i in range(len(pmXEvaluate)):
batch_x = np.reshape(pmXEvaluate[i,:], (1, n_input))
nn_predictions = np.append(nn_predictions, sess.run(pred, feed_dict={x: batch_x})[0])
print("Optimization Finished!")
nn_predictions.flatten()
return [pmYEvaluate, nn_predictions]
inp = lagmat(read_dollar_file(), n_input, trim='both')
outp = inp[1:, 0]
inp = inp[:-1]
real_value, predicted_value = run_mlp(inp, outp)
I also tried different cost functions and it didn't work. I know I may be missing something really stupid, so I really appreciate your help.
Thanks.
From your code:
out_layer = tf.nn.tanh(out_layer)
tanh can only output values between (-1.0, 1.0), remove this line will make it do better.
Related
Im doing a tutorial on tensorflow and I have following task:
I need to create 10 logits in tensorflow with 2 hidden layers and calculate the loss of them using the functions
tf.nn.softmax()
tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels)
My code looks like this:
import tensorflow as tf, numpy as np
trainx = np.array(trainx) / 300.0
trainy = np.array(trainy)
testx = np.array(testx) / 300.0
testy = np.array(testy)
print ('train x shape is {}'.format(trainx.shape))
print ('train y shape is {}'.format(trainy.shape))
print ('test x shape is {}'.format(testx.shape))
print ('tesy y shape is {}'.format(testy.shape))
learning_rate = 0.001
def get_train_batch():
indices = np.random.randint(low=0, high=60000, size=[64])
return trainx[indices], trainy[indices]
g = tf.Graph()
with g.as_default():
x = tf.placeholder(float32, shape=[None, 784])
y = tf.placeholder(float32, shape=[None, 784])
def fc(tensor, outdim, name):
w = tf.get_variable(name + "w", shape=[1, outdim], dtype=float32)
b = tf.get_variable(name + "b", shape=[], dtype=float32, initializer=tf.constant_initializer(0.0) to tf.get_variable)
return tf.add(tf.linalg.matmul(tensor, w), b)
first = fc(trainx, trainx.shape, "first")
first = tf.nn.relu(first)
second = fc(first, trainx.shape, "second")
second = tf.nn.relu(second)
logits = []
names = [some names]
logits.append(tf.log_sigmoid(first))
logits.append(tf.log_sigmoid(second))
for i in range(10):
logits.append(fc(trainx, trainx.shape, names[0]))
tf.nn.softmax() // how do I use this paragraph to get the loss
tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels)
loss = ? // how do i calculate the loss here?
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
minimize_op = optimizer.minimize(loss, var_list=tf.trainable_variables())
sess = tf.Session()
sess.run(tf.global_variables_initializer())
for epoch in range(50000):
randomX, randomY = get_train_batch()
sess.run(minimize_op, feed_dict={x: randomX, y: randomY})
if epoch % 50 == 0:
trainloss = sess.run(loss, feed_dict={x: testx, y:testy})
print("Current loss: ", trainloss)
My questions are:
How do I use the mentioned functions correctly to calculate the loss
Where do I get the labels from?
I am new to Tensorflow and I am working for training with LSTM-RNN in Tensorflow.
I need to save the model so that I can restore and run with Test data again.
I am not sure what to save.
I need to save sess or I need to save pred
When I save sess, restore and test the Test data as
one_hot_predictions, accuracy, final_loss = sess.run(
[pred, accuracy, cost],
feed_dict={
x: X_test,
y: one_hot(y_test)
}
)
Then the error is unknown for pred.
Since I am new to Tensorflow, I am not sure what to save and what to restore to test with new data?
X_train = load_X(X_train_path)
X_test = load_X(X_test_path)
y_train = load_y(y_train_path)
y_test = load_y(y_test_path)
# proof that it actually works for the skeptical: replace labelled classes with random classes to train on
#for i in range(len(y_train)):
# y_train[i] = randint(0, 5)
# Input Data
training_data_count = len(X_train) # 4519 training series (with 50% overlap between each serie)
test_data_count = len(X_test) # 1197 test series
n_input = len(X_train[0][0]) # num input parameters per timestep
n_hidden = 34 # Hidden layer num of features
n_classes = 6
#updated for learning-rate decay
# calculated as: decayed_learning_rate = learning_rate * decay_rate ^ (global_step / decay_steps)
decaying_learning_rate = True
learning_rate = 0.0025 #used if decaying_learning_rate set to False
init_learning_rate = 0.005
decay_rate = 0.96 #the base of the exponential in the decay
decay_steps = 100000 #used in decay every 60000 steps with a base of 0.96
global_step = tf.Variable(0, trainable=False)
lambda_loss_amount = 0.0015
training_iters = training_data_count *300 # Loop 300 times on the dataset, ie 300 epochs
batch_size = 512
display_iter = batch_size*8 # To show test set accuracy during training
#Utility functions for training:
def LSTM_RNN(_X, _weights, _biases):
# model architecture based on "guillaume-chevalier" and "aymericdamien" under the MIT license.
_X = tf.transpose(_X, [1, 0, 2]) # permute n_steps and batch_size
_X = tf.reshape(_X, [-1, n_input])
# Rectifies Linear Unit activation function used
_X = tf.nn.relu(tf.matmul(_X, _weights['hidden']) + _biases['hidden'])
# Split data because rnn cell needs a list of inputs for the RNN inner loop
_X = tf.split(_X, n_steps, 0)
# Define two stacked LSTM cells (two recurrent layers deep) with tensorflow
lstm_cell_1 = tf.contrib.rnn.BasicLSTMCell(n_hidden, forget_bias=1.0, state_is_tuple=True)
lstm_cell_2 = tf.contrib.rnn.BasicLSTMCell(n_hidden, forget_bias=1.0, state_is_tuple=True)
lstm_cells = tf.contrib.rnn.MultiRNNCell([lstm_cell_1, lstm_cell_2], state_is_tuple=True)
outputs, states = tf.contrib.rnn.static_rnn(lstm_cells, _X, dtype=tf.float32)
# A single output is produced, in style of "many to one" classifier, refer to http://karpathy.github.io/2015/05/21/rnn-effectiveness/ for details
lstm_last_output = outputs[-1]
# Linear activation
return tf.matmul(lstm_last_output, _weights['out']) + _biases['out']
def extract_batch_size(_train, _labels, _unsampled, batch_size):
# Fetch a "batch_size" amount of data and labels from "(X|y)_train" data.
# Elements of each batch are chosen randomly, without replacement, from X_train with corresponding label from Y_train
# unsampled_indices keeps track of sampled data ensuring non-replacement. Resets when remaining datapoints < batch_size
shape = list(_train.shape)
shape[0] = batch_size
batch_s = np.empty(shape)
batch_labels = np.empty((batch_size,1))
for i in range(batch_size):
# Loop index
# index = random sample from _unsampled (indices)
index = random.choice(_unsampled)
batch_s[i] = _train[index]
batch_labels[i] = _labels[index]
_unsampled.remove(index)
return batch_s, batch_labels, _unsampled
def one_hot(y_):
# One hot encoding of the network outputs
# e.g.: [[5], [0], [3]] --> [[0, 0, 0, 0, 0, 1], [1, 0, 0, 0, 0, 0], [0, 0, 0, 1, 0, 0]]
y_ = y_.reshape(len(y_))
n_values = int(np.max(y_)) + 1
return np.eye(n_values)[np.array(y_, dtype=np.int32)] # Returns FLOATS
# Graph input/output
x = tf.placeholder(tf.float32, [None, n_steps, n_input])
y = tf.placeholder(tf.float32, [None, n_classes])
# Graph weights
weights = {
'hidden': tf.Variable(tf.random_normal([n_input, n_hidden])), # Hidden layer weights
'out': tf.Variable(tf.random_normal([n_hidden, n_classes], mean=1.0))
}
biases = {
'hidden': tf.Variable(tf.random_normal([n_hidden])),
'out': tf.Variable(tf.random_normal([n_classes]))
}
pred = LSTM_RNN(x, weights, biases)
# Loss, optimizer and evaluation
l2 = lambda_loss_amount * sum(
tf.nn.l2_loss(tf_var) for tf_var in tf.trainable_variables()
) # L2 loss prevents this overkill neural network to overfit the data
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=pred)) + l2 # Softmax loss
if decaying_learning_rate:
learning_rate = tf.train.exponential_decay(init_learning_rate, global_step*batch_size, decay_steps, decay_rate, staircase=True)
#decayed_learning_rate = learning_rate * decay_rate ^ (global_step / decay_steps) #exponentially decayed learning rate
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost,global_step=global_step) # Adam Optimizer
correct_pred = tf.equal(tf.argmax(pred,1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
#Train the network:
test_losses = []
test_accuracies = []
train_losses = []
train_accuracies = []
sess = tf.InteractiveSession(config=tf.ConfigProto(log_device_placement=True))
init = tf.global_variables_initializer()
# Add ops to save and restore all the variables.
saver = tf.train.Saver()
sess.run(init)
# Perform Training steps with "batch_size" amount of data at each loop.
# Elements of each batch are chosen randomly, without replacement, from X_train,
# restarting when remaining datapoints < batch_size
step = 1
time_start = time.time()
unsampled_indices = range(0,len(X_train))
while step * batch_size <= training_iters:
#print (sess.run(learning_rate)) #decaying learning rate
#print (sess.run(global_step)) # global number of iterations
if len(unsampled_indices) < batch_size:
unsampled_indices = range(0,len(X_train))
batch_xs, raw_labels, unsampled_indicies = extract_batch_size(X_train, y_train, unsampled_indices, batch_size)
batch_ys = one_hot(raw_labels)
# check that encoded output is same length as num_classes, if not, pad it
if len(batch_ys[0]) < n_classes:
temp_ys = np.zeros((batch_size, n_classes))
temp_ys[:batch_ys.shape[0],:batch_ys.shape[1]] = batch_ys
batch_ys = temp_ys
# Fit training using batch data
_, loss, acc = sess.run(
[optimizer, cost, accuracy],
feed_dict={
x: batch_xs,
y: batch_ys
}
)
train_losses.append(loss)
train_accuracies.append(acc)
# Evaluate network only at some steps for faster training:
if (step*batch_size % display_iter == 0) or (step == 1) or (step * batch_size > training_iters):
# To not spam console, show training accuracy/loss in this "if"
print("Iter #" + str(step*batch_size) + \
": Learning rate = " + "{:.6f}".format(sess.run(learning_rate)) + \
": Batch Loss = " + "{:.6f}".format(loss) + \
", Accuracy = {}".format(acc))
# Evaluation on the test set (no learning made here - just evaluation for diagnosis)
loss, acc = sess.run(
[cost, accuracy],
feed_dict={
x: X_test,
y: one_hot(y_test)
}
)
test_losses.append(loss)
test_accuracies.append(acc)
print("PERFORMANCE ON TEST SET: " + \
"Batch Loss = {}".format(loss) + \
", Accuracy = {}".format(acc))
step += 1
print("Optimization Finished!")
EDIT:
I can save the model as
print("Optimization Finished!")
save_path = saver.save(sess, "/home/test/venv/TFCodes/HumanActivityRecognition/model.ckpt")
Then I tried to restore and ok, I can restore. But I don't know how to test with test data.
My restore code is
X_test = load_X(X_test_path)
with tf.Session() as sess:
saver = tf.train.import_meta_graph('/home/nyan/venv/TFCodes/HumanActivityRecognition/model.ckpt.meta')
saver.restore(sess, tf.train.latest_checkpoint('./'))
print("Model restored.")
all_vars = tf.trainable_variables()
for i in range(len(all_vars)):
name = all_vars[i].name
values = sess.run(name)
print('name', name)
#print('value', values)
print('shape',values.shape)
result = sess.run(prediction, feed_dict={X: X_test})
print("loss:", l, "prediction:", result, "true Y:", y_data)
# print char using dic
result_str = [idx2char[c] for c in np.squeeze(res
ult)]
print("\tPrediction str:", ''.join(result_str))
The output is
Model restored.
('name', u'Variable_1:0')
('shape', (36, 34))
('name', u'Variable_2:0')
('shape', (34, 6))
('name', u'Variable_3:0')
('shape', (34,))
('name', u'Variable_4:0')
('shape', (6,))
('name', u'rnn/multi_rnn_cell/cell_0/basic_lstm_cell/kernel:0')
('shape', (68, 136))
('name', u'rnn/multi_rnn_cell/cell_0/basic_lstm_cell/bias:0')
('shape', (136,))
('name', u'rnn/multi_rnn_cell/cell_1/basic_lstm_cell/kernel:0')
('shape', (68, 136))
('name', u'rnn/multi_rnn_cell/cell_1/basic_lstm_cell/bias:0')
('shape', (136,))
Traceback (most recent call last):
File "restore.py", line 74, in <module>
result = sess.run(prediction, feed_dict={X: X_test})
NameError: name 'prediction' is not defined
How to test the model restored?
What I find the easiest is the tf.saved_model.simple_save() function. It saves the computation graph you use, the weights, the input and the output in a .pb model and the weight variables.
You can later restore this model or even put it on ml-engine or use tf serving.
An example code snippit with a keras model and applied on YOLO:
inputs = {"image_bytes": model.input,
"shape": image_shape}
outputs = {"boxes": boxes,
"scores": scores,
"classes": classes}
tf.saved_model.simple_save(sess, "saved_model/", inputs, outputs)
I want to train an RNN model to connect an article and an image. The input and the output are two arrays.
I define the parameters of RNN as follow:
learning_rate = 0.001
training_iters = 100000
batch_size = 128
display_step = 10
# Network Parameters
n_input = 128
n_steps = 168 # timesteps
n_hidden = 512 # hidden layer num of features
output = 200
the image is 128*168 and the article is 200
cost = tf.reduce_mean(pow(pred-y,2)/2)
#cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
For the end result, I want to train a network to transform an image to an article. However, when I try to train the model, the cost is returned as NaN.
Here is the code:
# coding=utf-8
from __future__ import print_function
from tensorflow.contrib import rnn
import scipy.io as scio
import tensorflow as tf
import numpy as np
import os
TextPath = 'F://matlab_code//readtxt//ImageTextVector.mat';
ImageDirPath = 'F://matlab_code//CVPR10-LLC//features//1';
Text = scio.loadmat(TextPath)
learning_rate = 0.001
training_iters = 100000
batch_size = 128
display_step = 10
# Network Parameters
n_input = 128 #
n_steps = 168 # timesteps
n_hidden = 512 # hidden layer num of features
output = 200 #
x = tf.placeholder("float", [None, n_steps, n_input])
y = tf.placeholder("float", [None, output])
weights = {
'out': tf.Variable(tf.random_normal([n_hidden, output]))
}
biases = {
'out': tf.Variable(tf.random_normal([output]))
}
def RNN(x, weights, biases):
lstm_cell = rnn.BasicLSTMCell(n_hidden, forget_bias=1.0)
outputs, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32)
return tf.matmul(outputs[-1], weights['out']) + biases['out']
pred = RNN(x, weights, biases)
# Define loss and optimizer
cost = tf.reduce_mean(pow(pred-y,2)/2)
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
init = tf.global_variables_initializer()
train_count=0;
with tf.Session() as sess:
sess.run(init)
step = 0
while step* batch_size < training_iters:
iter = step*batch_size
batch_x = []
batch_y = []
while iter < (step+1)*batch_size:
ImagePath = ImageDirPath + '//' + Text['X'][train_count][0][0] +'.mat'
if os.path.exists(ImagePath):
batch_xx=[]
batch_yy=[]
Image = scio.loadmat(ImagePath)
i=0
while i<21504 :
batch_xx.append(Image['fea'][i][0])
i=i+1
batch_yy = Text['X'][train_count][1][0]
batch_xx = np.array(batch_xx)
batch_x=np.hstack((batch_x,batch_xx))
batch_y=np.hstack((batch_y,batch_yy))
iter = iter+1
train_count=train_count+1
batch_x = batch_x.reshape((batch_size,n_steps, n_input))
batch_y = batch_y.reshape((batch_size,output))
# Run optimization op (backprop)
sess.run(optimizer, feed_dict={x: batch_x, y: batch_y})
if step % display_step == 0:
# Calculate batch loss
loss = sess.run(cost, feed_dict={x: batch_x, y: batch_y})
print("Iter " + str(step* batch_size) + ", Minibatch Loss= " + \
"{:.6f}".format(loss) )
step += 1
print("Optimization Finished!")
when you pass tensor including nan values to lstm, the value in the cell of lstm's will be "forced" to nan because the numerical operation between number and nan. Check whether your data have nan value or just use numpy.nan_to_num to fill your nan data.
python 3.5.2, tensorflow 1.0.0
Somewhat new in programming with autoencoders. I am trying to implement a simple network to get familiarize from here. I have used the same input data in which a CNN is able to classify perfectly with accuracy of 98%. My data have 2000 row data and each row is a signal. I am trying with 3 stacked layers of auto encoders with 512 256 and 64 nodes.
class dimensions:
input_width, input_height = 1,1024
BATCH_SIZE = 50
layer = [input_width*input_height, 512, 256, 64]
learningrate = 0.001
def myencoder(x,corrupt_prob,dimensions):
current_input = corrupt(x) * corrupt_prob + x * (1 - corrupt_prob)
encoder = []
for layer_i, n_output in enumerate(dimensions.layer[1:]):
n_input = int(current_input.get_shape()[1])
W = tf.Variable(
tf.random_uniform([n_input, n_output],
-1.0 / math.sqrt(n_input),
1.0 / math.sqrt(n_input)))
b = tf.Variable(tf.zeros([n_output]))
encoder.append(W)
output = tf.nn.tanh(tf.matmul(current_input, W) + b)
current_input = output
z = current_input
encoder.reverse()
# Build the decoder using the same weights
for layer_i, n_output in enumerate(model.layer[:-1][::-1]):
W = tf.transpose(encoder[layer_i])
b = tf.Variable(tf.zeros([n_output]))
output = tf.nn.tanh(tf.matmul(current_input, W) + b)
current_input = output
# now have the reconstruction through the network
y = current_input
# cost function measures pixel-wise difference
cost = tf.sqrt(tf.reduce_mean(tf.square(y - x)))
return z,y,cost
sess = tf.Session()
model = dimensions()
data_train,data_test,label_train,label_test = load_data(Datainfo,folder)
x = tf.placeholder(tf.float32,[model.BATCH_SIZE,model.input_height*model.input_width])
corrupt_prob = tf.placeholder(tf.float32,[1])
z,y,cost = myencoder(x,corrupt_prob,dimensions)
train_step = tf.train.AdamOptimizer(model.learningrate).minimize(cost)
lossfun = np.zeros(STEPS)
sess.run(tf.global_variables_initializer())
for i in range(STEPS):
train_data = batchdata(data_train, model.BATCH_SIZE)
epoch_loss = 0
for j in range(model.BATCH_SIZE):
sess.run(train_step,feed_dict={x:train_data,corrupt_prob:[1.0]})
c = sess.run(cost, feed_dict={x: train_data, corrupt_prob: [1.0]})
epoch_loss += c
lossfun[i] = epoch_loss
print('Epoch', i, 'completed out of', STEPS, 'loss:', epoch_loss)
my loss function appears like this
xaxis - no of iterations, y axis - loss
the loss doesn't decrease and the network doesn't learn anything.
any help appreciated !
In the function myencoder, the weight variables W and b are initialized in every training step.
How can I log (with SummaryWriter, e.g. for TensorBoard) of individual scalar elements of a tensor Variable? For example, how can I log individual weights of a given layer or node in a network?
In my example code, I've pressed a general feed-forward neural network into service to do simple linear regression, and want (in that case) to log the weights of the lone node in the lone hidden layer as learning progresses.
I can get these values explicitly during a session with, for example
sess.run(layer_weights)[0][i][0]
for the i-th weight, where layer_weights is a list of the weight Variables; but I can't figure out how to log the corresponding scalar values. If I try
w1 = tf.slice(layer_weights[0], [0], [1])[0]
tf.scalar_summary('w1', w1)
or
w1 = layer_weights[0][1][0]
tf.scalar_summary('w1', w1)
I get
ValueError: Shape (5, 1) must have rank 1
How can I log individual scalar values from a TensorFlow Variable?
from __future__ import (absolute_import, print_function, division, unicode_literals)
import numpy as np
import tensorflow as tf
# Basic model parameters as external flags
flags = tf.app.flags
FLAGS = flags.FLAGS
flags.DEFINE_float('network_nodes', [5, 1], 'The number of nodes in each layer, including input and output.')
flags.DEFINE_float('epochs', 250, 'Epochs to run')
flags.DEFINE_float('learning_rate', 0.15, 'Initial learning rate.')
flags.DEFINE_string('data_dir', './data', 'Directory to hold training and test data.')
flags.DEFINE_string('train_dir', './_tmp/train', 'Directory to log training (and the network def).')
flags.DEFINE_string('test_dir', './_tmp/test', 'Directory to log testing.')
def variable_summaries(var, name):
with tf.name_scope("summaries"):
mean = tf.reduce_mean(var)
tf.scalar_summary('mean/' + name, mean)
with tf.name_scope('stddev'):
stddev = tf.sqrt(tf.reduce_sum(tf.square(var - mean)))
tf.scalar_summary('sttdev/' + name, stddev)
tf.scalar_summary('max/' + name, tf.reduce_max(var))
tf.scalar_summary('min/' + name, tf.reduce_min(var))
tf.histogram_summary(name, var)
def add_layer(input_tensor, input_dim, output_dim, neuron_fn, layer_name):
with tf.name_scope(layer_name):
with tf.name_scope("weights"):
weights = tf.Variable(tf.truncated_normal([input_dim, output_dim], stddev=0.1))
with tf.name_scope("biases"):
biases = tf.Variable(tf.constant(0.1, shape=[output_dim]))
with tf.name_scope('activations'):
with tf.name_scope('weighted_inputs'):
weighted_inputs = tf.matmul(input_tensor, weights) + biases
tf.histogram_summary(layer_name + '/weighted_inputs', weighted_inputs)
output = neuron_fn(weighted_inputs)
return output, weights, biases
def make_ff_network(nodes, input_activation, hidden_activation_fn=tf.nn.sigmoid, output_activation_fn=tf.nn.softmax):
layer_activations = [input_activation]
layer_weights = []
layer_biases = []
n_layers = len(nodes)
for l in range(1, n_layers):
a, w, b = add_layer(layer_activations[l - 1], nodes[l - 1], nodes[l],
output_activation_fn if l == n_layers - 1 else hidden_activation_fn,
'output_layer' if l == n_layers - 1 else 'hidden_layer' + (
'_{}'.format(l) if n_layers > 3 else ''))
layer_activations += [a]
layer_weights += [w]
layer_biases += [b]
with tf.name_scope('output'):
net_activation = tf.identity(layer_activations[-1], name='network_activation')
return net_activation, layer_weights, layer_biases
# Inputs and outputs
with tf.name_scope('data'):
x = tf.placeholder(tf.float32, shape=[None, FLAGS.network_nodes[0]], name='inputs')
y_ = tf.placeholder(tf.float32, shape=[None, FLAGS.network_nodes[-1]], name='correct_outputs')
# Network structure
y, layer_weights, layer_biases = make_ff_network(FLAGS.network_nodes, x, output_activation_fn=tf.identity)
# Metrics and operations
with tf.name_scope('accuracy'):
with tf.name_scope('loss'):
loss = tf.reduce_mean(tf.square(y - y_))
# NONE OF THESE WORK:
#w1 = tf.slice(layer_weights[0], [0], [1])[0]
#tf.scalar_summary('w1', w1)
#w1 = layer_weights[0][1][0]
#tf.scalar_summary('w1', w1)
tf.scalar_summary('loss', loss)
train_step = tf.train.GradientDescentOptimizer(FLAGS.learning_rate).minimize(loss)
# Logging
train_writer = tf.train.SummaryWriter(FLAGS.train_dir, tf.get_default_graph())
test_writer = tf.train.SummaryWriter(FLAGS.test_dir)
merged = tf.merge_all_summaries()
W = np.array([1.0, 2.0, 3.0, 4.0, 5.0])
train_x = np.random.rand(100000, FLAGS.network_nodes[0])
train_y = np.array([np.dot(W, train_x.T)+ 6.0]).T
test_x = np.random.rand(1000, FLAGS.network_nodes[0])
test_y = np.array([np.dot(W, test_x.T)+ 6.0]).T
with tf.Session() as sess:
sess.run(tf.initialize_all_variables())
for ep in range(FLAGS.epochs):
sess.run(train_step, feed_dict={x: train_x, y_: train_y})
summary = sess.run(merged, feed_dict={x: test_x, y_: test_y})
test_writer.add_summary(summary, ep+1)
# THESE WORK
print('w1 = {}'.format(sess.run(layer_weights)[0][0][0]))
print('w2 = {}'.format(sess.run(layer_weights)[0][1][0]))
print('w3 = {}'.format(sess.run(layer_weights)[0][2][0]))
print('w4 = {}'.format(sess.run(layer_weights)[0][3][0]))
print('w5 = {}'.format(sess.run(layer_weights)[0][4][0]))
print(' b = {}'.format(sess.run(layer_biases)[0][0]))
There are different errors in the code.
The main problem is that you are passing a python list of tensors to the scalar_summary.
The error says that your are passing a tensor that does not have Rank 1 is related to the slice operation.
You want to pass the weights and log them layer per layer. One way to do that is to log each weight on each layer:
for weight in layer_weights:
tf.scalar_summary([ ['%s_w%d%d' % (weight.name, i,j) for i in xrange(len(layer_weights))] for j in xrange(5) ], weight)
This will produce in tensorboard tensorboard --logdir=./_tmp/test these nice graphs