fix accuracy in python tensorflow - python

Background: We try to produce a model that will predict whether a case is happening or not, based on data numbers in rnn model
The problem: We reached a situation in which the LOSS decreases but the accuracy remains at 0.5
tf.initialize_all_variables()
ops.reset_default_graph()
sess = tf.InteractiveSession()
np_labels = np.array(labels)
np_labels = np_labels.reshape([np_labels.shape[0], 1])
print([np_labels.shape[0], 1])
############################ modle ############
cellsize = 100
possible_chars = 1
x = tf.placeholder(tf.float32, [None, 91, 10])
y = tf.placeholder(tf.float32, [None, 1])
lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(cellsize, forget_bias=0.0)
output, _ = tf.nn.dynamic_rnn(lstm_cell, x, dtype=tf.float32)
output = tf.transpose(output, [1, 0, 2])
last = output[-1]
W = tf.Variable(tf.truncated_normal([cellsize, possible_chars], stddev=0.1))
b = tf.Variable(tf.constant(0.1))
z = tf.matmul(last, W) + b
res = tf.nn.sigmoid(z)
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y * tf.log(res), reduction_indices=[1]))
train_step = tf.train.GradientDescentOptimizer(0.1).minimize(cross_entropy)
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
correct_prediction = tf.greater(res, 0.1)
correct = tf.equal(correct_prediction, tf.equal(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
############################ startTheTrain ################
num_of_epochs = 10
for ephoch in range(num_of_epochs):
_, loss = sess.run([train_step, cross_entropy], feed_dict={x: fvecs, y:np_labels})
a = accuracy.eval(feed_dict={x: fvecs, y: np_labels}) #
print("step %d, loss: %g, training accuracy %g" % (ephoch, loss, a))
After I run the code I get this output:
step 0, loss: 0.338716, training accuracy 0.5
step 1, loss: 0.130198, training accuracy 0.5
step 8, loss: 0.0502876, training accuracy 0.5
step 9, loss: 0.0468232, training accuracy 0.5
I am trying to fix the code so that accuracy it will work

Related

predict label for new text input on trained model

I'm trying to do a prediction on new text examples, where I want the prediction to return a probability output for each example.
This is my learning model:
with tf.name_scope('Placeholders'):
input_x = tf.placeholder(tf.int32, [None, sequence_length], name='input_x')
input_y = tf.placeholder(tf.float32, [None, n_classes], name='input_y')
drops = tf.placeholder(tf.float32, name='dropout_keep_prob')
with tf.name_scope('Embedding_layer'):
embeddings_v = tf.Variable(tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0))
embeddings = tf.nn.embedding_lookup(params=embeddings_v, ids=input_x)
# Bi_directional LSTM
with tf.name_scope('Bi_directional_LSTM'):
rnn_outputs, _ = bi_rnn(LSTMCell(hidden_unit), LSTMCell(hidden_unit),inputs=embeddings, dtype=tf.float32)
tf.summary.histogram('Bi_directional_LSTM', rnn_outputs)
# Attention layer
with tf.name_scope('Attention_layer'):
attention_output, alphas = attention(rnn_outputs, attention_size, return_alphas=True)
tf.summary.histogram('alphas', alphas)
with tf.name_scope('Dropout'):
drop = tf.nn.dropout(attention_output, drops)
with tf.name_scope('Fully_connected_layer'):
W = tf.Variable(tf.truncated_normal([hidden_unit * 2, n_classes],stddev=0.1))
b = tf.Variable(tf.constant(0., shape=[n_classes]))
y_hat = tf.nn.xw_plus_b(drop, W,b)
predictions = tf.argmax(input=y_hat, axis=1, name='predictions')
with tf.name_scope('Loss'):
loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=y_hat, labels=input_y))
global_step = tf.Variable(0.1, name="global_step", trainable=False)
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss, global_step=global_step)
probs = tf.nn.sigmoid(y_hat)
tf.summary.scalar('loss', loss)
with tf.name_scope('Accuracy'):
correct_pred = tf.equal(tf.argmax(y_hat,1), tf.argmax(input_y,1))
accuracy = tf.reduce_mean(input_tensor=tf.cast(correct_pred, tf.float32), name='accuracy')
num_correct = tf.reduce_sum(input_tensor=tf.cast(correct_pred, 'float'), name='correct_predictions')
tf.summary.scalar('accuracy', accuracy)
merged = tf.summary.merge_all()
I then run the following piece of code to train the model:
def batch_generator(X, y, batch_size):
"""batch generator"""
size = X.shape[0]
X_copy = X.copy()
y_copy = y.copy()
indices = np.arange(size)
np.random.shuffle(indices)
X_copy = X_copy[indices]
y_copy = y_copy[indices]
i = 0
while True:
if i + batch_size <= size:
yield X_copy[i:i + batch_size], y_copy[i:i + batch_size]
i += batch_size
else:
i = 0
indices = np.arange(size)
np.random.shuffle(indices)
X_copy = X_copy[indices]
y_copy = y_copy[indices]
continue
train_batch_generator = batch_generator(x_train, y_train, batch_size)
test_batch_generator = batch_generator(x_dev, y_dev, batch_size)
predict_generator = batch_generator(x_test, y_test, batch_size)
saver = tf.train.Saver()
with tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True))) as sess:
sess.run(tf.global_variables_initializer())
print("Start learning...")
for epoch in range(epochs):
loss_train = 0
loss_val = 0
loss_test = 0
accuracy_train = 0
accuracy_val = 0
accuracy_test = 0
train_loss_l = []
val_loss_l = []
print("epoch: {}\t".format(epoch), end="")
# Training
num_batches = x_train.shape[0] // batch_size
for b in tqdm(range(num_batches)):
x_batch, y_batch = next(train_batch_generator)
loss_tr, acc, _, summary = sess.run([loss, accuracy, optimizer,merged],
feed_dict={input_x: x_batch,
input_y: y_batch,
drops: 0.5})
train_loss_l.append(loss_tr)
accuracy_train += acc
loss_train = loss_tr * DELTA + loss_train * (1 - DELTA)
accuracy_train /= num_batches
# Validation
num_batches = x_dev.shape[0] // batch_size
for b in tqdm(range(num_batches)):
x_batch, y_batch = next(test_batch_generator)
val_loss, val_acc, summary = sess.run([loss, accuracy,merged],
feed_dict={input_x: x_batch,
input_y: y_batch,
drops: 0.5})
val_loss_l.append(val_loss)
accuracy_val += val_acc
loss_val += val_loss
accuracy_val /= num_batches
loss_val /= num_batches
print("loss: {:.3f}, val_loss: {:.3f}, acc: {:.3f}, val_acc: {:.3f}".format(loss_train, loss_val, accuracy_train, accuracy_val))
# predict x_test
num_batches = x_test.shape[0] // batch_size
print("n batches",num_batches)
predict_correct = 0
for batch in tqdm(range(num_batches)):
x_batch, yx_batch = next(predict_generator)
y_true = np.argmax(yx_batch,1)
loss_pred, acc_pred, n_correct, y_pred = sess.run([loss,accuracy,num_correct,predictions], feed_dict={input_x: x_batch,input_y: y_batch,drops : 0.5 })
print("Precision", sk.metrics.precision_score(y_true, y_pred,average='weighted'))
print("Recall", sk.metrics.recall_score(y_true, y_pred,average='weighted'))
print("f1_score", sk.metrics.f1_score(y_true, y_pred,average='weighted'))
print("confusion_matrix")
print(sk.metrics.confusion_matrix(y_true, y_pred))
saver.save(sess, MODEL_PATH)
sess.run(predictions, feed_dict={x: x_test})
The code runs fine until it hits the prediction part where I get the following error:
TypeError: unhashable type: 'numpy.ndarray'
Any chance someone could explain this to me please?
Just answered my own question...
This is just in case someone else is attempting to do this. If you feed tensors to feed_dict, just use the actual placeholder name you used before.
In my case this would be:
print(sess.run(predictions, feed_dict={input_x: x_test,drops:0.5}))

Why isn't my XOR tensorflow network learning?

I can't figure out what I'm doing wrong with this XOR neural network. Maybe I'm not computing the loss correctly? The loss improves slightly at the beginning, and then the accuracy converges to 50% very quickly. Could someone please point out what I'm doing wrong?
Here's a minimal self contained example:
import numpy as np
import tensorflow as tf
n_inputs = 2
n_hidden = 3
n_outputs = 1
X = tf.placeholder(tf.float32, shape=(None, n_inputs), name='X')
y = tf.placeholder(tf.float32, shape=(None), name='y')
def neuron_layer(X, n_neurons, name, activation=None):
with tf.name_scope(name):
n_inputs = int(X.get_shape()[1])
stddev = 2 / np.sqrt(n_inputs)
init = tf.truncated_normal((n_inputs, n_neurons), stddev=stddev)
W = tf.Variable(init, name="weights")
b = tf.Variable(tf.zeros([n_neurons]), name="bias")
Z = tf.matmul(X, W) + b
if activation is not None:
return activation(Z)
else: return Z
with tf.name_scope('nn'):
hidden = neuron_layer(X, n_hidden, name='hidden', activation=tf.nn.sigmoid)
prediction_probabilities = neuron_layer(hidden, n_outputs, name='outputs', activation=tf.nn.sigmoid)
with tf.name_scope('loss'):
mse_loss = tf.reduce_mean(tf.squared_difference(y, prediction_probabilities), name='loss')
learning_rate = 0.1
with tf.name_scope('train'):
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse_loss)
with tf.name_scope('eval'):
correct = tf.equal(tf.greater_equal(prediction_probabilities,0.5), tf.cast(y,tf.bool))
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
init = tf.global_variables_initializer()
X_train = [
(0, 0),
(0, 1),
(1, 0),
(1, 1)
]
y_train = [0,1,1,0]
with tf.Session() as sess:
init.run()
for epoch in range(500):
_, mse, acc = sess.run([training_op, mse_loss, accuracy],
feed_dict={X: np.array(X_train), y: np.array(y_train)})
print("mse: %.4f, accuracy: %.2f" % (mse, acc))
Your code is perfectly fine. The problem is with your input. You need to pass a 2D list, not 1D. Each inner list is a single dimension input, that's how tensorflow will parse them.
y_train = [[0],[1],[1],[0]]
Your code works nicely after.
...
mse: 0.0002, accuracy: 1.00

Trouble predicting with tensorflow model

I've trained a Deep Neural Network on the MNIST dataset. Here is the code for training.
n_classes = 10
batch_size = 100
x = tf.placeholder(tf.float32, [None, 784],name='Xx')
y = tf.placeholder(tf.float32,[None,10],name='Yy')
input = 784
n_nodes_1 = 300
n_nodes_2 = 300
def neural_network_model(data):
variables = {'w1':tf.Variable(tf.random_normal([input,n_nodes_1])),
'w2':tf.Variable(tf.random_normal([n_nodes_1,n_nodes_2])),
'w3':tf.Variable(tf.random_normal([n_nodes_2,n_classes])),
'b1':tf.Variable(tf.random_normal([n_nodes_1])),
'b2':tf.Variable(tf.random_normal([n_nodes_2])),
'b3':tf.Variable(tf.random_normal([n_classes]))}
output1 = tf.add(tf.matmul(data,variables['w1']),variables['b1'])
output2 = tf.nn.relu(output1)
output3 = tf.add(tf.matmul(output2, variables['w2']), variables['b2'])
output4 = tf.nn.relu(output3)
output5 = tf.add(tf.matmul(output4, variables['w3']), variables['b3'],name='last')
return output5
def train_neural_network(x):
prediction = neural_network_model(x)
name_of_final_layer = 'fin'
final = tf.nn.softmax_cross_entropy_with_logits_v2(logits=prediction,
labels=y,name=name_of_final_layer)
cost = tf.reduce_mean(final)
optimizer = tf.train.AdamOptimizer().minimize(cost)
hm_epochs = 3
saver = tf.train.Saver()
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for epoch in range(hm_epochs):
epoch_loss = 0
for _ in range(int(mnist.train.num_examples/batch_size)):
epoch_x, epoch_y = mnist.train.next_batch(batch_size)
_,c=sess.run([optimizer,cost],feed_dict={x:epoch_x,y:epoch_y})
epoch_loss += c
print("Epoch",epoch+1,"Completed Total Loss:",epoch_loss)
correct = tf.equal(tf.argmax(prediction,1),tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct,'float'))
print('Accuracy on val_set:',accuracy.eval({x:mnist.test.images,y:mnist.test.labels}))
path = saver.save(sess,"net/network")
print("Saved to",path)
Here is my code for evaluating a single datapoint
def eval_neural_network():
with tf.Session() as sess:
new_saver = tf.train.import_meta_graph('net/network.meta')
new_saver.restore(sess, "net/network")
sing = np.reshape(mnist.test.images[0],(-1,784))
output = sess.run([y],feed_dict={x:sing})
print(output)
eval_neural_network()
The error that popped up is :
InvalidArgumentError (see above for traceback): You must feed a value for placeholder tensor 'Yy' with dtype float and shape [?,10]
[[Node: Yy = Placeholder[dtype=DT_FLOAT, shape=[?,10], _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
I've researched this online for multiple days now and still could not get it to work. Any advice?
The losses are oscillating like this but the predictions don't seem to be bad. It works.
It also extracts the mnist archive repeatedly. Accuracy also can reach 0.98 with a simpler network.
Epoch 1 Completed Total Loss: 47.47844
Accuracy on val_set: 0.8685
Epoch 2 Completed Total Loss: 10.217445
Accuracy on val_set: 0.9
Epoch 3 Completed Total Loss: 14.013474
Accuracy on val_set: 0.9104
[2]
import tensorflow as tf
import tensorflow.examples.tutorials.mnist.input_data as input_data
import numpy as np
import matplotlib.pyplot as plt
n_classes = 10
batch_size = 100
x = tf.placeholder(tf.float32, [None, 784],name='Xx')
y = tf.placeholder(tf.float32,[None,10],name='Yy')
input = 784
n_nodes_1 = 300
n_nodes_2 = 300
mnist = input_data.read_data_sets('MNIST_data/', one_hot=True)
def neural_network_model(data):
variables = {'w1':tf.Variable(tf.random_normal([input,n_nodes_1])),
'w2':tf.Variable(tf.random_normal([n_nodes_1,n_nodes_2])),
'w3':tf.Variable(tf.random_normal([n_nodes_2,n_classes])),
'b1':tf.Variable(tf.random_normal([n_nodes_1])),
'b2':tf.Variable(tf.random_normal([n_nodes_2])),
'b3':tf.Variable(tf.random_normal([n_classes]))}
output1 = tf.add(tf.matmul(data,variables['w1']),variables['b1'])
output2 = tf.nn.relu(output1)
output3 = tf.add(tf.matmul(output2, variables['w2']), variables['b2'])
output4 = tf.nn.relu(output3)
output5 = tf.add(tf.matmul(output4, variables['w3']), variables['b3'],name='last')
return output5
def train_neural_network(x):
prediction = neural_network_model(x)
name_of_final_layer = 'fin'
final = tf.nn.softmax_cross_entropy_with_logits_v2(logits=prediction,
labels=y,name=name_of_final_layer)
cost = tf.reduce_mean(final)
optimizer = tf.train.AdamOptimizer().minimize(cost)
hm_epochs = 3
saver = tf.train.Saver()
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for epoch in range(hm_epochs):
for _ in range(int(mnist.train.num_examples/batch_size)):
epoch_x, epoch_y = mnist.train.next_batch(batch_size)
_,c=sess.run([optimizer,cost],feed_dict={x:epoch_x,y:epoch_y})
print("Epoch",epoch+1,"Completed Total Loss:",c)
correct = tf.equal(tf.argmax(prediction,1),tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct,'float'))
print('Accuracy on val_set:',accuracy.eval({x:mnist.test.images,y:mnist.test.labels}))
#path = saver.save(sess,"net/network")
#print("Saved to",path)
return prediction
def eval_neural_network(prediction):
with tf.Session() as sess:
new_saver = tf.train.import_meta_graph('net/network.meta')
new_saver.restore(sess, "net/network")
singleprediction = tf.argmax(prediction, 1)
sing = np.reshape(mnist.test.images[1], (-1, 784))
output = singleprediction.eval(feed_dict={x:sing},session=sess)
digit = mnist.test.images[1].reshape((28, 28))
plt.imshow(digit, cmap='gray')
plt.show()
print(output)
prediction = train_neural_network(x)
eval_neural_network(prediction)
This complete example based on tensorflow github worked for me:
(I modified few lines of code by removing name scope for x, keep_prob and changing to tf.placeholder_with_default. There's probably a better way to do this somewhere.
​
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import pandas as pd
import argparse
import sys
import tempfile
​
from tensorflow.examples.tutorials.mnist import input_data
​
import tensorflow as tf
​
FLAGS = None
​
​
def deepnn(x):
"""deepnn builds the graph for a deep net for classifying digits.
​
Args:
x: an input tensor with the dimensions (N_examples, 784), where 784 is the
number of pixels in a standard MNIST image.
​
Returns:
A tuple (y, keep_prob). y is a tensor of shape (N_examples, 10), with values
equal to the logits of classifying the digit into one of 10 classes (the
digits 0-9). keep_prob is a scalar placeholder for the probability of
dropout.
"""
# Reshape to use within a convolutional neural net.
# Last dimension is for "features" - there is only one here, since images are
# grayscale -- it would be 3 for an RGB image, 4 for RGBA, etc.
with tf.name_scope('reshape'):
x_image = tf.reshape(x, [-1, 28, 28, 1])
​
# First convolutional layer - maps one grayscale image to 32 feature maps.
with tf.name_scope('conv1'):
W_conv1 = weight_variable([5, 5, 1, 32])
b_conv1 = bias_variable([32])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
​
# Pooling layer - downsamples by 2X.
with tf.name_scope('pool1'):
h_pool1 = max_pool_2x2(h_conv1)
​
# Second convolutional layer -- maps 32 feature maps to 64.
with tf.name_scope('conv2'):
W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
​
# Second pooling layer.
with tf.name_scope('pool2'):
h_pool2 = max_pool_2x2(h_conv2)
​
# Fully connected layer 1 -- after 2 round of downsampling, our 28x28 image
# is down to 7x7x64 feature maps -- maps this to 1024 features.
with tf.name_scope('fc1'):
W_fc1 = weight_variable([7 * 7 * 64, 1024])
b_fc1 = bias_variable([1024])
​
h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
​
# Dropout - controls the complexity of the model, prevents co-adaptation of
# features.
​
keep_prob = tf.placeholder_with_default(1.0,())
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
​
# Map the 1024 features to 10 classes, one for each digit
with tf.name_scope('fc2'):
W_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])
​
y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
return y_conv, keep_prob
​
​
def conv2d(x, W):
"""conv2d returns a 2d convolution layer with full stride."""
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
​
​
def max_pool_2x2(x):
"""max_pool_2x2 downsamples a feature map by 2X."""
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME')
​
​
def weight_variable(shape):
"""weight_variable generates a weight variable of a given shape."""
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
​
​
def bias_variable(shape):
"""bias_variable generates a bias variable of a given shape."""
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
​
​
# Import data
mnist = input_data.read_data_sets("/tmp")
# Create the model
x = tf.placeholder(tf.float32, [None, 784],name='x')
# Define loss and optimizer
y_ = tf.placeholder(tf.int64, [None])
# Build the graph for the deep net
y_conv, keep_prob = deepnn(x)
with tf.name_scope('loss'):
cross_entropy = tf.losses.sparse_softmax_cross_entropy(
labels=y_, logits=y_conv)
cross_entropy = tf.reduce_mean(cross_entropy)
​
with tf.name_scope('adam_optimizer'):
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
​
with tf.name_scope('accuracy'):
correct_prediction = tf.equal(tf.argmax(y_conv, 1), y_)
correct_prediction = tf.cast(correct_prediction, tf.float32)
accuracy = tf.reduce_mean(correct_prediction)
​
graph_location = tempfile.mkdtemp()
print('Saving graph to: %s' % graph_location)
train_writer = tf.summary.FileWriter(graph_location)
train_writer.add_graph(tf.get_default_graph())
​
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(1000):
batch = mnist.train.next_batch(50)
if i % 100 == 0:
train_accuracy = accuracy.eval(feed_dict={
x: batch[0], y_: batch[1], keep_prob: 1.0})
print('step %d, training accuracy %g' % (i, train_accuracy))
train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})
​
print('test accuracy %g' % accuracy.eval(feed_dict={
x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0}))
sing = np.reshape(mnist.test.images[0],(-1,784))
output = sess.run(y_conv,feed_dict={x:sing,keep_prob:1.0})
print(tf.argmax(output,1).eval())
saver = tf.train.Saver()
saver.save(sess,"/tmp/network")
Extracting /tmp/train-images-idx3-ubyte.gz
Extracting /tmp/train-labels-idx1-ubyte.gz
Extracting /tmp/t10k-images-idx3-ubyte.gz
Extracting /tmp/t10k-labels-idx1-ubyte.gz
Saving graph to: /tmp/tmp17hf_6c7
step 0, training accuracy 0.2
step 100, training accuracy 0.86
step 200, training accuracy 0.8
step 300, training accuracy 0.94
step 400, training accuracy 0.94
step 500, training accuracy 0.96
step 600, training accuracy 0.88
step 700, training accuracy 0.98
step 800, training accuracy 0.98
step 900, training accuracy 0.98
test accuracy 0.9663
[7]
If you want to restore from a new python run:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
import numpy as np
import argparse
import sys
import tempfile
from tensorflow.examples.tutorials.mnist import input_data
sess = tf.Session()
saver = tf.train.import_meta_graph('/tmp/network.meta')
saver.restore(sess,tf.train.latest_checkpoint('/tmp'))
graph = tf.get_default_graph()
mnist = input_data.read_data_sets("/tmp")
simg = np.reshape(mnist.test.images[0],(-1,784))
op_to_restore = graph.get_tensor_by_name("fc2/MatMul:0")
x = graph.get_tensor_by_name("x:0")
output = sess.run(op_to_restore,feed_dict= {x:simg})
print("Result = ", np.argmax(output))

Weird behaviour of dropout layer in tensorflow

I made a CNN model with tensorflow that implement dropouts layers.
I pass the is_training argument in the network function so the dropout will be disabled on testing phase, and i realised that the errors were significantly higher when i disable it.
if I'm testing the model with the dropout function (which is not logic) I get an error of 0.01 in average, while when i'm testing it by specifying that is_training is False (but still training it with dropout) I get an error of 0.8.
I can't understand where is my error
Here is the model function:
def conv_net(x, arch, is_training=False):
# MNIST data input is a 1-D vector of 784 features (28*28 pixels)
# Reshape to match picture format [Height x Width x Channel]
# Tensor input becomes 4-D: [Batch Size, Height, Width, Channel]
x = tf.reshape(x, shape=[-1, 28, 28, 1])
### YOUR CODE STARTS HERE ###
# Convolution Layer with F1 filters, a kernel size of K1 and ReLU activations
pad = 'same'
conv1 = tf.layers.conv2d(x, arch['conv1'][0], arch['conv1'][1], activation=tf.nn.relu)
conv2 = tf.layers.conv2d(conv1, arch['conv2'][0], arch['conv2'][1], activation=tf.nn.relu)
pool1 = tf.layers.max_pooling2d(conv2, arch['pool1'][0], arch['pool1'][0])
drop1 = tf.layers.dropout(pool1, arch['dropout1'], training=is_training)
conv3 = tf.layers.conv2d(drop1, arch['conv3'][0], arch['conv3'][1], activation=tf.nn.relu) # # TODO: add padding
drop1_2 = tf.layers.dropout(conv3, arch['dropout1'], training=is_training)
conv4 = tf.layers.conv2d(drop1_2, arch['conv4'][0], arch['conv4'][1], activation=tf.nn.relu)
pool2 = tf.layers.max_pooling2d(conv4, arch['pool2'][0], arch['pool2'][0])
drop2 = tf.layers.dropout(pool2, arch['dropout2'], training=is_training)
flat = tf.contrib.layers.flatten(drop2)
fc1 = tf.layers.dense(flat, arch['N'])
out = tf.layers.dense(fc1, n_classes)
### YOUR CODE ENDS HERE ###
return out
And the training function:
def train_test_model(hypers, save_final_model=False):
# Running the training session
print("Starting training session...")
with tf.Session() as sess:
# Run the initializer
sess.run(init)
total_batch = int(mnist.train.num_examples / hypers.batch_size)
# Training cycle
try:
for epoch in range(hypers.n_epochs):
avg_cost = 0.
# Loop over all batches
for i in range(total_batch):
batch_x, batch_y = mnist.train.next_batch(hypers.batch_size)
# Run optimization op (backprop) and cost op (to get loss value)
_, c = sess.run([optimizer, cost], feed_dict={x: batch_x,
y: batch_y})
# Compute average loss
avg_cost += c / total_batch
# Display logs per epoch step
if epoch % display_step == 0:
# Test model
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
# Calculate accuracy
# ORIGINAL:
# accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
# train_err = 1-accuracy.eval({x: mnist.train.images, y: mnist.train.labels})
# valid_err = 1-accuracy.eval({x: mnist.validation.images, y: mnist.validation.labels})
# WITH BATCHES FOR LESS MEM ALLOC
accuracy = tf.reduce_mean(tf.cast(correct_prediction, 'float'))
train_acc = 0
for i in range(total_batch):
batch_x, batch_y = mnist.train.next_batch(hypers.batch_size)
train_acc += accuracy.eval(feed_dict={x:batch_x,
y:batch_y})
train_acc /= total_batch
train_err = 1 - train_acc
valid_err = 1 - accuracy.eval({x: mnist.validation.images, y: mnist.validation.labels})
# Display accuracy
print("Epoch:", '%05d' % (epoch + 1), ", cost=",
"{:.9f}".format(avg_cost), ", train_err=", "{:.4f}".format(train_err), ", valid_err=",
"{:.4f}".format(valid_err))
if epoch % 5 == 0:
v = input('Do you want to stop the model? [Y/n]')
if 'y' in v.lower():
raise KeyboardInterrupt
except KeyboardInterrupt:
hypers.n_epochs = epoch
print("SIGINT Received, interrupting the training")
print("\nOptimization Finished!\n")
# Test model
correct_prediction = tf.equal(tf.argmax(test_pred, 1), tf.argmax(y, 1))
# Calculate accuracy
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
# modified to batches
train_acc = 0
for i in range(total_batch):
batch_x, batch_y = mnist.train.next_batch(hypers.batch_size)
train_acc += accuracy.eval(feed_dict={x: batch_x,
y: batch_y})
train_acc /= total_batch
train_err = 1 - train_acc
#
valid_err = 1 - accuracy.eval({x: mnist.validation.images, y: mnist.validation.labels})
print("Optimized for ", '%05d' % (epoch + 1), "epochs, to obtain training error", "{:.4f}".format(train_err),
", and validation error", "{:.4f}".format(valid_err))
confusion = tf.confusion_matrix(tf.argmax(pred, 1), tf.argmax(y, 1))
print("\nValidation Confusion matrix:\n",
confusion.eval({x: mnist.validation.images, y: mnist.validation.labels}))

Cost function with Nan values in Tensorflow

I'm implementing a neural network to map one vector of 194 values to another vector of 5 values. Values are float numbers from an interval of -10, 10 on.
The problem with my code below is that the avg_cost printed in the end is always Nan. I've read somewhere that it happened because the guy didn't set the activation function of the output layer correctly, but I've set mine as it is (linear activation which is the default one). I'm a newbie in this area and to Tensorflow and so I'm not sure what I'm doing wrong here. Is the linear activation function a correct decision? Or is something else wrong in my code?
def neural_net(x):
layer_1 = tf.layers.dense(inputs=x, units=97, activation=tf.nn.relu)
out_layer = tf.layers.dense(inputs=layer_1, units=5)
return out_layer
train_x = pd.read_csv("x_data.csv", sep=" ")
train_y = pd.read_csv("y_data.csv", sep=" ")
train_size = 0.9
train_cnt = int(floor(train_x.shape[0] * train_size))
x_train = train_x.iloc[0:train_cnt].values
y_train = train_y.iloc[0:train_cnt].values
x_test = train_x.iloc[train_cnt:].values
y_test = train_y.iloc[train_cnt:].values
x = tf.placeholder("float", [None, 194])
y = tf.placeholder("float", [None, 5])
nn_output = neural_net(x)
cost = tf.reduce_mean(tf.losses.mean_squared_error(labels=y, predictions=nn_output))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.0001).minimize(cost)
training_epochs = 5000
display_step = 1000
batch_size = 30
keep_prob = tf.placeholder("float")
saver = tf.train.Saver()
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for epoch in range(training_epochs):
avg_cost = 0.0
total_batch = int(len(x_train) / batch_size)
x_batches = np.array_split(x_train, total_batch)
y_batches = np.array_split(y_train, total_batch)
for i in range(total_batch):
batch_x, batch_y = x_batches[i], y_batches[i]
_, c = sess.run([optimizer, cost],
feed_dict={
x: batch_x,
y: batch_y,
keep_prob: 0.8
})
avg_cost += c / total_batch
if epoch % display_step == 0:
print("Epoch:", '%04d' % (epoch+1), "cost=", \
"{:.9f}".format(avg_cost))

Categories