Cost function with Nan values in Tensorflow

Cost function with Nan values in Tensorflow - python

I'm implementing a neural network to map one vector of 194 values to another vector of 5 values. Values are float numbers from an interval of -10, 10 on.
The problem with my code below is that the avg_cost printed in the end is always Nan. I've read somewhere that it happened because the guy didn't set the activation function of the output layer correctly, but I've set mine as it is (linear activation which is the default one). I'm a newbie in this area and to Tensorflow and so I'm not sure what I'm doing wrong here. Is the linear activation function a correct decision? Or is something else wrong in my code?
def neural_net(x):
layer_1 = tf.layers.dense(inputs=x, units=97, activation=tf.nn.relu)
out_layer = tf.layers.dense(inputs=layer_1, units=5)
return out_layer
train_x = pd.read_csv("x_data.csv", sep=" ")
train_y = pd.read_csv("y_data.csv", sep=" ")
train_size = 0.9
train_cnt = int(floor(train_x.shape[0] * train_size))
x_train = train_x.iloc[0:train_cnt].values
y_train = train_y.iloc[0:train_cnt].values
x_test = train_x.iloc[train_cnt:].values
y_test = train_y.iloc[train_cnt:].values
x = tf.placeholder("float", [None, 194])
y = tf.placeholder("float", [None, 5])
nn_output = neural_net(x)
cost = tf.reduce_mean(tf.losses.mean_squared_error(labels=y, predictions=nn_output))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.0001).minimize(cost)
training_epochs = 5000
display_step = 1000
batch_size = 30
keep_prob = tf.placeholder("float")
saver = tf.train.Saver()
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for epoch in range(training_epochs):
avg_cost = 0.0
total_batch = int(len(x_train) / batch_size)
x_batches = np.array_split(x_train, total_batch)
y_batches = np.array_split(y_train, total_batch)
for i in range(total_batch):
batch_x, batch_y = x_batches[i], y_batches[i]
_, c = sess.run([optimizer, cost],
feed_dict={
x: batch_x,
y: batch_y,
keep_prob: 0.8
})
avg_cost += c / total_batch
if epoch % display_step == 0:
print("Epoch:", '%04d' % (epoch+1), "cost=", \
"{:.9f}".format(avg_cost))

Related

predict label for new text input on trained model

I'm trying to do a prediction on new text examples, where I want the prediction to return a probability output for each example.
This is my learning model:
with tf.name_scope('Placeholders'):
input_x = tf.placeholder(tf.int32, [None, sequence_length], name='input_x')
input_y = tf.placeholder(tf.float32, [None, n_classes], name='input_y')
drops = tf.placeholder(tf.float32, name='dropout_keep_prob')
with tf.name_scope('Embedding_layer'):
embeddings_v = tf.Variable(tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0))
embeddings = tf.nn.embedding_lookup(params=embeddings_v, ids=input_x)
# Bi_directional LSTM
with tf.name_scope('Bi_directional_LSTM'):
rnn_outputs, _ = bi_rnn(LSTMCell(hidden_unit), LSTMCell(hidden_unit),inputs=embeddings, dtype=tf.float32)
tf.summary.histogram('Bi_directional_LSTM', rnn_outputs)
# Attention layer
with tf.name_scope('Attention_layer'):
attention_output, alphas = attention(rnn_outputs, attention_size, return_alphas=True)
tf.summary.histogram('alphas', alphas)
with tf.name_scope('Dropout'):
drop = tf.nn.dropout(attention_output, drops)
with tf.name_scope('Fully_connected_layer'):
W = tf.Variable(tf.truncated_normal([hidden_unit * 2, n_classes],stddev=0.1))
b = tf.Variable(tf.constant(0., shape=[n_classes]))
y_hat = tf.nn.xw_plus_b(drop, W,b)
predictions = tf.argmax(input=y_hat, axis=1, name='predictions')
with tf.name_scope('Loss'):
loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=y_hat, labels=input_y))
global_step = tf.Variable(0.1, name="global_step", trainable=False)
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss, global_step=global_step)
probs = tf.nn.sigmoid(y_hat)
tf.summary.scalar('loss', loss)
with tf.name_scope('Accuracy'):
correct_pred = tf.equal(tf.argmax(y_hat,1), tf.argmax(input_y,1))
accuracy = tf.reduce_mean(input_tensor=tf.cast(correct_pred, tf.float32), name='accuracy')
num_correct = tf.reduce_sum(input_tensor=tf.cast(correct_pred, 'float'), name='correct_predictions')
tf.summary.scalar('accuracy', accuracy)
merged = tf.summary.merge_all()
I then run the following piece of code to train the model:
def batch_generator(X, y, batch_size):
"""batch generator"""
size = X.shape[0]
X_copy = X.copy()
y_copy = y.copy()
indices = np.arange(size)
np.random.shuffle(indices)
X_copy = X_copy[indices]
y_copy = y_copy[indices]
i = 0
while True:
if i + batch_size <= size:
yield X_copy[i:i + batch_size], y_copy[i:i + batch_size]
i += batch_size
else:
i = 0
indices = np.arange(size)
np.random.shuffle(indices)
X_copy = X_copy[indices]
y_copy = y_copy[indices]
continue
train_batch_generator = batch_generator(x_train, y_train, batch_size)
test_batch_generator = batch_generator(x_dev, y_dev, batch_size)
predict_generator = batch_generator(x_test, y_test, batch_size)
saver = tf.train.Saver()
with tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True))) as sess:
sess.run(tf.global_variables_initializer())
print("Start learning...")
for epoch in range(epochs):
loss_train = 0
loss_val = 0
loss_test = 0
accuracy_train = 0
accuracy_val = 0
accuracy_test = 0
train_loss_l = []
val_loss_l = []
print("epoch: {}\t".format(epoch), end="")
# Training
num_batches = x_train.shape[0] // batch_size
for b in tqdm(range(num_batches)):
x_batch, y_batch = next(train_batch_generator)
loss_tr, acc, _, summary = sess.run([loss, accuracy, optimizer,merged],
feed_dict={input_x: x_batch,
input_y: y_batch,
drops: 0.5})
train_loss_l.append(loss_tr)
accuracy_train += acc
loss_train = loss_tr * DELTA + loss_train * (1 - DELTA)
accuracy_train /= num_batches
# Validation
num_batches = x_dev.shape[0] // batch_size
for b in tqdm(range(num_batches)):
x_batch, y_batch = next(test_batch_generator)
val_loss, val_acc, summary = sess.run([loss, accuracy,merged],
feed_dict={input_x: x_batch,
input_y: y_batch,
drops: 0.5})
val_loss_l.append(val_loss)
accuracy_val += val_acc
loss_val += val_loss
accuracy_val /= num_batches
loss_val /= num_batches
print("loss: {:.3f}, val_loss: {:.3f}, acc: {:.3f}, val_acc: {:.3f}".format(loss_train, loss_val, accuracy_train, accuracy_val))
# predict x_test
num_batches = x_test.shape[0] // batch_size
print("n batches",num_batches)
predict_correct = 0
for batch in tqdm(range(num_batches)):
x_batch, yx_batch = next(predict_generator)
y_true = np.argmax(yx_batch,1)
loss_pred, acc_pred, n_correct, y_pred = sess.run([loss,accuracy,num_correct,predictions], feed_dict={input_x: x_batch,input_y: y_batch,drops : 0.5 })
print("Precision", sk.metrics.precision_score(y_true, y_pred,average='weighted'))
print("Recall", sk.metrics.recall_score(y_true, y_pred,average='weighted'))
print("f1_score", sk.metrics.f1_score(y_true, y_pred,average='weighted'))
print("confusion_matrix")
print(sk.metrics.confusion_matrix(y_true, y_pred))
saver.save(sess, MODEL_PATH)
sess.run(predictions, feed_dict={x: x_test})
The code runs fine until it hits the prediction part where I get the following error:
TypeError: unhashable type: 'numpy.ndarray'
Any chance someone could explain this to me please?

Just answered my own question...
This is just in case someone else is attempting to do this. If you feed tensors to feed_dict, just use the actual placeholder name you used before.
In my case this would be:
print(sess.run(predictions, feed_dict={input_x: x_test,drops:0.5}))

Why my GPU doesn't speed up with the following code?

I just checked that my computer is using GPU to run it.
But the running time is roughly the same with my CPU.
I'm using Windows10, i7-7700, NV GTX1050, Python 3.6, cuda9.0.
Is there any code that doesn't support GPU?
Or how should I fix it? Thanks!
X = tf.placeholder(tf.float32,[None, n_steps, n_inputs])
y = tf.placeholder(tf.int32, [None])
lstm_cells = [tf.contrib.rnn.LSTMCell(num_units = n_neurons, use_peepholes=True) for layer in range(n_layers)]
multi_cell = tf.contrib.rnn.MultiRNNCell(lstm_cells)
outputs, states = tf.nn.dynamic_rnn(multi_cell,X, dtype= tf.float32)
top_layer_h_state = states[-1][1]
logits = tf.layers.dense(top_layer_h_state, n_outputs, name="softmax")
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
loss = tf.reduce_mean(xentropy, name="loss")
optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate)
training_op = optimizer.minimize(loss)
correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
init = tf.global_variables_initializer()
saver = tf.train.Saver()
n_epochs = 2000
with tf.Session() as sess:
init.run()
for epoch in range(n_epochs):
for iteration in range(n_examples):
X_batch, y_batch = next_batch(iteration)
sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
if epoch % 100 == 0 :
X_test, y_test, batch_num = test_batch(n_examples)
acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})
print(epoch, "Test accuracy:", acc_test)

Weird behaviour of dropout layer in tensorflow

I made a CNN model with tensorflow that implement dropouts layers.
I pass the is_training argument in the network function so the dropout will be disabled on testing phase, and i realised that the errors were significantly higher when i disable it.
if I'm testing the model with the dropout function (which is not logic) I get an error of 0.01 in average, while when i'm testing it by specifying that is_training is False (but still training it with dropout) I get an error of 0.8.
I can't understand where is my error
Here is the model function:
def conv_net(x, arch, is_training=False):
# MNIST data input is a 1-D vector of 784 features (28*28 pixels)
# Reshape to match picture format [Height x Width x Channel]
# Tensor input becomes 4-D: [Batch Size, Height, Width, Channel]
x = tf.reshape(x, shape=[-1, 28, 28, 1])
### YOUR CODE STARTS HERE ###
# Convolution Layer with F1 filters, a kernel size of K1 and ReLU activations
pad = 'same'
conv1 = tf.layers.conv2d(x, arch['conv1'][0], arch['conv1'][1], activation=tf.nn.relu)
conv2 = tf.layers.conv2d(conv1, arch['conv2'][0], arch['conv2'][1], activation=tf.nn.relu)
pool1 = tf.layers.max_pooling2d(conv2, arch['pool1'][0], arch['pool1'][0])
drop1 = tf.layers.dropout(pool1, arch['dropout1'], training=is_training)
conv3 = tf.layers.conv2d(drop1, arch['conv3'][0], arch['conv3'][1], activation=tf.nn.relu) # # TODO: add padding
drop1_2 = tf.layers.dropout(conv3, arch['dropout1'], training=is_training)
conv4 = tf.layers.conv2d(drop1_2, arch['conv4'][0], arch['conv4'][1], activation=tf.nn.relu)
pool2 = tf.layers.max_pooling2d(conv4, arch['pool2'][0], arch['pool2'][0])
drop2 = tf.layers.dropout(pool2, arch['dropout2'], training=is_training)
flat = tf.contrib.layers.flatten(drop2)
fc1 = tf.layers.dense(flat, arch['N'])
out = tf.layers.dense(fc1, n_classes)
### YOUR CODE ENDS HERE ###
return out
And the training function:
def train_test_model(hypers, save_final_model=False):
# Running the training session
print("Starting training session...")
with tf.Session() as sess:
# Run the initializer
sess.run(init)
total_batch = int(mnist.train.num_examples / hypers.batch_size)
# Training cycle
try:
for epoch in range(hypers.n_epochs):
avg_cost = 0.
# Loop over all batches
for i in range(total_batch):
batch_x, batch_y = mnist.train.next_batch(hypers.batch_size)
# Run optimization op (backprop) and cost op (to get loss value)
_, c = sess.run([optimizer, cost], feed_dict={x: batch_x,
y: batch_y})
# Compute average loss
avg_cost += c / total_batch
# Display logs per epoch step
if epoch % display_step == 0:
# Test model
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
# Calculate accuracy
# ORIGINAL:
# accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
# train_err = 1-accuracy.eval({x: mnist.train.images, y: mnist.train.labels})
# valid_err = 1-accuracy.eval({x: mnist.validation.images, y: mnist.validation.labels})
# WITH BATCHES FOR LESS MEM ALLOC
accuracy = tf.reduce_mean(tf.cast(correct_prediction, 'float'))
train_acc = 0
for i in range(total_batch):
batch_x, batch_y = mnist.train.next_batch(hypers.batch_size)
train_acc += accuracy.eval(feed_dict={x:batch_x,
y:batch_y})
train_acc /= total_batch
train_err = 1 - train_acc
valid_err = 1 - accuracy.eval({x: mnist.validation.images, y: mnist.validation.labels})
# Display accuracy
print("Epoch:", '%05d' % (epoch + 1), ", cost=",
"{:.9f}".format(avg_cost), ", train_err=", "{:.4f}".format(train_err), ", valid_err=",
"{:.4f}".format(valid_err))
if epoch % 5 == 0:
v = input('Do you want to stop the model? [Y/n]')
if 'y' in v.lower():
raise KeyboardInterrupt
except KeyboardInterrupt:
hypers.n_epochs = epoch
print("SIGINT Received, interrupting the training")
print("\nOptimization Finished!\n")
# Test model
correct_prediction = tf.equal(tf.argmax(test_pred, 1), tf.argmax(y, 1))
# Calculate accuracy
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
# modified to batches
train_acc = 0
for i in range(total_batch):
batch_x, batch_y = mnist.train.next_batch(hypers.batch_size)
train_acc += accuracy.eval(feed_dict={x: batch_x,
y: batch_y})
train_acc /= total_batch
train_err = 1 - train_acc
#
valid_err = 1 - accuracy.eval({x: mnist.validation.images, y: mnist.validation.labels})
print("Optimized for ", '%05d' % (epoch + 1), "epochs, to obtain training error", "{:.4f}".format(train_err),
", and validation error", "{:.4f}".format(valid_err))
confusion = tf.confusion_matrix(tf.argmax(pred, 1), tf.argmax(y, 1))
print("\nValidation Confusion matrix:\n",
confusion.eval({x: mnist.validation.images, y: mnist.validation.labels}))

Neural Network in Tensorflow give 0 Accuracy

the input does not have any NaN value, but the accuracy is always 0.
n_nodes_hl1 = 3000
n_nodes_hl2 = 1500
n_nodes_hl3 = 1000
n_classes = 3
batch_size = 5
hm_epochs = 5
def train_neural_network(x):
prediction=neural_network_model(x)
cost=tf.nn.softmax_cross_entropy_with_logits_v2(logits = prediction, labels = y)
optimizer=tf.train.AdamOptimizer(learning_rate=0.001).minimize(cost)
saver = tf.train.Saver()
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for epoch in range(hm_epochs):
epoch_loss = 0
i = 0
#while i < len(train_x):
t = len(train_x)
f = t%batch_size
while i < (t-f):
start = i
end = i+batch_size
batch_x = np.array(train_x[start:end])
batch_y = np.array(train_y[start:end])
_, c = sess.run([optimizer, cost], feed_dict={x: batch_x, y: batch_y})
epoch_loss += c
#epoch_loss = epoch_loss + c
i+=batch_size
#i = i + batch_size
print('Epoch =', epoch+1, '/',hm_epochs,'loss:',epoch_loss)
save_path = saver.save(sess, "sesionestensorflow/model1802.ckpt")
print("Model saved in path: %s" % save_path)
correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
print('Accuracy:',accuracy.eval({x:test_x, y:test_y}))
i understand that it could be the loss function and if it is the case, the neural network need a clip by value procedure.

Constant accuracy while training the AlexNet model

I have been using this implementation of AlexNet Training I found on Github.
I have 8 input classes. Each class contains some images (Ex class1 : Bear, Class2 : Tiger; Class3: Horse ...)
The problem is when I run the following code I saw that the Training accuracy is always equal to 1 divided by the number of class (in this case Training Accuracy = 0.125, If I have only 2 classes the training accuracy will be equal to 0.5)
I found this really weird and I could not figure out where is the mistake in the following code :
from importData import Dataset
import inference
training = Dataset('wxb_pic/pic', '.jpg')
testing = Dataset('wxb_pic/pic_test', '.jpg')
import tensorflow as tf
# Parameters
learn_rate = 0.001
decay_rate = 0.1
batch_size = 64
display_step = 20
n_classes = training.num_labels # we got mad kanji
dropout = 0.8 # Dropout, probability to keep units
imagesize = 227
img_channel = 3
x = tf.placeholder(tf.float32, [None, imagesize, imagesize, img_channel])
y = tf.placeholder(tf.float32, [None, n_classes])
keep_prob = tf.placeholder(tf.float32) # dropout (keep probability)
pred = inference.alex_net(x, keep_prob, n_classes, imagesize, img_channel)
cost =tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
global_step = tf.Variable(0, trainable=False)
lr = tf.train.exponential_decay(learn_rate, global_step, 1000, decay_rate, staircase=True)
optimizer = tf.train.AdamOptimizer(learning_rate=lr).minimize(cost, global_step=global_step)
correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
init = tf.initialize_all_variables()
saver = tf.train.Saver()
tf.add_to_collection("x", x)
tf.add_to_collection("y", y)
tf.add_to_collection("keep_prob", keep_prob)
tf.add_to_collection("pred", pred)
tf.add_to_collection("accuracy", accuracy)
with tf.Session() as sess:
sess.run(init)
step = 1
while step < 3000:
batch_ys, batch_xs = training.nextBatch(batch_size)
sess.run(optimizer, feed_dict={x: batch_xs, y: batch_ys, keep_prob: dropout})
if step % display_step == 0:
acc = sess.run(accuracy, feed_dict={x: batch_xs, y: batch_ys, keep_prob: 1.})
loss = sess.run(cost, feed_dict={x: batch_xs, y: batch_ys, keep_prob: 1.})
rate = sess.run(lr)
print "lr " + str(rate) + " Iter " + str(step) + ", Minibatch Loss= " + "{:.6f}".format(loss) + ", Training Accuracy= " + "{:.5f}".format(acc)
if step % 1000 == 0:
saver.save(sess, 'save/model.ckpt', global_step=step*batch_size)
step += 1
print "Optimization Finished!"
step_test = 1
while step_test * batch_size < len(testing):
testing_ys, testing_xs = testing.nextBatch(batch_size)
print "Testing Accuracy:", sess.run(accuracy, feed_dict={x: testing_xs, y: testing_ys, keep_prob: 1.})
step_test += 1
I am Stuck with this and I want to train the AlexNet model to test the Performance of my machine.
Thanks ^^

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Cost function with Nan values in Tensorflow - python

Related

predict label for new text input on trained model

Why my GPU doesn't speed up with the following code?

Weird behaviour of dropout layer in tensorflow

Neural Network in Tensorflow give 0 Accuracy

Constant accuracy while training the AlexNet model

Categories

Resources