Why my GPU doesn't speed up with the following code?

Why my GPU doesn't speed up with the following code? - python

I just checked that my computer is using GPU to run it.
But the running time is roughly the same with my CPU.
I'm using Windows10, i7-7700, NV GTX1050, Python 3.6, cuda9.0.
Is there any code that doesn't support GPU?
Or how should I fix it? Thanks!
X = tf.placeholder(tf.float32,[None, n_steps, n_inputs])
y = tf.placeholder(tf.int32, [None])
lstm_cells = [tf.contrib.rnn.LSTMCell(num_units = n_neurons, use_peepholes=True) for layer in range(n_layers)]
multi_cell = tf.contrib.rnn.MultiRNNCell(lstm_cells)
outputs, states = tf.nn.dynamic_rnn(multi_cell,X, dtype= tf.float32)
top_layer_h_state = states[-1][1]
logits = tf.layers.dense(top_layer_h_state, n_outputs, name="softmax")
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
loss = tf.reduce_mean(xentropy, name="loss")
optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate)
training_op = optimizer.minimize(loss)
correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
init = tf.global_variables_initializer()
saver = tf.train.Saver()
n_epochs = 2000
with tf.Session() as sess:
init.run()
for epoch in range(n_epochs):
for iteration in range(n_examples):
X_batch, y_batch = next_batch(iteration)
sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
if epoch % 100 == 0 :
X_test, y_test, batch_num = test_batch(n_examples)
acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})
print(epoch, "Test accuracy:", acc_test)

Related

predict label for new text input on trained model

I'm trying to do a prediction on new text examples, where I want the prediction to return a probability output for each example.
This is my learning model:
with tf.name_scope('Placeholders'):
input_x = tf.placeholder(tf.int32, [None, sequence_length], name='input_x')
input_y = tf.placeholder(tf.float32, [None, n_classes], name='input_y')
drops = tf.placeholder(tf.float32, name='dropout_keep_prob')
with tf.name_scope('Embedding_layer'):
embeddings_v = tf.Variable(tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0))
embeddings = tf.nn.embedding_lookup(params=embeddings_v, ids=input_x)
# Bi_directional LSTM
with tf.name_scope('Bi_directional_LSTM'):
rnn_outputs, _ = bi_rnn(LSTMCell(hidden_unit), LSTMCell(hidden_unit),inputs=embeddings, dtype=tf.float32)
tf.summary.histogram('Bi_directional_LSTM', rnn_outputs)
# Attention layer
with tf.name_scope('Attention_layer'):
attention_output, alphas = attention(rnn_outputs, attention_size, return_alphas=True)
tf.summary.histogram('alphas', alphas)
with tf.name_scope('Dropout'):
drop = tf.nn.dropout(attention_output, drops)
with tf.name_scope('Fully_connected_layer'):
W = tf.Variable(tf.truncated_normal([hidden_unit * 2, n_classes],stddev=0.1))
b = tf.Variable(tf.constant(0., shape=[n_classes]))
y_hat = tf.nn.xw_plus_b(drop, W,b)
predictions = tf.argmax(input=y_hat, axis=1, name='predictions')
with tf.name_scope('Loss'):
loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=y_hat, labels=input_y))
global_step = tf.Variable(0.1, name="global_step", trainable=False)
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss, global_step=global_step)
probs = tf.nn.sigmoid(y_hat)
tf.summary.scalar('loss', loss)
with tf.name_scope('Accuracy'):
correct_pred = tf.equal(tf.argmax(y_hat,1), tf.argmax(input_y,1))
accuracy = tf.reduce_mean(input_tensor=tf.cast(correct_pred, tf.float32), name='accuracy')
num_correct = tf.reduce_sum(input_tensor=tf.cast(correct_pred, 'float'), name='correct_predictions')
tf.summary.scalar('accuracy', accuracy)
merged = tf.summary.merge_all()
I then run the following piece of code to train the model:
def batch_generator(X, y, batch_size):
"""batch generator"""
size = X.shape[0]
X_copy = X.copy()
y_copy = y.copy()
indices = np.arange(size)
np.random.shuffle(indices)
X_copy = X_copy[indices]
y_copy = y_copy[indices]
i = 0
while True:
if i + batch_size <= size:
yield X_copy[i:i + batch_size], y_copy[i:i + batch_size]
i += batch_size
else:
i = 0
indices = np.arange(size)
np.random.shuffle(indices)
X_copy = X_copy[indices]
y_copy = y_copy[indices]
continue
train_batch_generator = batch_generator(x_train, y_train, batch_size)
test_batch_generator = batch_generator(x_dev, y_dev, batch_size)
predict_generator = batch_generator(x_test, y_test, batch_size)
saver = tf.train.Saver()
with tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True))) as sess:
sess.run(tf.global_variables_initializer())
print("Start learning...")
for epoch in range(epochs):
loss_train = 0
loss_val = 0
loss_test = 0
accuracy_train = 0
accuracy_val = 0
accuracy_test = 0
train_loss_l = []
val_loss_l = []
print("epoch: {}\t".format(epoch), end="")
# Training
num_batches = x_train.shape[0] // batch_size
for b in tqdm(range(num_batches)):
x_batch, y_batch = next(train_batch_generator)
loss_tr, acc, _, summary = sess.run([loss, accuracy, optimizer,merged],
feed_dict={input_x: x_batch,
input_y: y_batch,
drops: 0.5})
train_loss_l.append(loss_tr)
accuracy_train += acc
loss_train = loss_tr * DELTA + loss_train * (1 - DELTA)
accuracy_train /= num_batches
# Validation
num_batches = x_dev.shape[0] // batch_size
for b in tqdm(range(num_batches)):
x_batch, y_batch = next(test_batch_generator)
val_loss, val_acc, summary = sess.run([loss, accuracy,merged],
feed_dict={input_x: x_batch,
input_y: y_batch,
drops: 0.5})
val_loss_l.append(val_loss)
accuracy_val += val_acc
loss_val += val_loss
accuracy_val /= num_batches
loss_val /= num_batches
print("loss: {:.3f}, val_loss: {:.3f}, acc: {:.3f}, val_acc: {:.3f}".format(loss_train, loss_val, accuracy_train, accuracy_val))
# predict x_test
num_batches = x_test.shape[0] // batch_size
print("n batches",num_batches)
predict_correct = 0
for batch in tqdm(range(num_batches)):
x_batch, yx_batch = next(predict_generator)
y_true = np.argmax(yx_batch,1)
loss_pred, acc_pred, n_correct, y_pred = sess.run([loss,accuracy,num_correct,predictions], feed_dict={input_x: x_batch,input_y: y_batch,drops : 0.5 })
print("Precision", sk.metrics.precision_score(y_true, y_pred,average='weighted'))
print("Recall", sk.metrics.recall_score(y_true, y_pred,average='weighted'))
print("f1_score", sk.metrics.f1_score(y_true, y_pred,average='weighted'))
print("confusion_matrix")
print(sk.metrics.confusion_matrix(y_true, y_pred))
saver.save(sess, MODEL_PATH)
sess.run(predictions, feed_dict={x: x_test})
The code runs fine until it hits the prediction part where I get the following error:
TypeError: unhashable type: 'numpy.ndarray'
Any chance someone could explain this to me please?

Just answered my own question...
This is just in case someone else is attempting to do this. If you feed tensors to feed_dict, just use the actual placeholder name you used before.
In my case this would be:
print(sess.run(predictions, feed_dict={input_x: x_test,drops:0.5}))

Tensorflow DNN always underfits my dataset

I am trying to make a deep neural network with the low level API of tensorflow although when I train the model and test it, the loss and mae of the testing set and the training set it very similar and very high compared to other models I have tried(e.g. Random Forest, AdaBoost Decision Tree). I even made the same dnn using keras and it gave me much better results I don't understand the problem, I do not have to much experience in machine learning
Tensorflow code
# load libraries
import tensorflow as tf
# reset graph
tf.reset_default_graph()
# define variables
n_hidden1 = 200
n_outputs = 1
with tf.device("/gpu:0"):
X = tf.placeholder(tf.float32, shape=(None, n_features), name="X")
y = tf.placeholder(tf.float32, shape=(None), name="y")
with tf.name_scope("dnn"):
hidden1 = tf.layers.dense(X, n_hidden1, name="hidden1",
activation=tf.nn.leaky_relu)
logits = tf.layers.dense(hidden1, n_outputs, name="logits")
with tf.device("/cpu:0"):
with tf.name_scope("loss"):
loss = tf.reduce_mean(tf.abs(logits - y), name="loss")
with tf.device("cpu:0"):
with tf.name_scope("learning_rate"):
learning_rate = 0.001
with tf.device("/gpu:0"):
with tf.name_scope("train"):
optimizer = tf.train.AdamOptimizer(learning_rate)
training_op = optimizer.minimize(loss)
with tf.device("/gpu:0"):
with tf.name_scope("eval"):
mae = tf.reduce_mean(tf.abs(logits - y), name="mae")
def shuffle_batch(X, y, batch_size):
rnd_idx = np.random.permutation(len(X))
n_batches = len(X) // batch_size
for batch_idx in np.array_split(rnd_idx, n_batches):
X_batch, y_batch = X[batch_idx], y[batch_idx]
yield X_batch, y_batch
n_epochs = 200
batch_size = 1000
n_batches = int(np.ceil(X_train.shape[0] / batch_size))
# create graph variables initializer
init = tf.global_variables_initializer()
# create model saver
saver = tf.train.Saver()
# set device to gpu
with tf.device("/gpu:0"):
with tf.Session() as sess:
sess.run(init)
for epoch in range(n_epochs):
print("Epoch:", str(epoch) + "/" + str(n_epochs))
batch_index = 0
for X_batch, y_batch in shuffle_batch(X_train, np.array(y_train).reshape(-1), batch_size):
sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
acc_batch = mae.eval(feed_dict={X: X_batch, y: y_batch})
acc_val = mae.eval(feed_dict={X: X_test, y: np.array(y_test).reshape(-1)})
loss_batch = loss.eval(feed_dict={X: X_batch, y: y_batch})
loss_val = loss.eval(feed_dict={X: X_test, y: np.array(y_test).reshape(-1)})
print("Batch mae:", acc_batch, "Val mae:", acc_val)
print("Batch loss:", loss_batch, "Val loss:", loss_val)
Keras Code
import tensorflow as tf
from keras import layers
from keras import models
from keras import optimizers
from keras import initializers
from keras import regularizers
network = models.Sequential()
network.add(layers.Dense(units=200,
activation=tf.nn.leaky_relu,
input_shape=(X_train.shape[1], )))
# output layer
network.add(layers.Dense(units=1))
network.compile(loss="mae",
optimizer=optimizers.Adam(lr=0.001),
metrics=["mae"])
history = network.fit(X_train,
np.array(y_train).reshape(-1),
epochs=200,
verbose=1,
batch_size=1000,
validation_data=(X_test, np.array(y_test).reshape(-1)),
shuffle=True)

tensorflow, access inner elements of a variable and freeze some of them

In a weight tensor, which is a matrix for suppose, how do I select some of the elements from the weight matrix and add it to a list of variables to be froze and rest of elements of matrix to be trained in tensorflow?
example: created a variable W: of size 20*20
how can I pick out few elements like W[0][1],W[13][15] and freeze them in optimizer
........
def rnn_cell(rnn_input, state, weight):
with tf.variable_scope('rnn_cell', reuse=True):
W = tf.get_variable('W', [n_inputs + n_neurons, n_neurons])
b = tf.get_variable('b', [1, n_neurons],
initializer=tf.constant_initializer(0.0))
return (tf.tanh(tf.matmul(tf.concat([rnn_input, state], 1), weight) + b))
part_W = tf.scatter_nd([[0,0]], [W[0][0]], [178,150])
W_2 = part_W + tf.stop_gradient(-part_W + W)
state = init_state
rnn_outputs = []
for rnn_input in rnn_inputs:
state = rnn_cell(rnn_input, state, W_2)
rnn_outputs.append(state)
final_state = rnn_outputs[-1]
logits = fully_connected(final_state, n_outputs, activation_fn=None)
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y,
logits=logits)
loss = tf.reduce_mean(xentropy)
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(loss)
correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
init = tf.global_variables_initializer()
with tf.Session() as sess:
init.run()
for epoch in range(n_epochs):
for iteration in range(mnist.train.num_examples //
batch_size):
X_batch, y_batch =
mnist.train.next_batch(batch_size)
X_batch = X_batch.reshape((-1, n_steps,
n_inputs))
h=np.zeros([batch_size,n_neurons])
sess.run(training_op, feed_dict={X: X_batch, y:
y_batch, p:h})
acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch,
p:h})
q=np.zeros([10000,n_neurons])
acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test,p:q})
print(epoch, "Train accuracy:", acc_train, "Test
accuracy:",acc_test)

I would recombine the weights with a copy of it-self going through tf.stop_gradient. For example,
import tensorflow as tf
w = tf.Variable(tf.zeros((10, 10)))
mask = tf.cast(tf.random_uniform((10, 10), 0, 2, dtype=tf.int32), tf.bool)
w = tf.where(mask, w, tf.stop_gradient(w))

I get a significant difference in my accuracy when testing a saved model in my current module or an external module

train.py
y = cnn_model.CNN(x)
...
print("Optimization Finished!")
tf.reset_default_graph()
saver.restore(sess, MODEL_DIRECTORY)
# Calculate accuracy for all mnist test images
test_size = test_labels.shape[0]
batch_size = 50
total_batch = int(test_size / batch_size)
acc_buffer = []
# Loop over all batches
for i in range(total_batch):
# Compute the offset of the current minibatch in the data.
offset = (i * batch_size) % (test_size)
batch_xs = test_data[offset:(offset + batch_size), :]
batch_ys = test_labels[offset:(offset + batch_size), :]
y_final = sess.run(y, feed_dict={x: batch_xs, y_: batch_ys, is_training: False})
correct_prediction = numpy.equal(numpy.argmax(y_final, 1), numpy.argmax(batch_ys, 1))
acc_buffer.append(numpy.sum(correct_prediction) / batch_size)
print("******Test Results **********")
print("test accuracy for the current model: %g" % numpy.mean(acc_buffer))
test accuracy for the current model:0.9756
sess.close()
tf.reset_default_graph()
test.test_org(MODEL_DIRECTORY, batch_size)
test.py
def test_org(model_directory, batch_size):
train_total_data, train_size, validation_data, validation_labels, test_data,
test_labels = mnist_data.prepare_MNIST_data(
False)
is_training = tf.placeholder(tf.bool, name='MODE')
# tf Graph input
x = tf.placeholder(tf.float32, [None, 784])
y_ = tf.placeholder(tf.float32, [None, 10]) # answer
y = cnn_model.CNN(x, is_training=is_training)
# Add ops to save and restore all the variables
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer(), feed_dict={is_training: True})
# Restore variables from disk
saver = tf.train.Saver()
# Calculate accuracy for all mnist test images
test_size = test_labels.shape[0]
print(test_size)
total_batch = int(test_size / batch_size)
saver.restore(sess, model_directory)
acc_buffer = []
# Loop over all batches
for i in range(total_batch):
# Compute the offset of the current minibatch in the data.
offset = (i * batch_size) % (test_size)
batch_xs = test_data[offset:(offset + batch_size), :]
batch_ys = test_labels[offset:(offset + batch_size), :]
y_final = sess.run(y, feed_dict={x: batch_xs, y_: batch_ys, is_training: False})
correct_prediction = numpy.equal(numpy.argmax(y_final, 1), numpy.argmax(batch_ys, 1))
acc_buffer.append(numpy.sum(correct_prediction) / batch_size)
print("******Test Results **********")
print("test accuracy for the stored model: %g" % numpy.mean(acc_buffer))
test accuracy for the stored model: .9838
It appears that the steps to calculate accuracy are identical but I can't understand why the values are different. In the train.py, I just finished training my model, should I not use the same session when I'm restoring?
Found the issue. In my train.py, I didn't pass in the is_training variable when initializing my model. But in my test.py, I do pass in the variable.
y = cnn_model.CNN(x, is_training=is_training)
So when I run my accuracy check in train.py, the is_training=false has no affect causing the results to be slightly off.

Constant accuracy while training the AlexNet model

I have been using this implementation of AlexNet Training I found on Github.
I have 8 input classes. Each class contains some images (Ex class1 : Bear, Class2 : Tiger; Class3: Horse ...)
The problem is when I run the following code I saw that the Training accuracy is always equal to 1 divided by the number of class (in this case Training Accuracy = 0.125, If I have only 2 classes the training accuracy will be equal to 0.5)
I found this really weird and I could not figure out where is the mistake in the following code :
from importData import Dataset
import inference
training = Dataset('wxb_pic/pic', '.jpg')
testing = Dataset('wxb_pic/pic_test', '.jpg')
import tensorflow as tf
# Parameters
learn_rate = 0.001
decay_rate = 0.1
batch_size = 64
display_step = 20
n_classes = training.num_labels # we got mad kanji
dropout = 0.8 # Dropout, probability to keep units
imagesize = 227
img_channel = 3
x = tf.placeholder(tf.float32, [None, imagesize, imagesize, img_channel])
y = tf.placeholder(tf.float32, [None, n_classes])
keep_prob = tf.placeholder(tf.float32) # dropout (keep probability)
pred = inference.alex_net(x, keep_prob, n_classes, imagesize, img_channel)
cost =tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
global_step = tf.Variable(0, trainable=False)
lr = tf.train.exponential_decay(learn_rate, global_step, 1000, decay_rate, staircase=True)
optimizer = tf.train.AdamOptimizer(learning_rate=lr).minimize(cost, global_step=global_step)
correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
init = tf.initialize_all_variables()
saver = tf.train.Saver()
tf.add_to_collection("x", x)
tf.add_to_collection("y", y)
tf.add_to_collection("keep_prob", keep_prob)
tf.add_to_collection("pred", pred)
tf.add_to_collection("accuracy", accuracy)
with tf.Session() as sess:
sess.run(init)
step = 1
while step < 3000:
batch_ys, batch_xs = training.nextBatch(batch_size)
sess.run(optimizer, feed_dict={x: batch_xs, y: batch_ys, keep_prob: dropout})
if step % display_step == 0:
acc = sess.run(accuracy, feed_dict={x: batch_xs, y: batch_ys, keep_prob: 1.})
loss = sess.run(cost, feed_dict={x: batch_xs, y: batch_ys, keep_prob: 1.})
rate = sess.run(lr)
print "lr " + str(rate) + " Iter " + str(step) + ", Minibatch Loss= " + "{:.6f}".format(loss) + ", Training Accuracy= " + "{:.5f}".format(acc)
if step % 1000 == 0:
saver.save(sess, 'save/model.ckpt', global_step=step*batch_size)
step += 1
print "Optimization Finished!"
step_test = 1
while step_test * batch_size < len(testing):
testing_ys, testing_xs = testing.nextBatch(batch_size)
print "Testing Accuracy:", sess.run(accuracy, feed_dict={x: testing_xs, y: testing_ys, keep_prob: 1.})
step_test += 1
I am Stuck with this and I want to train the AlexNet model to test the Performance of my machine.
Thanks ^^

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Why my GPU doesn't speed up with the following code? - python

Related

predict label for new text input on trained model

Tensorflow DNN always underfits my dataset

tensorflow, access inner elements of a variable and freeze some of them

I get a significant difference in my accuracy when testing a saved model in my current module or an external module

Constant accuracy while training the AlexNet model

Categories

Resources