Why does my retrained model have poor accuracy? - python

I'm trying to retrain the final layer of a pretrained model using the same dataset (MNIST handrwitten digit dataset) but the accuracy of the retrained model is much worse than the initial model. My initial model gets an accuracy of ~98% while the retrained model accuracy varies between 40-80% depending on the run. I get similar results when I don't bother to train the first 2 layers at all.
Here's a visualization of what I'm trying to do.
And the code:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
epochs1 = 150
epochs2 = 300
batch_size = 11000
learning_rate1 = 1e-3
learning_rate2 = 1e-4
# Base model
def base_model(input, reuse=False):
with tf.variable_scope('base_model', reuse=reuse):
layer1 = tf.contrib.layers.fully_connected(input, 300)
features = tf.contrib.layers.fully_connected(layer1, 300)
return features
mnist = input_data.read_data_sets('./mnist/', one_hot=True)
image = tf.placeholder(tf.float32, [None, 784])
label = tf.placeholder(tf.float32, [None, 10])
features1 = base_model(image, reuse=False)
features2 = base_model(image, reuse=True)
# Logits1 trained with the base model
with tf.variable_scope('logits1', reuse=False):
logits1 = tf.contrib.layers.fully_connected(features1, 10, tf.nn.relu)
# Logits2 trained while the base model is frozen
with tf.variable_scope('logits2', reuse=False):
logits2 = tf.contrib.layers.fully_connected(features2, 10, tf.nn.relu)
# Var Lists
var_list_partial1 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='logits1')
var_list_partial2 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='base_model')
var_list1 = var_list_partial1 + var_list_partial2
var_list2 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='logits2')
# Sanity check
print("var_list1:", var_list1)
print("var_list2:", var_list2)
# Cross Entropy Losses
loss1 = tf.nn.softmax_cross_entropy_with_logits(logits=logits1, labels=label)
loss2 = tf.nn.softmax_cross_entropy_with_logits(logits=logits2, labels=label)
# Train the final logits layer
train1 = tf.train.AdamOptimizer(learning_rate1).minimize(loss1, var_list=var_list1)
train2 = tf.train.AdamOptimizer(learning_rate2).minimize(loss2, var_list=var_list2)
# Accuracy operations
correct_prediction1 = tf.equal(tf.argmax(logits1, 1), tf.argmax(label, 1))
correct_prediction2 = tf.equal(tf.argmax(logits2, 1), tf.argmax(label, 1))
accuracy1 = tf.reduce_mean(tf.cast(correct_prediction1, "float"))
accuracy2 = tf.reduce_mean(tf.cast(correct_prediction2, "float"))
with tf.Session() as sess:
sess.run(tf.initialize_all_variables())
batches = int(len(mnist.train.images) / batch_size)
# Train base model and logits1
for epoch in range(epochs1):
for batch in range(batches):
batch_xs, batch_ys = mnist.train.next_batch(batch_size)
sess.run(train1, feed_dict={image: batch_xs, label: batch_ys})
# Train logits2 keeping the base model frozen
for epoch in range(epochs2):
for batch in range(batches):
batch_xs, batch_ys = mnist.train.next_batch(batch_size)
sess.run(train2, feed_dict={image: batch_xs, label: batch_ys})
# Print the both models after training
accuracy = sess.run(accuracy1, feed_dict={image: mnist.test.images, label: mnist.test.labels})
print("Initial Model Accuracy After training final model:", accuracy)
accuracy = sess.run(accuracy2, feed_dict={image: mnist.test.images, label: mnist.test.labels})
print("Final Model Accuracy After Training:", accuracy)
Thanks in advance!

Try to remove non-linearity from the "logits1" and "logits2".
I changed your code to:
# Logits1 trained with the base model
with tf.variable_scope('logits1', reuse=False):
#logits1 = tf.contrib.layers.fully_connected(features1, 10, tf.nn.relu)
logits1 = tf.contrib.layers.fully_connected(features1, 10, None)
# Logits2 trained while the base model is frozen
with tf.variable_scope('logits2', reuse=False):
#logits2 = tf.contrib.layers.fully_connected(features2, 10, tf.nn.relu)
logits2 = tf.contrib.layers.fully_connected(features2, 10, None)
and result changed to:
Initial Model Accuracy After training final model: 0.9805
Final Model Accuracy After Training: 0.9658
P.S. And 300 + 300 neurons is too much for MNIST classifier, but i think your point is not to classify MNIST :)

Related

Is doing a prediction like this in Tensorflow 1.9 correct?

So I have a CNN in Tensorflow 1.9 and want to make a prediction to a picture. I have this code:
# ...
x = tf.placeholder(tf.float32, shape=[None, 32, 32, 3], name='images_in')
y = tf.placeholder(tf.float32, [None, 10], name='labels_in')
def cnn(x):
# define my cnn
# build the network, input comes from the 'x' placeholder
logits = cnn(x)
prediction = tf.nn.softmax(logits, name='prediction')
# softmax cross entropy loss function
loss = tf.reduce_mean(tf.losses.softmax_cross_entropy(logits=logits, onehot_labels=y))
# Adaptive Momentum optimizer - minimize the loss
optimizer = tf.train.AdamOptimizer(learning_rate=LEARN_RATE, name='Adam').minimize(loss)
# Check to see if the prediction matches the label
correct_prediction = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
# Calculate accuracy as mean of the correct predictions
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# TensorBoard data collection
tf.summary.scalar('cross_entropy_loss', loss)
tf.summary.scalar('accuracy', accuracy)
tf.summary.image('input_images', x)
# set up saver object
saver = tf.train.Saver()
with tf.Session() as sess:
sess.run(tf.initializers.global_variables())
# ... batch up, etc.
_, s = sess.run([optimizer, tb_summary], feed_dict={x: batch_x, y: batch_y})
# ...
# Save chkpts, graph, etc.
and then, in the with tf.Session() as sess: environment, I would like to predict on my x_train or x_test, or better, an image I've loaded on my own. Is this the right code?
predictions = sess.run(correct_prediction, feed_dict={x: x_valid, y: y_valid})

'tuple' object has no attribute 'train'

I am getting a answer that 'tuple' object has no attribute 'train'. I can not understand this error(I am using google colab). Please, help me, and explain more detail (training part) as you can. my code is in the below. Thanks a lot in advance
%tensorflow_versionĀ 1.x
## loading nessecery functions and CIFAR10 dataset
from __future__ import print_function
import tensorflow as tf
from tensorflow.keras.datasets import cifar10
tf.__version__
((train_X, train_y), (test_X, test_y)) = cifar10.load_data()
print(f"train_X: {train_X.shape}, test_X = {test_X.shape}")
cifar10 = cifar10.load_data()
# define placeholder for inputs to network
X = tf.placeholder(tf.float32, [None, 3072])/255.0 # 32x32x3
Y = tf.placeholder(tf.float32, [None, 10])
keep_prob = tf.placeholder(tf.float32)
learning_rate = 0.001
training_epochs =10
batch_size = 30
# weights & bias for nn layers
W = tf.Variable(tf.random_normal([3072, 10]))
b = tf.Variable(tf.random_normal([10]))
hypothesis = tf.matmul(X, W) + b
# define cost/loss & optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=hypothesis, labels=Y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# initialize
sess = tf.Session()
sess.run(tf.global_variables_initializer())
# train my model
for epoch in range(training_epochs):
avg_cost = 0
num_examples = 50000
total_batch = int(num_examples / batch_size)
My question is here
for i in range(total_batch):
batch_xs, batch_ys = cifar10.train.next_batch(batch_size)
feed_dict = {X: batch_xs, Y: batch_ys}
c, _ = sess.run([cost, optimizer], feed_dict=feed_dict)
avg_cost += c / total_batch
print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.9f}'.format(avg_cost))
print('Learning Finished!')
# Test model and check accuracy
correct_prediction = tf.equal(tf.argmax(hypothesis, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print('Accuracy:', sess.run(accuracy, feed_dict={X: cifar10.test.images, Y: cifar10.test.labels}))
You're trying to access a train attribute from a tuple resulting from cifar10.load_data(). You've already loaded the data correctly on a previous step:
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
cifar10.load_data() is a data loader, which returns the train and test sets of the dataset.
If you want to implement a next_batch method to do as above, you'll need to define a custom helper class, which is something quite commonly used. Here's an example case.

Tensorflow DNN always underfits my dataset

I am trying to make a deep neural network with the low level API of tensorflow although when I train the model and test it, the loss and mae of the testing set and the training set it very similar and very high compared to other models I have tried(e.g. Random Forest, AdaBoost Decision Tree). I even made the same dnn using keras and it gave me much better results I don't understand the problem, I do not have to much experience in machine learning
Tensorflow code
# load libraries
import tensorflow as tf
# reset graph
tf.reset_default_graph()
# define variables
n_hidden1 = 200
n_outputs = 1
with tf.device("/gpu:0"):
X = tf.placeholder(tf.float32, shape=(None, n_features), name="X")
y = tf.placeholder(tf.float32, shape=(None), name="y")
with tf.name_scope("dnn"):
hidden1 = tf.layers.dense(X, n_hidden1, name="hidden1",
activation=tf.nn.leaky_relu)
logits = tf.layers.dense(hidden1, n_outputs, name="logits")
with tf.device("/cpu:0"):
with tf.name_scope("loss"):
loss = tf.reduce_mean(tf.abs(logits - y), name="loss")
with tf.device("cpu:0"):
with tf.name_scope("learning_rate"):
learning_rate = 0.001
with tf.device("/gpu:0"):
with tf.name_scope("train"):
optimizer = tf.train.AdamOptimizer(learning_rate)
training_op = optimizer.minimize(loss)
with tf.device("/gpu:0"):
with tf.name_scope("eval"):
mae = tf.reduce_mean(tf.abs(logits - y), name="mae")
def shuffle_batch(X, y, batch_size):
rnd_idx = np.random.permutation(len(X))
n_batches = len(X) // batch_size
for batch_idx in np.array_split(rnd_idx, n_batches):
X_batch, y_batch = X[batch_idx], y[batch_idx]
yield X_batch, y_batch
n_epochs = 200
batch_size = 1000
n_batches = int(np.ceil(X_train.shape[0] / batch_size))
# create graph variables initializer
init = tf.global_variables_initializer()
# create model saver
saver = tf.train.Saver()
# set device to gpu
with tf.device("/gpu:0"):
with tf.Session() as sess:
sess.run(init)
for epoch in range(n_epochs):
print("Epoch:", str(epoch) + "/" + str(n_epochs))
batch_index = 0
for X_batch, y_batch in shuffle_batch(X_train, np.array(y_train).reshape(-1), batch_size):
sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
acc_batch = mae.eval(feed_dict={X: X_batch, y: y_batch})
acc_val = mae.eval(feed_dict={X: X_test, y: np.array(y_test).reshape(-1)})
loss_batch = loss.eval(feed_dict={X: X_batch, y: y_batch})
loss_val = loss.eval(feed_dict={X: X_test, y: np.array(y_test).reshape(-1)})
print("Batch mae:", acc_batch, "Val mae:", acc_val)
print("Batch loss:", loss_batch, "Val loss:", loss_val)
Keras Code
import tensorflow as tf
from keras import layers
from keras import models
from keras import optimizers
from keras import initializers
from keras import regularizers
network = models.Sequential()
network.add(layers.Dense(units=200,
activation=tf.nn.leaky_relu,
input_shape=(X_train.shape[1], )))
# output layer
network.add(layers.Dense(units=1))
network.compile(loss="mae",
optimizer=optimizers.Adam(lr=0.001),
metrics=["mae"])
history = network.fit(X_train,
np.array(y_train).reshape(-1),
epochs=200,
verbose=1,
batch_size=1000,
validation_data=(X_test, np.array(y_test).reshape(-1)),
shuffle=True)

How to use tf.contrib.model_pruning on MNIST?

I'm struggling to use Tensorflow's pruning library and haven't found many helpful examples so I'm looking for help to prune a simple model trained on the MNIST dataset. If anyone can either help fix my attempt or provide an example of how to use the library on MNIST I would be very grateful.
The first half of my code is pretty standard except my model has 2 hidden layers 300 units wide using layers.masked_fully_connected for pruning.
import tensorflow as tf
from tensorflow.contrib.model_pruning.python import pruning
from tensorflow.contrib.model_pruning.python.layers import layers
from tensorflow.examples.tutorials.mnist import input_data
# Import dataset
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
# Define Placeholders
image = tf.placeholder(tf.float32, [None, 784])
label = tf.placeholder(tf.float32, [None, 10])
# Define the model
layer1 = layers.masked_fully_connected(image, 300)
layer2 = layers.masked_fully_connected(layer1, 300)
logits = tf.contrib.layers.fully_connected(layer2, 10, tf.nn.relu)
# Loss function
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=label))
# Training op
train_op = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(loss)
# Accuracy ops
correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(label, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
Then I attempt to define the necessary pruning operations but I get an error.
############ Pruning Operations ##############
# Create global step variable
global_step = tf.contrib.framework.get_or_create_global_step()
# Create a pruning object using the pruning specification
pruning_hparams = pruning.get_pruning_hparams()
p = pruning.Pruning(pruning_hparams, global_step=global_step)
# Mask Update op
mask_update_op = p.conditional_mask_update_op()
# Set up the specification for model pruning
prune_train = tf.contrib.model_pruning.train(train_op=train_op, logdir=None, mask_update_op=mask_update_op)
Error on this line:
prune_train = tf.contrib.model_pruning.train(train_op=train_op, logdir=None, mask_update_op=mask_update_op)
InvalidArgumentError (see above for traceback): You must feed a value for placeholder tensor 'Placeholder_1' with dtype float and shape [?,10]
[[Node: Placeholder_1 = Placeholderdtype=DT_FLOAT, shape=[?,10], _device="/job:localhost/replica:0/task:0/device:GPU:0"]]
[[Node: global_step/_57 = _Recv_start_time=0, client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_71_global_step", tensor_type=DT_INT64, _device="/job:localhost/replica:0/task:0/device:CPU:0"]]
I assume it wants a different type of operation in place of train_op but I haven't found any adjustments that work.
Again if you have a different working example that prunes a model trained on MNIST I would consider that an answer.
The simplest pruning library example I could get working, figured I'd post it here in case it helps some other noobie who has a hard time with the documentation.
import tensorflow as tf
from tensorflow.contrib.model_pruning.python import pruning
from tensorflow.contrib.model_pruning.python.layers import layers
from tensorflow.examples.tutorials.mnist import input_data
epochs = 250
batch_size = 55000 # Entire training set
# Import dataset
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
batches = int(len(mnist.train.images) / batch_size)
# Define Placeholders
image = tf.placeholder(tf.float32, [None, 784])
label = tf.placeholder(tf.float32, [None, 10])
# Define the model
layer1 = layers.masked_fully_connected(image, 300)
layer2 = layers.masked_fully_connected(layer1, 300)
logits = layers.masked_fully_connected(layer2, 10)
# Create global step variable (needed for pruning)
global_step = tf.train.get_or_create_global_step()
reset_global_step_op = tf.assign(global_step, 0)
# Loss function
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=label))
# Training op, the global step is critical here, make sure it matches the one used in pruning later
# running this operation increments the global_step
train_op = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(loss, global_step=global_step)
# Accuracy ops
correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(label, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# Get, Print, and Edit Pruning Hyperparameters
pruning_hparams = pruning.get_pruning_hparams()
print("Pruning Hyperparameters:", pruning_hparams)
# Change hyperparameters to meet our needs
pruning_hparams.begin_pruning_step = 0
pruning_hparams.end_pruning_step = 250
pruning_hparams.pruning_frequency = 1
pruning_hparams.sparsity_function_end_step = 250
pruning_hparams.target_sparsity = .9
# Create a pruning object using the pruning specification, sparsity seems to have priority over the hparam
p = pruning.Pruning(pruning_hparams, global_step=global_step, sparsity=.9)
prune_op = p.conditional_mask_update_op()
with tf.Session() as sess:
sess.run(tf.initialize_all_variables())
# Train the model before pruning (optional)
for epoch in range(epochs):
for batch in range(batches):
batch_xs, batch_ys = mnist.train.next_batch(batch_size)
sess.run(train_op, feed_dict={image: batch_xs, label: batch_ys})
# Calculate Test Accuracy every 10 epochs
if epoch % 10 == 0:
acc_print = sess.run(accuracy, feed_dict={image: mnist.test.images, label: mnist.test.labels})
print("Un-pruned model step %d test accuracy %g" % (epoch, acc_print))
acc_print = sess.run(accuracy, feed_dict={image: mnist.test.images, label: mnist.test.labels})
print("Pre-Pruning accuracy:", acc_print)
print("Sparsity of layers (should be 0)", sess.run(tf.contrib.model_pruning.get_weight_sparsity()))
# Reset the global step counter and begin pruning
sess.run(reset_global_step_op)
for epoch in range(epochs):
for batch in range(batches):
batch_xs, batch_ys = mnist.train.next_batch(batch_size)
# Prune and retrain
sess.run(prune_op)
sess.run(train_op, feed_dict={image: batch_xs, label: batch_ys})
# Calculate Test Accuracy every 10 epochs
if epoch % 10 == 0:
acc_print = sess.run(accuracy, feed_dict={image: mnist.test.images, label: mnist.test.labels})
print("Pruned model step %d test accuracy %g" % (epoch, acc_print))
print("Weight sparsities:", sess.run(tf.contrib.model_pruning.get_weight_sparsity()))
# Print final accuracy
acc_print = sess.run(accuracy, feed_dict={image: mnist.test.images, label: mnist.test.labels})
print("Final accuracy:", acc_print)
print("Final sparsity by layer (should be 0)", sess.run(tf.contrib.model_pruning.get_weight_sparsity()))
Roman Nikishin requested code that could save the model, it's a slight extension to my original answer.
import tensorflow as tf
from tensorflow.contrib.model_pruning.python import pruning
from tensorflow.contrib.model_pruning.python.layers import layers
from tensorflow.examples.tutorials.mnist import input_data
epochs = 250
batch_size = 55000 # Entire training set
model_path_unpruned = "Model_Saves/Unpruned.ckpt"
model_path_pruned = "Model_Saves/Pruned.ckpt"
# Import dataset
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
batches = int(len(mnist.train.images) / batch_size)
# Define Placeholders
image = tf.placeholder(tf.float32, [None, 784])
label = tf.placeholder(tf.float32, [None, 10])
# Define the model
layer1 = layers.masked_fully_connected(image, 300)
layer2 = layers.masked_fully_connected(layer1, 300)
logits = layers.masked_fully_connected(layer2, 10)
# Create global step variable (needed for pruning)
global_step = tf.train.get_or_create_global_step()
reset_global_step_op = tf.assign(global_step, 0)
# Loss function
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=label))
# Training op, the global step is critical here, make sure it matches the one used in pruning later
# running this operation increments the global_step
train_op = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(loss, global_step=global_step)
# Accuracy ops
correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(label, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# Get, Print, and Edit Pruning Hyperparameters
pruning_hparams = pruning.get_pruning_hparams()
print("Pruning Hyperparameters:", pruning_hparams)
# Change hyperparameters to meet our needs
pruning_hparams.begin_pruning_step = 0
pruning_hparams.end_pruning_step = 250
pruning_hparams.pruning_frequency = 1
pruning_hparams.sparsity_function_end_step = 250
pruning_hparams.target_sparsity = .9
# Create a pruning object using the pruning specification, sparsity seems to have priority over the hparam
p = pruning.Pruning(pruning_hparams, global_step=global_step, sparsity=.9)
prune_op = p.conditional_mask_update_op()
# Create a saver for writing training checkpoints.
saver = tf.train.Saver()
with tf.Session() as sess:
# Uncomment the following if you don't have a trained model yet
sess.run(tf.initialize_all_variables())
# Train the model before pruning (optional)
for epoch in range(epochs):
for batch in range(batches):
batch_xs, batch_ys = mnist.train.next_batch(batch_size)
sess.run(train_op, feed_dict={image: batch_xs, label: batch_ys})
# Calculate Test Accuracy every 10 epochs
if epoch % 10 == 0:
acc_print = sess.run(accuracy, feed_dict={image: mnist.test.images, label: mnist.test.labels})
print("Un-pruned model step %d test accuracy %g" % (epoch, acc_print))
acc_print = sess.run(accuracy, feed_dict={image: mnist.test.images, label: mnist.test.labels})
print("Pre-Pruning accuracy:", acc_print)
print("Sparsity of layers (should be 0)", sess.run(tf.contrib.model_pruning.get_weight_sparsity()))
# Saves the model before pruning
saver.save(sess, model_path_unpruned)
# Resets the session and restores the saved model
sess.run(tf.initialize_all_variables())
saver.restore(sess, model_path_unpruned)
# Reset the global step counter and begin pruning
sess.run(reset_global_step_op)
for epoch in range(epochs):
for batch in range(batches):
batch_xs, batch_ys = mnist.train.next_batch(batch_size)
# Prune and retrain
sess.run(prune_op)
sess.run(train_op, feed_dict={image: batch_xs, label: batch_ys})
# Calculate Test Accuracy every 10 epochs
if epoch % 10 == 0:
acc_print = sess.run(accuracy, feed_dict={image: mnist.test.images, label: mnist.test.labels})
print("Pruned model step %d test accuracy %g" % (epoch, acc_print))
print("Weight sparsities:", sess.run(tf.contrib.model_pruning.get_weight_sparsity()))
# Saves the model after pruning
saver.save(sess, model_path_pruned)
# Print final accuracy
acc_print = sess.run(accuracy, feed_dict={image: mnist.test.images, label: mnist.test.labels})
print("Final accuracy:", acc_print)
print("Final sparsity by layer (should be 0)", sess.run(tf.contrib.model_pruning.get_weight_sparsity()))

How to mix variable sequence length and variable batch size in RNN

I'm trying to train a RNN-Classifier with a training set that has variable length, I've read around and the suggested solution is to do bucketing on the training data and then feed batches out of each bucket and exit the RNN on certain sequence_length, however the results from this process vs just trimming the input sequences to length 10 is extremely worse and my loss function and accuracy move up and down erratically
Some more information:
I have variable batch size because some times there's not enough samples of a certain sequence length in a bucket
A sequence is a series of words converted into word embeddings
If I have a bucket of sequences of length 4, the batch would be of size max_seq_length padding everything on the right with vectors full of 0's
batches have the following shape (batch_size,number_steps,embedding_dimensions)
batch(128,48,100)
sequence_length = 4
classes(128,188)
batch(128,48,100)
sequence_length = 8
classes(128,188)
...
batch(30,48,100)
sequence_length = 40
classes(30,188)
I feed the sequence_length as the early_stop variable to my RNN
What is the best way to mix variable sequence length with variable bath size?
My Graph:
# Network Parameters
n_input = embeddings_dim # word embeddings dimensions : 100
n_steps = max_seq_len # timesteps = maximum sequence length in my training = 47
n_classes = total_labels # total classes = 188
graph = tf.Graph()
with graph.as_default():
# tf Graph input
x = tf.placeholder("float", [None, n_steps, n_input])
# Tensorflow LSTM cell requires 2x n_hidden length (state & cell)
istate = tf.placeholder("float", [None, 2*n_hidden])
y = tf.placeholder("float", [None, n_classes])
#at what step we should read out the value of the RNN
early_stop = tf.placeholder(tf.int32)
tf.scalar_summary('early_stop', early_stop)
# Define weights
weights = {
'hidden': tf.Variable(tf.random_normal([n_input, n_hidden])), # Hidden layer weights
'out': tf.Variable(tf.random_normal([n_hidden, n_classes]))
}
biases = {
'hidden': tf.Variable(tf.random_normal([n_hidden])),
'out': tf.Variable(tf.random_normal([n_classes]))
}
def RNN(_X, _istate, _weights, _biases):
# input shape: (batch_size, n_steps, n_input)
_X = tf.transpose(_X, [1, 0, 2]) # permute n_steps and batch_size
# Reshape to prepare input to hidden activation
_X = tf.reshape(_X, [-1, n_input]) # (n_steps*batch_size, n_input)
# Linear activation
_X = tf.matmul(_X, _weights['hidden']) + _biases['hidden']
# Define a lstm cell with tensorflow
lstm_cell = rnn_cell.LSTMCell(n_hidden, forget_bias=0.25)
# Split data because rnn cell needs a list of inputs for the RNN inner loop
_X = tf.split(0, n_steps, _X) # n_steps * (batch_size, n_hidden)
# Get lstm cell output
outputs, states = rnn.rnn(lstm_cell, _X,
initial_state=_istate,
sequence_length=early_stop)
# Linear activation
# Get inner loop last output
return tf.matmul(outputs[-1], _weights['out']) + _biases['out']
pred = RNN(x, istate, weights, biases)
# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y)) # Softmax loss
global_step = tf.Variable(0, name='global_step', trainable=False)
learning_rate_2 = tf.train.exponential_decay(learning_rate, global_step, 300, 0.96, staircase=True)
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate_2).minimize(cost) # Adam Optimizer
# Evaluate model
correct_pred = tf.equal(tf.argmax(pred,1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
My Training loop:
with tf.Session(graph=graph) as sess:
init = tf.initialize_all_variables()
sess.run(init)
step = 1
while step * batch_size < training_iters:
#batch_xs = batch_xs.reshape((batch_size, n_steps, n_input))
batch_xs, batch_ys, batch_sl = train_batches.next()
ins_batch_size = len(batch_xs)
# Fit training using batch data
summ ,acc = sess.run([merged,optimizer], feed_dict={x: batch_xs, y: batch_ys,
istate: np.zeros((ins_batch_size, 2*n_hidden)),
early_stop:batch_sl})
train_writer.add_summary(summ, step)
if step % display_step == 0:
# Calculate batch accuracy
acc = sess.run(accuracy, feed_dict={x: batch_xs, y: batch_ys,
istate: np.zeros((ins_batch_size, 2*n_hidden)),
early_stop:batch_sl})
# Calculate batch loss
loss = sess.run(cost, feed_dict={x: batch_xs, y: batch_ys,
istate: np.zeros((ins_batch_size, 2*n_hidden)),
early_stop:batch_sl})
print( "Iter " + str(step*batch_size) + ", Minibatch Loss= " + "{:.6f}".format(loss) + \
", Training Accuracy= " + "{:.5f}".format(acc))
step += 1
print( "Optimization Finished!")
test_data,test_label, test_sl = test_batches.next()
test_len = len(test_data)
print( "Testing Accuracy:", sess.run(accuracy, feed_dict={x: test_data, y: test_label,
istate: np.zeros((test_len, 2*n_hidden)),
early_stop:test_sl}))

Categories