I am new to tensorflow and trying to train the following two layer network. It seems it is not working as cross entropy is not decreasing as iteration. I think I am screwed up connecting hidden layer to output layer. Please help me if you can see the problem,
import tensorflow as tf
from scipy.io import loadmat
import numpy as np
import sys
x = loadmat('../mnist_data/ex4data1.mat')
X = x['X']
# one hot conversion
y_temp = x['y']
y_temp = np.reshape(y_temp, (len(y_temp),))
y = np.zeros((len(y_temp),10))
y[np.arange(len(y_temp)), y_temp-1] = 1.
input_size = 400
hidden1_size = 25
output_size = 10
num_iters = 50
reg_alpha = 0.05
x = tf.placeholder(tf.float32, [None, input_size], name='data')
W1 = tf.Variable(tf.zeros([hidden1_size, input_size], tf.float32, name='weights_1st_layer'))
b1 = tf.Variable(tf.zeros([hidden1_size], tf.float32), name='bias_layer_1')
W2 = tf.Variable(tf.zeros([output_size, hidden1_size], tf.float32, name='weights_2nd_layer'))
b2 = tf.Variable(tf.zeros([output_size], tf.float32), name='bias_layer_2')
hidden_op = tf.nn.relu(tf.add(tf.matmul(x, W1, transpose_b=True), b1))
output_op = tf.matmul(hidden_op, W2, transpose_b=True) + b2
pred = tf.nn.softmax(output_op)
y_ = tf.placeholder(tf.float32, [None, 10], name='actual_labels')
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
labels=y_, logits=output_op))
train_step = tf.train.GradientDescentOptimizer(reg_alpha).minimize(cross_entropy)
sess = tf.InteractiveSession()
tf.global_variables_initializer().run()
for _ in range(50):
print ('training..', _)
print (sess.run([train_step, cross_entropy], feed_dict={x : X, y_ : y}))
corr_pred = tf.equal(tf.argmax(pred, axis=1), tf.argmax(y_, axis=1))
acc = tf.reduce_mean(tf.cast(corr_pred, tf.float32))
print (sess.run(acc, feed_dict={x:X, y_:y}))
sess.close()
Try initialising your weights as randoms, not zeros.
So instead of:
W1 = tf.Variable(tf.zeros([hidden1_size, input_size], tf.float32, name='weights_1st_layer'))
W2 = tf.Variable(tf.zeros([output_size, hidden1_size], tf.float32, name='weights_2nd_layer'))
use:
W1 = tf.Variable(tf.truncated_normal([hidden1_size, input_size], tf.float32, name='weights_1st_layer'), stddev=0.1))
W2 = tf.Variable(tf.truncated_normal([output_size, hidden1_size], tf.float32, name='weights_2nd_layer'), stddev=0.1))
Check this nice summary why initialising all the weights to zero prevents the network from training.
Related
I've tried to figure things out myself and not fallback to actually creating an account here but as a self-taught beginner I've reached a wall with this code.
I'm having two major issues besides optimizing the net architecture when everything is working:
Everytime I've tried to create a new dataset for a test batch I've ran into 'xTensor is not a Tensor' error and could run a session through it, unlike with the iterator which works just fine. I'm loading custom data with dir names as labels with no manually created train and test directories. I'm probably missing a proper method for tf.
I can't work around the current first error I get which is:
'ValueError: Cannot feed value of shape (100,) for Tensor 'Placeholder_1:0', which has shape '(?, 1)' while feed_dict {y=batch_y}. I've tried some of solutions posted on SO but couldn't get it to work.
I'm pasting the whole thing, ########### are the problem triggering zones at the very bottom in the session.
import tensorflow as tf
import numpy as np
import os
# load custom imageset directory
data_path = r"..\datasets\images\flowers"
# setup hypervariables for labels and images format
n_classes = 5
img_width = 64
img_length = 64
channels = 3
# setup hypervariables for network
learning_rate = 0.0001
epochs = 2
batch_size = 100
drop_rate = 0.6
imagepaths = list()
labels = list()
label = 0
classes = sorted(os.walk(data_path).__next__()[1])
# List each sub-directory (the classes)
for c in classes:
c_dir = os.path.join(data_path, c)
walk = os.walk(c_dir).__next__()
# Add each image to the training set
for sample in walk[2]:
imagepaths.append(os.path.join(c_dir, sample))
labels.append(label)
label += 1
total_input = len(labels)
# Convert to Tensor
imagepaths = tf.convert_to_tensor(imagepaths, dtype=tf.string)
labels = tf.convert_to_tensor(labels, dtype=tf.int32)
# Build a TF Queue, shuffle data
dataset = tf.data.Dataset.from_tensor_slices((imagepaths, labels))
# read, decode, resize and normalize images on RGB range
def parse(imagepath, label):
image = tf.read_file(imagepath)
image = tf.image.decode_jpeg(image, channels=channels)
image = tf.image.resize_images(image, [img_length, img_width])
image = image * 1.0/255
return image, label
dataset = dataset.map(parse)
dataset = dataset.shuffle(buffer_size=batch_size*10)
dataset = dataset.batch(batch_size)
iterator = dataset.make_one_shot_iterator()
next_batch = iterator.get_next()
# hypervariables for layers' output size
K = 16
L = 32
M = 200
x = tf.placeholder(tf.float32, [None, 4326])
x_shaped = tf.reshape(x, [-1, img_length, img_width, 3])
y = tf.placeholder(tf.float32, [None, 1])
# weight, bias with stride size and activation method after convolution for layer 1
W1 = tf.Variable(tf.truncated_normal([5, 5, 3, K], stddev=0.03))
b1 = tf.Variable(tf.truncated_normal([K], stddev=0.01))
stride = 1
y1 = tf.nn.relu(tf.nn.conv2d(x_shaped, W1, strides=[1, stride, stride, 1], padding='SAME') + b1)
# weight, bias with stride size and activation method after convolution for layer 2
W2 = tf.Variable(tf.truncated_normal([5, 5, K, L], stddev=0.03))
b2 = tf.Variable(tf.truncated_normal([L], stddev=0.01))
stride = 2 # output is 14x14
y2 = tf.nn.relu(tf.nn.conv2d(y1, W2, strides=[1, stride, stride, 1], padding='SAME') + b2)
yflat = tf.reshape(y2, [-1, 7 * 7 * L])
W3 = tf.Variable(tf.truncated_normal([7 * 7 * L, M], stddev=0.1))
b3 = tf.Variable(tf.truncated_normal([M], stddev=0.01))
y3 = tf.nn.relu(tf.matmul(yflat, W3) + b3)
W4 = tf.Variable(tf.truncated_normal([M, 10], stddev=0.1))
b4 = tf.Variable(tf.truncated_normal([10], stddev=0.01))
ylogits = tf.matmul(y3, W4) + b4
y_ = tf.nn.softmax(ylogits)
# add cross entropy for back prop
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=ylogits, labels=y_))
# add an optimiser for back prop
optimiser = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cross_entropy)
# define an accuracy assessment operation
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
########## temporary solution for test_x, test_y
test_x, test_y = sess.run(next_batch)
total_batch = int(total_input / batch_size)
# define the iterator for the network
for epoch in range(epochs):
avg_cost = 0
for i in range(total_batch):
batch_x, batch_y = sess.run(next_batch)
########## ValueError: Cannot feed value of shape (100,) for Tensor 'Placeholder_1:0' -> y: batch_y
_, c = sess.run([optimiser, cross_entropy], feed_dict={x_shaped: batch_x, y: batch_y})
avg_cost += c / total_batch
test_acc = sess.run(accuracy,feed_dict={x: test_x, y: test_y})
print("Epoch:", (epoch + 1), "cost =", "{:.3f}".format(avg_cost), " test accuracy: {:.3f}".format(test_acc))
summary = sess.run(merged, feed_dict={x: test_x, y: test_y})
print("\nTraining complete!")
print(sess.run(accuracy, feed_dict={x: test_x, y: test_y}))
are you sure that this part:
_, c = sess.run([optimiser, cross_entropy], feed_dict={x_shaped: batch_x, y: batch_y})
doesn't have to be:
_, c = sess.run([optimiser, cross_entropy], feed_dict={x: batch_x, y: batch_y})
furthermore you've a batchsize of 100, the data is right of the array, the shape is not complete.
What you have (dummy example)
np.zeros((100,)).shape
>>> (100,)
here 100 matches the '?' of the required shape: shape '(?, 1)', the one can be easily added, and it often occurs that numpy does not do this. See the following code:
np.expand_dims(np.zeros((100,)), axis=-1).shape
>>> (100, 1)
axis -1 stand for the last axis, you basically tell numpy to add a dimension in the end. This does not affect the data itself, but the shape of the array. So your code should be:
_, c = sess.run([optimiser, cross_entropy], feed_dict={x_shaped: batch_x, y:np.expand_dims(batch_y, axis=-1)})
I would like to do some sequence prediction in tensorflow using GRU. so I have created the same model in 2 different ways as follows:
In model 1 I have a 2 GRUs, one after the other, that is, the new_state1, the final hidden state of the first GRU, acts as the initial state to the second GRU. Therefore, the model outputs new_state1 and new_state2 consequentially. Note that this is not a 2 layer model, but only 1 layer. From the code below, I divided the input and the output into 2 parts where GRU1 takes the first part, and the second GRU takes the second part.
Also the random_seed is set and fixed for both model so that results can be comparable.
Model 1
import tensorflow as tf
import numpy as np
cell_size = 32
seq_length = 1000
time_steps1 = 500
time_steps2 = seq_length - time_steps1
x_t = np.arange(1, seq_length + 1)
x_t_plus_1 = np.arange(2, seq_length + 2)
tf.set_random_seed(123)
m_dtype = tf.float32
input_1 = tf.placeholder(dtype=m_dtype, shape=[None, time_steps1, 1], name="input_1")
input_2 = tf.placeholder(dtype=m_dtype, shape=[None, time_steps2, 1], name="input_2")
labels1 = tf.placeholder(dtype=m_dtype, shape=[None, time_steps1, 1], name="labels_1")
labels2 = tf.placeholder(dtype=m_dtype, shape=[None, time_steps2, 1], name="labels_2")
labels = tf.concat([labels1, labels2], axis=1, name="labels")
initial_state = tf.placeholder(shape=[None, cell_size], dtype=m_dtype, name="initial_state")
def model(input_feat1, input_feat2):
with tf.variable_scope("GRU"):
cell1 = tf.nn.rnn_cell.GRUCell(cell_size)
cell2 = tf.nn.rnn_cell.GRUCell(cell_size)
with tf.variable_scope("First50"):
# output1: shape=[1, time_steps1, 32]
output1, new_state1 = tf.nn.dynamic_rnn(cell1, input_feat1, dtype=m_dtype, initial_state=initial_state)
with tf.variable_scope("Second50"):
# output2: shape=[1, time_steps2, 32]
output2, new_state2 = tf.nn.dynamic_rnn(cell2, input_feat2, dtype=m_dtype, initial_state=new_state1)
with tf.variable_scope("output"):
# output shape: [1, time_steps1 + time_steps2, 32] => [1, 100, 32]
output = tf.concat([output1, output2], axis=1)
output = tf.reshape(output, shape=[-1, cell_size])
output = tf.layers.dense(output, units=1)
output = tf.reshape(output, shape=[1, time_steps1 + time_steps2, 1])
with tf.variable_scope("outputs_1_2_reshaped"):
output1 = tf.slice(input_=output, begin=[0, 0, 0], size=[-1, time_steps1, -1])
output2 = tf.slice(input_=output, begin=[0, time_steps1, 0], size=[-1, time_steps2, 1])
print(output.get_shape().as_list(), "1")
print(output1.get_shape().as_list(), "2")
print(output2.get_shape().as_list(), "3")
return output, output1, output2, initial_state, new_state1, new_state2
output, output1, output2, initial_state, new_state1, new_state2 = model(input_1, input_2)
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
to_run_list = [new_state1, new_state2]
in1 = np.reshape(x_t[:time_steps1], newshape=(1, time_steps1, 1))
in2 = np.reshape(x_t[time_steps1:], newshape=(1, time_steps2, 1))
l1 = np.reshape(x_t_plus_1[:time_steps1], newshape=(1, time_steps1, 1))
l2 = np.reshape(x_t_plus_1[time_steps1:], newshape=(1, time_steps2, 1))
i_s = np.zeros([1, cell_size])
new_s1, new_s2 = sess.run(to_run_list, feed_dict={input_1: in1,
input_2: in2,
labels1: l1,
labels2: l2,
initial_state: i_s})
print(np.shape(new_s1), np.shape(new_s2))
print(np.mean(new_s1), np.mean(new_s2))
print(np.sum(new_s1), np.sum(new_s2))
In this model, Instead of having 2 different GRU, I created one, and I divided the input and labels into 2 different parts as well, and I used a for loop to iterate over my input dataset. Then the final state is taken and fed back into the same model as initial state.
Note that both model1 and model2 have the very first initial state of zeros.
Model 2
import tensorflow as tf
import numpy as np
cell_size = 32
seq_length = 1000
time_steps = 500
x_t = np.arange(1, seq_length + 1)
x_t_plus_1 = np.arange(2, seq_length + 2)
tf.set_random_seed(123)
m_dtype = tf.float32
inputs = tf.placeholder(dtype=m_dtype, shape=[None, time_steps, 1], name="inputs")
labels = tf.placeholder(dtype=m_dtype, shape=[None, time_steps, 1], name="labels")
initial_state = tf.placeholder(shape=[None, cell_size], dtype=m_dtype, name="initial_state")
grads_initial_state = tf.placeholder(dtype=m_dtype, shape=[None, cell_size], name="prev_grads")
this_is_last_batch = tf.placeholder(dtype=tf.bool, name="this_is_last_batch")
def model(input_feat):
with tf.variable_scope("GRU"):
cell = tf.nn.rnn_cell.GRUCell(cell_size)
with tf.variable_scope("cell"):
# output1: shape=[1, time_steps, 32]
output, new_state = tf.nn.dynamic_rnn(cell, input_feat, dtype=m_dtype, initial_state=initial_state)
with tf.variable_scope("output"):
output = tf.reshape(output, shape=[-1, cell_size])
output = tf.layers.dense(output, units=1)
output = tf.reshape(output, shape=[1, time_steps, 1])
print(output.get_shape().as_list(), "1")
return output, new_state
output, new_state = model(inputs)
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
# 1000 // 500 = 2
num_iterations = seq_length // time_steps
print("num_iterations:", num_iterations)
final_states = []
to_run_list = [grads_wrt_initial_state, new_state]
for i in range(num_iterations):
current_xt = x_t[i * time_steps: (i + 1)*time_steps]
current_xt_plus_1 = x_t_plus_1[i*time_steps: (i + 1)*time_steps]
in1 = np.reshape(current_xt, newshape=(1, time_steps, 1))
l1 = np.reshape(current_xt_plus_1, newshape=(1, time_steps, 1))
i_s = np.zeros([1, cell_size])
if i == 0:
new_s = sess.run(new_state, feed_dict={inputs: in1,
labels: l1,
initial_state: i_s})
final_states.append(new_s)
print("---->", np.mean(final_states[-1]), np.sum(final_states[-1]), i)
else:
new_s = sess.run(new_state, feed_dict={inputs: in1,
labels: l1,
initial_state: final_states[-1]})
final_states.append(new_s)
print("---->", np.mean(final_states[-1]), np.sum(final_states[-1]), i)
Finally, after printing out the statistics of new_state1 and new_state2 in model1, they were different from the new_state, after each iteration, in model2.
I would like to know how to fix this problem and why is that happening.
Edit:
I have figured out that the weights values of the gru in both files are different
Now how can I reproduce the same results in 2 the different files even after setting the random seed?
Any help is much appreciated!!!
so to reproduce the same results in different files, tf.set_random_seed() is not enough. I figured out that we need to also set the seed for the intializers of the gru cells as well as the initializers of the weights in the dense layer at the output (this is at least acccording to my model); so the definition of the cell is now:
cell1 = tf.nn.rnn_cell.GRUCell(cell_size, kernel_initializer=tf.glorot_normal_initializer(seed=123, dtype=m_dtype))
And for the dense layer:
output = tf.layers.dense(output, units=1, kernel_initializer=tf.glorot_uniform_initializer(seed=123, dtype=m_dtype))
Note that any other initializer could be used as long as we set the seed the dtype for it.
I have a problem restring my model. I trained model and saved a model using this code. I'm not really sure if this is the proper method I would be grateful for suggestions. The problem occurs when I'm trying to restore model. I need it only to predict, it won't be furder trained. It takes forever to restore parameters from the model. How can I improve my model saver or model restorer to make it quick, under the assumption I need it only for predicting.
X = tf.placeholder(tf.float32, [None, 56, 56, 1])
Y_ = tf.placeholder(tf.float32, [None, 36])
L1 = 432
L2 = 72
L3 = 36
W1 = tf.Variable(tf.truncated_normal([3136, L1], stddev=0.1))
b1 = tf.Variable(tf.zeros([L1]))
W2 = tf.Variable(tf.truncated_normal([L1, L2], stddev=0.1))
b2 = tf.Variable(tf.zeros([L2]))
W3 = tf.Variable(tf.truncated_normal([L2, L3], stddev=0.1))
b3 = tf.Variable(tf.zeros([L3]))
XX = tf.reshape(X, [-1, 3136])
Y1 = tf.nn.sigmoid(tf.matmul(XX, W1) + b1)
Y1 = tf.nn.dropout(Y1, keep_prob=0.8)
Y2 = tf.nn.sigmoid(tf.matmul(Y1, W2) + b2)
Y2 = tf.nn.dropout(Y2, keep_prob=0.8)
Ylogits = tf.matmul(Y2, W3) + b3
Y = tf.nn.softmax(Ylogits)
cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits=Ylogits, labels=Y_)
cross_entropy = tf.reduce_mean(cross_entropy) * 100
correct_prediction = tf.equal(tf.argmax(Y, 1), tf.argmax(Y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
train_step = tf.train.GradientDescentOptimizer(0.0001).minimize(cross_entropy)
allweights = tf.concat([tf.reshape(W1, [-1]), tf.reshape(W2, [-1]), tf.reshape(W3, [-1])], 0)
allbiases = tf.concat([tf.reshape(b1, [-1]), tf.reshape(b2, [-1]), tf.reshape(b3, [-1])], 0)
init = tf.global_variables_initializer()
saver = tf.train.Saver()
def next_batch(x, y, batch, step):
x_temp = x[cur_step:(step+batch)]
y_temp = np.squeeze(y[step:(step + batch)])
return x_temp, y_temp
with tf.Session() as sess:
sess.run(init)
cur_step = 0
for i in range(NUM_ITERS + 1):
batch_X, batch_Y = next_batch(train_xx, train_yy, BATCH, cur_step)
if i % DISPLAY_STEP == 0:
acc_trn, loss_trn, w, b = sess.run([accuracy, cross_entropy, allweights, allbiases], feed_dict={X: batch_X, Y_: batch_Y})
acc_tst, loss_tst = sess.run([accuracy, cross_entropy], feed_dict={X: test_xx, Y_: test_yy})
sess.run(train_step, feed_dict={X: batch_X, Y_: batch_Y})
save_path = saver.save(sess, "abc/model")
Restore:
X = tf.placeholder(tf.float32, [None, 56, 56, 1])
Y_ = tf.placeholder(tf.float32, [None, 36])
L1 = 432
L2 = 72
L3 = 36
W1 = tf.Variable(tf.truncated_normal([3136, L1], stddev=0.1))
b1 = tf.Variable(tf.zeros([L1]))
W2 = tf.Variable(tf.truncated_normal([L1, L2], stddev=0.1))
b2 = tf.Variable(tf.zeros([L2]))
W3 = tf.Variable(tf.truncated_normal([L2, L3], stddev=0.1))
b3 = tf.Variable(tf.zeros([L3]))
XX = tf.reshape(X, [-1, 3136])
Y1 = tf.nn.sigmoid(tf.matmul(XX, W1) + b1)
Y1 = tf.nn.dropout(Y1, keep_prob=0.8)
Y2 = tf.nn.sigmoid(tf.matmul(Y1, W2) + b2)
Y2 = tf.nn.dropout(Y2, keep_prob=0.8)
Ylogits = tf.matmul(Y2, W3) + b3
Y = tf.nn.softmax(Ylogits)
with tf.Session() as sess:
saver = tf.train.Saver()
saver = tf.train.import_meta_graph('model.meta')
saver.restore(sess, 'model')
EDIT: Maybe a fact that model is trained using Google Colab's GPU and I'm restoring it on my PC is important.
Its a duplicate of: Tensorflow: how to save/restore a model?.
Your saving of the model is right but not your restoring. What you are doing is trying to create a new graph with the same nodes as the saved model, instead of restoring it from the saved graph. The following steps should address your issue on how to restore a model:
#Start with resetting the default graph
tf.reset_default_graph()
with tf.Session() as sess:
# Nodes:Before loading the graph
print([n.name for n in tf.get_default_graph().as_graph_def().node])
# Output is [] as no graph is loaded yet.
# First let's load meta graph
saver = tf.train.import_meta_graph("abc/model.meta")
# Nodes:after loading the graph'
print([n.name for n in tf.get_default_graph().as_graph_def().node])
# Output is [save/RestoreV2/shape_and_slices', 'save/RestoreV2/tensor_ ...]
# The above step doesnt load the weights, can be checked by
print(sess.run('Variable_1:0'))
# Error: attempting to use uninitialized graph.
#load the weights
saver.restore(sess,tf.train.latest_checkpoint('./abc/'))
print(sess.run('Variable_1:0'))
# Output: [-2.80421402e-04 3.53254407e-04 ...]
Now we have the nodes loaded and ready, you need to access some of them for inference. But since the nodes are not named properly, its not easy to figure out which node are inputs and outputs. To avoid this you need to name the tensors/ops when saving the model properly using the name argument like:
X = tf.placeholder(tf.float32, [None, 56, 56, 1], name='X')
Y = tf.identity(f.nn.softmax(Ylogits), name='logits').
In your inference graph once you have loaded the graph and weights, you can get these tensors using get_tensor_by_name:
with tf.Session() as sess:
#Load the graph and weights as above
....
graph = tf.get_default_graph()
X_infer = graph.get_tensor_by_name('X:0')
Y_infer = graph.get_tensor_by_name('logits:0')
sess.run(Y_infer,{X_infer:new_input}
Below is the code from tensorflow website regarding using the dataset api for consuming data from tfrecords
filenames = ["/var/data/file1.tfrecord", "/var/data/file2.tfrecord"]
dataset = tf.contrib.data.TFRecordDataset(filenames)
dataset = dataset.map(...)
dataset = dataset.shuffle(buffer_size=10000)
dataset = dataset.batch(32)
dataset = dataset.repeat(num_epochs)
iterator = dataset.make_one_shot_iterator()
next_example, next_label = iterator.get_next()
loss = model_function(next_example, next_label)
training_op = tf.train.AdagradOptimizer(...).minimize(loss)
with tf.train.MonitoredTrainingSession(...) as sess:
while not sess.should_stop
Normally I define my network as
x = tf.placeholder(tf.float32, [None, INPUT_SIZE], name='INPUT')
y_ = tf.placeholder(tf.float32, [None, OUTPUT_SIZE], name='OUTPUT')
w1 = tf.Variable(tf.truncated_normal([INPUT_SIZE, L1_SIZE], stddev=0.1))
b1 = tf.Variable(tf.constant(0.1, shape=[L1_SIZE]))
w2 = tf.Variable(tf.truncated_normal([L1_SIZE, L2_SIZE], stddev=0.1))
b2 = tf.Variable(tf.constant(0.1, shape=[L2_SIZE]))
w3 = tf.Variable(tf.truncated_normal([L2_SIZE, OUTPUT_SIZE], stddev=0.1))
b3 = tf.Variable(tf.constant(0.1, shape=[OUTPUT_SIZE]))
input_layer = tf.nn.relu(tf.matmul(x, w1) + b1)
hidden_layer1_dropout = tf.nn.dropout(input_layer, DROPOUT1)
hidden_layer2 = tf.nn.relu(tf.matmul(hidden_layer1_dropout, w2) + b2)
hidden_layer2_dropout = tf.nn.dropout(hidden_layer2, DROPOUT2)
y = tf.nn.softmax(tf.matmul(hidden_layer2_dropout, w3) + b3)
and my loss function as
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))
But now looks like it is not necessary to use feed_dict any more, but I'm quite confused about how to define the loss function in this new way, the sample code only show a single line
loss = model_function(next_example, next_label)
Can anyone help to give a detail example how to define the loss function, how to map the features and label to the placeholders? Thanks a lot
Placeholder is not needed when using DataSet apis any more, for reading data already a part of tf.Graph.
We do not need to read file in python code, and feed them when training, but read data as a tensorflow op in the tf.Graph, it will be much more efficiency for tensorflow ops mainly run in cpp.
as in your case, this to lines:
x = tf.placeholder(tf.float32, [None, INPUT_SIZE], name='INPUT')
y_ = tf.placeholder(tf.float32, [None, OUTPUT_SIZE], name='OUTPUT')
change into:
x = next_example
y_ = next_label
And remove the feed_dict when calling Session.run
So far I have written following code:
import pickle
import numpy as np
import pandas as pd
import tensorflow as tf
# load pickled objects (x and y)
x_input, y_actual = pickle.load(open('sample_input.pickle', 'rb'))
x_input = np.reshape(x_input, (50, 1))
y_actual = np.reshape(y_actual, (50, 1))
# parameters
batch_size = 50
hidden_size = 100
# create network graph
input_data = tf.placeholder(tf.float32, [batch_size, 1])
output_data = tf.placeholder(tf.float32, [batch_size, 1])
cell = tf.nn.rnn_cell.GRUCell(hidden_size)
initial_state = cell.zero_state(batch_size, tf.float32)
hidden_state = initial_state
output_of_cell, hidden_state = cell(inputs=input_data, state=hidden_state)
init_op = tf.initialize_all_variables()
softmax_w = tf.get_variable("softmax_w", [hidden_size, 1], )
softmax_b = tf.get_variable("softmax_b", [1])
logits = tf.matmul(output_of_cell, softmax_w) + softmax_b
probabilities = tf.nn.softmax(logits)
sess = tf.Session()
sess.run(init_op)
something = sess.run([probabilities, hidden_state], feed_dict={input_data:x_input, output_data:y_actual})
#cost = tf.nn.sigmoid_cross_entropy_with_logits(logits, output_data)
#sess.close()
But I am getting error for softmax_w/b as uninitialized variables.
I am not getting how should I use these W and b and carry out train operation.
Something like following:
## some cost function
## training operation minimizing cost function using gradient descent optimizer
tf.initialize_all_variables() gets the "current" set of variables from the graph. Since you are creating softmax_w and softmax_b after your call to tf.initialize_all_variables(), they are not in the list that tf.initialize_all_variables() consults, and hence not initialized when you run sess.run(init_op). The following should work :
softmax_w = tf.get_variable("softmax_w", [hidden_size, 1], )
softmax_b = tf.get_variable("softmax_b", [1])
init_op = tf.initialize_all_variables()