Im doing a tutorial on tensorflow and I have following task:
I need to create 10 logits in tensorflow with 2 hidden layers and calculate the loss of them using the functions
tf.nn.softmax()
tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels)
My code looks like this:
import tensorflow as tf, numpy as np
trainx = np.array(trainx) / 300.0
trainy = np.array(trainy)
testx = np.array(testx) / 300.0
testy = np.array(testy)
print ('train x shape is {}'.format(trainx.shape))
print ('train y shape is {}'.format(trainy.shape))
print ('test x shape is {}'.format(testx.shape))
print ('tesy y shape is {}'.format(testy.shape))
learning_rate = 0.001
def get_train_batch():
indices = np.random.randint(low=0, high=60000, size=[64])
return trainx[indices], trainy[indices]
g = tf.Graph()
with g.as_default():
x = tf.placeholder(float32, shape=[None, 784])
y = tf.placeholder(float32, shape=[None, 784])
def fc(tensor, outdim, name):
w = tf.get_variable(name + "w", shape=[1, outdim], dtype=float32)
b = tf.get_variable(name + "b", shape=[], dtype=float32, initializer=tf.constant_initializer(0.0) to tf.get_variable)
return tf.add(tf.linalg.matmul(tensor, w), b)
first = fc(trainx, trainx.shape, "first")
first = tf.nn.relu(first)
second = fc(first, trainx.shape, "second")
second = tf.nn.relu(second)
logits = []
names = [some names]
logits.append(tf.log_sigmoid(first))
logits.append(tf.log_sigmoid(second))
for i in range(10):
logits.append(fc(trainx, trainx.shape, names[0]))
tf.nn.softmax() // how do I use this paragraph to get the loss
tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels)
loss = ? // how do i calculate the loss here?
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
minimize_op = optimizer.minimize(loss, var_list=tf.trainable_variables())
sess = tf.Session()
sess.run(tf.global_variables_initializer())
for epoch in range(50000):
randomX, randomY = get_train_batch()
sess.run(minimize_op, feed_dict={x: randomX, y: randomY})
if epoch % 50 == 0:
trainloss = sess.run(loss, feed_dict={x: testx, y:testy})
print("Current loss: ", trainloss)
My questions are:
How do I use the mentioned functions correctly to calculate the loss
Where do I get the labels from?
Related
A complete rookie here, trying to run the code. The problem is that my shapes' dimensions do not coincide. Does anyone know which variables' dimensions should be changed?
I tried changing x or y dimensions right after assigning values to x and y but I still keep getting the error
np.expand_dims(x, axis=1)
The main method:
def main():
#tf.reset.default.graph()
sess = tf.Session()
x = tf.placeholder(tf.float32, shape=[None, HEIGHT, WIDTH], name="input")
y = tf.placeholder(tf.float32, shape=[None, NUM_LABELS], name="labels")
dropout = tf.placeholder(tf.float32, name="dropout")
np.expand_dims(input, axis=1)
logits = get_model(x, dropout)
with tf.name_scope('loss'):
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y), name=None)
tf.summary.scalar('loss', loss)
with tf.name_scope('train'):
train_step = tf.train.AdamOptimizer(LEARNING_RATE).minimize(loss)
with tf.name_scope('accuracy'):
predicted = tf.argmax(logits, 1)
truth = tf.argmax(y, 1)
correct_prediction = tf.equal(predicted, truth)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
confusion_matrix = tf.confusion_matrix(truth, predicted, num_classes=NUM_LABELS)
tf.summary.scalar('accuracy', accuracy)
summ = tf.summary.merge_all()
saver = tf.train.Saver()
sess.run(tf.global_variables_initializer())
writer = tf.summary.FileWriter(LOGDIR)
writer.add_graph(sess.graph)
test_writer = tf.summary.FileWriter(TEST_LOGDIR)
print('Starting training\n')
batch = get_batch(BATCH_SIZE, PATH_TRAIN)
start_time = time.time()
for i in range(1, ITERATIONS + 1):
X, Y = next(batch)
if i % EVAL_EVERY == 0:
[train_accuracy, train_loss, s] = sess.run([accuracy, loss, summ], feed_dict={x: X, y: Y, dropout:0.5}, acc_and_loss = [i, train_loss, train_accuracy * 100])
print('Iteration # {}. Train Loss: {:.2f}. Train Acc: {:.0f}%'.format(*acc_and_loss))
writer.add_summary(s, i)
if i % (EVAL_EVERY * 20) == 0:
train_confusion_matrix = sess.run([accuracy, sum], feed_dict={x: X, y: Y, dropout:1.0})
header = LABEL_TO_INDEX_MAP.keys()
df = pd.DataFrame(np.reshape(train_confusion_matrix, (NUM_LABELS, NUM_LABELS)), index=i)
print('\nConfusion Matrix:\n {}\n'.format(df))
saver.save(sess, os.path.join(LOGDIR, "model.ckpt"), i)
sess.run(train_step, feed_dict={x: X, y: Y, dropout:0.5})
print('\nTotal training time {:0f} seconds\n'.format(time.time() - start_time))
batch = get_batch(BATCH_SIZE, PATH_TEST)
total_accuracy = 0
for i in range(ITERATIONS_TEST):
X, Y = next(batch, PATH_TEST)
test_accuracy, s = sess.run([accuracy, summ], feed_dict={x: X, y: Y, dropout:1.0})
print('Iteration # {}. Test Accuracy {:.0f}%'.format(i+1, test_accuracy * 100))
total_accuracy += (test_accuracy / ITERATIONS_TEST)
test_writer.add_summary(s, i)
print('\nFinal Test Accuracy: {:.0f}%').format(total_accuracy * 100)
if __name__ == '__main__':
init(PATH_TRAIN)
main()
The Result I get:
ValueError: Cannot feed value of shape (100,) for Tensor 'input_19:0', which has shape '(?, 20, 44)'
It seems like it is complaining about feeding X which has shape (100,) into x which is required to have shape (anything, 20, 44). This variable has the name “input” noted in the error.
x and y are tensorflow placeholders rather than numpy arrays, and their shape are not changed in that way. It is telling tensorflow to expect some numpy arrays (in your case perhaps X and Y) in the specified shape. Since the shapes mismatch, you might be using the wrong data, so simply reshaping X might give you wrong results.
You will have to figure out what the shape of X and Y actually are, and where the 20x44 data should be coming from out of you dataset (or if it should not be requiring 20x44 data, what should it be requiring).
After playing with tensorflow, I finally was able to make my code run but the loss seem to go in the negative, even with few epochs. Why is this happening? I made simple neural nets with numpy only and never had this problem, can someone explain what am I doing wrong?
here is the input file if anyone wants to test it: input file
additional information: the labels arent onehot array, is my loss function the correct one since it isnt onehot array? I followed a tutorial that used onehot array labels.
import tensorflow as tf
import numpy as np
data = np.load("test_data.npz")
trng_input = np.array(data['Input'], dtype=np.float64)
trng_output = np.array(data['Output'], dtype=np.float64)
nhl1 = 12
nhl2 = 8
n_classes = 4
x = tf.placeholder(dtype=tf.float64, shape=[len(trng_input),24])
y = tf.placeholder(dtype=tf.float64, shape=[len(trng_output),n_classes])
def NN(data):
hl1 = {"weights":tf.Variable(tf.random_normal([24, nhl1], dtype=tf.float64)),
"biases":tf.Variable(tf.random_normal([nhl1], dtype=tf.float64))}
hl2 = {"weights":tf.Variable(tf.random_normal([nhl1, nhl2], dtype=tf.float64)),
"biases":tf.Variable(tf.random_normal([nhl2], dtype=tf.float64))}
output_layer = {"weights":tf.Variable(tf.random_normal([nhl2, n_classes], dtype=tf.float64)),
"biases":tf.Variable(tf.random_normal([n_classes], dtype=tf.float64))}
l1 = tf.add(tf.matmul(data, hl1["weights"]), hl1["biases"])
l1 = tf.nn.relu(l1)
l2 = tf.add(tf.matmul(l1, hl2["weights"]), hl2["biases"])
l2 = tf.nn.relu(l2)
output = tf.add(tf.matmul(l2, output_layer["weights"]), output_layer["biases"])
output = tf.nn.relu(output)
return output
def train(data, epochs):
prediction = NN(data)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=prediction, labels=y))
optimizer = tf.train.AdamOptimizer().minimize(cost)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for epoch in range(epochs):
_, c = sess.run([optimizer, cost], feed_dict={x: trng_input, y: trng_output})
if not epoch % 20:
print(F"Epoch {epoch} completed out of {epochs}. Loss:{c}")
correct = tf.equal(tf.argmax(prediction,1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct,"float"))
Eval = accuracy.eval({x:trng_input, y:trng_output})
print(F"Accuracy:{Eval}")
train(trng_input, 200)
I'm trying to create a neural network that takes 13 features as input from multiple csv files one at a time and measure accuracy after each iteration. Here is my code snippet:
import tensorflow as tf
import numpy as np
from tensorflow.contrib.layers import fully_connected
import os
import pandas as pd
n_inputs = 13
n_hidden1 = 30
n_hidden2 = 10
n_outputs = 2
learning_rate = 0.01
n_epochs = 40
batch_size = 1
patient_id = os.listdir('./subset_numerical')
output = pd.read_csv('output.csv')
sepsis_pat = output['output'].tolist()
X = tf.placeholder(tf.float32, shape=[None, n_inputs], name="X")
y = tf.placeholder(tf.int64, shape=[None], name="y")
def data_processor(n):
id = pd.read_csv('./subset_numerical/'+patient_id[n])
id_input = np.array([id['VALUE'].tolist()])
for s in sepsis_pat:
if str(s) == str(patient_id[n].split('.')[0]):
a = 1
try:
if a == 1:
a = 0
return [id_input, np.array([1])]
except:
return [id_input, np.array([0])]
def test_set():
id_combined = []
out = []
for p in range(300, len(patient_id)):
try:
id1 = pd.read_csv('./subset_numerical/' + patient_id[p])
id_input1 = np.array(id1['VALUE'].tolist())
id_combined.append(id_input1)
for s in sepsis_pat:
if str(s) == str(patient_id[p].split('.')[0]):
a = 1
try:
if a == 1:
a = 0
out.append([1, 0])
except:
out.append([0, 1])
except:
pass
return [np.array(id_combined), np.array(out)]
# Declaration of hidden layers and calculation of loss goes here
# Construction phase begins
with tf.name_scope("dnn"):
hidden1 = fully_connected(X, n_hidden1, scope="hidden1")
hidden2 = fully_connected(hidden1, n_hidden2, scope="hidden2")
logits = fully_connected(hidden2, n_outputs, scope="outputs", activation_fn=None) # We will apply softmax here later
# Calculating loss
with tf.name_scope("loss"):
xentropy = tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=logits)
loss = tf.reduce_mean(xentropy, name="loss")
# Training with gradient descent optimizer
with tf.name_scope("train"):
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
training_op = optimizer.minimize(loss)
# Measuring accuracy
with tf.name_scope("eval"):
correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
accuracy_summary = tf.summary.scalar('accuracy', accuracy)
# Variable initialization and saving model goes here
# Construction is finished. Let's get this to work.
with tf.Session() as sess:
init.run()
for epoch in range(n_epochs):
a = 0
for iteration in range(300 // batch_size):
X_batch, y_batch = data_processor(iteration)
sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
X_test, y_test = test_set()
acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})
print(epoch, "Train accuracy:", acc_train, "Test accuracy:", acc_test)
save_path = saver.save(sess, "./my_model_final.ckpt")
But I'm stuck with this error:
logits and labels must be same size: logits_size=[1,2] labels_size=[1,1]
The error seems to occur at this line:
correct = tf.nn.in_top_k(logits, y, 1)
What am I doing wrong?
Based on your error log provided, the problem is in this line of your code:
xentropy = tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=logits)
Ensure that both of them have same shape and dtype.
The shape should be of the format [batch_size, num_classes] and dtype should be of type float16, float32 or float64. Check the documentation of softmax_cross_entropy_with_logits for more details.
Since you've defined n_outputs = 2, the shape of logits is [?, 2] (? means batch size), while the shape of y is just [?]. In order to apply the softmax loss function, the last FC layer should return a flat tensor, which can be compared with y.
Solution: set n_outputs = 1.
I am currently trying to train my model to categorize the cifar-10 dataset. I read the data like this:
def convert_images(raw):
raw_float = np.array(raw, dtype = float)
images = raw_float.reshape([-1,3,32,32])
images = images.transpose([0,2,3,1])
return images
def load_data(filename):
data = unpickle(filename)
raw_images = data[b'data']
labels = np.array(data[b'labels'])
images = convert_images(raw_images)
return images, labels
def load_training_data():
images = np.zeros(shape=[50000,32,32,3], dtype = float)
labels = np.zeros(shape = [50000], dtype = int)
begin = 0
for i in range(5):
filename = "data_batch_" + str(i+1)
images_batch, labels_batch = load_data(filename)
num_images = len(images_batch)
end = begin + num_images
images[begin:end, :] = images_batch
labels[begin:end] = labels_batch
begin = end
return images, labels, OneHotEncoder(categorical_features=labels, n_values=10)
What this does is reshape the data so that it is a 4d array with 32x32x3 values for the pixels and rgb colors. I define my model like this (i first reshape X to be a row vector because the 4d array creates errors):
X = tf.placeholder(tf.float32, [None,32,32,3])
Y_labeled = tf.placeholder(tf.int32, [None])
data = load_training_data()
with tf.name_scope('dnn'):
XX = tf.reshape(X, [-1,3072])
hidden1 = tf.layers.dense(XX, 300, name = 'hidden1', activation = tf.nn.relu)
hidden2 = tf.layers.dense(hidden1, 200, name = 'hidden2', activation = tf.nn.relu)
hidden3 = tf.layers.dense(hidden2, 200, name = 'hidden3', activation = tf.nn.relu)
hidden4 = tf.layers.dense(hidden3, 100, name = 'hidden4', activation = tf.nn.relu)
logits = tf.layers.dense(hidden4, 10, name = 'outputs')
with tf.name_scope('loss'):
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels = (Y_labeled), logits = logits)
loss = tf.reduce_mean(cross_entropy, name = 'loss')
learning_rate = 0.01
with tf.name_scope('train'):
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
training_op = optimizer.minimize(loss)
with tf.name_scope('eval'):
correct = tf.nn.in_top_k(logits,Y_labeled, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
init = tf.global_variables_initializer()
batch_size = 100
n_epochs = 50
with tf.Session() as sess:
init.run()
for epoch in range(n_epochs):
for iteration in range(50000 // batch_size):
X_batch = data[0][iteration*batch_size:(iteration+1)*batch_size]
y_batch = data[1][iteration*batch_size:(iteration+1)*batch_size]
#X_batch, y_batch = data.train.next_batch(batch_size)
sess.run(training_op, feed_dict = {X: X_batch,Y_labeled: y_batch})
acc_train = accuracy.eval(feed_dict = {X: X_batch,Y_labeled: y_batch})
print(epoch, "train accuracy:", acc_train, "loss", loss)
I want to define a simple model that has 4 hidden layers. When I run this it compiles with no errors and starts "training", but the accuracy is 0.0 and it does not print any losses. I am not sure if the error is in my calculation of accuracy and loss or in my definition of the model.
There seem to be a problem with the way you feed your labels. When you create the placholder Y_labeled = tf.placeholder(tf.int32, [None, 10]) it seems to be a vector of dimension 10 but later when you create the label numpy tensor labels = np.zeros(shape = [50000], dtype = int) it seems to be a scalar.
This is why you have this error, the placeholder needs to be fed with a tensor of dimension (batch_size, 10) but you feed it with (batch_size, 0)
I'm having a hard time trying to set up a multilayer perceptron neural network to predict the next value of a time-series using Tensorflow.
I read the time-series from a file, split it into three arrays and use those arrays to train, test and validate the network. Unfortunately, my network answers 0.9999 to every input I give to it.
The image below shows the values I expect my network to outcome, note that they range from 2.8 to 4.2
Now, these are the values my network predicts. Though they seem all the same, they're actually 0.9999... (and some difference in the 9th decimal place).
import csv
import numpy as np
from statsmodels.tsa.tsatools import lagmat
import tensorflow as tf
# Data split (values represent percentage)
perc_train = 0.5
perc_test = 0.4
perc_eval = 0.1
# Parameters
learning_rate = 10 ** -3
min_step_size_train = 10 ** -5
training_epochs = 250
display_step = 1
# Network Parameters
n_input = 15
n_classes = 1
n_hidden = (n_input + n_classes) / 2
def get_nn_sets(pmX, pmY):
'''
Splits data into three subsets
'''
trainningIndex = int(len(pmX) * perc_train)
validationIndex = int(len(pmX) * perc_test) + trainningIndex
pmXFit = pmX[:trainningIndex, :]
pmYFit = pmY[:trainningIndex]
pmXTest = pmX[trainningIndex:validationIndex, :]
pmYTest = pmY[trainningIndex:validationIndex]
pmxEvaluate = pmX[validationIndex:, :]
pmYEvaluate = pmY[validationIndex:]
return pmXFit, pmYFit, pmXTest, pmYTest, pmxEvaluate, pmYEvaluate
def read_dollar_file(clip_first = 4000):
'''
Reads the CSV file containing the dollar value for Brazilian real during the years
-----
RETURNS:
A matrix with the file contents
'''
str_vals = []
with open('dolar.csv', 'rb') as csvfile:
spamreader = csv.reader(csvfile, delimiter=',')
for row in spamreader:
# retrieving the first column of the file (the dollar value)
str_vals.append(row[1])
# removing title
str_vals = str_vals[1:]
# removing the empty strings (sunday and holidays have no values)
y = filter(None, str_vals)
# converting from string to float values
y = np.array(y).astype(np.float)
# checking if initial elements should be discarded
if (clip_first > 0):
y = y[clip_first:]
return y
# Create model
def get_multilayer_perceptron(x):
# Store layers weight & bias
weights = {
'h1': tf.Variable(tf.random_normal([n_input, n_hidden], dtype=tf.float64)),
'out': tf.Variable(tf.random_normal([n_hidden, n_classes], dtype=tf.float64))
}
biases = {
'b1': tf.Variable(tf.random_normal([n_hidden], dtype=tf.float64)),
'out': tf.Variable(tf.random_normal([n_classes], dtype=tf.float64))
}
# Hidden layer with relu activation
layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
layer_1 = tf.nn.relu(layer_1)
# Output layer with tanh activation
out_layer = tf.matmul(layer_1, weights['out']) + biases['out']
out_layer = tf.nn.tanh(out_layer)
return out_layer
def run_mlp(inp, outp):
pmXFit, pmYFit, pmXTest, pmYTest, pmXEvaluate, pmYEvaluate = get_nn_sets(inp, outp)
# tf Graph input
x = tf.placeholder("float64", [None, n_input])
y = tf.placeholder("float64", [None, n_classes])
# Construct model
pred = get_multilayer_perceptron(x)
# Define loss and optimizer
cost = tf.nn.l2_loss(tf.sub(pred, y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# Initializing the variables
init = tf.initialize_all_variables()
# Launch the graph
with tf.Session() as sess:
sess.run(init)
# Training cycle
last_cost = min_step_size_train + 1
for epoch in range(training_epochs):
# Trainning data
for i in range(len(pmXFit)):
batch_x = np.reshape(pmXFit[i,:], (1, n_input))
batch_y = np.reshape(pmYFit[i], (1, n_classes))
# Run optimization
sess.run(optimizer, feed_dict={x: batch_x, y: batch_y})
# Calculating data error
c = 0.0
for i in range(len(pmXTest)):
batch_x = np.reshape(pmXTest[i,:], (1, n_input))
batch_y = np.reshape(pmYTest[i], (1, n_classes))
# Run Cost function
c += sess.run(cost, feed_dict={x: batch_x, y: batch_y})
c /= len(pmXTest)
# Display logs per epoch step
if epoch % display_step == 0:
print("Epoch:", '%04d' % (epoch+1), "cost=", \
"{:.30f}".format(c))
if abs(c - last_cost) < min_step_size_train:
break
last_cost = c
nn_predictions = np.array([])
for i in range(len(pmXEvaluate)):
batch_x = np.reshape(pmXEvaluate[i,:], (1, n_input))
nn_predictions = np.append(nn_predictions, sess.run(pred, feed_dict={x: batch_x})[0])
print("Optimization Finished!")
nn_predictions.flatten()
return [pmYEvaluate, nn_predictions]
inp = lagmat(read_dollar_file(), n_input, trim='both')
outp = inp[1:, 0]
inp = inp[:-1]
real_value, predicted_value = run_mlp(inp, outp)
I also tried different cost functions and it didn't work. I know I may be missing something really stupid, so I really appreciate your help.
Thanks.
From your code:
out_layer = tf.nn.tanh(out_layer)
tanh can only output values between (-1.0, 1.0), remove this line will make it do better.