ValueError: too many values to unpack 3 - python

1 i#:coding:utf-8
2 #0导入模块,生成模拟数据集
3 import tensorflow as tf
4 import numpy as np
5 BATCH_SIZE = 8
6 seed = 23455
7
8 #给予seed产生随机数
9 rng = np.random.RandomState(seed)
10 #随机数返回32行2列矩阵 表示32组 体积和重量 作为输入数据集
11 X = rng.rand(32,3)
12
13 Y = [[int(x0+x1<1)] for (x0,x1) in X]
14 print "X:\n",X
15 print "Y:\n",Y
16
17 #1定义神经网络的输入,参数和输出,定义向前传播过程
18 x = tf.placeholder(tf.float32, shape=(None, 2))
19 y_= tf.placeholder(tf.float32, shape=(None, 1))
20
21 w1= tf.variable(tf.random([2,3], stddev=1, seed=1))
22 w2= tf.variable(tf.random([3,1], stddev=1, seed=1))
23
24 a =tf.matmul(x,w1)
25 y =tf.matmul(a,w2)
26
27 #定义损失函数集反向传播方法
28 loss = tf.reduce_mean(tf.square(y-y_))
29 #train_step = tf.train.MomentumOptimizer(0.001,0.9).minimize(loss)
30 #train_step = tf.train.AdamOptimizer(0.001).minimize(loss)
31
32 #3生成会话,训练steps轮
33 with tf.Session() as sess:
34 init_op = tf.global_variables_initializer()
35 sess.run(init_op)
36 # 输出目前未经训练的参数取值。
37 print "w1:\n", sess.run(w1)
38 print "w2:\n", sess.run(w2)
39 print "\n"
40
41 #train the model
42 STEPS=3000
43 for i in range(STEPS):
44 start =(i*BATCH_SIZE) % 32
45 end = start + BATCH_SIZE
46 sess.run(train_step, feed_dict={x: X[start:end], y_: Y[start:end]})
47 if i % 500 == 0:
48 total_loss = sess.run(loss, feed_dict={x: X, y_: Y})
49 print("After %d training steps(s), loss on all data in %g" % (i, total_loss))
50
51 #output the trained value of variables
52 print "\n"
53 print "w1:\n", sess.run(w1)
54 print "w2:\n", sess.run(w2)
File "tf3_6.py", line 13, in
Y = [[int(x0+x1<1)] for (x0,x1) in X]
ValueError: too many values to unpack.
The code i don't think it is wrong but i still noticed the value error so i hope you guys to help me cope this question thanks a lot

The shape of X is (32, 3), but in your list comprehension, you are only trying to unpack 2 values:
Y = [[int(x0+x1<1)] for (x0,x1) in X]
Either change the shape of your array of rands:
X = rng.rand(32,2)
Or throw away the third rand in your list comp:
Y = [[int(x0+x1<1)] for (x0,x1, _) in X]

Related

Tensorflow dataset splitting does not work

I recently tried to use the tf.data API. I created an images dataset and has to split in into train/val/test. I'm using the below method using ds.take and ds.skip but always get train_ds correctly and no data in test_ds and val_ds.
DATASET_SIZE = 2000
train_size = int(0.7 * DATASET_SIZE) # 1400
val_size = int(0.15 * DATASET_SIZE) # 300
test_size = int(0.15 * DATASET_SIZE) # 300
train_ds = ds.take(train_size)
val_ds = ds.skip(train_size).take(val_size)
test_ds = ds.skip(train_size+val_size).take(test_size)
When I run the below:
for image, label in train_ds.take(1):
print("Image shape: ", image.shape)
print("Label: ", label.numpy())
I see the output as:
Image shape: (32, 400, 400, 3)
Label: [39 23 21 27 28 18 28 30 28 44 34 37 21 39 35 26 48 37 41 30 22 36 46 28
34 38 33 32 36 35 25 24]
But if I try to use from test_ds.take(1) or val_ds.take(1) in the above, there is no output. It seems test_ds and val_ds empty dataset. Also, when I use val_ds later in my model.fit() function, I don't see val_loss because of this.
I could use other techniques that would work for me but want to understand the reason/ what's wrong I am doing here?

IndexError: index 440 is out of bounds for axis 0 with size 440 in Python

IndexError Traceback (most recent call last)
in
----> 1 create_tf_record_2D(training_set, train_tfrec2D, LABELS)
2 create_tf_record_2D(test_set, test_tfrec2D, LABELS)
3 create_tf_record_2D(validation_set, val_tfrec2D, LABELS)
in create_tf_record_2D(img_filenames, tf_rec_filename, labels)
19
20 # load the image and label
---> 21 img, label = load_image_2D(meta_data, labels)
22
23 # create a feature
in load_image_2D(abs_path, labels)
27
28 # make the 2D image
---> 29 img = slices_matrix_2D(img)
30
31 return img, label
in slices_matrix_2D(img)
44 for i in range(cut.shape[0]):
45 for j in range(cut.shape[1]):
---> 46 image_2D[i + row_it, j + col_it] = cut[i, j]
47 row_it += cut.shape[0]
48
IndexError: index 440 is out of bounds for axis 0 with size 440

Understand tensorflow slice operation

I am confused about the follow code:
import tensorflow as tf
import numpy as np
from tensorflow.python.framework import ops
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import control_flow_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.framework import dtypes
'''
Randomly crop a tensor, then return the crop position
'''
def random_crop(value, size, seed=None, name=None):
with ops.name_scope(name, "random_crop", [value, size]) as name:
value = ops.convert_to_tensor(value, name="value")
size = ops.convert_to_tensor(size, dtype=dtypes.int32, name="size")
shape = array_ops.shape(value)
check = control_flow_ops.Assert(
math_ops.reduce_all(shape >= size),
["Need value.shape >= size, got ", shape, size],
summarize=1000)
shape = control_flow_ops.with_dependencies([check], shape)
limit = shape - size + 1
begin = tf.random_uniform(
array_ops.shape(shape),
dtype=size.dtype,
maxval=size.dtype.max,
seed=seed) % limit
return tf.slice(value, begin=begin, size=size, name=name), begin
sess = tf.InteractiveSession()
size = [10]
a = tf.constant(np.arange(0, 100, 1))
print (a.eval())
a_crop, begin = random_crop(a, size = size, seed = 0)
print ("offset: {}".format(begin.eval()))
print ("a_crop: {}".format(a_crop.eval()))
a_slice = tf.slice(a, begin=begin, size=size)
print ("a_slice: {}".format(a_slice.eval()))
assert (tf.reduce_all(tf.equal(a_crop, a_slice)).eval() == True)
sess.close()
outputs:
[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
96 97 98 99]
offset: [46]
a_crop: [89 90 91 92 93 94 95 96 97 98]
a_slice: [27 28 29 30 31 32 33 34 35 36]
There are two tf.slice options:
(1). called in function random_crop, such as tf.slice(value, begin=begin, size=size, name=name)
(2). called as a_slice = tf.slice(a, begin=begin, size=size)
The parameters (values, begin and size) of those two slice operations are the same.
However, why the printed values a_crop and a_slice are different and tf.reduce_all(tf.equal(a_crop, a_slice)).eval() is True?
Thanks
EDIT1
Thanks #xdurch0, I understand the first question now.
Tensorflow random_uniform seems like a random generator.
import tensorflow as tf
import numpy as np
sess = tf.InteractiveSession()
size = [10]
np_begin = np.random.randint(0, 50, size=1)
tf_begin = tf.random_uniform(shape = [1], minval=0, maxval=50, dtype=tf.int32, seed = 0)
a = tf.constant(np.arange(0, 100, 1))
a_slice = tf.slice(a, np_begin, size = size)
print ("a_slice: {}".format(a_slice.eval()))
a_slice = tf.slice(a, np_begin, size = size)
print ("a_slice: {}".format(a_slice.eval()))
a_slice = tf.slice(a, tf_begin, size = size)
print ("a_slice: {}".format(a_slice.eval()))
a_slice = tf.slice(a, tf_begin, size = size)
print ("a_slice: {}".format(a_slice.eval()))
sess.close()
output
a_slice: [42 43 44 45 46 47 48 49 50 51]
a_slice: [42 43 44 45 46 47 48 49 50 51]
a_slice: [41 42 43 44 45 46 47 48 49 50]
a_slice: [29 30 31 32 33 34 35 36 37 38]
The confusing thing here is that tf.random_uniform (like every random operation in TensorFlow) produces a new, different value on each evaluation call (each call to .eval() or, in general, each call to tf.Session.run). So if you evaluate a_crop you get one thing, if you then evaluate a_slice you get a different thing, but if you evaluate tf.reduce_all(tf.equal(a_crop, a_slice)) you get True, because all is being computed in a single evaluation step, so only one random value is produced and it determines the value of both a_crop and a_slice. Another example is this, if you run tf.stack([a_crop, a_slice]).eval() you will get a tensor with to equal rows; again, only one random value was produced. More generally, if you call tf.Session.run with multiple tensors to evaluate, all the computations in that call will use the same random values.
As a side note, if you actually need a random value in a computation that you want to maintain for a later computation, the easiest thing would be to just retrieve if with tf.Session.run, along with any other needed computation, to feed it back later through feed_dict; or you could have a tf.Variable and store the random value there. A more advanced possibility would be to use partial_run, an experimental API that allows you to evaluate part of the computation graph and continue evaluating it later, while maintaining the same state (i.e. the same random values, among other things).

how to implement tensorflow's next_batch for own data

In the tensorflow MNIST tutorial the mnist.train.next_batch(100) function comes very handy. I am now trying to implement a simple classification myself. I have my training data in a numpy array. How could I implement a similar function for my own data to give me the next batch?
sess = tf.InteractiveSession()
tf.global_variables_initializer().run()
Xtr, Ytr = loadData()
for it in range(1000):
batch_x = Xtr.next_batch(100)
batch_y = Ytr.next_batch(100)
The link you posted says: "we get a "batch" of one hundred random data points from our training set". In my example I use a global function (not a method like in your example) so there will be a difference in syntax.
In my function you'll need to pass the number of samples wanted and the data array.
Here is the correct code, which ensures samples have correct labels:
import numpy as np
def next_batch(num, data, labels):
'''
Return a total of `num` random samples and labels.
'''
idx = np.arange(0 , len(data))
np.random.shuffle(idx)
idx = idx[:num]
data_shuffle = [data[ i] for i in idx]
labels_shuffle = [labels[ i] for i in idx]
return np.asarray(data_shuffle), np.asarray(labels_shuffle)
Xtr, Ytr = np.arange(0, 10), np.arange(0, 100).reshape(10, 10)
print(Xtr)
print(Ytr)
Xtr, Ytr = next_batch(5, Xtr, Ytr)
print('\n5 random samples')
print(Xtr)
print(Ytr)
And a demo run:
[0 1 2 3 4 5 6 7 8 9]
[[ 0 1 2 3 4 5 6 7 8 9]
[10 11 12 13 14 15 16 17 18 19]
[20 21 22 23 24 25 26 27 28 29]
[30 31 32 33 34 35 36 37 38 39]
[40 41 42 43 44 45 46 47 48 49]
[50 51 52 53 54 55 56 57 58 59]
[60 61 62 63 64 65 66 67 68 69]
[70 71 72 73 74 75 76 77 78 79]
[80 81 82 83 84 85 86 87 88 89]
[90 91 92 93 94 95 96 97 98 99]]
5 random samples
[9 1 5 6 7]
[[90 91 92 93 94 95 96 97 98 99]
[10 11 12 13 14 15 16 17 18 19]
[50 51 52 53 54 55 56 57 58 59]
[60 61 62 63 64 65 66 67 68 69]
[70 71 72 73 74 75 76 77 78 79]]
In order to shuffle and sampling each mini-batch, the state whether a sample has been selected inside the current epoch should also be considered. Here is an implementation which use the data in the above answer.
import numpy as np
class Dataset:
def __init__(self,data):
self._index_in_epoch = 0
self._epochs_completed = 0
self._data = data
self._num_examples = data.shape[0]
pass
#property
def data(self):
return self._data
def next_batch(self,batch_size,shuffle = True):
start = self._index_in_epoch
if start == 0 and self._epochs_completed == 0:
idx = np.arange(0, self._num_examples) # get all possible indexes
np.random.shuffle(idx) # shuffle indexe
self._data = self.data[idx] # get list of `num` random samples
# go to the next batch
if start + batch_size > self._num_examples:
self._epochs_completed += 1
rest_num_examples = self._num_examples - start
data_rest_part = self.data[start:self._num_examples]
idx0 = np.arange(0, self._num_examples) # get all possible indexes
np.random.shuffle(idx0) # shuffle indexes
self._data = self.data[idx0] # get list of `num` random samples
start = 0
self._index_in_epoch = batch_size - rest_num_examples #avoid the case where the #sample != integar times of batch_size
end = self._index_in_epoch
data_new_part = self._data[start:end]
return np.concatenate((data_rest_part, data_new_part), axis=0)
else:
self._index_in_epoch += batch_size
end = self._index_in_epoch
return self._data[start:end]
dataset = Dataset(np.arange(0, 10))
for i in range(10):
print(dataset.next_batch(5))
the output is:
[2 8 6 3 4]
[1 5 9 0 7]
[1 7 3 0 8]
[2 6 5 9 4]
[1 0 4 8 3]
[7 6 2 9 5]
[9 5 4 6 2]
[0 1 8 7 3]
[9 7 8 1 6]
[3 5 2 4 0]
the first and second (3rd and 4th,...) mini-batch correspond to one whole epoch..
I use Anaconda and Jupyter.
In Jupyter if you run ?mnist you get:
File: c:\programdata\anaconda3\lib\site-packages\tensorflow\contrib\learn\python\learn\datasets\base.py
Docstring: Datasets(train, validation, test)
In folder datesets you shall find mnist.py which contains all methods including next_batch.
The answer which is marked up above I tried the algorithm by that algorithm I am not getting results so I searched on kaggle and I saw really amazing algorithm which worked really well. Best result try this. In below algorithm **Global variable takes the input you declared above in which you read your data set.**
epochs_completed = 0
index_in_epoch = 0
num_examples = X_train.shape[0]
# for splitting out batches of data
def next_batch(batch_size):
global X_train
global y_train
global index_in_epoch
global epochs_completed
start = index_in_epoch
index_in_epoch += batch_size
# when all trainig data have been already used, it is reorder randomly
if index_in_epoch > num_examples:
# finished epoch
epochs_completed += 1
# shuffle the data
perm = np.arange(num_examples)
np.random.shuffle(perm)
X_train = X_train[perm]
y_train = y_train[perm]
# start next epoch
start = 0
index_in_epoch = batch_size
assert batch_size <= num_examples
end = index_in_epoch
return X_train[start:end], y_train[start:end]
If you would not like to get shape mismatch error in your tensorflow session run
then use the below function instead of the function provided in the first solution above (https://stackoverflow.com/a/40995666/7748451) -
def next_batch(num, data, labels):
'''
Return a total of `num` random samples and labels.
'''
idx = np.arange(0 , len(data))
np.random.shuffle(idx)
idx = idx[:num]
data_shuffle = data[idx]
labels_shuffle = labels[idx]
labels_shuffle = np.asarray(labels_shuffle.values.reshape(len(labels_shuffle), 1))
return data_shuffle, labels_shuffle
Yet another implementation:
from typing import Tuple
import numpy as np
class BatchMaker(object):
def __init__(self, feat: np.array, lab: np.array) -> None:
if len(feat) != len(lab):
raise ValueError("Expected feat and lab to have the same number of samples")
self.feat = feat
self.lab = lab
self.indexes = np.arange(len(feat))
np.random.shuffle(self.indexes)
self.pos = 0
# "BatchMaker, BatchMaker, make me a batch..."
def next_batch(self, batch_size: int) -> Tuple[np.array, np.array]:
if self.pos + batch_size > len(self.feat):
np.random.shuffle(self.indexes)
self.pos = 0
batch_indexes = self.indexes[self.pos: self.pos + batch_size]
self.pos += batch_size
return self.feat[batch_indexes], self.lab[batch_indexes]

How do i create Confusion matrix of predicted and ground truth labels with Tensorflow?

I have implemented a Nueral Network model for a classification with the help of using TensorFlow. But, i don't know how can i able to draw confusion matrix by using predicted scores (accuracy). I am not an expert of TensorFlow and still in learning phase. Here i pasted my code below please tell me how can i write a code for making confusion from the following code:
# Launch the graph
with tf.Session() as sess:
sess.run(init)
# Set logs writer into folder /tmp/tensorflow_logs
#summary_writer = tf.train.SummaryWriter('/tmp/tensorflow_logs', graph_def=sess.graph_def)
# Training cycle
for epoch in range(training_epochs):
avg_cost = 0.
total_batch = int(X_train.shape[0]/batch_size)
# Loop over total length of batches
for i in range(total_batch):
#picking up random batches from training set of specific size
batch_xs, batch_ys = w2v_utils.nextBatch(X_train, y_train, batch_size)
# Fit training using batch data
sess.run(optimizer, feed_dict={x: batch_xs, y: batch_ys})
# Compute average loss
avg_cost += sess.run(cost, feed_dict={x: batch_xs, y: batch_ys})/total_batch
# Write logs at every iteration
#summary_str = sess.run(merged_summary_op, feed_dict={x: batch_xs, y: batch_ys})
#summary_writer.add_summary(summary_str, epoch*total_batch + i)
#append loss
loss_history.append(avg_cost)
# Display logs per epoch step
if (epoch % display_step == 0):
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
# Calculate training accuracy
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
trainAccuracy = accuracy.eval({x: X_train, y: y_train})
train_acc_history.append(trainAccuracy)
# Calculate validation accuracy
valAccuracy = accuracy.eval({x: X_val, y: y_val})
val_acc_history.append(valAccuracy)
print "Epoch:", '%04d' % (epoch+1), "cost=", "{:.9f}".format(avg_cost), "train=",trainAccuracy,"val=", valAccuracy
print "Optimization Finished!"
# Test model
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
# Calculate accuracy
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
print "Final Training Accuracy:", accuracy.eval({x: X_train, y: y_train})
print "Final Test Accuracy:", accuracy.eval({x: X_test, y: y_test})
print "Final Gold Accuracy:", accuracy.eval({x: X_gold, y: y_gold})
Up till now, i am able to print predicted scores but failed to implement confusion matrix please help.
Note:(I am using one hot vectors for representing my labels)
If you want to produce a confusion matrix, and then later precision and recall, you first need to get your counts of true positives, true negatives, false positives and false negatives. Here is how:
For better readibility, I wrote the code very verbose.
def evaluation(logits,labels):
"Returns correct predictions, and 4 values needed for precision, recall and F1 score"
# Step 1:
# Let's create 2 vectors that will contain boolean values, and will describe our labels
is_label_one = tf.cast(labels, dtype=tf.bool)
is_label_zero = tf.logical_not(is_label_one)
# Imagine that labels = [0,1]
# Then
# is_label_one = [False,True]
# is_label_zero = [True,False]
# Step 2:
# get the prediction and false prediction vectors. correct_prediction is something that you choose within your model.
correct_prediction = tf.nn.in_top_k(logits, labels, 1, name="correct_answers")
false_prediction = tf.logical_not(correct_prediction)
# Step 3:
# get the 4 metrics by comparing boolean vectors
# TRUE POSITIVES
true_positives = tf.reduce_sum(tf.to_int32(tf.logical_and(correct_prediction,is_label_one)))
# FALSE POSITIVES
false_positives = tf.reduce_sum(tf.to_int32(tf.logical_and(false_prediction, is_label_zero)))
# TRUE NEGATIVES
true_negatives = tf.reduce_sum(tf.to_int32(tf.logical_and(correct_prediction, is_label_zero)))
# FALSE NEGATIVES
false_negatives = tf.reduce_sum(tf.to_int32(tf.logical_and(false_prediction, is_label_one)))
return true_positives, false_positives, true_negatives, false_negatives
# Now you can do something like this in your session:
true_positives, \
false_positives, \
true_negatives, \
false_negatives = sess.run(evaluation(logits,labels), feed_dict=feed_dict)
# you can print the confusion matrix using the 4 values from above, or get precision and recall:
precision = float(true_positives) / float(true_positives+false_positives)
recall = float(true_positives) / float(true_positives+false_negatives)
# or F1 score:
F1_score = 2 * ( precision * recall ) / ( precision+recall )
This code worked for me. I sort it out myself :)
from sklearn.metrics import precision_recall_fscore_support as score
from sklearn.metrics import classification_report
def print_confusion_matrix(plabels,tlabels):
"""
functions print the confusion matrix for the different classes
to find the error...
Input:
-----------
plabels: predicted labels for the classes...
tlabels: true labels for the classes
code from: http://stackoverflow.com/questions/2148543/how-to-write-a-confusion-matrix-in-python
"""
import pandas as pd
plabels = pd.Series(plabels)
tlabels = pd.Series(tlabels)
# draw a cross tabulation...
df_confusion = pd.crosstab(tlabels,plabels, rownames=['Actual'], colnames=['Predicted'], margins=True)
#print df_confusion
return df_confusion
def confusionMatrix(text,Labels,y_pred, not_partial):
y_actu = np.where(Labels[:]==1)[1]
df = print_confusion_matrix(y_pred,y_actu)
print "\n",df
#print plt.imshow(df.as_matrix())
if not_partial:
print "\n",classification_report(y_actu, y_pred)
print "\n\t------------------------------------------------------\n"
def do_eval(message, sess, correct_prediction, accuracy, pred, X_, y_,x,y):
predictions = sess.run([correct_prediction], feed_dict={x: X_, y: y_})
prediction = tf.argmax(pred,1)
labels = prediction.eval(feed_dict={x: X_, y: y_}, session=sess)
print message, accuracy.eval({x: X_, y: y_}),"\n"
confusionMatrix("Partial Confusion matrix",y_,predictions[0], False)#Partial confusion Matrix
confusionMatrix("Complete Confusion matrix",y_,labels, True) #complete confusion Matrix
# Launch the graph
with tf.Session() as sess:
sess.run(init)
data = zip(X_train,y_train)
data = np.array(data)
data_size = len(data)
num_batches_per_epoch = int(len(data)/batch_size) + 1
for epoch in range(training_epochs):
avg_cost = 0.
# Shuffle the data at each epoch
shuffle_indices = np.random.permutation(np.arange(data_size))
shuffled_data = data[shuffle_indices]
for batch_num in range(num_batches_per_epoch):
start_index = batch_num * batch_size
end_index = min((batch_num + 1) * batch_size, data_size)
sample = zip(*shuffled_data[start_index:end_index])
#picking up random batches from training set of specific size
batch_xs, batch_ys = sample[0],sample[1]
# Fit training using batch data
sess.run(optimizer, feed_dict={x: batch_xs, y: batch_ys})
# Compute average loss
avg_cost += sess.run(cost, feed_dict={x: batch_xs, y: batch_ys})/num_batches_per_epoch
#append loss
loss_history.append(avg_cost)
# Display logs per epoch step
if (epoch % display_step == 0):
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
# Calculate training accuracy
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
trainAccuracy = accuracy.eval({x: X_train, y: y_train})
train_acc_history.append(trainAccuracy)
# Calculate validation accuracy
valAccuracy = accuracy.eval({x: X_val, y: y_val})
val_acc_history.append(valAccuracy)
print "Epoch:", '%04d' % (epoch+1), "cost=", "{:.9f}".format(avg_cost), "train=",trainAccuracy,"val=", valAccuracy
print "Optimization Finished!\n"
# Evaluation of model
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
# Calculate accuracy
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
do_eval("Accuracy of Gold Test set Results: ", sess, correct_prediction, accuracy, pred, X_gold, y_gold, x, y)
and here is the sample output:
Accuracy of Gold Test set Results: 0.642608
Predicted False True All
Actual
0 20 46 66
1 3 1 4
2 21 1 22
3 8 4 12
4 16 7 23
5 54 259 313
6 41 14 55
7 11 2 13
8 48 94 142
9 29 4 33
10 17 4 21
11 39 116 155
All 307 552 859
Predicted 0 1 2 3 4 5 6 7 8 9 10 11 All
Actual
0 46 0 0 0 0 8 0 2 2 2 0 6 66
1 0 1 0 1 0 2 0 0 0 0 0 0 4
2 3 0 1 3 0 12 0 0 1 0 0 2 22
3 2 0 0 4 1 3 1 1 0 0 0 0 12
4 1 0 0 0 7 12 0 0 1 0 0 2 23
5 8 0 0 1 5 259 9 0 9 3 1 18 313
6 1 0 0 1 6 30 14 1 2 0 0 0 55
7 3 0 0 0 0 2 0 2 4 0 1 1 13
8 6 0 0 1 1 18 0 3 94 8 1 10 142
9 9 0 0 0 0 1 1 1 9 4 0 8 33
10 1 0 0 0 3 6 0 1 1 0 4 5 21
11 5 1 0 1 0 18 1 0 6 5 2 116 155
All 85 2 1 12 23 371 26 11 129 22 9 168 859
precision recall f1-score support
0 0.54 0.70 0.61 66
1 0.50 0.25 0.33 4
2 1.00 0.05 0.09 22
3 0.33 0.33 0.33 12
4 0.30 0.30 0.30 23
5 0.70 0.83 0.76 313
6 0.54 0.25 0.35 55
7 0.18 0.15 0.17 13
8 0.73 0.66 0.69 142
9 0.18 0.12 0.15 33
10 0.44 0.19 0.27 21
11 0.69 0.75 0.72 155
avg / total 0.64 0.64 0.62 859
For the moment, I use this solution to obtain the confusion matrix :
# load the data
(train_x, train_y), (dev_x, dev_y), (test_x, test_y) = dataLoader.load()
# build the classifier
classifier = tf.estimator.DNNClassifier(...)
# train the classifier
classifier.train(input_fn=lambda:train_input_fn(), steps=1000)
# evaluate and prediction on the test set
test_evaluate = classifier.evaluate(input_fn=lambda:eval_input_fn())
test_predict = classifier.predict(input_fn = lambda:eval_input_fn())
# parse the prediction to retrieve the predicted labels
predictions = []
for i in list(test_predict):
predictions.append(i['class_ids'][0])
# build the prediction matrix
matrix = tf.confusion_matrix(test_y, predictions)
#display the prediction matrix
with tf.Session():
print(str(tf.Tensor.eval(matrix)))
But I am not convince by my loop to retrieve the predicted labels...there should be a better Python way to do this...(or a TensorFlow way...)

Categories