I have the following problem.
I am trying to train a 3d CNN in tensorflow. I have separated the data in three data sets, train, validation and test.
The main problem is that when I test the validation set after 5 epoch of training the output of the model is the nearly the same for the 5 images.
(this is the output of the last layer without any softmax)
2018-04-17 23:30:35.134318 Prediction: [[0.8185656 2.7571523 ]
[0.8200048 2.7590456 ]
[0.8185656 2.7571523 ]
[0.8200048 2.7590458 ]
[0.7751368 2.7532804 ]
[0.82061136 2.7588618 ]
[0.8130686 2.7821052 ]
[0.83537185 2.7514493 ]
[0.8200041 2.7590454 ]
[0.81701267 2.7519925 ]
[0.8424163 2.8674953 ]
[0.82000506 2.7590454 ]
[0.81999433 2.7590487 ]
[0.81701267 2.7519925 ]
However, if i do the same for trainning set I get a conventional prediction.
I have fully check the data sets and both are correct and in the same conditions.
This is my mode used to build the model and do the training:
class Cnn3DMRI(object):
def weight_variable(self, shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(self, shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def conv3d(self, x, W):
return tf.nn.conv3d(x, W, strides=[1, 1, 1, 1, 1], padding='SAME')
def maxpool3d(self, x):
# size of window movement of window
return tf.nn.max_pool3d(x, ksize=[1, 2, 2, 2, 1], strides=[1, 2, 2, 2, 1], padding='SAME')
def dense_to_one_hot(self, labels_dense, num_classes):
"""Convert class labels from scalars to one-hot vectors."""
num_labels = labels_dense.shape[0]
index_offset = np.arange(num_labels) * num_classes
labels_one_hot = np.zeros((num_labels, num_classes))
labels_one_hot.flat[index_offset + labels_dense.ravel()] = 1
return labels_one_hot
def wrapper_image(self, full_image_set, full_label_set, last_batch=0, batch_size=5):
batch_img = full_image_set[last_batch:batch_size+last_batch, :, :, :]
batch_label = full_label_set[last_batch:batch_size+last_batch]
return batch_img, batch_label, batch_size+last_batch
def convolutional_neural_network(self, x, img_sz, n_slices):
weights = {
'W_conv1': self.weight_variable([3, 5, 5, 1, 32]),
'W_conv2': self.weight_variable([2, 5, 5, 32, 48]),
'W_fc': self.weight_variable(
[
int(
math.ceil(
n_slices / 8
) * math.ceil(
img_sz / 8
) * math.ceil(
img_sz / 8
) *48), 2048
]
),
'W_fc2': self.weight_variable([2048, 1024]),
'out': self.weight_variable([1024, 2])
}
biases = {
'b_conv1': self.bias_variable([32]),
'b_conv2': self.bias_variable([48]),
'b_fc': self.bias_variable([2048]),
'b_fc2': self.bias_variable([1024]),
'out': self.bias_variable([2])
}
self.x_im = tf.reshape(x, shape=[-1, n_slices, img_sz, img_sz, 1])
conv1 = tf.nn.relu(self.conv3d(self.x_im, weights['W_conv1']) + biases['b_conv1'])
conv1 = tf.Print(conv1,[conv1], 'The conv1: ')
conv1 =self.maxpool3d(conv1)
conv1 = tf.Print(conv1,[conv1], 'The max1: ')
conv2 = tf.nn.relu(self.conv3d(conv1, weights['W_conv2']) + biases['b_conv2'])
conv1 = tf.Print(conv2,[conv2], 'The conv2: ')
conv2 = tf.nn.max_pool3d(conv2, ksize=[1, 4, 4, 4, 1], strides=[1, 4, 4, 4, 1],
padding='SAME')
conv2 = tf.Print(conv2,[conv2], 'The max2: ')
fc = tf.reshape(conv2, [-1,int(math.ceil(n_slices/8)*math.ceil(img_sz/8)*math.ceil(
img_sz/8))*48])
fc = tf.Print(fc,[fc], 'The reshape: ')
fc2 = tf.nn.relu(tf.matmul(fc, weights['W_fc'])+biases['b_fc'])
fc2 = tf.Print(fc2,[fc2], 'The fc: ')
dp1 = tf.nn.dropout(fc2, self.keep_prob)
fc3 = tf.nn.relu(tf.matmul(dp1, weights['W_fc2'])+biases['b_fc2'])
fc3 = tf.Print(fc3,[fc3], 'The fc2: ')
dp2 = tf.nn.dropout(fc3, self.keep_prob)
output = tf.matmul(dp2, weights['out'])+biases['out']
output = tf.Print(output,[output], 'The output: ')
return output
def test_validation_set(self, sess, data_validation, label_validation, valid_batch_size=60):
batch_img, batch_label, last_batch = self.wrapper_image(
data_validation, label_validation, self.last_valid_batch, valid_batch_size
)
batch_label = self.dense_to_one_hot(
np.array(batch_label, dtype=np.int),2
).astype(np.float32)
if last_batch+valid_batch_size < len(label_validation):
self.last_valid_batch = last_batch
else:
self.last_valid_batch = 0
pred, c, validation_accuracy = sess.run(
[self.prediction, self.cost, self.accuracy], feed_dict={
self.x: batch_img, self.y_: batch_label, self.keep_prob: 1.0
}
)
self.log("Prediction: "+str(pred))
self.log("Label: "+str(batch_label))
self.log("Validation accuracy: "+str(validation_accuracy))
self.log("Validation cost: "+str(c))
return validation_accuracy, c
def train_neural_network(self, data_img, labels, data_validation, label_validation,
batch_size, img_sz, n_slices, last_batch,
keep_rate, model_path):
self.prediction = self.convolutional_neural_network(self.x, img_sz, n_slices)
self.cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=self.y_,
logits=self.prediction))
optimizer = tf.train.AdamOptimizer(self.learning_rate).minimize(self.cost)
correct_prediction = tf.equal(tf.argmax(self.prediction, 1), tf.argmax(self.y_, 1))
self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
hm_epochs = 1000
saver = tf.train.Saver(tf.trainable_variables())
epoch_loss = 0
epoch_loss_mean = []
n_epoch = 0
learning_rate = 1e-4
self.last_valid_batch = 0
min_valid_cost = 0
all_valid_cost = []
model_path_train = 'model_train/my_model.ckpt'
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
if model_path:
pass
#saver.restore(sess, model_path_train)
while n_epoch < hm_epochs:
if len(data_img)>last_batch+batch_size:
with tf.device('/cpu:0'):
#batch_img, batch_label, last_batch = self.get_image(
# data_img, labels, last_batch, batch_size, img_sz, n_slices
#)
batch_img, batch_label, last_batch = self.wrapper_image(data_img, labels, last_batch, batch_size)
print "Batch label images: "+str(batch_label)
batch_label = self.dense_to_one_hot(np.array(batch_label, dtype=np.int),
2).astype(np.float32)
else:
with tf.device('/cpu:0'):
restbatch = last_batch + batch_size - len(data_img)
batch_img = np.concatenate((
self.wrapper_image(data_img, labels, last_batch, len(data_img) -
last_batch)[0],
self.wrapper_image(data_img, labels, last_batch, len(data_img) -
last_batch)[0]
))
batch_label = np.concatenate((
self.wrapper_image(data_img, labels, last_batch, len(data_img) -
last_batch)[1],
self.wrapper_image(data_img, labels, last_batch, len(data_img) -
last_batch)[1]
))
batch_label = self.dense_to_one_hot(np.array(batch_label, dtype=np.int),
2).astype(
np.float32)
last_batch = restbatch
####### at the end of EACH EPOCH ###
epoch_loss_mean.append(epoch_loss)
print "epoch loss mean: "+str(epoch_loss_mean)
epoch_loss = 0
n_epoch += 1
print "n_epoch: "+str(n_epoch)
if model_path:
saver.save(sess, model_path_train)
if not n_epoch % 5:
valid_accuracy, valid_cost = self.test_validation_set(sess,data_validation,
label_validation, 60)
if valid_cost < min_valid_cost - 2:
min_valid_cost = valid_cost
if model_path:
saver.save(sess, model_path)
all_valid_cost.append(valid_cost)
print all_valid_cost
if self.last_valid_batch == 0:
self.shufle_data(data_validation, label_validation)
train_accuracy = self.accuracy.eval(
feed_dict={self.x: batch_img, self.y_: batch_label, self.keep_prob: 1.0})
print "trainning accuracy: " + str(train_accuracy)
self.shufle_data(data_img, labels)
_, c, pred = sess.run(
[optimizer, self.cost,], feed_dict={
self.x: batch_img, self.y_: batch_label, self.keep_prob: keep_rate,
self.learning_rate: learning_rate
}
)
print 'epoch_loss: '+str(c)
def main(self, data_dir, labels_dir, img_sz, n_slices, batch_size=5, last_batch=0, train=False,
model_path=None, keep_rate=0.5):
"""
Args:
data_dir(list): directories of the image to be tested
labels_dir: (str): directory of the csv file where the image are labeled, the index
colum is the number 2 and the labels header is 'Diag'.
img_sz: the spatial image size the be transformed to. that is the sizes with which
the image will be trainned. width and hight must be the same
n_slices: the number of slices for the image to be trained
last_batch: the batch at which you want to start the trainning
train: boolean to set trainning: 0 or testing :1
model_path: the path where the model is saved, if there is no previous model you can
set a path here to start a new one.
keep_rate: the keep_probability of firing a node by means of dropout
Returns:
"""
self.train = train
data_path_trainning, label_trainning, data_path_validation, label_validation, \
data_testing, label_testing = self.load_dataset(data_dir, labels_dir,)
data_trainning, label_trainning_final = self.load_image(data_path_trainning,
label_trainning, img_sz, n_slices
)
data_validation, label_validation_final = self.load_image(
data_path_validation, label_validation, img_sz, n_slices
)
self.x = tf.placeholder(tf.float32, shape=[None, n_slices, img_sz, img_sz]) #batch_size,
# image_Size
self.y_ = tf.placeholder(tf.float32, shape=[None, 3]) #batch_size, label_size
self.learning_rate = tf.placeholder(tf.float32)
self.keep_prob = tf.placeholder(tf.float32)
if train:
self.train_neural_network(data_trainning, label_trainning_final, data_validation,
label_validation_final, batch_size, img_sz, n_slices,
last_batch, keep_rate, model_path
)
I have already tried tf.set_random_seed( 1 ) but no correction is seen
Do anyone have any idea about, please?
thanks so much
EDITED 22/04/18:
The data to be classified are 3d images of 150x150x40 pixels in a biclass problem. I have a total 400 images approaximatly half of each class. I have separated the dataset in train (75%), validation (10%) and test(15%)
Edit2:
I have simplified a bit my model. see up
Also mention that we have only 2 classes
I have tried another check I have train my model with only 20 images. To see if 0 cost is obtained.
result after 125 epochs:
2018-04-24 23:58:24.992421 epoch loss mean: [4549.9554141853, 1854.6537470817566, 817.4076923541704, 686.8368729054928, 687.7348744268759, 704.946801304817, 483.6952783479355, 260.2293045549304, 272.66821688037817, 116.57515235748815, 97.86094704543848, 90.43152131629176, 132.54018089070996, 69.62595339218387, 57.412255316681694, 79.66184640157735, 70.99515068903565, 55.75798599421978, 44.14403077028692, 38.901107819750905, 49.75594720244408, 52.6321079954505, 37.70595762133598, 42.07099115010351, 29.01994925737381, 28.365123450756073, 31.93120799213648, 43.9855432882905, 33.242121398448944, 36.57513061538339, 28.828659534454346, 29.847569406032562, 24.078316539525986, 31.630925316363573, 30.5430103354156, 26.18060240149498, 32.86780231446028, 25.42889341711998, 29.355055704712868, 26.269534677267075, 24.921810917556286, 27.15281054377556, 27.343381822109222, 24.293660208582878, 28.212179094552994, 25.07626649737358, 21.650991335511208, 25.7527906447649, 23.42476052045822, 28.350880563259125, 22.57907184958458, 21.601420983672142, 25.28128480911255, 25.550641894340515, 22.444457232952118, 27.660063683986664, 21.863914296031, 25.722180172801018, 24.00674758851528, 21.46472266316414, 26.599679857492447, 23.52132275700569, 26.1786640137434, 24.842691332101822, 25.263965144753456, 22.730938494205475, 22.787407517433167, 23.58866274356842, 25.351682364940643, 23.85272353887558, 23.884423837065697, 24.685379207134247, 22.55106496810913, 25.993630707263947, 21.967322662472725, 22.651918083429337, 21.91003155708313, 23.782021015882492, 21.567724645137787, 22.130879193544388, 21.33636975288391, 25.624440014362335, 23.26347705721855, 22.370914071798325, 22.614411562681198, 24.962509214878082, 22.121410965919495, 20.644148647785187, 24.472172617912292, 21.622991144657135, 21.719978988170624, 21.72349101305008, 21.729621797800064, 22.090826153755188, 21.44688707590103, 22.34817299246788, 22.93226248025894, 22.63547444343567, 22.1306095123291, 22.16277289390564, 22.83771103620529, 24.171751350164413, 22.025538682937622, 21.339059710502625, 22.169043481349945, 24.614955246448517, 22.83159503340721, 21.43451902270317, 21.54544973373413, 22.889380514621735, 24.168621599674225, 21.947510302066803, 22.30243694782257, 22.381454586982727, 22.50485634803772, 22.61657750606537, 22.288170099258423, 21.30070123076439, 22.489792048931122, 21.885000944137573, 21.343613982200623, 23.04211688041687, 24.00969059765339, 21.8588485121727, 22.199619591236115]
2018-04-24 23:58:24.992694 n_epoch: 125
the print output of each layer:
The conv1: [[[[[0.0981627107 0.100793235 0.0934509188]]]]...]
The max1: [[[[[0.102978 0.107030481 0.0977560952]]]]...]
The max2: [[[[[0 0 0.00116439909]]]]...]
The reshape: [[0 0 0.00116439909]...]
The fc: [[0.01167579 0.182256863 0.107154548]...]
The fc2: [[0.773868561 0.364259362 0]...]
The output: [[0.16590938 -0.255491495][0.16590938]...]
The conv1: [[[[[0.0981602222 0.100800745 0.0934513509]]]]...]
The max1: [[[[[0.102975294 0.107038349 0.0977560282]]]]...]
The max2: [[[[[0 0 0.000874094665]]]]...]
The reshape: [[0 0 0.000874094665]...]
The fc: [[0.0117974132 0.182980478 0.106876813]...]
The fc2: [[0.774896204 0.36372292 0]...]
The output: [[0.129838273 -0.210624188][0.129838273]...]
Shouldn't be 125 epochs enoght to overfit 60 samples?
Any idea about what is happening?
This is more of a comment that did not fit into the comment limit.
As I said before, I can't see anything obviously wrong. You might have to do some debugging. If the pre-softmax outputs are exactly the same, it is probably a bug somewhere and you can find it by finding the exact place where your presumably different inputs lead to the same layer outputs.
If pre-softmax outputs are close, but not exactly the same, most likely you have a classic issue of over-fitting. You mentioned that you have just 300 training examples - that is extremely few to train the whole net on (without using some pre-trained weights). Your net just "memorized" 300 training examples and does not generalize to validation set at all.
EDIT 04/23/18:
So, the issue is not just in validation? I interpreted your "if i do the same for trainning set I get a conventional prediction." to mean that training images are classified just fine. If you get the same prediction for your training images, most likely the data or loss or prediction calculation is wrong. I did not spot anything and guess you will need to debug. You might find "eager execution" useful for this - https://www.tensorflow.org/get_started/eager. If you organize your model as in examples (https://github.com/tensorflow/tensorflow/tree/3f4662e7ca8724f760db4a5ea6e241c99e66e588/tensorflow/contrib/eager/python/examples), you should be able to use the same code with regular tensorflow graph execution.
Related
I'm learning the basic CNN model by using tensorflow. After training my model, I want to load it and use the model to predict the hand-written digital img (CSV file).
Here is my CNN model:
import random
import os
import numpy as np
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import matplotlib.pyplot as plt
tf.logging.set_verbosity(tf.logging.ERROR)
class CNNLogisticClassification:
def __init__(self, shape_picture, n_labels,
learning_rate=0.5, dropout_ratio=0.5, alpha=0.0):
self.shape_picture = shape_picture
self.n_labels = n_labels
self.weights = None
self.biases = None
self.graph = tf.Graph() # initialize new grap
self.build(learning_rate, dropout_ratio, alpha) # building graph
self.sess = tf.Session(graph=self.graph) # create session by the graph
def build(self, learning_rate, dropout_ratio, alpha):
with self.graph.as_default():
### Input
self.train_pictures = tf.placeholder(tf.float32,
shape=[None]+self.shape_picture,name="Input")
self.train_labels = tf.placeholder(tf.int32,
shape=(None, self.n_labels),name="Output")
### Optimalization
# build neurel network structure and get their predictions and loss
self.y_, self.original_loss = self.structure(pictures=self.train_pictures,
labels=self.train_labels,
dropout_ratio=dropout_ratio,
train=True, )
# regularization loss
self.regularization = \
tf.reduce_sum([tf.nn.l2_loss(w) for w in self.weights.values()]) \
/ tf.reduce_sum([tf.size(w, out_type=tf.float32) for w in self.weights.values()])
# total loss
self.loss = self.original_loss + alpha * self.regularization
# define training operation
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
self.train_op = optimizer.minimize(self.loss)
### Prediction
self.new_pictures = tf.placeholder(tf.float32,
shape=[None]+self.shape_picture,name="Input")
self.new_labels = tf.placeholder(tf.int32,
shape=(None, self.n_labels),name="Output")
self.new_y_, self.new_original_loss = self.structure(pictures=self.new_pictures,
labels=self.new_labels)
self.new_loss = self.new_original_loss + alpha * self.regularization
### Initialization
self.init_op = tf.global_variables_initializer()
### save model
self.saver=tf.train.Saver()
def structure(self, pictures, labels, dropout_ratio=None, train=False):
### Variable
## LeNet5 Architecture(http://yann.lecun.com/exdb/lenet/)
# input:(batch,28,28,1) => conv1[5x5,6] => (batch,24,24,6)
# pool2 => (batch,12,12,6) => conv2[5x5,16] => (batch,8,8,16)
# pool4 => fatten5 => (batch,4x4x16) => fc6 => (batch,120)
# (batch,120) => fc7 => (batch,84)
# (batch,84) => fc8 => (batch,10) => softmax
if (not self.weights) and (not self.biases):
self.weights = {
'conv1': tf.Variable(tf.truncated_normal(shape=(5, 5, 1, 6),
stddev=0.1)),
'conv3': tf.Variable(tf.truncated_normal(shape=(5, 5, 6, 16),
stddev=0.1)),
'fc6': tf.Variable(tf.truncated_normal(shape=(4*4*16, 120),
stddev=0.1)),
'fc7': tf.Variable(tf.truncated_normal(shape=(120, 84),
stddev=0.1)),
'fc8': tf.Variable(tf.truncated_normal(shape=(84, self.n_labels),
stddev=0.1)),
}
self.biases = {
'conv1': tf.Variable(tf.zeros(shape=(6))),
'conv3': tf.Variable(tf.zeros(shape=(16))),
'fc6': tf.Variable(tf.zeros(shape=(120))),
'fc7': tf.Variable(tf.zeros(shape=(84))),
'fc8': tf.Variable(tf.zeros(shape=(self.n_labels))),
}
### Structure
conv1 = self.get_conv_2d_layer(pictures,
self.weights['conv1'], self.biases['conv1'],
activation=tf.nn.relu)
pool2 = tf.nn.max_pool(conv1,
ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')
conv3 = self.get_conv_2d_layer(pool2,
self.weights['conv3'], self.biases['conv3'],
activation=tf.nn.relu)
pool4 = tf.nn.max_pool(conv3,
ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')
fatten5 = self.get_flatten_layer(pool4)
if train:
fatten5 = tf.nn.dropout(fatten5, keep_prob=1-dropout_ratio[0])
fc6 = self.get_dense_layer(fatten5,
self.weights['fc6'], self.biases['fc6'],
activation=tf.nn.relu)
if train:
fc6 = tf.nn.dropout(fc6, keep_prob=1-dropout_ratio[1])
fc7 = self.get_dense_layer(fc6,
self.weights['fc7'], self.biases['fc7'],
activation=tf.nn.relu)
logits = self.get_dense_layer(fc7, self.weights['fc8'], self.biases['fc8'])
y_ = tf.nn.softmax(logits)
loss = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(labels=labels,
logits=logits))
return (y_, loss)
def get_dense_layer(self, input_layer, weight, bias, activation=None):
x = tf.add(tf.matmul(input_layer, weight), bias)
if activation:
x = activation(x)
return x
def get_conv_2d_layer(self, input_layer,
weight, bias,
strides=(1, 1), padding='VALID', activation=None):
x = tf.add(
tf.nn.conv2d(input_layer,
weight,
[1, strides[0], strides[1], 1],
padding=padding), bias)
if activation:
x = activation(x)
return x
def get_flatten_layer(self, input_layer):
shape = input_layer.get_shape().as_list()
n = 1
for s in shape[1:]:
n *= s
x = tf.reshape(input_layer, [-1, n])
return x
def fit(self, X, y, epochs=10,
validation_data=None, test_data=None, batch_size=None):
X = self._check_array(X)
y = self._check_array(y)
N = X.shape[0]
random.seed(9000)
if not batch_size:
batch_size = N
self.sess.run(self.init_op)
for epoch in range(epochs):
print('Epoch %2d/%2d: ' % (epoch+1, epochs))
# mini-batch gradient descent
index = [i for i in range(N)]
random.shuffle(index)
while len(index) > 0:
index_size = len(index)
batch_index = [index.pop() for _ in range(min(batch_size, index_size))]
feed_dict = {
self.train_pictures: X[batch_index, :],
self.train_labels: y[batch_index],
}
_, loss = self.sess.run([self.train_op, self.loss],
feed_dict=feed_dict)
print('[%d/%d] loss = %.4f ' % (N-len(index), N, loss), end='\r')
# evaluate at the end of this epoch
y_ = self.predict(X)
train_loss = self.evaluate(X, y)
train_acc = self.accuracy(y_, y)
msg = '[%d/%d] loss = %8.4f, acc = %3.2f%%' % (N, N, train_loss, train_acc*100)
if validation_data:
val_loss = self.evaluate(validation_data[0], validation_data[1])
val_acc = self.accuracy(self.predict(validation_data[0]), validation_data[1])
msg += ', val_loss = %8.4f, val_acc = %3.2f%%' % (val_loss, val_acc*100)
print(msg)
if test_data:
test_acc = self.accuracy(self.predict(test_data[0]), test_data[1])
print('test_acc = %3.2f%%' % (test_acc*100))
def accuracy(self, predictions, labels):
return (np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))/predictions.shape[0])
def predict(self, X):
X = self._check_array(X)
return self.sess.run(self.new_y_, feed_dict={self.new_pictures: X})
def evaluate(self, X, y):
X = self._check_array(X)
y = self._check_array(y)
return self.sess.run(self.new_loss, feed_dict={self.new_pictures: X,
self.new_labels: y})
def _check_array(self, ndarray):
ndarray = np.array(ndarray)
if len(ndarray.shape) == 1:
ndarray = np.reshape(ndarray, (1, ndarray.shape[0]))
return ndarray
if __name__ == '__main__':
print('Extract MNIST Dataset ...')
mnist = input_data.read_data_sets('MNIST_data/', one_hot=True)
train_data = mnist.train
valid_data = mnist.validation
test_data = mnist.test
train_img = np.reshape(train_data.images, [-1, 28, 28, 1])
valid_img = np.reshape(valid_data.images, [-1, 28, 28, 1])
test_img = np.reshape(test_data.images, [-1, 28, 28, 1])
model = CNNLogisticClassification(
shape_picture=[28, 28, 1],
n_labels=10,
learning_rate=0.07,
dropout_ratio=[0.2, 0.1],
alpha=0.1,
)
model.fit(
X=train_img,
y=train_data.labels,
epochs=10,
validation_data=(valid_img, valid_data.labels),
test_data=(test_img, test_data.labels),
batch_size=32,
)
saver = model.saver.save(model.sess, "test_model")
print("Model saved in path: %s" % saver)
And I create another py file to load my model:
import tensorflow as tf
saver = tf.train.import_meta_graph('./my_model/test_model.meta')
with tf.Session() as sess:
new_saver = tf.train.import_meta_graph('./my_model/test_model.meta')
new_saver.restore(sess, tf.train.latest_checkpoint('./my_model'))
sess.run(tf.global_variables_initializer())
saver.predict('D:\python\number_data\3.csv')
This is the error I'm getting:
AttributeError: 'Saver' object has no attribute 'predict'
How do I fix it and let the trained model predict my CSV file?
Thanks in advance for your help!
Edit:
I change my second py file as below:
import numpy as np
import tensorflow as tf
import pandas as pd
X=pd.read_csv('D:/PYTHON/cnn_data/7.csv', index_col=None, header=None).values
X1=X/255
X3=tf.convert_to_tensor(
X1,
dtype=None,
dtype_hint=None,
name=None
)
saver = tf.train.import_meta_graph('./my_model/test_model.meta')
with tf.Session() as sess:
new_saver = tf.train.import_meta_graph('./my_model/test_model.meta')
new_saver.restore(sess, tf.train.latest_checkpoint('./my_model'))
graph=tf.get_default_graph()
xs0=graph.get_tensor_by_name("Input:0")
prediction=graph.get_tensor_by_name("Output:0")
sess.run(prediction,feed_dict={xs0:X3})
print(prediction)
I only try to predict one digital img data(CSV file with one row), I transfer it into tensor type and name my two placeholder "Input" , "Output", but get another error:
TypeError: The value of a feed cannot be a tf.Tensor object. Acceptable feed values include Python scalars, strings, lists, numpy ndarrays, or TensorHandles. For reference, the tensor object was Tensor("Const:0", shape=(1, 784), dtype=float64) which was passed to
the feed with key Tensor("Input:0", shape=(?, 28, 28, 1), dtype=float32).
>
First of all, the obvious error here is that you are trying to call a function that doesn't exist. Evidently, the saver object does not have a predict function.
Second, if you want Tensorflow to make predictions, you need to provide it with "Tensorflow" input, and sadly, CSVs are not one of them.
All you need to do is transform your CSV inputs into tensors, with a function like this for instance:
filename = 'D:\python\number_data\3.csv'
def csv_to_tensor(filename):
...
return tensors
I cannot tell you how to implement the function exactly since I don't know the exact format of your data, but I am assuming that each row in your file is an input. So you most likely just need to loop through the lines in your file and convert each line to a tensor, which can then be used by a Tensorflow model.
I am training my own model using Tensorflow. However, I got some trouble when I change my activation function from Relu to Selu.
This is what happened. Learning curve drops accidentally and I have no idea about what's going on.
my learning curve
like this.
For what I have known, Selu can prevent overfitting, so I try to implement it in my model. Is there any tips, or any condition when I want to use Selu?
This is my code:
this is the place where I change my activation function
-----
def conv2d_maxpool(x_tensor, conv_num_outputs, conv_ksize, conv_strides, pool_ksize, pool_strides, layer_name):
conv_layer = tf.layers.conv2d(x_tensor, conv_num_outputs, kernel_size=conv_ksize, strides=conv_strides, activation=tf.nn.selu, name = layer_name)
conv_layer = tf.layers.max_pooling2d(conv_layer, pool_size=pool_ksize, strides=pool_strides)
return conv_layer
-----
graph
tf.reset_default_graph()
#### placeholder ####
input_img = tf.placeholder(dtype=tf.float32, shape=(None, img_size, img_size, 3))
y_true = tf.placeholder(dtype=tf.float32, shape=(None, num_class))
keep_prob = tf.placeholder(tf.float32, name="keep_prob")
lr_in = tf.placeholder(dtype = tf.float32, name = 'learning_rate')
conv_ksize = (3,3)
conv_strides = (1,1)
pool_ksize = (2,2)
pool_strides = (2,2)
n_filters_1 = 32
n_filters_2 = 64
n_filters_3 = 128
n_filters_4 = 256
onebyone_ksize = (1,1)
#CNN
conv_1 = conv2d_maxpool(input_img, n_filters_1, conv_ksize, conv_strides, pool_ksize, pool_strides, layer_name = "conv1")
# conv_1 = tf.layers.conv2d(conv_1, conv_num_outputs, kernel_size=conv_ksize, strides=conv_strides, activation=tf.nn.relu)
# conv_1_norm = tf.layers.batch_normalization(conv_1, name = "batch_norm1")
# conv_1_dropout = tf.layers.dropout(conv_1_norm, rate = keep_prob, training = True, name = "dropout1")
conv_2 = conv2d_maxpool(conv_1, n_filters_2, conv_ksize, conv_strides, pool_ksize, pool_strides, layer_name = "conv2")
# conv_2_norm = tf.layers.batch_normalization(conv_2)
conv_3 = conv2d_maxpool(conv_2, n_filters_3, conv_ksize, conv_strides, pool_ksize, pool_strides, layer_name = "conv3")
# conv_3_norm = tf.layers.batch_normalization(conv_3, name = "batch_norm3")
# conv_3_dropout = tf.layers.dropout(conv_3_norm, rate = keep_prob, training = True, name = "dropout3")
conv_4 = conv2d_maxpool(conv_3, n_filters_4, conv_ksize, conv_strides, pool_ksize, pool_strides, layer_name = "conv4")
flatten = tf.layers.flatten(conv_4)
fc1 = tf.layers.dense(flatten, 256, activation = tf.nn.relu)
out = tf.layers.dense(fc1, 6, activation=None, name= "logits") #logit
predict = tf.nn.softmax(out)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = out, labels = y_true))
optimizer = tf.train.AdamOptimizer(lr).minimize(cost)
##accuracy
correct_pred = tf.equal(tf.argmax(out, 1), tf.argmax(y_true, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32), name='accuracy')
Training
#history/record
train_loss, train_acc = [], []
valid_loss, valid_acc = [], []
update_per_epoch = int(np.floor(X_train.shape[0] / batch_size))
## early stopping and learning rate congig
es_patience = 10
es_n = 0
lr_patience = 3
lr_n = 0
save_model_path = './save'
saver = tf.train.Saver()
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
# Initializing the variables
batch_gen = img_gen.flow(generator_input(X_train), y_train, batch_size = 32)
val_batch_gen = img_gen.flow(generator_input(X_valid), y_valid, batch_size = len(X_valid))
for i in range(epoch):
epoch_loss = 0
epoch_acc = 0
for j in range(update_per_epoch):
image, label = next(batch_gen)
_, this_loss, this_acc = sess.run([optimizer, cost, accuracy], feed_dict={
input_img : image,
y_true : label,
lr_in: lr,
keep_prob : keep_probability
})
epoch_loss += this_loss
epoch_acc += this_acc
## end of epoch
epoch_loss /= update_per_epoch
epoch_acc /= update_per_epoch
train_loss.append(epoch_loss)
train_acc.append(epoch_acc)
print('Epoch {:>2} Loss: {:>4.4f} Training Accuracy: {:.6f}'.format(i + 1, epoch_loss, epoch_acc))
valid_image, valid_label = next(val_batch_gen)
valid_this_loss, valid_this_acc = sess.run([cost, accuracy], feed_dict = {
input_img: valid_image,
y_true: valid_label,
lr_in: lr,
keep_prob: 1.
})
valid_loss.append(valid_this_loss)
valid_acc.append(valid_this_acc)
print('Epoch {:>2} Loss: {:>4.4f} Validation Accuracy: {:.6f}'.format(i + 1,valid_this_loss, valid_this_acc))
# early stop
if valid_this_loss > np.min(valid_loss):
es_n += 1
lr_n += 1
else:
es_n = 0
lr_n = 0
saver.save(sess, os.path.join(os.getcwd(), 'bestsession.ckpt'))
# early stop
if es_n >= es_patience:
print("-----------early stopping-------------")
break
# adaptive learning rate
if lr_n >= lr_patience:
lr *= lr_decay_rate
lr_n = 0
print("-----------adjust learning rate------------")
# Save Model
save_path = saver.save(sess, save_model_path)
print('-----model save ------')
----------- 18/09/07------------
I can always reproduce the same result.
And this is my code, I wrote it in Jupyter. But sorry I can't upload the training data:
https://drive.google.com/open?id=1uUE32KrNmWnhLbV8z-fyHSMu6zGCCG_e
The project I run is C3D-tensorflow from Github, when I trained the network with train_c3d_ucf101.py, Error shows that:
ValueError:Cannot feed value of shape (10,0) for Tensor u
'Placeholder:0', which has shape '(10,16,112,112,3)'
Here is my environment: Ubantu16.04, tensorflow1.4, python2.7. I'm a new guy to learn tensorflow so can you tell me how to locate Placeholder:0 and what can I do to solve this problem? Thank you in advance!
The train_c3d_ucf101.py:
"""Trains and Evaluates the MNIST network using a feed dictionary."""
# pylint: disable=missing-docstring
import os
import time
import numpy
from six.moves import xrange # pylint: disable=redefined-builtin
import tensorflow as tf
import input_data
import c3d_model
import math
import numpy as np
# Basic model parameters as external flags.
flags = tf.app.flags
gpu_num = 1
#flags.DEFINE_float('learning_rate', 0.0, 'Initial learning rate.')
flags.DEFINE_integer('max_steps', 5000, 'Number of steps to run trainer.')
flags.DEFINE_integer('batch_size', 10, 'Batch size.')
FLAGS = flags.FLAGS
MOVING_AVERAGE_DECAY = 0.9999
model_save_dir = './models'
def placeholder_inputs(batch_size):
"""Generate placeholder variables to represent the input tensors.
These placeholders are used as inputs by the rest of the model building
code and will be fed from the downloaded data in the .run() loop, below.
Args:
batch_size: The batch size will be baked into both placeholders.
Returns:
images_placeholder: Images placeholder.
labels_placeholder: Labels placeholder.
"""
# Note that the shapes of the placeholders match the shapes of the full
# image and label tensors, except the first dimension is now batch_size
# rather than the full size of the train or test data sets.
images_placeholder = tf.placeholder(tf.float32, shape=(batch_size,
c3d_model.NUM_FRAMES_PER_CLIP,
c3d_model.CROP_SIZE,
c3d_model.CROP_SIZE,
c3d_model.CHANNELS))
labels_placeholder = tf.placeholder(tf.int64, shape=(batch_size))
return images_placeholder, labels_placeholder
def average_gradients(tower_grads):
average_grads = []
for grad_and_vars in zip(*tower_grads):
grads = []
for g, _ in grad_and_vars:
expanded_g = tf.expand_dims(g, 0)
grads.append(expanded_g)
grad = tf.concat(grads, 0)
grad = tf.reduce_mean(grad, 0)
v = grad_and_vars[0][1]
grad_and_var = (grad, v)
average_grads.append(grad_and_var)
return average_grads
def tower_loss(name_scope, logit, labels):
cross_entropy_mean = tf.reduce_mean(
tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels,logits=logit)
)
tf.summary.scalar(
name_scope + '_cross_entropy',
cross_entropy_mean
)
weight_decay_loss = tf.get_collection('weightdecay_losses')
tf.summary.scalar(name_scope + '_weight_decay_loss', tf.reduce_mean(weight_decay_loss) )
# Calculate the total loss for the current tower.
total_loss = cross_entropy_mean + weight_decay_loss
tf.summary.scalar(name_scope + '_total_loss', tf.reduce_mean(total_loss) )
return total_loss
def tower_acc(logit, labels):
correct_pred = tf.equal(tf.argmax(logit, 1), labels)
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
return accuracy
def _variable_on_cpu(name, shape, initializer):
with tf.device('/cpu:0'):
var = tf.get_variable(name, shape, initializer=initializer)
return var
def _variable_with_weight_decay(name, shape, wd):
var = _variable_on_cpu(name, shape, tf.contrib.layers.xavier_initializer())
if wd is not None:
weight_decay = tf.nn.l2_loss(var)*wd
tf.add_to_collection('weightdecay_losses', weight_decay)
return var
def run_training():
# Get the sets of images and labels for training, validation, and
# Tell TensorFlow that the model will be built into the default Graph.
# Create model directory
if not os.path.exists(model_save_dir):
os.makedirs(model_save_dir)
use_pretrained_model = True
model_filename = "./sports1m_finetuning_ucf101.model"
with tf.Graph().as_default():
global_step = tf.get_variable(
'global_step',
[],
initializer=tf.constant_initializer(0),
trainable=False
)
images_placeholder, labels_placeholder = placeholder_inputs(
FLAGS.batch_size * gpu_num
)
tower_grads1 = []
tower_grads2 = []
logits = []
opt_stable = tf.train.AdamOptimizer(1e-4)
opt_finetuning = tf.train.AdamOptimizer(1e-3)
with tf.variable_scope('var_name') as var_scope:
weights = {
'wc1': _variable_with_weight_decay('wc1', [3, 3, 3, 3, 64], 0.0005),
'wc2': _variable_with_weight_decay('wc2', [3, 3, 3, 64, 128], 0.0005),
'wc3a': _variable_with_weight_decay('wc3a', [3, 3, 3, 128, 256], 0.0005),
'wc3b': _variable_with_weight_decay('wc3b', [3, 3, 3, 256, 256], 0.0005),
'wc4a': _variable_with_weight_decay('wc4a', [3, 3, 3, 256, 512], 0.0005),
'wc4b': _variable_with_weight_decay('wc4b', [3, 3, 3, 512, 512], 0.0005),
'wc5a': _variable_with_weight_decay('wc5a', [3, 3, 3, 512, 512], 0.0005),
'wc5b': _variable_with_weight_decay('wc5b', [3, 3, 3, 512, 512], 0.0005),
'wd1': _variable_with_weight_decay('wd1', [8192, 4096], 0.0005),
'wd2': _variable_with_weight_decay('wd2', [4096, 4096], 0.0005),
'out': _variable_with_weight_decay('wout', [4096, c3d_model.NUM_CLASSES], 0.0005)
}
biases = {
'bc1': _variable_with_weight_decay('bc1', [64], 0.000),
'bc2': _variable_with_weight_decay('bc2', [128], 0.000),
'bc3a': _variable_with_weight_decay('bc3a', [256], 0.000),
'bc3b': _variable_with_weight_decay('bc3b', [256], 0.000),
'bc4a': _variable_with_weight_decay('bc4a', [512], 0.000),
'bc4b': _variable_with_weight_decay('bc4b', [512], 0.000),
'bc5a': _variable_with_weight_decay('bc5a', [512], 0.000),
'bc5b': _variable_with_weight_decay('bc5b', [512], 0.000),
'bd1': _variable_with_weight_decay('bd1', [4096], 0.000),
'bd2': _variable_with_weight_decay('bd2', [4096], 0.000),
'out': _variable_with_weight_decay('bout', [c3d_model.NUM_CLASSES], 0.000),
}
for gpu_index in range(0, gpu_num):
with tf.device('/gpu:%d' % gpu_index):
varlist2 = [ weights['out'],biases['out'] ]
varlist1 = list( set(weights.values() + biases.values()) - set(varlist2) )
logit = c3d_model.inference_c3d(
images_placeholder[gpu_index * FLAGS.batch_size:(gpu_index + 1) * FLAGS.batch_size,:,:,:,:],
0.5,
FLAGS.batch_size,
weights,
biases
)
loss_name_scope = ('gpud_%d_loss' % gpu_index)
loss = tower_loss(
loss_name_scope,
logit,
labels_placeholder[gpu_index * FLAGS.batch_size:(gpu_index + 1) * FLAGS.batch_size]
)
grads1 = opt_stable.compute_gradients(loss, varlist1)
grads2 = opt_finetuning.compute_gradients(loss, varlist2)
tower_grads1.append(grads1)
tower_grads2.append(grads2)
logits.append(logit)
logits = tf.concat(logits,0)
accuracy = tower_acc(logits, labels_placeholder)
tf.summary.scalar('accuracy', accuracy)
grads1 = average_gradients(tower_grads1)
grads2 = average_gradients(tower_grads2)
apply_gradient_op1 = opt_stable.apply_gradients(grads1)
apply_gradient_op2 = opt_finetuning.apply_gradients(grads2, global_step=global_step)
variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY)
variables_averages_op = variable_averages.apply(tf.trainable_variables())
train_op = tf.group(apply_gradient_op1, apply_gradient_op2, variables_averages_op)
null_op = tf.no_op()
# Create a saver for writing training checkpoints.
saver = tf.train.Saver(weights.values() + biases.values())
init = tf.global_variables_initializer()
# Create a session for running Ops on the Graph.
sess = tf.Session(
config=tf.ConfigProto(allow_soft_placement=True)
)
sess.run(init)
if os.path.isfile(model_filename) and use_pretrained_model:
saver.restore(sess, model_filename)
# Create summary writter
merged = tf.summary.merge_all()
train_writer = tf.summary.FileWriter('./visual_logs/train', sess.graph)
test_writer = tf.summary.FileWriter('./visual_logs/test', sess.graph)
for step in xrange(FLAGS.max_steps):
start_time = time.time()
train_images, train_labels, _, _, _ = input_data.read_clip_and_label(
filename='list/train.list',
batch_size=FLAGS.batch_size * gpu_num,
num_frames_per_clip=c3d_model.NUM_FRAMES_PER_CLIP,
crop_size=c3d_model.CROP_SIZE,
shuffle=True
)
sess.run(train_op, feed_dict={
images_placeholder: train_images,
labels_placeholder: train_labels
})
duration = time.time() - start_time
print('Step %d: %.3f sec' % (step, duration))
# Save a checkpoint and evaluate the model periodically.
if (step) % 10 == 0 or (step + 1) == FLAGS.max_steps:
saver.save(sess, os.path.join(model_save_dir, 'c3d_ucf_model'), global_step=step)
print('Training Data Eval:')
summary, acc = sess.run(
[merged, accuracy],
feed_dict={images_placeholder: train_images,
labels_placeholder: train_labels
})
print ("accuracy: " + "{:.5f}".format(acc))
train_writer.add_summary(summary, step)
print('Validation Data Eval:')
val_images, val_labels, _, _, _ = input_data.read_clip_and_label(
filename='list/test.list',
batch_size=FLAGS.batch_size * gpu_num,
num_frames_per_clip=c3d_model.NUM_FRAMES_PER_CLIP,
crop_size=c3d_model.CROP_SIZE,
shuffle=True
)
summary, acc = sess.run(
[merged, accuracy],
feed_dict={
images_placeholder: val_images,
labels_placeholder: val_labels
})
print ("accuracy: " + "{:.5f}".format(acc))
test_writer.add_summary(summary, step)
print("done")
def main(_):
run_training()
if __name__ == '__main__':
tf.app.run()
If you want to check other files, please tell me and I will show you.
This is my Traceback:
I started learning tensorflow one week ago and i am struggling a bit with the possibility of restoring a model.
I am constructing a class for a CNN (from stanford course code) where i want to have a run and a test function (first one to train the model, second one to make a prediction on another set of datas).
The function run is working correctly, mais i have a hard time restoring the model to make the prediction in function test (here i have a problem shape and i guess it comes from the way I get the operation).
import os
import tensorflow as tf
from time import time
class Cnn:
def __init__(self, batch_size=128, skip_step=10, epochs=1, dropout_ratio=0.75):
self.batch_size = batch_size
self.skip_step = skip_step
self.epochs = epochs
self.dropout_ratio = dropout_ratio
self.x = tf.placeholder(tf.float32, [None, 784], name="X_placeholder")
self.images = tf.reshape(self.x, shape=[-1, 28, 28, 1])
self.y = tf.placeholder(tf.float32, [None, 10], name="Y_placeholder")
self.dropout = tf.placeholder(tf.float32, name="dropout")
self.global_step = tf.Variable(0, dtype=tf.int32, trainable=False, name='global_step')
self.layers = [self.images]
self.n_layers = 0
def convolution(self, kernel_size, n_output, strides, scope_name):
previous_layer = self.layers[self.n_layers]
with tf.variable_scope(scope_name) as scope:
kernel_shape = [kernel_size[0], kernel_size[1], previous_layer.get_shape()[3], n_output]
kernel = tf.get_variable("kernels", kernel_shape,
initializer=tf.truncated_normal_initializer())
biases = tf.get_variable("biases", [n_output],
initializer=tf.random_normal_initializer())
convolution_2d = tf.nn.conv2d(previous_layer, kernel, strides=strides,
padding="SAME")
convolution = tf.nn.relu(convolution_2d + biases, name=scope.name)
self.layers.append(convolution)
self.n_layers += 1
return convolution
def pool(self, size, strides, scope_name, padding="SAME"):
previous_layer = self.layers[self.n_layers]
with tf.variable_scope(scope_name):
pool = tf.nn.max_pool(previous_layer, ksize=size, strides=strides,
padding=padding)
self.layers.append(pool)
self.n_layers += 1
return pool
def fully_connected(self, n_input, n_output, scope_name, relu=False, dropout=True):
previous_layer = self.layers[self.n_layers]
with tf.variable_scope(scope_name):
weights = tf.get_variable("weights", [n_input, n_output],
initializer=tf.truncated_normal_initializer())
biases = tf.get_variable("biases", [n_output],
initializer=tf.random_normal_initializer())
pool_reshaped = tf.reshape(previous_layer, [-1, n_input])
logits = tf.add(tf.matmul(pool_reshaped, weights), biases, name="logits")
fc = logits
if relu:
fc = tf.nn.relu(fc, name="relu")
if dropout:
fc = tf.nn.dropout(fc, self.dropout, name="dropout")
self.layers.append(fc)
self.n_layers += 1
if relu is False and dropout is False:
self.logits = tf.add(tf.matmul(pool_reshaped, weights), biases, name="logits")
self.predictions = tf.nn.softmax(logits, name="predictions")
return fc
def set_loss(self, scope_name):
previous_layer = self.layers[self.n_layers]
with tf.name_scope(scope_name):
entropy = tf.nn.softmax_cross_entropy_with_logits(logits=previous_layer, labels=self.y)
loss = tf.reduce_mean(entropy, name='loss')
self.layers.append(loss)
self.n_layers += 1
self.loss = loss
def set_optimizer(self):
previous_layer = self.layers[self.n_layers]
optimizer = tf.train.AdamOptimizer(0.001).minimize(previous_layer, global_step=self.global_step)
self.layers.append(optimizer)
self.n_layers += 1
self.optimizer = optimizer
def run(self, train_x, train_y):
sess = tf.Session()
with sess.as_default():
sess.run(tf.global_variables_initializer())
saver = tf.train.Saver()
writer = tf.summary.FileWriter('./my_graph/mnist', sess.graph)
ckpt = tf.train.get_checkpoint_state(os.path.dirname('results/checkpoint'))
if ckpt and ckpt.model_checkpoint_path:
saver.restore(sess, ckpt.model_checkpoint_path)
initial_step = self.global_step.eval()
start_time = time()
n_batches = int(train_x.shape[0] / self.batch_size)
total_correct_preds = 0
total_loss = 0.0
current_position = 0
for index in range(initial_step, int(n_batches * self.epochs)): # train the model n_epochs times
x_batch = train_x[current_position: current_position + self.batch_size, :]
y_batch = train_y[current_position: current_position + self.batch_size, :]
feed_dict = {self.x: x_batch, self.y: y_batch, self.dropout: self.dropout_ratio}
_, loss_batch, logits_batch = sess.run([self.optimizer, self.loss, self.logits],
feed_dict=feed_dict)
print(logits_batch.shape)
total_loss += loss_batch
preds = tf.nn.softmax(logits_batch)
correct_preds = tf.equal(tf.argmax(preds, 1), tf.argmax(y_batch, 1))
accuracy = tf.reduce_sum(tf.cast(correct_preds, tf.float32))
total_correct_preds += sess.run(accuracy)
if (index + 1) % self.skip_step == 0:
print('Average loss at step {}: {:5.1f}'.format(index + 1, total_loss / self.skip_step))
total_loss = 0.0
saver.save(sess, 'results/mnist-convnet', index)
current_position += self.batch_size
print("Optimization Finished!") # should be around 0.35 after 25 epochs
print("Total time: {0} seconds".format(time() - start_time))
print("Accuracy {0}".format(total_correct_preds / train_x.shape[0]))
def test(self, val_x, val_y):
checkpoint_file = tf.train.latest_checkpoint("results/")
graph = tf.Graph()
with graph.as_default():
sess = tf.Session()
with sess.as_default():
saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file))
saver.restore(sess, checkpoint_file)
self.x = graph.get_operation_by_name("X_placeholder").outputs[0]
self.y = graph.get_operation_by_name("Y_placeholder").outputs[0]
self.dropout = graph.get_operation_by_name("dropout").outputs[0]
self.global_step = graph.get_operation_by_name("global_step").outputs[0]
self.logits = graph.get_operation_by_name("logits").outputs[0]
current_position = 0
n_batches = int(val_x.shape[0] / self.batch_size)
total_correct_preds = 0
for i in range(n_batches):
x_batch = val_x[current_position: current_position + self.batch_size]
y_batch = val_y[current_position: current_position + self.batch_size]
feed_dict = {self.x: x_batch, self.y: y_batch, self.dropout: self.dropout_ratio}
logits_batch = sess.run([self.logits], feed_dict=feed_dict)
preds = tf.nn.softmax(logits_batch)
# correct_preds = tf.equal(tf.argmax(preds, 1), tf.argmax(y_batch, 1))
"""
accuracy = tf.reduce_sum(tf.cast(correct_preds, tf.float32))
total_correct_preds += sess.run(accuracy)
current_position += self.batch_size
print("Accuracy {0}".format(total_correct_preds / val_x.shape[0]))
"""
and testing it like this (sorry if there is a lot of code) :
from cnn import Cnn
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
tf.set_random_seed(1)
N_CLASSES = 10
# load datas
mnist = input_data.read_data_sets("mnist", one_hot=True)
train_x, train_y = mnist.train.images, mnist.train.labels
val_x, val_y = mnist.test.images, mnist.test.labels
model = Cnn(batch_size=128, skip_step=10, epochs=0.25, dropout_ratio=0.75)
model.convolution(kernel_size=[5, 5], strides=[1, 1, 1, 1], n_output=32, scope_name="conv1")
model.pool(size=[1, 2, 2, 1], strides=[1, 2, 2, 1], scope_name="pool1")
model.convolution(kernel_size=[5, 5], strides=[1, 1, 1, 1], n_output=64, scope_name="conv2")
model.pool(size=[1, 2, 2, 1], strides=[1, 2, 2, 1], scope_name="pool2")
input_features = 7 * 7 * 64
model.fully_connected(n_input=input_features, n_output=1024, scope_name="fc", relu=True, dropout=True)
model.fully_connected(n_input=1024, n_output=N_CLASSES, scope_name="softmax_linear", dropout=False)
model.set_loss(scope_name="loss")
model.set_optimizer()
model.run(train_x=train_x, train_y=train_y)
model.test(val_x, val_y)
I would also appreciate any comments regarding the better way to create a class like this (in particular the handling of the placeholders and the sessions)
Thanks !
Nicolas
I'm new to TensorFlow. I have the following graph. The test accuracy that I get is 90%. I'd like to reuse the model. One way that I figured it out is to initiate my variables from the learned weights (look in REUSE_MODEL below). But, when I run the test dataset through the model I get now accuracy of 2.0%.
What is the problem in the way I'm doing it and what's best way to do it?
GRAPH BUILD AND RUN
graph = tf.Graph()
with graph.as_default():
# input data
tf_train_dataset = tf.placeholder(tf.float32, shape=(batch_size, image_size, image_size, num_channels))
tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
tf_test_dataset = tf.constant(test_dataset)
layer1_weights = tf.Variable(tf.truncated_normal([kernel_size, kernel_size, num_channels, num_kernels]))
layer1_biases = tf.Variable(tf.zeros([num_kernels]))
layer2_weights = tf.Variable(tf.truncated_normal([kernel_size, kernel_size, num_kernels, num_kernels]))
layer2_biases = tf.Variable(tf.constant(1.0, shape=[num_kernels]))
layer3_weights = tf.Variable(tf.truncated_normal([image_size // 4 * image_size // 4 * num_kernels, num_hidden], stddev=0.1))
layer3_biases = tf.Variable(tf.constant(1.0, shape=[num_hidden]))
layer4_weights = tf.Variable(tf.truncated_normal([num_hidden, num_labels], stddev=0.1))
layer4_biases = tf.Variable(tf.constant(1.0, shape=[num_labels]))
# model
def model(data):
conv = tf.nn.conv2d(data, layer1_weights, [1, 2, 2, 1], padding='SAME')
hidden = tf.nn.relu(conv + layer1_biases)
conv = tf.nn.conv2d(hidden, layer2_weights, [1, 2, 2, 1], padding='SAME')
hidden = tf.nn.relu(conv + layer2_biases)
shape = hidden.get_shape().as_list()
# reshape is of size batch_size X features_vector. We flatten the output of the layer2 to a features vector
reshape = tf.reshape(hidden, [shape[0], shape[1] * shape[2] * shape[3]])
hidden = tf.nn.relu(tf.matmul(reshape, layer3_weights) + layer3_biases)
return tf.matmul(hidden, layer4_weights) + layer4_biases
# training computation
logits = model(tf_train_dataset)
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=tf_train_labels, logits=logits))
optimizer = tf.train.GradientDescentOptimizer(0.05).minimize(loss)
# predictions
train_prediction = tf.nn.softmax(logits)
test_prediction = tf.nn.softmax(model(tf_test_dataset))
def accuracy(predictions, labels):
return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
/ predictions.shape[0])
num_steps = 1001
num_epochs = 100
with tf.Session(graph=graph) as session:
tf.global_variables_initializer().run()
print('Initialized')
for epoch in range(num_epochs):
for step in range(num_steps):
offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
batch_data = train_dataset[offset:(offset + batch_size), :, :, :]
batch_labels = train_labels[offset:(offset + batch_size), :]
feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
_, l, predictions = session.run([optimizer, loss, train_prediction], feed_dict=feed_dict)
if (step % 50 == 0):
print('Minibatch loss at step %d: %f' % (step, l))
print('Minibatch accuracy: %.1f%%' % accuracy(predictions, batch_labels))
print('Test accuracy: %.1f%%' % accuracy(test_prediction.eval(), test_labels))
REUSE MODEL
with graph.as_default():
tf_test_dataset2 = tf.constant(test_dataset)
layer1_weights2 = tf.Variable(layer1_weights.initialized_value())
layer1_biases2 = tf.Variable(layer1_biases.initialized_value())
layer2_weights2 = tf.Variable(layer2_weights.initialized_value())
layer2_biases2 = tf.Variable(layer2_biases.initialized_value())
layer3_weights2 = tf.Variable(layer3_weights.initialized_value())
layer3_biases2 = tf.Variable(layer3_biases.initialized_value())
layer4_weights2 = tf.Variable(layer4_weights.initialized_value())
layer4_biases2 = tf.Variable(layer4_biases.initialized_value())
# model
def model(data):
conv = tf.nn.conv2d(data, layer1_weights2, [1, 2, 2, 1], padding='SAME')
hidden = tf.nn.relu(conv + layer1_biases2)
conv = tf.nn.conv2d(hidden, layer2_weights2, [1, 2, 2, 1], padding='SAME')
hidden = tf.nn.relu(conv + layer2_biases2)
shape = hidden.get_shape().as_list()
# reshape is of size batch_size X features_vector. We flatten the output of the layer2 to a features vector
reshape = tf.reshape(hidden, [shape[0], shape[1] * shape[2] * shape[3]])
hidden = tf.nn.relu(tf.matmul(reshape, layer3_weights2) + layer3_biases2)
return tf.matmul(hidden, layer4_weights2) + layer4_biases2
test_prediction2 = tf.nn.softmax(model(tf_test_dataset2))
with tf.Session(graph=graph) as session:
tf.global_variables_initializer().run()
session.run(test_prediction2)
print('Test accuracy: %.1f%%' % accuracy(test_prediction2.eval(), test_labels))
I think the correct way is to save and restore the metagraph ,this is the official documentation on this:
https://www.tensorflow.org/api_docs/python/state_ops/exporting_and_importing_meta_graphs