I'm learning the basic CNN model by using tensorflow. After training my model, I want to load it and use the model to predict the hand-written digital img (CSV file).
Here is my CNN model:
import random
import os
import numpy as np
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import matplotlib.pyplot as plt
tf.logging.set_verbosity(tf.logging.ERROR)
class CNNLogisticClassification:
def __init__(self, shape_picture, n_labels,
learning_rate=0.5, dropout_ratio=0.5, alpha=0.0):
self.shape_picture = shape_picture
self.n_labels = n_labels
self.weights = None
self.biases = None
self.graph = tf.Graph() # initialize new grap
self.build(learning_rate, dropout_ratio, alpha) # building graph
self.sess = tf.Session(graph=self.graph) # create session by the graph
def build(self, learning_rate, dropout_ratio, alpha):
with self.graph.as_default():
### Input
self.train_pictures = tf.placeholder(tf.float32,
shape=[None]+self.shape_picture,name="Input")
self.train_labels = tf.placeholder(tf.int32,
shape=(None, self.n_labels),name="Output")
### Optimalization
# build neurel network structure and get their predictions and loss
self.y_, self.original_loss = self.structure(pictures=self.train_pictures,
labels=self.train_labels,
dropout_ratio=dropout_ratio,
train=True, )
# regularization loss
self.regularization = \
tf.reduce_sum([tf.nn.l2_loss(w) for w in self.weights.values()]) \
/ tf.reduce_sum([tf.size(w, out_type=tf.float32) for w in self.weights.values()])
# total loss
self.loss = self.original_loss + alpha * self.regularization
# define training operation
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
self.train_op = optimizer.minimize(self.loss)
### Prediction
self.new_pictures = tf.placeholder(tf.float32,
shape=[None]+self.shape_picture,name="Input")
self.new_labels = tf.placeholder(tf.int32,
shape=(None, self.n_labels),name="Output")
self.new_y_, self.new_original_loss = self.structure(pictures=self.new_pictures,
labels=self.new_labels)
self.new_loss = self.new_original_loss + alpha * self.regularization
### Initialization
self.init_op = tf.global_variables_initializer()
### save model
self.saver=tf.train.Saver()
def structure(self, pictures, labels, dropout_ratio=None, train=False):
### Variable
## LeNet5 Architecture(http://yann.lecun.com/exdb/lenet/)
# input:(batch,28,28,1) => conv1[5x5,6] => (batch,24,24,6)
# pool2 => (batch,12,12,6) => conv2[5x5,16] => (batch,8,8,16)
# pool4 => fatten5 => (batch,4x4x16) => fc6 => (batch,120)
# (batch,120) => fc7 => (batch,84)
# (batch,84) => fc8 => (batch,10) => softmax
if (not self.weights) and (not self.biases):
self.weights = {
'conv1': tf.Variable(tf.truncated_normal(shape=(5, 5, 1, 6),
stddev=0.1)),
'conv3': tf.Variable(tf.truncated_normal(shape=(5, 5, 6, 16),
stddev=0.1)),
'fc6': tf.Variable(tf.truncated_normal(shape=(4*4*16, 120),
stddev=0.1)),
'fc7': tf.Variable(tf.truncated_normal(shape=(120, 84),
stddev=0.1)),
'fc8': tf.Variable(tf.truncated_normal(shape=(84, self.n_labels),
stddev=0.1)),
}
self.biases = {
'conv1': tf.Variable(tf.zeros(shape=(6))),
'conv3': tf.Variable(tf.zeros(shape=(16))),
'fc6': tf.Variable(tf.zeros(shape=(120))),
'fc7': tf.Variable(tf.zeros(shape=(84))),
'fc8': tf.Variable(tf.zeros(shape=(self.n_labels))),
}
### Structure
conv1 = self.get_conv_2d_layer(pictures,
self.weights['conv1'], self.biases['conv1'],
activation=tf.nn.relu)
pool2 = tf.nn.max_pool(conv1,
ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')
conv3 = self.get_conv_2d_layer(pool2,
self.weights['conv3'], self.biases['conv3'],
activation=tf.nn.relu)
pool4 = tf.nn.max_pool(conv3,
ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')
fatten5 = self.get_flatten_layer(pool4)
if train:
fatten5 = tf.nn.dropout(fatten5, keep_prob=1-dropout_ratio[0])
fc6 = self.get_dense_layer(fatten5,
self.weights['fc6'], self.biases['fc6'],
activation=tf.nn.relu)
if train:
fc6 = tf.nn.dropout(fc6, keep_prob=1-dropout_ratio[1])
fc7 = self.get_dense_layer(fc6,
self.weights['fc7'], self.biases['fc7'],
activation=tf.nn.relu)
logits = self.get_dense_layer(fc7, self.weights['fc8'], self.biases['fc8'])
y_ = tf.nn.softmax(logits)
loss = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(labels=labels,
logits=logits))
return (y_, loss)
def get_dense_layer(self, input_layer, weight, bias, activation=None):
x = tf.add(tf.matmul(input_layer, weight), bias)
if activation:
x = activation(x)
return x
def get_conv_2d_layer(self, input_layer,
weight, bias,
strides=(1, 1), padding='VALID', activation=None):
x = tf.add(
tf.nn.conv2d(input_layer,
weight,
[1, strides[0], strides[1], 1],
padding=padding), bias)
if activation:
x = activation(x)
return x
def get_flatten_layer(self, input_layer):
shape = input_layer.get_shape().as_list()
n = 1
for s in shape[1:]:
n *= s
x = tf.reshape(input_layer, [-1, n])
return x
def fit(self, X, y, epochs=10,
validation_data=None, test_data=None, batch_size=None):
X = self._check_array(X)
y = self._check_array(y)
N = X.shape[0]
random.seed(9000)
if not batch_size:
batch_size = N
self.sess.run(self.init_op)
for epoch in range(epochs):
print('Epoch %2d/%2d: ' % (epoch+1, epochs))
# mini-batch gradient descent
index = [i for i in range(N)]
random.shuffle(index)
while len(index) > 0:
index_size = len(index)
batch_index = [index.pop() for _ in range(min(batch_size, index_size))]
feed_dict = {
self.train_pictures: X[batch_index, :],
self.train_labels: y[batch_index],
}
_, loss = self.sess.run([self.train_op, self.loss],
feed_dict=feed_dict)
print('[%d/%d] loss = %.4f ' % (N-len(index), N, loss), end='\r')
# evaluate at the end of this epoch
y_ = self.predict(X)
train_loss = self.evaluate(X, y)
train_acc = self.accuracy(y_, y)
msg = '[%d/%d] loss = %8.4f, acc = %3.2f%%' % (N, N, train_loss, train_acc*100)
if validation_data:
val_loss = self.evaluate(validation_data[0], validation_data[1])
val_acc = self.accuracy(self.predict(validation_data[0]), validation_data[1])
msg += ', val_loss = %8.4f, val_acc = %3.2f%%' % (val_loss, val_acc*100)
print(msg)
if test_data:
test_acc = self.accuracy(self.predict(test_data[0]), test_data[1])
print('test_acc = %3.2f%%' % (test_acc*100))
def accuracy(self, predictions, labels):
return (np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))/predictions.shape[0])
def predict(self, X):
X = self._check_array(X)
return self.sess.run(self.new_y_, feed_dict={self.new_pictures: X})
def evaluate(self, X, y):
X = self._check_array(X)
y = self._check_array(y)
return self.sess.run(self.new_loss, feed_dict={self.new_pictures: X,
self.new_labels: y})
def _check_array(self, ndarray):
ndarray = np.array(ndarray)
if len(ndarray.shape) == 1:
ndarray = np.reshape(ndarray, (1, ndarray.shape[0]))
return ndarray
if __name__ == '__main__':
print('Extract MNIST Dataset ...')
mnist = input_data.read_data_sets('MNIST_data/', one_hot=True)
train_data = mnist.train
valid_data = mnist.validation
test_data = mnist.test
train_img = np.reshape(train_data.images, [-1, 28, 28, 1])
valid_img = np.reshape(valid_data.images, [-1, 28, 28, 1])
test_img = np.reshape(test_data.images, [-1, 28, 28, 1])
model = CNNLogisticClassification(
shape_picture=[28, 28, 1],
n_labels=10,
learning_rate=0.07,
dropout_ratio=[0.2, 0.1],
alpha=0.1,
)
model.fit(
X=train_img,
y=train_data.labels,
epochs=10,
validation_data=(valid_img, valid_data.labels),
test_data=(test_img, test_data.labels),
batch_size=32,
)
saver = model.saver.save(model.sess, "test_model")
print("Model saved in path: %s" % saver)
And I create another py file to load my model:
import tensorflow as tf
saver = tf.train.import_meta_graph('./my_model/test_model.meta')
with tf.Session() as sess:
new_saver = tf.train.import_meta_graph('./my_model/test_model.meta')
new_saver.restore(sess, tf.train.latest_checkpoint('./my_model'))
sess.run(tf.global_variables_initializer())
saver.predict('D:\python\number_data\3.csv')
This is the error I'm getting:
AttributeError: 'Saver' object has no attribute 'predict'
How do I fix it and let the trained model predict my CSV file?
Thanks in advance for your help!
Edit:
I change my second py file as below:
import numpy as np
import tensorflow as tf
import pandas as pd
X=pd.read_csv('D:/PYTHON/cnn_data/7.csv', index_col=None, header=None).values
X1=X/255
X3=tf.convert_to_tensor(
X1,
dtype=None,
dtype_hint=None,
name=None
)
saver = tf.train.import_meta_graph('./my_model/test_model.meta')
with tf.Session() as sess:
new_saver = tf.train.import_meta_graph('./my_model/test_model.meta')
new_saver.restore(sess, tf.train.latest_checkpoint('./my_model'))
graph=tf.get_default_graph()
xs0=graph.get_tensor_by_name("Input:0")
prediction=graph.get_tensor_by_name("Output:0")
sess.run(prediction,feed_dict={xs0:X3})
print(prediction)
I only try to predict one digital img data(CSV file with one row), I transfer it into tensor type and name my two placeholder "Input" , "Output", but get another error:
TypeError: The value of a feed cannot be a tf.Tensor object. Acceptable feed values include Python scalars, strings, lists, numpy ndarrays, or TensorHandles. For reference, the tensor object was Tensor("Const:0", shape=(1, 784), dtype=float64) which was passed to
the feed with key Tensor("Input:0", shape=(?, 28, 28, 1), dtype=float32).
>
First of all, the obvious error here is that you are trying to call a function that doesn't exist. Evidently, the saver object does not have a predict function.
Second, if you want Tensorflow to make predictions, you need to provide it with "Tensorflow" input, and sadly, CSVs are not one of them.
All you need to do is transform your CSV inputs into tensors, with a function like this for instance:
filename = 'D:\python\number_data\3.csv'
def csv_to_tensor(filename):
...
return tensors
I cannot tell you how to implement the function exactly since I don't know the exact format of your data, but I am assuming that each row in your file is an input. So you most likely just need to loop through the lines in your file and convert each line to a tensor, which can then be used by a Tensorflow model.
Related
I am relatively new to transformers and thought of programming one from scratch with pytorch as a good exercise. I already tested the model and it worked. However, when implementing training for english-french translation tasks, I get the said error when computing the loss.
The code of the training function is as follows:
def train_(self, x, y, lr, steps, path=None):
self.train()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(self.parameters(), lr=lr)
for epoch in range(steps):
for batch_id, (batch_x, batch_y) in enumerate(zip(x, y)):
if torch.cuda.is_available() and self.is_cuda:
batch_x = batch_x.cuda()
batch_y = batch_y.cuda()
out = self(batch_x, batch_y)
print(out.shape, batch_y.shape)
# Embed batch_y so result is comparable
batch_y = self.decoder.word_embedding(batch_y)
print(batch_y.shape)
loss = criterion(out, batch_y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
print(f"Training: epoch {epoch} batch {batch_id} loss {loss}")
The prints of the shapes give the following output:
torch.Size([2, 8, 256]) torch.Size([2, 8])
torch.Size([2, 8, 256])
In terms of dimensionality, I'm using an embedding size of 256.
If needed, I can also provide the entire code.
Thank you.
Edit:
Here's the whole code
# Imports
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
# Hyper-parameters
lr = 0.001
steps = 1000
# Attention head
class AttentionHead(nn.Module):
def __init__(self, embed_dim, head_dim):
super(AttentionHead, self).__init__()
self.embed_dim = embed_dim
self.values_layer = nn.Linear(head_dim, head_dim, bias=False)
self.keys_layer = nn.Linear(head_dim, head_dim, bias=False)
self.queries_layer = nn.Linear(head_dim, head_dim, bias=False)
def forward(self, values, keys, queries, mask=None):
# Send them through the linear layers
values = self.values_layer(values)
keys = self.keys_layer(keys)
queries = self.queries_layer(queries)
# Multiply queries and keys to score matrix
scores = torch.einsum("nah,nbh->nab", queries, keys)
# Keys shape: (n, m, head_dim)
# Queries shape: (n, m, heads_dim)
# Score shape: (n, m, m)
# If needed, then mask the score matrix
if mask is not None:
scores = scores.masked_fill(mask == 0, float("-1e20"))
# Scale the (masked) score matrix
scaled = scores / (self.embed_dim ** (1 / 2))
# Normalize the scaled score matrix
attention = torch.softmax(scaled, dim=1)
# Multiply scores and values to output
out = torch.einsum("nab,nbh->nah", attention, values)
# attention shape: (n, m, m)
# values shape: (n, m, head_dim)
# out shape: (n, m, head_dim)
return out
# Multi head attention mechanism
class MultiHeadAttentionBlock(nn.Module):
def __init__(self, embed_dim, head_num):
super(MultiHeadAttentionBlock, self).__init__()
self.embed_dim = embed_dim
self.head_num = head_num
self.head_dim = embed_dim // head_num
assert (head_num * self.head_dim == embed_dim), \
"Embed size is required to be dividable by heads."
self.heads = nn.ModuleList(
[AttentionHead(embed_dim, self.head_dim)
for _ in range(head_num)]
)
self.out_layer = nn.Linear(embed_dim, embed_dim)
def forward(self, values, keys, queries, mask=None):
n = values.shape[0] # Number of examples / batch size
v_dim = values.shape[1] # Quantity of embeddings
k_dim = keys.shape[1]
q_dim = queries.shape[1]
# Split up the values, keys and queries
values = values.reshape(n, v_dim, self.head_num, self.head_dim)
keys = keys.reshape(n, k_dim, self.head_num, self.head_dim)
queries = queries.reshape(n, q_dim, self.head_num, self.head_dim)
# Iterate through heads
for i, head in enumerate(self.heads):
globals()[f"out{i}"] = head(values[:, :, i, :], keys[:, :, i, :], queries[:, :, i, :], mask)
# out shape: (n, m, head_dim)
# Concatenate the output of each head
out = globals()[f"out{0}"]
for i in range(self.head_num - 1):
out = torch.cat((out, globals()[f"out{i + 1}"]), dim=2)
# Out shape: (n, m, head_num * head_dim / embed_dim)
# Send output through a last linear layer and return the outcome
out = self.out_layer(out)
return out
# Transformer block
class TransformerBlock(nn.Module):
def __init__(self, embed_dim, head_num, dropout, forward_expansion):
super(TransformerBlock, self).__init__()
self.attention = MultiHeadAttentionBlock(embed_dim, head_num)
self.norm1 = nn.LayerNorm(embed_dim)
self.norm2 = nn.LayerNorm(embed_dim)
self.feed_forward = nn.Sequential(
nn.Linear(embed_dim, forward_expansion * embed_dim),
nn.ReLU(),
nn.Linear(forward_expansion * embed_dim, embed_dim)
)
self.dropout = nn.Dropout(dropout)
def forward(self, values, keys, queries, mask=None):
attention = self.attention(values, keys, queries, mask)
x = self.dropout(self.norm1(attention + queries))
forward = self.feed_forward(x)
x = self.dropout(self.norm2(forward + x))
return x
# Encoder
class Encoder(nn.Module):
def __init__(self, src_vocab_dim, embed_dim, head_num, block_num, dropout, forward_expansion, max_length, device):
super(Encoder, self).__init__()
self.device = device
self.embed_dim = embed_dim
self.word_embedding = nn.Embedding(src_vocab_dim, embed_dim)
self.position_embedding = nn.Embedding(max_length, embed_dim) # max_length: max word length of all data
self.blocks = nn.ModuleList(
[TransformerBlock(embed_dim, head_num, dropout, forward_expansion)
for _ in range(block_num)]
)
self.dropout = nn.Dropout(dropout)
def forward(self, x):
n, seq_length = x.shape # (batch size, max word length of that batch)
positions = torch.arange(0, seq_length).expand(n, seq_length).to(self.device) # 0 - seq_length along dim 1
x = self.dropout(self.word_embedding(x) + self.position_embedding(positions))
for block in self.blocks:
x = block(x, x, x)
return x
# Decoder block
class DecoderBlock(nn.Module):
def __init__(self, embed_dim, head_num, dropout, forward_expansion):
super(DecoderBlock, self).__init__()
self.attention = MultiHeadAttentionBlock(embed_dim, head_num)
self.norm = nn.LayerNorm(embed_dim)
self.transformer_block = TransformerBlock(embed_dim, head_num, dropout, forward_expansion)
self.dropout = nn.Dropout(dropout)
def forward(self, x, values, keys, mask):
attention = self.attention(x, x, x, mask)
# As the outputs of the decoder's first self attention block are the queries, the encoder's
# output can be of different size. Only keys and values have to be indentical in size.
queries = self.dropout(self.norm(attention + x))
x = self.transformer_block(values, keys, queries)
return x
# Decoder
class Decoder(nn.Module):
def __init__(self, trg_vocab_dim, embed_dim, head_num, block_num, dropout, forward_expansion, max_length, device):
super(Decoder, self).__init__()
self.device = device
self.embed_dim = embed_dim
self.word_embedding = nn.Embedding(trg_vocab_dim, embed_dim)
self.position_embedding = nn.Embedding(max_length, embed_dim)
self.blocks = nn.ModuleList(
[DecoderBlock(embed_dim, head_num, dropout, forward_expansion)
for _ in range(block_num)]
)
self.dropout = nn.Dropout(dropout)
self.out_layer = nn.Linear(embed_dim, embed_dim) # changed embed_dim (second time in bracket) from trg_vocab_dim
def forward(self, x, enc_out, mask):
n, seq_length = x.shape
positions = torch.arange(0, seq_length).expand(n, seq_length).to(self.device) # 0 - seq_length along dim 1
x = self.dropout(self.word_embedding(x) + self.position_embedding(positions))
for block in self.blocks:
x = block(x, enc_out, enc_out, mask)
x = self.out_layer(x)
return x
# Transformer
class Transformer(nn.Module):
def __init__(self, src_vocab_dim, trg_vocab_dim, embed_dim, head_num, block_num_enc, block_num_dec,
dropout, forward_expansion, max_length, device):
super(Transformer, self).__init__()
self.device = device
self.encoder = Encoder(src_vocab_dim, embed_dim, head_num, block_num_enc, dropout, forward_expansion, max_length, device)
self.decoder = Decoder(trg_vocab_dim, embed_dim, head_num, block_num_dec, dropout, forward_expansion, max_length, device)
def make_mask(self, y):
n, m = y.shape
mask = torch.tril(torch.ones((m, m))).expand(n, m, m)
return mask.to(self.device)
def forward(self, x, y):
mask = self.make_mask(y)
out_enc = self.encoder(x)
out_dec = self.decoder(y, out_enc, mask)
return out_dec
def train_(self, x, y, lr, steps, path=None):
self.train()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(self.parameters(), lr=lr)
for epoch in range(steps):
for batch_id, (batch_x, batch_y) in enumerate(zip(x, y)):
if torch.cuda.is_available() and self.is_cuda:
batch_x = batch_x.cuda()
batch_y = batch_y.cuda()
out = self(batch_x, batch_y)
print(out.shape, batch_y.shape)
# Embed batch_y so result is comparable
batch_y = self.decoder.word_embedding(batch_y)
print(batch_y.shape)
loss = criterion(out, batch_y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
print(f"Training: epoch {epoch} batch {batch_id} loss {loss}")
if path is not None:
torch.save(self, path)
# Run
if __name__ == "__main__":
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# use for normal run
# x = torch.tensor([[1, 5, 6, 4, 3, 9, 5, 2, 0], [1, 8, 7, 3, 4, 5, 6, 7, 2]]).to(device) # input
# y = torch.tensor([[1, 7, 4, 3, 5, 9, 2, 0], [1, 5, 6, 2, 4, 7, 6, 2]]).to(device) # target
# added one bracket for training so this is one batch
x = torch.tensor([[[1, 5, 6, 4, 3, 9, 5, 2, 0], [1, 8, 7, 3, 4, 5, 6, 7, 2]]]).to(device) # input
y = torch.tensor([[[1, 7, 4, 3, 5, 9, 2, 0], [1, 5, 6, 2, 4, 7, 6, 2]]]).to(device) # target
src_vocab_dim = 10
trg_vocab_dim = 10
model = Transformer(src_vocab_dim, trg_vocab_dim, embed_dim=256, head_num=8, block_num_enc=6, block_num_dec=6,
dropout=0, forward_expansion=4, max_length=100, device=device)
model.train_(x, y, lr, steps)
Here's the whole error trace:
Traceback (most recent call last):
File "C:/Users/user/PycharmProjects/Transformer/Code.py", line 310, in <module>
model.train_(x, y, lr, steps)
File "C:/Users/user/PycharmProjects/Transformer/Code.py", line 279, in train_
loss = criterion(out, batch_y)
File "C:\Users\user\Anaconda3\envs\Transformer\lib\site-packages\torch\nn\modules\module.py", line 722, in _call_impl
result = self.forward(*input, **kwargs)
File "C:\Users\user\Anaconda3\envs\Transformer\lib\site-packages\torch\nn\modules\loss.py", line 948, in forward
ignore_index=self.ignore_index, reduction=self.reduction)
File "C:\Users\user\Anaconda3\envs\Transformer\lib\site-packages\torch\nn\functional.py", line 2422, in cross_entropy
return nll_loss(log_softmax(input, 1), target, weight, None, ignore_index, None, reduction)
File "C:\Users\user\Anaconda3\envs\Transformer\lib\site-packages\torch\nn\functional.py", line 2228, in nll_loss
out_size, target.size()))
ValueError: Expected target size (2, 256), got torch.Size([2, 8, 256])
Concerning your question:
My thoughts were that the loss function cannot compare the output with the target if the target is not embedded as well. With the embedding missing, this gives the following shapes:
torch.Size([2, 8]) # target
torch.Size([2, 8, 256]) # output
I am getting ResourceExhaustedError when trying to train MNIST
I found out that I can change the batch size in order to avoid the problem, but unfortunately I dont know where to do that in my code
Traceback:
ResourceExhaustedError (see above for traceback): OOM when allocating tensor with shape[238305,32,28,28] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
[[Node: Conv2D = Conv2D[T=DT_FLOAT, data_format="NCHW", dilations=[1, 1, 1, 1], padding="SAME", strides=[1, 1, 1, 1], use_cudnn_on_gpu=true, _device="/job:localhost/replica:0/task:0/device:GPU:0"](Conv2D-0-TransposeNHWCToNCHW-LayoutOptimizer, Variable/read)]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.
[[Node: add_3/_39 = _Recvclient_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_51_add_3", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]]
My CNN model:
from __future__ import print_function
import tensorflow as tf
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
# number 1 to 10 data
# from tensorflow.examples.tutorials.mnist import input_data
# mnist = input_data.read_data_sets('MNIST_data/', one_hot=True)
import mnist2.mnist as mn
mnist = mn.read_data_sets('MNIST/', one_hot=True, num_classes=9)
def compute_accuracy(v_xs, v_ys):
global prediction
y_pre = sess.run(prediction, feed_dict={xs: v_xs, keep_prob: 1})
correct_prediction = tf.equal(tf.argmax(y_pre,1), tf.argmax(v_ys,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
result = sess.run(accuracy, feed_dict={xs: v_xs, ys: v_ys, keep_prob: 1})
return result
def weight_variable(shape):
inital = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(inital)
def bias_variable(shape):
inital = tf.constant(0.1, shape=shape)
return tf.Variable(inital)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME')
# define placeholder for inputs to network
xs = tf.placeholder(tf.float32, [None, 784, 1]) # 28x28
ys = tf.placeholder(tf.float32, [None, 9])
keep_prob = tf.placeholder(tf.float32)
x_image = tf.reshape(xs, [-1, 28, 28, 1])
## conv1 layer ##
W_conv1 = weight_variable([5, 5, 1, 32]) #patch 5x5, in channel size 1, out size 32
## pool1 layer ##
b_conv1 = bias_variable([32])
#Combine
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1) #output size 28x28x32
h_pool1 = max_pool_2x2(h_conv1) #output size 14x14x32
## conv2 layer ##
W_conv2 = weight_variable([5, 5, 32, 64]) #patch 5x5, in channel size 32, out size 64
## pool2 layer ##
b_conv2 = bias_variable([64])
#Combine
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2) #output size 14x14x64
h_pool2 = max_pool_2x2(h_conv2) #output size 7x7x64
## fc1 layer ##
W_fc1 = weight_variable([7*7*64, 1024])
b_fc1 = bias_variable([1024])
h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64]) #[n_samples, 7,7,64] => [n_samples, 7*7*64]
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
## output layer ##
W_fc2 = weight_variable([1024, 9])
b_fc2 = bias_variable([9])
prediction = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)
cross_entropy = tf.reduce_mean(-tf.reduce_sum(ys * tf.log(prediction),
reduction_indices=[1]))
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
sess = tf.Session()
sess.run(tf.initialize_all_variables())
for i in range(10001):
batch_xs, batch_ys = mnist.train.next_batch(100)
sess.run(train_step, feed_dict={xs: batch_xs, ys: batch_ys, keep_prob:0.5})
if i % 50 == 0:
print(compute_accuracy(
mnist.test.images, mnist.test.labels))
My MNIST code:
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Functions for downloading and reading MNIST data."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import gzip
import numpy
from six.moves import xrange # pylint: disable=redefined-builtin
from tensorflow.contrib.learn.python.learn.datasets import base
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import random_seed
# CVDF mirror of http://yann.lecun.com/exdb/mnist/
SOURCE_URL = 'https://storage.googleapis.com/cvdf-datasets/mnist/'
def _read32(bytestream):
dt = numpy.dtype(numpy.uint32).newbyteorder('>')
return numpy.frombuffer(bytestream.read(4), dtype=dt)[0]
def extract_images(f, channels=1):
"""Extract the images into a 4D uint8 numpy array [index, y, x, depth].
Args:
f: A file object that can be passed into a gzip reader.
Returns:
data: A 4D uint8 numpy array [index, y, x, depth].
Raises:
ValueError: If the bytestream does not start with 2051.
"""
print('Extracting', f.name)
with gzip.GzipFile(fileobj=f) as bytestream:
magic = _read32(bytestream)
if magic != 2051:
raise ValueError('Invalid magic number %d in MNIST image file: %s' %
(magic, f.name))
num_images = _read32(bytestream)
rows = _read32(bytestream)
cols = _read32(bytestream)
buf = bytestream.read(rows * cols * num_images * channels)
data = numpy.frombuffer(buf, dtype=numpy.uint8)
data = data.reshape(num_images, rows, cols, channels)
return data
def dense_to_one_hot(labels_dense, num_classes):
"""Convert class labels from scalars to one-hot vectors."""
num_labels = labels_dense.shape[0]
index_offset = numpy.arange(num_labels) * num_classes
labels_one_hot = numpy.zeros((num_labels, num_classes))
labels_one_hot.flat[index_offset + labels_dense.ravel()] = 1
return labels_one_hot
def extract_labels(f, one_hot=False, num_classes=10):
"""Extract the labels into a 1D uint8 numpy array [index].
Args:
f: A file object that can be passed into a gzip reader.
one_hot: Does one hot encoding for the result.
num_classes: Number of classes for the one hot encoding.
Returns:
labels: a 1D uint8 numpy array.
Raises:
ValueError: If the bystream doesn't start with 2049.
"""
print('Extracting', f.name)
with gzip.GzipFile(fileobj=f) as bytestream:
magic = _read32(bytestream)
if magic != 2049:
raise ValueError('Invalid magic number %d in MNIST label file: %s' %
(magic, f.name))
num_items = _read32(bytestream)
buf = bytestream.read(num_items)
labels = numpy.frombuffer(buf, dtype=numpy.uint8)
if one_hot:
return dense_to_one_hot(labels, num_classes)
return labels
class DataSet(object):
def __init__(self,
images,
labels,
fake_data=False,
one_hot=False,
dtype=dtypes.float32,
reshape=True,
seed=None):
"""Construct a DataSet.
one_hot arg is used only if fake_data is true. `dtype` can be either
`uint8` to leave the input as `[0, 255]`, or `float32` to rescale into
`[0, 1]`. Seed arg provides for convenient deterministic testing.
"""
seed1, seed2 = random_seed.get_seed(seed)
# If op level seed is not set, use whatever graph level seed is returned
numpy.random.seed(seed1 if seed is None else seed2)
dtype = dtypes.as_dtype(dtype).base_dtype
if dtype not in (dtypes.uint8, dtypes.float32):
raise TypeError('Invalid image dtype %r, expected uint8 or float32' %
dtype)
if fake_data:
self._num_examples = 10000
self.one_hot = one_hot
else:
assert images.shape[0] == labels.shape[0], (
'images.shape: %s labels.shape: %s' % (images.shape, labels.shape))
self._num_examples = images.shape[0]
# Convert shape from [num examples, rows, columns, depth]
# to [num examples, rows*columns] (assuming depth == 1)
if reshape:
#iassert images.shape[3] == 1
images = images.reshape(images.shape[0],
images.shape[1] * images.shape[2], images.shape[3])
if dtype == dtypes.float32:
# Convert from [0, 255] -> [0.0, 1.0].
num, dim, channels = images.shape
count = num * dim * channels
images.setflags(write=1)
index = 1000
images = images.astype(numpy.float32)
for i in range(0,count,index):
if not (count - i) < index:
images[i:i+index] = numpy.multiply(images[i:i+index], 1.0 / 255.0)
else:
images[i:count] = numpy.multiply(images[i:count], 1.0 / 255.0)
#images = images.astype(numpy.float32)
#images = numpy.multiply(images, 1.0 / 255.0)
self._images = images
self._labels = labels
self._epochs_completed = 0
self._index_in_epoch = 0
#property
def images(self):
return self._images
#property
def labels(self):
return self._labels
#property
def num_examples(self):
return self._num_examples
#property
def epochs_completed(self):
return self._epochs_completed
def next_batch(self, batch_size, fake_data=False, shuffle=True):
"""Return the next `batch_size` examples from this data set."""
if fake_data:
fake_image = [1] * 784
if self.one_hot:
fake_label = [1] + [0] * 9
else:
fake_label = 0
return [fake_image for _ in xrange(batch_size)], [
fake_label for _ in xrange(batch_size)
]
start = self._index_in_epoch
# Shuffle for the first epoch
if self._epochs_completed == 0 and start == 0 and shuffle:
perm0 = numpy.arange(self._num_examples)
numpy.random.shuffle(perm0)
self._images = self.images[perm0]
self._labels = self.labels[perm0]
# Go to the next epoch
if start + batch_size > self._num_examples:
# Finished epoch
self._epochs_completed += 1
# Get the rest examples in this epoch
rest_num_examples = self._num_examples - start
images_rest_part = self._images[start:self._num_examples]
labels_rest_part = self._labels[start:self._num_examples]
# Shuffle the data
if shuffle:
perm = numpy.arange(self._num_examples)
numpy.random.shuffle(perm)
self._images = self.images[perm]
self._labels = self.labels[perm]
# Start next epoch
start = 0
self._index_in_epoch = batch_size - rest_num_examples
end = self._index_in_epoch
images_new_part = self._images[start:end]
labels_new_part = self._labels[start:end]
return numpy.concatenate((images_rest_part, images_new_part), axis=0) , numpy.concatenate((labels_rest_part, labels_new_part), axis=0)
else:
self._index_in_epoch += batch_size
end = self._index_in_epoch
return self._images[start:end], self._labels[start:end]
def read_data_sets(train_dir,
fake_data=False,
one_hot=False,
dtype=dtypes.float32,
reshape=True,
validation_size=24,
seed=None,
num_classes=10,
channels=1):
if fake_data:
def fake():
return DataSet(
[], [], fake_data=True, one_hot=one_hot, dtype=dtype, seed=seed)
train = fake()
validation = fake()
test = fake()
return base.Datasets(train=train, validation=validation, test=test)
TRAIN_IMAGES = 'train-images-idx3-ubyte.gz'
TRAIN_LABELS = 'train-labels-idx1-ubyte.gz'
TEST_IMAGES = 't10k-images-idx3-ubyte.gz'
TEST_LABELS = 't10k-labels-idx1-ubyte.gz'
local_file = base.maybe_download(TRAIN_IMAGES, train_dir,
SOURCE_URL + TRAIN_IMAGES)
with open(local_file, 'rb') as f:
train_images = extract_images(f, channels)
local_file = base.maybe_download(TRAIN_LABELS, train_dir,
SOURCE_URL + TRAIN_LABELS)
with open(local_file, 'rb') as f:
train_labels = extract_labels(f, one_hot=one_hot, num_classes=num_classes)
local_file = base.maybe_download(TEST_IMAGES, train_dir,
SOURCE_URL + TEST_IMAGES)
with open(local_file, 'rb') as f:
test_images = extract_images(f, channels)
local_file = base.maybe_download(TEST_LABELS, train_dir,
SOURCE_URL + TEST_LABELS)
with open(local_file, 'rb') as f:
test_labels = extract_labels(f, one_hot=one_hot, num_classes=num_classes)
if not 0 <= validation_size <= len(train_images):
raise ValueError(
'Validation size should be between 0 and {}. Received: {}.'
.format(len(train_images), validation_size))
validation_images = train_images[:validation_size]
validation_labels = train_labels[:validation_size]
train_images = train_images[validation_size:]
train_labels = train_labels[validation_size:]
options = dict(dtype=dtype, reshape=reshape, seed=seed)
train = DataSet(train_images, train_labels, **options)
validation = DataSet(validation_images, validation_labels, **options)
test = DataSet(test_images, test_labels, **options)
return base.Datasets(train=train, validation=validation, test=test)
def load_mnist(train_dir='MNIST-data'):
return read_data_sets(train_dir)
You have to change the 100 in the following line:
for i in range(10001):
batch_xs, batch_ys = mnist.train.next_batch(100)
sess.run(train_step, feed_dict={xs: batch_xs, ys: batch_ys, keep_prob:0.5})
So the right code would be :
size_batch=50
for i in range(10001):
batch_xs, batch_ys = mnist.train.next_batch(size_batch)
sess.run(train_step, feed_dict={xs: batch_xs, ys: batch_ys, keep_prob:0.5})
I have the following problem.
I am trying to train a 3d CNN in tensorflow. I have separated the data in three data sets, train, validation and test.
The main problem is that when I test the validation set after 5 epoch of training the output of the model is the nearly the same for the 5 images.
(this is the output of the last layer without any softmax)
2018-04-17 23:30:35.134318 Prediction: [[0.8185656 2.7571523 ]
[0.8200048 2.7590456 ]
[0.8185656 2.7571523 ]
[0.8200048 2.7590458 ]
[0.7751368 2.7532804 ]
[0.82061136 2.7588618 ]
[0.8130686 2.7821052 ]
[0.83537185 2.7514493 ]
[0.8200041 2.7590454 ]
[0.81701267 2.7519925 ]
[0.8424163 2.8674953 ]
[0.82000506 2.7590454 ]
[0.81999433 2.7590487 ]
[0.81701267 2.7519925 ]
However, if i do the same for trainning set I get a conventional prediction.
I have fully check the data sets and both are correct and in the same conditions.
This is my mode used to build the model and do the training:
class Cnn3DMRI(object):
def weight_variable(self, shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(self, shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def conv3d(self, x, W):
return tf.nn.conv3d(x, W, strides=[1, 1, 1, 1, 1], padding='SAME')
def maxpool3d(self, x):
# size of window movement of window
return tf.nn.max_pool3d(x, ksize=[1, 2, 2, 2, 1], strides=[1, 2, 2, 2, 1], padding='SAME')
def dense_to_one_hot(self, labels_dense, num_classes):
"""Convert class labels from scalars to one-hot vectors."""
num_labels = labels_dense.shape[0]
index_offset = np.arange(num_labels) * num_classes
labels_one_hot = np.zeros((num_labels, num_classes))
labels_one_hot.flat[index_offset + labels_dense.ravel()] = 1
return labels_one_hot
def wrapper_image(self, full_image_set, full_label_set, last_batch=0, batch_size=5):
batch_img = full_image_set[last_batch:batch_size+last_batch, :, :, :]
batch_label = full_label_set[last_batch:batch_size+last_batch]
return batch_img, batch_label, batch_size+last_batch
def convolutional_neural_network(self, x, img_sz, n_slices):
weights = {
'W_conv1': self.weight_variable([3, 5, 5, 1, 32]),
'W_conv2': self.weight_variable([2, 5, 5, 32, 48]),
'W_fc': self.weight_variable(
[
int(
math.ceil(
n_slices / 8
) * math.ceil(
img_sz / 8
) * math.ceil(
img_sz / 8
) *48), 2048
]
),
'W_fc2': self.weight_variable([2048, 1024]),
'out': self.weight_variable([1024, 2])
}
biases = {
'b_conv1': self.bias_variable([32]),
'b_conv2': self.bias_variable([48]),
'b_fc': self.bias_variable([2048]),
'b_fc2': self.bias_variable([1024]),
'out': self.bias_variable([2])
}
self.x_im = tf.reshape(x, shape=[-1, n_slices, img_sz, img_sz, 1])
conv1 = tf.nn.relu(self.conv3d(self.x_im, weights['W_conv1']) + biases['b_conv1'])
conv1 = tf.Print(conv1,[conv1], 'The conv1: ')
conv1 =self.maxpool3d(conv1)
conv1 = tf.Print(conv1,[conv1], 'The max1: ')
conv2 = tf.nn.relu(self.conv3d(conv1, weights['W_conv2']) + biases['b_conv2'])
conv1 = tf.Print(conv2,[conv2], 'The conv2: ')
conv2 = tf.nn.max_pool3d(conv2, ksize=[1, 4, 4, 4, 1], strides=[1, 4, 4, 4, 1],
padding='SAME')
conv2 = tf.Print(conv2,[conv2], 'The max2: ')
fc = tf.reshape(conv2, [-1,int(math.ceil(n_slices/8)*math.ceil(img_sz/8)*math.ceil(
img_sz/8))*48])
fc = tf.Print(fc,[fc], 'The reshape: ')
fc2 = tf.nn.relu(tf.matmul(fc, weights['W_fc'])+biases['b_fc'])
fc2 = tf.Print(fc2,[fc2], 'The fc: ')
dp1 = tf.nn.dropout(fc2, self.keep_prob)
fc3 = tf.nn.relu(tf.matmul(dp1, weights['W_fc2'])+biases['b_fc2'])
fc3 = tf.Print(fc3,[fc3], 'The fc2: ')
dp2 = tf.nn.dropout(fc3, self.keep_prob)
output = tf.matmul(dp2, weights['out'])+biases['out']
output = tf.Print(output,[output], 'The output: ')
return output
def test_validation_set(self, sess, data_validation, label_validation, valid_batch_size=60):
batch_img, batch_label, last_batch = self.wrapper_image(
data_validation, label_validation, self.last_valid_batch, valid_batch_size
)
batch_label = self.dense_to_one_hot(
np.array(batch_label, dtype=np.int),2
).astype(np.float32)
if last_batch+valid_batch_size < len(label_validation):
self.last_valid_batch = last_batch
else:
self.last_valid_batch = 0
pred, c, validation_accuracy = sess.run(
[self.prediction, self.cost, self.accuracy], feed_dict={
self.x: batch_img, self.y_: batch_label, self.keep_prob: 1.0
}
)
self.log("Prediction: "+str(pred))
self.log("Label: "+str(batch_label))
self.log("Validation accuracy: "+str(validation_accuracy))
self.log("Validation cost: "+str(c))
return validation_accuracy, c
def train_neural_network(self, data_img, labels, data_validation, label_validation,
batch_size, img_sz, n_slices, last_batch,
keep_rate, model_path):
self.prediction = self.convolutional_neural_network(self.x, img_sz, n_slices)
self.cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=self.y_,
logits=self.prediction))
optimizer = tf.train.AdamOptimizer(self.learning_rate).minimize(self.cost)
correct_prediction = tf.equal(tf.argmax(self.prediction, 1), tf.argmax(self.y_, 1))
self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
hm_epochs = 1000
saver = tf.train.Saver(tf.trainable_variables())
epoch_loss = 0
epoch_loss_mean = []
n_epoch = 0
learning_rate = 1e-4
self.last_valid_batch = 0
min_valid_cost = 0
all_valid_cost = []
model_path_train = 'model_train/my_model.ckpt'
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
if model_path:
pass
#saver.restore(sess, model_path_train)
while n_epoch < hm_epochs:
if len(data_img)>last_batch+batch_size:
with tf.device('/cpu:0'):
#batch_img, batch_label, last_batch = self.get_image(
# data_img, labels, last_batch, batch_size, img_sz, n_slices
#)
batch_img, batch_label, last_batch = self.wrapper_image(data_img, labels, last_batch, batch_size)
print "Batch label images: "+str(batch_label)
batch_label = self.dense_to_one_hot(np.array(batch_label, dtype=np.int),
2).astype(np.float32)
else:
with tf.device('/cpu:0'):
restbatch = last_batch + batch_size - len(data_img)
batch_img = np.concatenate((
self.wrapper_image(data_img, labels, last_batch, len(data_img) -
last_batch)[0],
self.wrapper_image(data_img, labels, last_batch, len(data_img) -
last_batch)[0]
))
batch_label = np.concatenate((
self.wrapper_image(data_img, labels, last_batch, len(data_img) -
last_batch)[1],
self.wrapper_image(data_img, labels, last_batch, len(data_img) -
last_batch)[1]
))
batch_label = self.dense_to_one_hot(np.array(batch_label, dtype=np.int),
2).astype(
np.float32)
last_batch = restbatch
####### at the end of EACH EPOCH ###
epoch_loss_mean.append(epoch_loss)
print "epoch loss mean: "+str(epoch_loss_mean)
epoch_loss = 0
n_epoch += 1
print "n_epoch: "+str(n_epoch)
if model_path:
saver.save(sess, model_path_train)
if not n_epoch % 5:
valid_accuracy, valid_cost = self.test_validation_set(sess,data_validation,
label_validation, 60)
if valid_cost < min_valid_cost - 2:
min_valid_cost = valid_cost
if model_path:
saver.save(sess, model_path)
all_valid_cost.append(valid_cost)
print all_valid_cost
if self.last_valid_batch == 0:
self.shufle_data(data_validation, label_validation)
train_accuracy = self.accuracy.eval(
feed_dict={self.x: batch_img, self.y_: batch_label, self.keep_prob: 1.0})
print "trainning accuracy: " + str(train_accuracy)
self.shufle_data(data_img, labels)
_, c, pred = sess.run(
[optimizer, self.cost,], feed_dict={
self.x: batch_img, self.y_: batch_label, self.keep_prob: keep_rate,
self.learning_rate: learning_rate
}
)
print 'epoch_loss: '+str(c)
def main(self, data_dir, labels_dir, img_sz, n_slices, batch_size=5, last_batch=0, train=False,
model_path=None, keep_rate=0.5):
"""
Args:
data_dir(list): directories of the image to be tested
labels_dir: (str): directory of the csv file where the image are labeled, the index
colum is the number 2 and the labels header is 'Diag'.
img_sz: the spatial image size the be transformed to. that is the sizes with which
the image will be trainned. width and hight must be the same
n_slices: the number of slices for the image to be trained
last_batch: the batch at which you want to start the trainning
train: boolean to set trainning: 0 or testing :1
model_path: the path where the model is saved, if there is no previous model you can
set a path here to start a new one.
keep_rate: the keep_probability of firing a node by means of dropout
Returns:
"""
self.train = train
data_path_trainning, label_trainning, data_path_validation, label_validation, \
data_testing, label_testing = self.load_dataset(data_dir, labels_dir,)
data_trainning, label_trainning_final = self.load_image(data_path_trainning,
label_trainning, img_sz, n_slices
)
data_validation, label_validation_final = self.load_image(
data_path_validation, label_validation, img_sz, n_slices
)
self.x = tf.placeholder(tf.float32, shape=[None, n_slices, img_sz, img_sz]) #batch_size,
# image_Size
self.y_ = tf.placeholder(tf.float32, shape=[None, 3]) #batch_size, label_size
self.learning_rate = tf.placeholder(tf.float32)
self.keep_prob = tf.placeholder(tf.float32)
if train:
self.train_neural_network(data_trainning, label_trainning_final, data_validation,
label_validation_final, batch_size, img_sz, n_slices,
last_batch, keep_rate, model_path
)
I have already tried tf.set_random_seed( 1 ) but no correction is seen
Do anyone have any idea about, please?
thanks so much
EDITED 22/04/18:
The data to be classified are 3d images of 150x150x40 pixels in a biclass problem. I have a total 400 images approaximatly half of each class. I have separated the dataset in train (75%), validation (10%) and test(15%)
Edit2:
I have simplified a bit my model. see up
Also mention that we have only 2 classes
I have tried another check I have train my model with only 20 images. To see if 0 cost is obtained.
result after 125 epochs:
2018-04-24 23:58:24.992421 epoch loss mean: [4549.9554141853, 1854.6537470817566, 817.4076923541704, 686.8368729054928, 687.7348744268759, 704.946801304817, 483.6952783479355, 260.2293045549304, 272.66821688037817, 116.57515235748815, 97.86094704543848, 90.43152131629176, 132.54018089070996, 69.62595339218387, 57.412255316681694, 79.66184640157735, 70.99515068903565, 55.75798599421978, 44.14403077028692, 38.901107819750905, 49.75594720244408, 52.6321079954505, 37.70595762133598, 42.07099115010351, 29.01994925737381, 28.365123450756073, 31.93120799213648, 43.9855432882905, 33.242121398448944, 36.57513061538339, 28.828659534454346, 29.847569406032562, 24.078316539525986, 31.630925316363573, 30.5430103354156, 26.18060240149498, 32.86780231446028, 25.42889341711998, 29.355055704712868, 26.269534677267075, 24.921810917556286, 27.15281054377556, 27.343381822109222, 24.293660208582878, 28.212179094552994, 25.07626649737358, 21.650991335511208, 25.7527906447649, 23.42476052045822, 28.350880563259125, 22.57907184958458, 21.601420983672142, 25.28128480911255, 25.550641894340515, 22.444457232952118, 27.660063683986664, 21.863914296031, 25.722180172801018, 24.00674758851528, 21.46472266316414, 26.599679857492447, 23.52132275700569, 26.1786640137434, 24.842691332101822, 25.263965144753456, 22.730938494205475, 22.787407517433167, 23.58866274356842, 25.351682364940643, 23.85272353887558, 23.884423837065697, 24.685379207134247, 22.55106496810913, 25.993630707263947, 21.967322662472725, 22.651918083429337, 21.91003155708313, 23.782021015882492, 21.567724645137787, 22.130879193544388, 21.33636975288391, 25.624440014362335, 23.26347705721855, 22.370914071798325, 22.614411562681198, 24.962509214878082, 22.121410965919495, 20.644148647785187, 24.472172617912292, 21.622991144657135, 21.719978988170624, 21.72349101305008, 21.729621797800064, 22.090826153755188, 21.44688707590103, 22.34817299246788, 22.93226248025894, 22.63547444343567, 22.1306095123291, 22.16277289390564, 22.83771103620529, 24.171751350164413, 22.025538682937622, 21.339059710502625, 22.169043481349945, 24.614955246448517, 22.83159503340721, 21.43451902270317, 21.54544973373413, 22.889380514621735, 24.168621599674225, 21.947510302066803, 22.30243694782257, 22.381454586982727, 22.50485634803772, 22.61657750606537, 22.288170099258423, 21.30070123076439, 22.489792048931122, 21.885000944137573, 21.343613982200623, 23.04211688041687, 24.00969059765339, 21.8588485121727, 22.199619591236115]
2018-04-24 23:58:24.992694 n_epoch: 125
the print output of each layer:
The conv1: [[[[[0.0981627107 0.100793235 0.0934509188]]]]...]
The max1: [[[[[0.102978 0.107030481 0.0977560952]]]]...]
The max2: [[[[[0 0 0.00116439909]]]]...]
The reshape: [[0 0 0.00116439909]...]
The fc: [[0.01167579 0.182256863 0.107154548]...]
The fc2: [[0.773868561 0.364259362 0]...]
The output: [[0.16590938 -0.255491495][0.16590938]...]
The conv1: [[[[[0.0981602222 0.100800745 0.0934513509]]]]...]
The max1: [[[[[0.102975294 0.107038349 0.0977560282]]]]...]
The max2: [[[[[0 0 0.000874094665]]]]...]
The reshape: [[0 0 0.000874094665]...]
The fc: [[0.0117974132 0.182980478 0.106876813]...]
The fc2: [[0.774896204 0.36372292 0]...]
The output: [[0.129838273 -0.210624188][0.129838273]...]
Shouldn't be 125 epochs enoght to overfit 60 samples?
Any idea about what is happening?
This is more of a comment that did not fit into the comment limit.
As I said before, I can't see anything obviously wrong. You might have to do some debugging. If the pre-softmax outputs are exactly the same, it is probably a bug somewhere and you can find it by finding the exact place where your presumably different inputs lead to the same layer outputs.
If pre-softmax outputs are close, but not exactly the same, most likely you have a classic issue of over-fitting. You mentioned that you have just 300 training examples - that is extremely few to train the whole net on (without using some pre-trained weights). Your net just "memorized" 300 training examples and does not generalize to validation set at all.
EDIT 04/23/18:
So, the issue is not just in validation? I interpreted your "if i do the same for trainning set I get a conventional prediction." to mean that training images are classified just fine. If you get the same prediction for your training images, most likely the data or loss or prediction calculation is wrong. I did not spot anything and guess you will need to debug. You might find "eager execution" useful for this - https://www.tensorflow.org/get_started/eager. If you organize your model as in examples (https://github.com/tensorflow/tensorflow/tree/3f4662e7ca8724f760db4a5ea6e241c99e66e588/tensorflow/contrib/eager/python/examples), you should be able to use the same code with regular tensorflow graph execution.
The project I run is C3D-tensorflow from Github, when I trained the network with train_c3d_ucf101.py, Error shows that:
ValueError:Cannot feed value of shape (10,0) for Tensor u
'Placeholder:0', which has shape '(10,16,112,112,3)'
Here is my environment: Ubantu16.04, tensorflow1.4, python2.7. I'm a new guy to learn tensorflow so can you tell me how to locate Placeholder:0 and what can I do to solve this problem? Thank you in advance!
The train_c3d_ucf101.py:
"""Trains and Evaluates the MNIST network using a feed dictionary."""
# pylint: disable=missing-docstring
import os
import time
import numpy
from six.moves import xrange # pylint: disable=redefined-builtin
import tensorflow as tf
import input_data
import c3d_model
import math
import numpy as np
# Basic model parameters as external flags.
flags = tf.app.flags
gpu_num = 1
#flags.DEFINE_float('learning_rate', 0.0, 'Initial learning rate.')
flags.DEFINE_integer('max_steps', 5000, 'Number of steps to run trainer.')
flags.DEFINE_integer('batch_size', 10, 'Batch size.')
FLAGS = flags.FLAGS
MOVING_AVERAGE_DECAY = 0.9999
model_save_dir = './models'
def placeholder_inputs(batch_size):
"""Generate placeholder variables to represent the input tensors.
These placeholders are used as inputs by the rest of the model building
code and will be fed from the downloaded data in the .run() loop, below.
Args:
batch_size: The batch size will be baked into both placeholders.
Returns:
images_placeholder: Images placeholder.
labels_placeholder: Labels placeholder.
"""
# Note that the shapes of the placeholders match the shapes of the full
# image and label tensors, except the first dimension is now batch_size
# rather than the full size of the train or test data sets.
images_placeholder = tf.placeholder(tf.float32, shape=(batch_size,
c3d_model.NUM_FRAMES_PER_CLIP,
c3d_model.CROP_SIZE,
c3d_model.CROP_SIZE,
c3d_model.CHANNELS))
labels_placeholder = tf.placeholder(tf.int64, shape=(batch_size))
return images_placeholder, labels_placeholder
def average_gradients(tower_grads):
average_grads = []
for grad_and_vars in zip(*tower_grads):
grads = []
for g, _ in grad_and_vars:
expanded_g = tf.expand_dims(g, 0)
grads.append(expanded_g)
grad = tf.concat(grads, 0)
grad = tf.reduce_mean(grad, 0)
v = grad_and_vars[0][1]
grad_and_var = (grad, v)
average_grads.append(grad_and_var)
return average_grads
def tower_loss(name_scope, logit, labels):
cross_entropy_mean = tf.reduce_mean(
tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels,logits=logit)
)
tf.summary.scalar(
name_scope + '_cross_entropy',
cross_entropy_mean
)
weight_decay_loss = tf.get_collection('weightdecay_losses')
tf.summary.scalar(name_scope + '_weight_decay_loss', tf.reduce_mean(weight_decay_loss) )
# Calculate the total loss for the current tower.
total_loss = cross_entropy_mean + weight_decay_loss
tf.summary.scalar(name_scope + '_total_loss', tf.reduce_mean(total_loss) )
return total_loss
def tower_acc(logit, labels):
correct_pred = tf.equal(tf.argmax(logit, 1), labels)
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
return accuracy
def _variable_on_cpu(name, shape, initializer):
with tf.device('/cpu:0'):
var = tf.get_variable(name, shape, initializer=initializer)
return var
def _variable_with_weight_decay(name, shape, wd):
var = _variable_on_cpu(name, shape, tf.contrib.layers.xavier_initializer())
if wd is not None:
weight_decay = tf.nn.l2_loss(var)*wd
tf.add_to_collection('weightdecay_losses', weight_decay)
return var
def run_training():
# Get the sets of images and labels for training, validation, and
# Tell TensorFlow that the model will be built into the default Graph.
# Create model directory
if not os.path.exists(model_save_dir):
os.makedirs(model_save_dir)
use_pretrained_model = True
model_filename = "./sports1m_finetuning_ucf101.model"
with tf.Graph().as_default():
global_step = tf.get_variable(
'global_step',
[],
initializer=tf.constant_initializer(0),
trainable=False
)
images_placeholder, labels_placeholder = placeholder_inputs(
FLAGS.batch_size * gpu_num
)
tower_grads1 = []
tower_grads2 = []
logits = []
opt_stable = tf.train.AdamOptimizer(1e-4)
opt_finetuning = tf.train.AdamOptimizer(1e-3)
with tf.variable_scope('var_name') as var_scope:
weights = {
'wc1': _variable_with_weight_decay('wc1', [3, 3, 3, 3, 64], 0.0005),
'wc2': _variable_with_weight_decay('wc2', [3, 3, 3, 64, 128], 0.0005),
'wc3a': _variable_with_weight_decay('wc3a', [3, 3, 3, 128, 256], 0.0005),
'wc3b': _variable_with_weight_decay('wc3b', [3, 3, 3, 256, 256], 0.0005),
'wc4a': _variable_with_weight_decay('wc4a', [3, 3, 3, 256, 512], 0.0005),
'wc4b': _variable_with_weight_decay('wc4b', [3, 3, 3, 512, 512], 0.0005),
'wc5a': _variable_with_weight_decay('wc5a', [3, 3, 3, 512, 512], 0.0005),
'wc5b': _variable_with_weight_decay('wc5b', [3, 3, 3, 512, 512], 0.0005),
'wd1': _variable_with_weight_decay('wd1', [8192, 4096], 0.0005),
'wd2': _variable_with_weight_decay('wd2', [4096, 4096], 0.0005),
'out': _variable_with_weight_decay('wout', [4096, c3d_model.NUM_CLASSES], 0.0005)
}
biases = {
'bc1': _variable_with_weight_decay('bc1', [64], 0.000),
'bc2': _variable_with_weight_decay('bc2', [128], 0.000),
'bc3a': _variable_with_weight_decay('bc3a', [256], 0.000),
'bc3b': _variable_with_weight_decay('bc3b', [256], 0.000),
'bc4a': _variable_with_weight_decay('bc4a', [512], 0.000),
'bc4b': _variable_with_weight_decay('bc4b', [512], 0.000),
'bc5a': _variable_with_weight_decay('bc5a', [512], 0.000),
'bc5b': _variable_with_weight_decay('bc5b', [512], 0.000),
'bd1': _variable_with_weight_decay('bd1', [4096], 0.000),
'bd2': _variable_with_weight_decay('bd2', [4096], 0.000),
'out': _variable_with_weight_decay('bout', [c3d_model.NUM_CLASSES], 0.000),
}
for gpu_index in range(0, gpu_num):
with tf.device('/gpu:%d' % gpu_index):
varlist2 = [ weights['out'],biases['out'] ]
varlist1 = list( set(weights.values() + biases.values()) - set(varlist2) )
logit = c3d_model.inference_c3d(
images_placeholder[gpu_index * FLAGS.batch_size:(gpu_index + 1) * FLAGS.batch_size,:,:,:,:],
0.5,
FLAGS.batch_size,
weights,
biases
)
loss_name_scope = ('gpud_%d_loss' % gpu_index)
loss = tower_loss(
loss_name_scope,
logit,
labels_placeholder[gpu_index * FLAGS.batch_size:(gpu_index + 1) * FLAGS.batch_size]
)
grads1 = opt_stable.compute_gradients(loss, varlist1)
grads2 = opt_finetuning.compute_gradients(loss, varlist2)
tower_grads1.append(grads1)
tower_grads2.append(grads2)
logits.append(logit)
logits = tf.concat(logits,0)
accuracy = tower_acc(logits, labels_placeholder)
tf.summary.scalar('accuracy', accuracy)
grads1 = average_gradients(tower_grads1)
grads2 = average_gradients(tower_grads2)
apply_gradient_op1 = opt_stable.apply_gradients(grads1)
apply_gradient_op2 = opt_finetuning.apply_gradients(grads2, global_step=global_step)
variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY)
variables_averages_op = variable_averages.apply(tf.trainable_variables())
train_op = tf.group(apply_gradient_op1, apply_gradient_op2, variables_averages_op)
null_op = tf.no_op()
# Create a saver for writing training checkpoints.
saver = tf.train.Saver(weights.values() + biases.values())
init = tf.global_variables_initializer()
# Create a session for running Ops on the Graph.
sess = tf.Session(
config=tf.ConfigProto(allow_soft_placement=True)
)
sess.run(init)
if os.path.isfile(model_filename) and use_pretrained_model:
saver.restore(sess, model_filename)
# Create summary writter
merged = tf.summary.merge_all()
train_writer = tf.summary.FileWriter('./visual_logs/train', sess.graph)
test_writer = tf.summary.FileWriter('./visual_logs/test', sess.graph)
for step in xrange(FLAGS.max_steps):
start_time = time.time()
train_images, train_labels, _, _, _ = input_data.read_clip_and_label(
filename='list/train.list',
batch_size=FLAGS.batch_size * gpu_num,
num_frames_per_clip=c3d_model.NUM_FRAMES_PER_CLIP,
crop_size=c3d_model.CROP_SIZE,
shuffle=True
)
sess.run(train_op, feed_dict={
images_placeholder: train_images,
labels_placeholder: train_labels
})
duration = time.time() - start_time
print('Step %d: %.3f sec' % (step, duration))
# Save a checkpoint and evaluate the model periodically.
if (step) % 10 == 0 or (step + 1) == FLAGS.max_steps:
saver.save(sess, os.path.join(model_save_dir, 'c3d_ucf_model'), global_step=step)
print('Training Data Eval:')
summary, acc = sess.run(
[merged, accuracy],
feed_dict={images_placeholder: train_images,
labels_placeholder: train_labels
})
print ("accuracy: " + "{:.5f}".format(acc))
train_writer.add_summary(summary, step)
print('Validation Data Eval:')
val_images, val_labels, _, _, _ = input_data.read_clip_and_label(
filename='list/test.list',
batch_size=FLAGS.batch_size * gpu_num,
num_frames_per_clip=c3d_model.NUM_FRAMES_PER_CLIP,
crop_size=c3d_model.CROP_SIZE,
shuffle=True
)
summary, acc = sess.run(
[merged, accuracy],
feed_dict={
images_placeholder: val_images,
labels_placeholder: val_labels
})
print ("accuracy: " + "{:.5f}".format(acc))
test_writer.add_summary(summary, step)
print("done")
def main(_):
run_training()
if __name__ == '__main__':
tf.app.run()
If you want to check other files, please tell me and I will show you.
This is my Traceback:
I started learning tensorflow one week ago and i am struggling a bit with the possibility of restoring a model.
I am constructing a class for a CNN (from stanford course code) where i want to have a run and a test function (first one to train the model, second one to make a prediction on another set of datas).
The function run is working correctly, mais i have a hard time restoring the model to make the prediction in function test (here i have a problem shape and i guess it comes from the way I get the operation).
import os
import tensorflow as tf
from time import time
class Cnn:
def __init__(self, batch_size=128, skip_step=10, epochs=1, dropout_ratio=0.75):
self.batch_size = batch_size
self.skip_step = skip_step
self.epochs = epochs
self.dropout_ratio = dropout_ratio
self.x = tf.placeholder(tf.float32, [None, 784], name="X_placeholder")
self.images = tf.reshape(self.x, shape=[-1, 28, 28, 1])
self.y = tf.placeholder(tf.float32, [None, 10], name="Y_placeholder")
self.dropout = tf.placeholder(tf.float32, name="dropout")
self.global_step = tf.Variable(0, dtype=tf.int32, trainable=False, name='global_step')
self.layers = [self.images]
self.n_layers = 0
def convolution(self, kernel_size, n_output, strides, scope_name):
previous_layer = self.layers[self.n_layers]
with tf.variable_scope(scope_name) as scope:
kernel_shape = [kernel_size[0], kernel_size[1], previous_layer.get_shape()[3], n_output]
kernel = tf.get_variable("kernels", kernel_shape,
initializer=tf.truncated_normal_initializer())
biases = tf.get_variable("biases", [n_output],
initializer=tf.random_normal_initializer())
convolution_2d = tf.nn.conv2d(previous_layer, kernel, strides=strides,
padding="SAME")
convolution = tf.nn.relu(convolution_2d + biases, name=scope.name)
self.layers.append(convolution)
self.n_layers += 1
return convolution
def pool(self, size, strides, scope_name, padding="SAME"):
previous_layer = self.layers[self.n_layers]
with tf.variable_scope(scope_name):
pool = tf.nn.max_pool(previous_layer, ksize=size, strides=strides,
padding=padding)
self.layers.append(pool)
self.n_layers += 1
return pool
def fully_connected(self, n_input, n_output, scope_name, relu=False, dropout=True):
previous_layer = self.layers[self.n_layers]
with tf.variable_scope(scope_name):
weights = tf.get_variable("weights", [n_input, n_output],
initializer=tf.truncated_normal_initializer())
biases = tf.get_variable("biases", [n_output],
initializer=tf.random_normal_initializer())
pool_reshaped = tf.reshape(previous_layer, [-1, n_input])
logits = tf.add(tf.matmul(pool_reshaped, weights), biases, name="logits")
fc = logits
if relu:
fc = tf.nn.relu(fc, name="relu")
if dropout:
fc = tf.nn.dropout(fc, self.dropout, name="dropout")
self.layers.append(fc)
self.n_layers += 1
if relu is False and dropout is False:
self.logits = tf.add(tf.matmul(pool_reshaped, weights), biases, name="logits")
self.predictions = tf.nn.softmax(logits, name="predictions")
return fc
def set_loss(self, scope_name):
previous_layer = self.layers[self.n_layers]
with tf.name_scope(scope_name):
entropy = tf.nn.softmax_cross_entropy_with_logits(logits=previous_layer, labels=self.y)
loss = tf.reduce_mean(entropy, name='loss')
self.layers.append(loss)
self.n_layers += 1
self.loss = loss
def set_optimizer(self):
previous_layer = self.layers[self.n_layers]
optimizer = tf.train.AdamOptimizer(0.001).minimize(previous_layer, global_step=self.global_step)
self.layers.append(optimizer)
self.n_layers += 1
self.optimizer = optimizer
def run(self, train_x, train_y):
sess = tf.Session()
with sess.as_default():
sess.run(tf.global_variables_initializer())
saver = tf.train.Saver()
writer = tf.summary.FileWriter('./my_graph/mnist', sess.graph)
ckpt = tf.train.get_checkpoint_state(os.path.dirname('results/checkpoint'))
if ckpt and ckpt.model_checkpoint_path:
saver.restore(sess, ckpt.model_checkpoint_path)
initial_step = self.global_step.eval()
start_time = time()
n_batches = int(train_x.shape[0] / self.batch_size)
total_correct_preds = 0
total_loss = 0.0
current_position = 0
for index in range(initial_step, int(n_batches * self.epochs)): # train the model n_epochs times
x_batch = train_x[current_position: current_position + self.batch_size, :]
y_batch = train_y[current_position: current_position + self.batch_size, :]
feed_dict = {self.x: x_batch, self.y: y_batch, self.dropout: self.dropout_ratio}
_, loss_batch, logits_batch = sess.run([self.optimizer, self.loss, self.logits],
feed_dict=feed_dict)
print(logits_batch.shape)
total_loss += loss_batch
preds = tf.nn.softmax(logits_batch)
correct_preds = tf.equal(tf.argmax(preds, 1), tf.argmax(y_batch, 1))
accuracy = tf.reduce_sum(tf.cast(correct_preds, tf.float32))
total_correct_preds += sess.run(accuracy)
if (index + 1) % self.skip_step == 0:
print('Average loss at step {}: {:5.1f}'.format(index + 1, total_loss / self.skip_step))
total_loss = 0.0
saver.save(sess, 'results/mnist-convnet', index)
current_position += self.batch_size
print("Optimization Finished!") # should be around 0.35 after 25 epochs
print("Total time: {0} seconds".format(time() - start_time))
print("Accuracy {0}".format(total_correct_preds / train_x.shape[0]))
def test(self, val_x, val_y):
checkpoint_file = tf.train.latest_checkpoint("results/")
graph = tf.Graph()
with graph.as_default():
sess = tf.Session()
with sess.as_default():
saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file))
saver.restore(sess, checkpoint_file)
self.x = graph.get_operation_by_name("X_placeholder").outputs[0]
self.y = graph.get_operation_by_name("Y_placeholder").outputs[0]
self.dropout = graph.get_operation_by_name("dropout").outputs[0]
self.global_step = graph.get_operation_by_name("global_step").outputs[0]
self.logits = graph.get_operation_by_name("logits").outputs[0]
current_position = 0
n_batches = int(val_x.shape[0] / self.batch_size)
total_correct_preds = 0
for i in range(n_batches):
x_batch = val_x[current_position: current_position + self.batch_size]
y_batch = val_y[current_position: current_position + self.batch_size]
feed_dict = {self.x: x_batch, self.y: y_batch, self.dropout: self.dropout_ratio}
logits_batch = sess.run([self.logits], feed_dict=feed_dict)
preds = tf.nn.softmax(logits_batch)
# correct_preds = tf.equal(tf.argmax(preds, 1), tf.argmax(y_batch, 1))
"""
accuracy = tf.reduce_sum(tf.cast(correct_preds, tf.float32))
total_correct_preds += sess.run(accuracy)
current_position += self.batch_size
print("Accuracy {0}".format(total_correct_preds / val_x.shape[0]))
"""
and testing it like this (sorry if there is a lot of code) :
from cnn import Cnn
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
tf.set_random_seed(1)
N_CLASSES = 10
# load datas
mnist = input_data.read_data_sets("mnist", one_hot=True)
train_x, train_y = mnist.train.images, mnist.train.labels
val_x, val_y = mnist.test.images, mnist.test.labels
model = Cnn(batch_size=128, skip_step=10, epochs=0.25, dropout_ratio=0.75)
model.convolution(kernel_size=[5, 5], strides=[1, 1, 1, 1], n_output=32, scope_name="conv1")
model.pool(size=[1, 2, 2, 1], strides=[1, 2, 2, 1], scope_name="pool1")
model.convolution(kernel_size=[5, 5], strides=[1, 1, 1, 1], n_output=64, scope_name="conv2")
model.pool(size=[1, 2, 2, 1], strides=[1, 2, 2, 1], scope_name="pool2")
input_features = 7 * 7 * 64
model.fully_connected(n_input=input_features, n_output=1024, scope_name="fc", relu=True, dropout=True)
model.fully_connected(n_input=1024, n_output=N_CLASSES, scope_name="softmax_linear", dropout=False)
model.set_loss(scope_name="loss")
model.set_optimizer()
model.run(train_x=train_x, train_y=train_y)
model.test(val_x, val_y)
I would also appreciate any comments regarding the better way to create a class like this (in particular the handling of the placeholders and the sessions)
Thanks !
Nicolas