ResourceExhaustedError when trying to train with MNIST

ResourceExhaustedError when trying to train with MNIST - python

I am getting ResourceExhaustedError when trying to train MNIST
I found out that I can change the batch size in order to avoid the problem, but unfortunately I dont know where to do that in my code
Traceback:
ResourceExhaustedError (see above for traceback): OOM when allocating tensor with shape[238305,32,28,28] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
[[Node: Conv2D = Conv2D[T=DT_FLOAT, data_format="NCHW", dilations=[1, 1, 1, 1], padding="SAME", strides=[1, 1, 1, 1], use_cudnn_on_gpu=true, _device="/job:localhost/replica:0/task:0/device:GPU:0"](Conv2D-0-TransposeNHWCToNCHW-LayoutOptimizer, Variable/read)]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.
[[Node: add_3/_39 = _Recvclient_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_51_add_3", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]]
My CNN model:
from __future__ import print_function
import tensorflow as tf
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
# number 1 to 10 data
# from tensorflow.examples.tutorials.mnist import input_data
# mnist = input_data.read_data_sets('MNIST_data/', one_hot=True)
import mnist2.mnist as mn
mnist = mn.read_data_sets('MNIST/', one_hot=True, num_classes=9)
def compute_accuracy(v_xs, v_ys):
global prediction
y_pre = sess.run(prediction, feed_dict={xs: v_xs, keep_prob: 1})
correct_prediction = tf.equal(tf.argmax(y_pre,1), tf.argmax(v_ys,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
result = sess.run(accuracy, feed_dict={xs: v_xs, ys: v_ys, keep_prob: 1})
return result
def weight_variable(shape):
inital = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(inital)
def bias_variable(shape):
inital = tf.constant(0.1, shape=shape)
return tf.Variable(inital)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME')
# define placeholder for inputs to network
xs = tf.placeholder(tf.float32, [None, 784, 1]) # 28x28
ys = tf.placeholder(tf.float32, [None, 9])
keep_prob = tf.placeholder(tf.float32)
x_image = tf.reshape(xs, [-1, 28, 28, 1])
## conv1 layer ##
W_conv1 = weight_variable([5, 5, 1, 32]) #patch 5x5, in channel size 1, out size 32
## pool1 layer ##
b_conv1 = bias_variable([32])
#Combine
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1) #output size 28x28x32
h_pool1 = max_pool_2x2(h_conv1) #output size 14x14x32
## conv2 layer ##
W_conv2 = weight_variable([5, 5, 32, 64]) #patch 5x5, in channel size 32, out size 64
## pool2 layer ##
b_conv2 = bias_variable([64])
#Combine
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2) #output size 14x14x64
h_pool2 = max_pool_2x2(h_conv2) #output size 7x7x64
## fc1 layer ##
W_fc1 = weight_variable([7*7*64, 1024])
b_fc1 = bias_variable([1024])
h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64]) #[n_samples, 7,7,64] => [n_samples, 7*7*64]
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
## output layer ##
W_fc2 = weight_variable([1024, 9])
b_fc2 = bias_variable([9])
prediction = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)
cross_entropy = tf.reduce_mean(-tf.reduce_sum(ys * tf.log(prediction),
reduction_indices=[1]))
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
sess = tf.Session()
sess.run(tf.initialize_all_variables())
for i in range(10001):
batch_xs, batch_ys = mnist.train.next_batch(100)
sess.run(train_step, feed_dict={xs: batch_xs, ys: batch_ys, keep_prob:0.5})
if i % 50 == 0:
print(compute_accuracy(
mnist.test.images, mnist.test.labels))
My MNIST code:
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Functions for downloading and reading MNIST data."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import gzip
import numpy
from six.moves import xrange # pylint: disable=redefined-builtin
from tensorflow.contrib.learn.python.learn.datasets import base
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import random_seed
# CVDF mirror of http://yann.lecun.com/exdb/mnist/
SOURCE_URL = 'https://storage.googleapis.com/cvdf-datasets/mnist/'
def _read32(bytestream):
dt = numpy.dtype(numpy.uint32).newbyteorder('>')
return numpy.frombuffer(bytestream.read(4), dtype=dt)[0]
def extract_images(f, channels=1):
"""Extract the images into a 4D uint8 numpy array [index, y, x, depth].
Args:
f: A file object that can be passed into a gzip reader.
Returns:
data: A 4D uint8 numpy array [index, y, x, depth].
Raises:
ValueError: If the bytestream does not start with 2051.
"""
print('Extracting', f.name)
with gzip.GzipFile(fileobj=f) as bytestream:
magic = _read32(bytestream)
if magic != 2051:
raise ValueError('Invalid magic number %d in MNIST image file: %s' %
(magic, f.name))
num_images = _read32(bytestream)
rows = _read32(bytestream)
cols = _read32(bytestream)
buf = bytestream.read(rows * cols * num_images * channels)
data = numpy.frombuffer(buf, dtype=numpy.uint8)
data = data.reshape(num_images, rows, cols, channels)
return data
def dense_to_one_hot(labels_dense, num_classes):
"""Convert class labels from scalars to one-hot vectors."""
num_labels = labels_dense.shape[0]
index_offset = numpy.arange(num_labels) * num_classes
labels_one_hot = numpy.zeros((num_labels, num_classes))
labels_one_hot.flat[index_offset + labels_dense.ravel()] = 1
return labels_one_hot
def extract_labels(f, one_hot=False, num_classes=10):
"""Extract the labels into a 1D uint8 numpy array [index].
Args:
f: A file object that can be passed into a gzip reader.
one_hot: Does one hot encoding for the result.
num_classes: Number of classes for the one hot encoding.
Returns:
labels: a 1D uint8 numpy array.
Raises:
ValueError: If the bystream doesn't start with 2049.
"""
print('Extracting', f.name)
with gzip.GzipFile(fileobj=f) as bytestream:
magic = _read32(bytestream)
if magic != 2049:
raise ValueError('Invalid magic number %d in MNIST label file: %s' %
(magic, f.name))
num_items = _read32(bytestream)
buf = bytestream.read(num_items)
labels = numpy.frombuffer(buf, dtype=numpy.uint8)
if one_hot:
return dense_to_one_hot(labels, num_classes)
return labels
class DataSet(object):
def __init__(self,
images,
labels,
fake_data=False,
one_hot=False,
dtype=dtypes.float32,
reshape=True,
seed=None):
"""Construct a DataSet.
one_hot arg is used only if fake_data is true. `dtype` can be either
`uint8` to leave the input as `[0, 255]`, or `float32` to rescale into
`[0, 1]`. Seed arg provides for convenient deterministic testing.
"""
seed1, seed2 = random_seed.get_seed(seed)
# If op level seed is not set, use whatever graph level seed is returned
numpy.random.seed(seed1 if seed is None else seed2)
dtype = dtypes.as_dtype(dtype).base_dtype
if dtype not in (dtypes.uint8, dtypes.float32):
raise TypeError('Invalid image dtype %r, expected uint8 or float32' %
dtype)
if fake_data:
self._num_examples = 10000
self.one_hot = one_hot
else:
assert images.shape[0] == labels.shape[0], (
'images.shape: %s labels.shape: %s' % (images.shape, labels.shape))
self._num_examples = images.shape[0]
# Convert shape from [num examples, rows, columns, depth]
# to [num examples, rows*columns] (assuming depth == 1)
if reshape:
#iassert images.shape[3] == 1
images = images.reshape(images.shape[0],
images.shape[1] * images.shape[2], images.shape[3])
if dtype == dtypes.float32:
# Convert from [0, 255] -> [0.0, 1.0].
num, dim, channels = images.shape
count = num * dim * channels
images.setflags(write=1)
index = 1000
images = images.astype(numpy.float32)
for i in range(0,count,index):
if not (count - i) < index:
images[i:i+index] = numpy.multiply(images[i:i+index], 1.0 / 255.0)
else:
images[i:count] = numpy.multiply(images[i:count], 1.0 / 255.0)
#images = images.astype(numpy.float32)
#images = numpy.multiply(images, 1.0 / 255.0)
self._images = images
self._labels = labels
self._epochs_completed = 0
self._index_in_epoch = 0
#property
def images(self):
return self._images
#property
def labels(self):
return self._labels
#property
def num_examples(self):
return self._num_examples
#property
def epochs_completed(self):
return self._epochs_completed
def next_batch(self, batch_size, fake_data=False, shuffle=True):
"""Return the next `batch_size` examples from this data set."""
if fake_data:
fake_image = [1] * 784
if self.one_hot:
fake_label = [1] + [0] * 9
else:
fake_label = 0
return [fake_image for _ in xrange(batch_size)], [
fake_label for _ in xrange(batch_size)
]
start = self._index_in_epoch
# Shuffle for the first epoch
if self._epochs_completed == 0 and start == 0 and shuffle:
perm0 = numpy.arange(self._num_examples)
numpy.random.shuffle(perm0)
self._images = self.images[perm0]
self._labels = self.labels[perm0]
# Go to the next epoch
if start + batch_size > self._num_examples:
# Finished epoch
self._epochs_completed += 1
# Get the rest examples in this epoch
rest_num_examples = self._num_examples - start
images_rest_part = self._images[start:self._num_examples]
labels_rest_part = self._labels[start:self._num_examples]
# Shuffle the data
if shuffle:
perm = numpy.arange(self._num_examples)
numpy.random.shuffle(perm)
self._images = self.images[perm]
self._labels = self.labels[perm]
# Start next epoch
start = 0
self._index_in_epoch = batch_size - rest_num_examples
end = self._index_in_epoch
images_new_part = self._images[start:end]
labels_new_part = self._labels[start:end]
return numpy.concatenate((images_rest_part, images_new_part), axis=0) , numpy.concatenate((labels_rest_part, labels_new_part), axis=0)
else:
self._index_in_epoch += batch_size
end = self._index_in_epoch
return self._images[start:end], self._labels[start:end]
def read_data_sets(train_dir,
fake_data=False,
one_hot=False,
dtype=dtypes.float32,
reshape=True,
validation_size=24,
seed=None,
num_classes=10,
channels=1):
if fake_data:
def fake():
return DataSet(
[], [], fake_data=True, one_hot=one_hot, dtype=dtype, seed=seed)
train = fake()
validation = fake()
test = fake()
return base.Datasets(train=train, validation=validation, test=test)
TRAIN_IMAGES = 'train-images-idx3-ubyte.gz'
TRAIN_LABELS = 'train-labels-idx1-ubyte.gz'
TEST_IMAGES = 't10k-images-idx3-ubyte.gz'
TEST_LABELS = 't10k-labels-idx1-ubyte.gz'
local_file = base.maybe_download(TRAIN_IMAGES, train_dir,
SOURCE_URL + TRAIN_IMAGES)
with open(local_file, 'rb') as f:
train_images = extract_images(f, channels)
local_file = base.maybe_download(TRAIN_LABELS, train_dir,
SOURCE_URL + TRAIN_LABELS)
with open(local_file, 'rb') as f:
train_labels = extract_labels(f, one_hot=one_hot, num_classes=num_classes)
local_file = base.maybe_download(TEST_IMAGES, train_dir,
SOURCE_URL + TEST_IMAGES)
with open(local_file, 'rb') as f:
test_images = extract_images(f, channels)
local_file = base.maybe_download(TEST_LABELS, train_dir,
SOURCE_URL + TEST_LABELS)
with open(local_file, 'rb') as f:
test_labels = extract_labels(f, one_hot=one_hot, num_classes=num_classes)
if not 0 <= validation_size <= len(train_images):
raise ValueError(
'Validation size should be between 0 and {}. Received: {}.'
.format(len(train_images), validation_size))
validation_images = train_images[:validation_size]
validation_labels = train_labels[:validation_size]
train_images = train_images[validation_size:]
train_labels = train_labels[validation_size:]
options = dict(dtype=dtype, reshape=reshape, seed=seed)
train = DataSet(train_images, train_labels, **options)
validation = DataSet(validation_images, validation_labels, **options)
test = DataSet(test_images, test_labels, **options)
return base.Datasets(train=train, validation=validation, test=test)
def load_mnist(train_dir='MNIST-data'):
return read_data_sets(train_dir)

You have to change the 100 in the following line:
for i in range(10001):
batch_xs, batch_ys = mnist.train.next_batch(100)
sess.run(train_step, feed_dict={xs: batch_xs, ys: batch_ys, keep_prob:0.5})
So the right code would be :
size_batch=50
for i in range(10001):
batch_xs, batch_ys = mnist.train.next_batch(size_batch)
sess.run(train_step, feed_dict={xs: batch_xs, ys: batch_ys, keep_prob:0.5})

Related

Loading the CNN model and predict the CSV file

I'm learning the basic CNN model by using tensorflow. After training my model, I want to load it and use the model to predict the hand-written digital img (CSV file).
Here is my CNN model:
import random
import os
import numpy as np
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import matplotlib.pyplot as plt
tf.logging.set_verbosity(tf.logging.ERROR)
class CNNLogisticClassification:
def __init__(self, shape_picture, n_labels,
learning_rate=0.5, dropout_ratio=0.5, alpha=0.0):
self.shape_picture = shape_picture
self.n_labels = n_labels
self.weights = None
self.biases = None
self.graph = tf.Graph() # initialize new grap
self.build(learning_rate, dropout_ratio, alpha) # building graph
self.sess = tf.Session(graph=self.graph) # create session by the graph
def build(self, learning_rate, dropout_ratio, alpha):
with self.graph.as_default():
### Input
self.train_pictures = tf.placeholder(tf.float32,
shape=[None]+self.shape_picture,name="Input")
self.train_labels = tf.placeholder(tf.int32,
shape=(None, self.n_labels),name="Output")
### Optimalization
# build neurel network structure and get their predictions and loss
self.y_, self.original_loss = self.structure(pictures=self.train_pictures,
labels=self.train_labels,
dropout_ratio=dropout_ratio,
train=True, )
# regularization loss
self.regularization = \
tf.reduce_sum([tf.nn.l2_loss(w) for w in self.weights.values()]) \
/ tf.reduce_sum([tf.size(w, out_type=tf.float32) for w in self.weights.values()])
# total loss
self.loss = self.original_loss + alpha * self.regularization
# define training operation
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
self.train_op = optimizer.minimize(self.loss)
### Prediction
self.new_pictures = tf.placeholder(tf.float32,
shape=[None]+self.shape_picture,name="Input")
self.new_labels = tf.placeholder(tf.int32,
shape=(None, self.n_labels),name="Output")
self.new_y_, self.new_original_loss = self.structure(pictures=self.new_pictures,
labels=self.new_labels)
self.new_loss = self.new_original_loss + alpha * self.regularization
### Initialization
self.init_op = tf.global_variables_initializer()
### save model
self.saver=tf.train.Saver()
def structure(self, pictures, labels, dropout_ratio=None, train=False):
### Variable
## LeNet5 Architecture(http://yann.lecun.com/exdb/lenet/)
# input:(batch,28,28,1) => conv1[5x5,6] => (batch,24,24,6)
# pool2 => (batch,12,12,6) => conv2[5x5,16] => (batch,8,8,16)
# pool4 => fatten5 => (batch,4x4x16) => fc6 => (batch,120)
# (batch,120) => fc7 => (batch,84)
# (batch,84) => fc8 => (batch,10) => softmax
if (not self.weights) and (not self.biases):
self.weights = {
'conv1': tf.Variable(tf.truncated_normal(shape=(5, 5, 1, 6),
stddev=0.1)),
'conv3': tf.Variable(tf.truncated_normal(shape=(5, 5, 6, 16),
stddev=0.1)),
'fc6': tf.Variable(tf.truncated_normal(shape=(4*4*16, 120),
stddev=0.1)),
'fc7': tf.Variable(tf.truncated_normal(shape=(120, 84),
stddev=0.1)),
'fc8': tf.Variable(tf.truncated_normal(shape=(84, self.n_labels),
stddev=0.1)),
}
self.biases = {
'conv1': tf.Variable(tf.zeros(shape=(6))),
'conv3': tf.Variable(tf.zeros(shape=(16))),
'fc6': tf.Variable(tf.zeros(shape=(120))),
'fc7': tf.Variable(tf.zeros(shape=(84))),
'fc8': tf.Variable(tf.zeros(shape=(self.n_labels))),
}
### Structure
conv1 = self.get_conv_2d_layer(pictures,
self.weights['conv1'], self.biases['conv1'],
activation=tf.nn.relu)
pool2 = tf.nn.max_pool(conv1,
ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')
conv3 = self.get_conv_2d_layer(pool2,
self.weights['conv3'], self.biases['conv3'],
activation=tf.nn.relu)
pool4 = tf.nn.max_pool(conv3,
ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')
fatten5 = self.get_flatten_layer(pool4)
if train:
fatten5 = tf.nn.dropout(fatten5, keep_prob=1-dropout_ratio[0])
fc6 = self.get_dense_layer(fatten5,
self.weights['fc6'], self.biases['fc6'],
activation=tf.nn.relu)
if train:
fc6 = tf.nn.dropout(fc6, keep_prob=1-dropout_ratio[1])
fc7 = self.get_dense_layer(fc6,
self.weights['fc7'], self.biases['fc7'],
activation=tf.nn.relu)
logits = self.get_dense_layer(fc7, self.weights['fc8'], self.biases['fc8'])
y_ = tf.nn.softmax(logits)
loss = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(labels=labels,
logits=logits))
return (y_, loss)
def get_dense_layer(self, input_layer, weight, bias, activation=None):
x = tf.add(tf.matmul(input_layer, weight), bias)
if activation:
x = activation(x)
return x
def get_conv_2d_layer(self, input_layer,
weight, bias,
strides=(1, 1), padding='VALID', activation=None):
x = tf.add(
tf.nn.conv2d(input_layer,
weight,
[1, strides[0], strides[1], 1],
padding=padding), bias)
if activation:
x = activation(x)
return x
def get_flatten_layer(self, input_layer):
shape = input_layer.get_shape().as_list()
n = 1
for s in shape[1:]:
n *= s
x = tf.reshape(input_layer, [-1, n])
return x
def fit(self, X, y, epochs=10,
validation_data=None, test_data=None, batch_size=None):
X = self._check_array(X)
y = self._check_array(y)
N = X.shape[0]
random.seed(9000)
if not batch_size:
batch_size = N
self.sess.run(self.init_op)
for epoch in range(epochs):
print('Epoch %2d/%2d: ' % (epoch+1, epochs))
# mini-batch gradient descent
index = [i for i in range(N)]
random.shuffle(index)
while len(index) > 0:
index_size = len(index)
batch_index = [index.pop() for _ in range(min(batch_size, index_size))]
feed_dict = {
self.train_pictures: X[batch_index, :],
self.train_labels: y[batch_index],
}
_, loss = self.sess.run([self.train_op, self.loss],
feed_dict=feed_dict)
print('[%d/%d] loss = %.4f ' % (N-len(index), N, loss), end='\r')
# evaluate at the end of this epoch
y_ = self.predict(X)
train_loss = self.evaluate(X, y)
train_acc = self.accuracy(y_, y)
msg = '[%d/%d] loss = %8.4f, acc = %3.2f%%' % (N, N, train_loss, train_acc*100)
if validation_data:
val_loss = self.evaluate(validation_data[0], validation_data[1])
val_acc = self.accuracy(self.predict(validation_data[0]), validation_data[1])
msg += ', val_loss = %8.4f, val_acc = %3.2f%%' % (val_loss, val_acc*100)
print(msg)
if test_data:
test_acc = self.accuracy(self.predict(test_data[0]), test_data[1])
print('test_acc = %3.2f%%' % (test_acc*100))
def accuracy(self, predictions, labels):
return (np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))/predictions.shape[0])
def predict(self, X):
X = self._check_array(X)
return self.sess.run(self.new_y_, feed_dict={self.new_pictures: X})
def evaluate(self, X, y):
X = self._check_array(X)
y = self._check_array(y)
return self.sess.run(self.new_loss, feed_dict={self.new_pictures: X,
self.new_labels: y})
def _check_array(self, ndarray):
ndarray = np.array(ndarray)
if len(ndarray.shape) == 1:
ndarray = np.reshape(ndarray, (1, ndarray.shape[0]))
return ndarray
if __name__ == '__main__':
print('Extract MNIST Dataset ...')
mnist = input_data.read_data_sets('MNIST_data/', one_hot=True)
train_data = mnist.train
valid_data = mnist.validation
test_data = mnist.test
train_img = np.reshape(train_data.images, [-1, 28, 28, 1])
valid_img = np.reshape(valid_data.images, [-1, 28, 28, 1])
test_img = np.reshape(test_data.images, [-1, 28, 28, 1])
model = CNNLogisticClassification(
shape_picture=[28, 28, 1],
n_labels=10,
learning_rate=0.07,
dropout_ratio=[0.2, 0.1],
alpha=0.1,
)
model.fit(
X=train_img,
y=train_data.labels,
epochs=10,
validation_data=(valid_img, valid_data.labels),
test_data=(test_img, test_data.labels),
batch_size=32,
)
saver = model.saver.save(model.sess, "test_model")
print("Model saved in path: %s" % saver)
And I create another py file to load my model:
import tensorflow as tf
saver = tf.train.import_meta_graph('./my_model/test_model.meta')
with tf.Session() as sess:
new_saver = tf.train.import_meta_graph('./my_model/test_model.meta')
new_saver.restore(sess, tf.train.latest_checkpoint('./my_model'))
sess.run(tf.global_variables_initializer())
saver.predict('D:\python\number_data\3.csv')
This is the error I'm getting:
AttributeError: 'Saver' object has no attribute 'predict'
How do I fix it and let the trained model predict my CSV file?
Thanks in advance for your help!
Edit:
I change my second py file as below:
import numpy as np
import tensorflow as tf
import pandas as pd
X=pd.read_csv('D:/PYTHON/cnn_data/7.csv', index_col=None, header=None).values
X1=X/255
X3=tf.convert_to_tensor(
X1,
dtype=None,
dtype_hint=None,
name=None
)
saver = tf.train.import_meta_graph('./my_model/test_model.meta')
with tf.Session() as sess:
new_saver = tf.train.import_meta_graph('./my_model/test_model.meta')
new_saver.restore(sess, tf.train.latest_checkpoint('./my_model'))
graph=tf.get_default_graph()
xs0=graph.get_tensor_by_name("Input:0")
prediction=graph.get_tensor_by_name("Output:0")
sess.run(prediction,feed_dict={xs0:X3})
print(prediction)
I only try to predict one digital img data(CSV file with one row), I transfer it into tensor type and name my two placeholder "Input" , "Output", but get another error:
TypeError: The value of a feed cannot be a tf.Tensor object. Acceptable feed values include Python scalars, strings, lists, numpy ndarrays, or TensorHandles. For reference, the tensor object was Tensor("Const:0", shape=(1, 784), dtype=float64) which was passed to
the feed with key Tensor("Input:0", shape=(?, 28, 28, 1), dtype=float32).
>

First of all, the obvious error here is that you are trying to call a function that doesn't exist. Evidently, the saver object does not have a predict function.
Second, if you want Tensorflow to make predictions, you need to provide it with "Tensorflow" input, and sadly, CSVs are not one of them.
All you need to do is transform your CSV inputs into tensors, with a function like this for instance:
filename = 'D:\python\number_data\3.csv'
def csv_to_tensor(filename):
...
return tensors
I cannot tell you how to implement the function exactly since I don't know the exact format of your data, but I am assuming that each row in your file is an input. So you most likely just need to loop through the lines in your file and convert each line to a tensor, which can then be used by a Tensorflow model.

C3D-TF: ValueError:Cannot feed value of shape (10,0) for Tensor u 'Placeholder:0', which has shape '(10,16,112,112,3)'

The project I run is C3D-tensorflow from Github, when I trained the network with train_c3d_ucf101.py, Error shows that:
ValueError:Cannot feed value of shape (10,0) for Tensor u
'Placeholder:0', which has shape '(10,16,112,112,3)'
Here is my environment: Ubantu16.04, tensorflow1.4, python2.7. I'm a new guy to learn tensorflow so can you tell me how to locate Placeholder:0 and what can I do to solve this problem? Thank you in advance!
The train_c3d_ucf101.py:
"""Trains and Evaluates the MNIST network using a feed dictionary."""
# pylint: disable=missing-docstring
import os
import time
import numpy
from six.moves import xrange # pylint: disable=redefined-builtin
import tensorflow as tf
import input_data
import c3d_model
import math
import numpy as np
# Basic model parameters as external flags.
flags = tf.app.flags
gpu_num = 1
#flags.DEFINE_float('learning_rate', 0.0, 'Initial learning rate.')
flags.DEFINE_integer('max_steps', 5000, 'Number of steps to run trainer.')
flags.DEFINE_integer('batch_size', 10, 'Batch size.')
FLAGS = flags.FLAGS
MOVING_AVERAGE_DECAY = 0.9999
model_save_dir = './models'
def placeholder_inputs(batch_size):
"""Generate placeholder variables to represent the input tensors.
These placeholders are used as inputs by the rest of the model building
code and will be fed from the downloaded data in the .run() loop, below.
Args:
batch_size: The batch size will be baked into both placeholders.
Returns:
images_placeholder: Images placeholder.
labels_placeholder: Labels placeholder.
"""
# Note that the shapes of the placeholders match the shapes of the full
# image and label tensors, except the first dimension is now batch_size
# rather than the full size of the train or test data sets.
images_placeholder = tf.placeholder(tf.float32, shape=(batch_size,
c3d_model.NUM_FRAMES_PER_CLIP,
c3d_model.CROP_SIZE,
c3d_model.CROP_SIZE,
c3d_model.CHANNELS))
labels_placeholder = tf.placeholder(tf.int64, shape=(batch_size))
return images_placeholder, labels_placeholder
def average_gradients(tower_grads):
average_grads = []
for grad_and_vars in zip(*tower_grads):
grads = []
for g, _ in grad_and_vars:
expanded_g = tf.expand_dims(g, 0)
grads.append(expanded_g)
grad = tf.concat(grads, 0)
grad = tf.reduce_mean(grad, 0)
v = grad_and_vars[0][1]
grad_and_var = (grad, v)
average_grads.append(grad_and_var)
return average_grads
def tower_loss(name_scope, logit, labels):
cross_entropy_mean = tf.reduce_mean(
tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels,logits=logit)
)
tf.summary.scalar(
name_scope + '_cross_entropy',
cross_entropy_mean
)
weight_decay_loss = tf.get_collection('weightdecay_losses')
tf.summary.scalar(name_scope + '_weight_decay_loss', tf.reduce_mean(weight_decay_loss) )
# Calculate the total loss for the current tower.
total_loss = cross_entropy_mean + weight_decay_loss
tf.summary.scalar(name_scope + '_total_loss', tf.reduce_mean(total_loss) )
return total_loss
def tower_acc(logit, labels):
correct_pred = tf.equal(tf.argmax(logit, 1), labels)
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
return accuracy
def _variable_on_cpu(name, shape, initializer):
with tf.device('/cpu:0'):
var = tf.get_variable(name, shape, initializer=initializer)
return var
def _variable_with_weight_decay(name, shape, wd):
var = _variable_on_cpu(name, shape, tf.contrib.layers.xavier_initializer())
if wd is not None:
weight_decay = tf.nn.l2_loss(var)*wd
tf.add_to_collection('weightdecay_losses', weight_decay)
return var
def run_training():
# Get the sets of images and labels for training, validation, and
# Tell TensorFlow that the model will be built into the default Graph.
# Create model directory
if not os.path.exists(model_save_dir):
os.makedirs(model_save_dir)
use_pretrained_model = True
model_filename = "./sports1m_finetuning_ucf101.model"
with tf.Graph().as_default():
global_step = tf.get_variable(
'global_step',
[],
initializer=tf.constant_initializer(0),
trainable=False
)
images_placeholder, labels_placeholder = placeholder_inputs(
FLAGS.batch_size * gpu_num
)
tower_grads1 = []
tower_grads2 = []
logits = []
opt_stable = tf.train.AdamOptimizer(1e-4)
opt_finetuning = tf.train.AdamOptimizer(1e-3)
with tf.variable_scope('var_name') as var_scope:
weights = {
'wc1': _variable_with_weight_decay('wc1', [3, 3, 3, 3, 64], 0.0005),
'wc2': _variable_with_weight_decay('wc2', [3, 3, 3, 64, 128], 0.0005),
'wc3a': _variable_with_weight_decay('wc3a', [3, 3, 3, 128, 256], 0.0005),
'wc3b': _variable_with_weight_decay('wc3b', [3, 3, 3, 256, 256], 0.0005),
'wc4a': _variable_with_weight_decay('wc4a', [3, 3, 3, 256, 512], 0.0005),
'wc4b': _variable_with_weight_decay('wc4b', [3, 3, 3, 512, 512], 0.0005),
'wc5a': _variable_with_weight_decay('wc5a', [3, 3, 3, 512, 512], 0.0005),
'wc5b': _variable_with_weight_decay('wc5b', [3, 3, 3, 512, 512], 0.0005),
'wd1': _variable_with_weight_decay('wd1', [8192, 4096], 0.0005),
'wd2': _variable_with_weight_decay('wd2', [4096, 4096], 0.0005),
'out': _variable_with_weight_decay('wout', [4096, c3d_model.NUM_CLASSES], 0.0005)
}
biases = {
'bc1': _variable_with_weight_decay('bc1', [64], 0.000),
'bc2': _variable_with_weight_decay('bc2', [128], 0.000),
'bc3a': _variable_with_weight_decay('bc3a', [256], 0.000),
'bc3b': _variable_with_weight_decay('bc3b', [256], 0.000),
'bc4a': _variable_with_weight_decay('bc4a', [512], 0.000),
'bc4b': _variable_with_weight_decay('bc4b', [512], 0.000),
'bc5a': _variable_with_weight_decay('bc5a', [512], 0.000),
'bc5b': _variable_with_weight_decay('bc5b', [512], 0.000),
'bd1': _variable_with_weight_decay('bd1', [4096], 0.000),
'bd2': _variable_with_weight_decay('bd2', [4096], 0.000),
'out': _variable_with_weight_decay('bout', [c3d_model.NUM_CLASSES], 0.000),
}
for gpu_index in range(0, gpu_num):
with tf.device('/gpu:%d' % gpu_index):
varlist2 = [ weights['out'],biases['out'] ]
varlist1 = list( set(weights.values() + biases.values()) - set(varlist2) )
logit = c3d_model.inference_c3d(
images_placeholder[gpu_index * FLAGS.batch_size:(gpu_index + 1) * FLAGS.batch_size,:,:,:,:],
0.5,
FLAGS.batch_size,
weights,
biases
)
loss_name_scope = ('gpud_%d_loss' % gpu_index)
loss = tower_loss(
loss_name_scope,
logit,
labels_placeholder[gpu_index * FLAGS.batch_size:(gpu_index + 1) * FLAGS.batch_size]
)
grads1 = opt_stable.compute_gradients(loss, varlist1)
grads2 = opt_finetuning.compute_gradients(loss, varlist2)
tower_grads1.append(grads1)
tower_grads2.append(grads2)
logits.append(logit)
logits = tf.concat(logits,0)
accuracy = tower_acc(logits, labels_placeholder)
tf.summary.scalar('accuracy', accuracy)
grads1 = average_gradients(tower_grads1)
grads2 = average_gradients(tower_grads2)
apply_gradient_op1 = opt_stable.apply_gradients(grads1)
apply_gradient_op2 = opt_finetuning.apply_gradients(grads2, global_step=global_step)
variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY)
variables_averages_op = variable_averages.apply(tf.trainable_variables())
train_op = tf.group(apply_gradient_op1, apply_gradient_op2, variables_averages_op)
null_op = tf.no_op()
# Create a saver for writing training checkpoints.
saver = tf.train.Saver(weights.values() + biases.values())
init = tf.global_variables_initializer()
# Create a session for running Ops on the Graph.
sess = tf.Session(
config=tf.ConfigProto(allow_soft_placement=True)
)
sess.run(init)
if os.path.isfile(model_filename) and use_pretrained_model:
saver.restore(sess, model_filename)
# Create summary writter
merged = tf.summary.merge_all()
train_writer = tf.summary.FileWriter('./visual_logs/train', sess.graph)
test_writer = tf.summary.FileWriter('./visual_logs/test', sess.graph)
for step in xrange(FLAGS.max_steps):
start_time = time.time()
train_images, train_labels, _, _, _ = input_data.read_clip_and_label(
filename='list/train.list',
batch_size=FLAGS.batch_size * gpu_num,
num_frames_per_clip=c3d_model.NUM_FRAMES_PER_CLIP,
crop_size=c3d_model.CROP_SIZE,
shuffle=True
)
sess.run(train_op, feed_dict={
images_placeholder: train_images,
labels_placeholder: train_labels
})
duration = time.time() - start_time
print('Step %d: %.3f sec' % (step, duration))
# Save a checkpoint and evaluate the model periodically.
if (step) % 10 == 0 or (step + 1) == FLAGS.max_steps:
saver.save(sess, os.path.join(model_save_dir, 'c3d_ucf_model'), global_step=step)
print('Training Data Eval:')
summary, acc = sess.run(
[merged, accuracy],
feed_dict={images_placeholder: train_images,
labels_placeholder: train_labels
})
print ("accuracy: " + "{:.5f}".format(acc))
train_writer.add_summary(summary, step)
print('Validation Data Eval:')
val_images, val_labels, _, _, _ = input_data.read_clip_and_label(
filename='list/test.list',
batch_size=FLAGS.batch_size * gpu_num,
num_frames_per_clip=c3d_model.NUM_FRAMES_PER_CLIP,
crop_size=c3d_model.CROP_SIZE,
shuffle=True
)
summary, acc = sess.run(
[merged, accuracy],
feed_dict={
images_placeholder: val_images,
labels_placeholder: val_labels
})
print ("accuracy: " + "{:.5f}".format(acc))
test_writer.add_summary(summary, step)
print("done")
def main(_):
run_training()
if __name__ == '__main__':
tf.app.run()
If you want to check other files, please tell me and I will show you.
This is my Traceback:

I use CNN to classification MNIST with TensorFlow. But I want to read the dataset with TFRecordrs. However I only get 10% accuracy

I want to classification the dataset of mnist in my way with TensorFlow.
First, converting the dataset into a TFRecords file.
Then, reading this file with tf.TFRecodsReader.
Finally,training the dataset.
Existing problems：The code doesn't have any syntax errors. But，I only get 10% accuracy on the test data.
train.py:
import tensorflow as tf
import Net
import os
import numpy as np
import datetime
import time
import tfrecords as rd
BATCH_SIZE = 100
LEARNING_RATE_BASE = 0.01
LEARNING_RATE_DECAY = 0.99
REGULARIZATION_RATE = 0.0001
TRAINING_STEPS = 10000
MOVING_AVERAGE_DECAY = 0.99
MODEL_SAVE_PATH = "Model/"
MODEL_NAME = "model"
def train():
x = tf.placeholder(tf.float32, [
BATCH_SIZE,
Net.IMAGE_SIZE,
Net.IMAGE_SIZE,
Net.NUM_CHANNELS],
name='x-input')
y_ = tf.placeholder(tf.float32, [None, Net.OUTPUT_NODE], name='y-input')
regularizer = tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE)
y = Net.inference(x, True, regularizer)
global_step = tf.Variable(0, trainable=False)
variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
variables_averages_op = variable_averages.apply(tf.trainable_variables())
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax(y_, 1))
cross_entropy_mean = tf.reduce_mean(cross_entropy)
loss = cross_entropy_mean + tf.add_n(tf.get_collection('losses'))
learning_rate = tf.train.exponential_decay(
LEARNING_RATE_BASE,
global_step,
55000 / BATCH_SIZE, LEARNING_RATE_DECAY,
staircase=True)
train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)
with tf.control_dependencies([train_step, variables_averages_op]):
train_op = tf.no_op(name='train')
init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
img, one_hot = rd.read_and_decode("./tfrecords/train.tfrecords")
img_batch, label_batch = tf.train.shuffle_batch(tensors = [img, one_hot], batch_size = BATCH_SIZE, capacity = 10000 + 3 * 100, min_after_dequeue = 10000)
saver = tf.train.Saver()
with tf.Session() as sess:
sess.run(init_op)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord = coord, sess = sess)
try:
steps = 1
while not coord.should_stop():
if steps > TRAINING_STEPS:
break
xs, ys = sess.run([img_batch, label_batch])
_, loss_value, step = sess.run([train_op, loss, global_step], feed_dict={x: xs, y_: ys})
if steps % 1000 == 0:
print("After %d training step(s), loss on training batch is %g." % (step, loss_value))
saver.save(sess, os.path.join(MODEL_SAVE_PATH, MODEL_NAME), global_step=global_step)
steps += 1
except tf.errors.OutOfRangeError:
print("Done training after reading all data")
finally:
coord.request_stop()
coord.join(threads)
sess.close()
def main(argv=None):
start_time = datetime.datetime.now()
print("start_time = "),
print(start_time)
rd.create_record("train")
train_start_time = datetime.datetime.now()
print("train_start_time = " ),
print( train_start_time)
train()
end_time = datetime.datetime.now()
print("end_time = " ),
print(end_time)
if __name__ == '__main__':
main()
Net.py:
import tensorflow as tf
INPUT_NODE = 784
OUTPUT_NODE = 10
IMAGE_SIZE = 28
NUM_CHANNELS = 1
NUM_LABELS = 10
CONV1_DEEP = 32
CONV1_SIZE = 5
CONV2_DEEP = 64
CONV2_SIZE = 5
FC_SIZE = 512
def inference(input_tensor, train, regularizer):
with tf.variable_scope('layer1-conv1'):
conv1_weights = tf.get_variable(
"weight", [CONV1_SIZE, CONV1_SIZE, NUM_CHANNELS, CONV1_DEEP],
initializer=tf.truncated_normal_initializer(stddev=0.1))
conv1_biases = tf.get_variable("bias", [CONV1_DEEP], initializer=tf.constant_initializer(0.0))
conv1 = tf.nn.conv2d(input_tensor, conv1_weights, strides=[1, 1, 1, 1], padding='SAME')
relu1 = tf.nn.relu(tf.nn.bias_add(conv1, conv1_biases))
with tf.name_scope("layer2-pool1"):
pool1 = tf.nn.max_pool(relu1, ksize = [1,2,2,1],strides=[1,2,2,1],padding="SAME")
with tf.variable_scope("layer3-conv2"):
conv2_weights = tf.get_variable(
"weight", [CONV2_SIZE, CONV2_SIZE, CONV1_DEEP, CONV2_DEEP],
initializer=tf.truncated_normal_initializer(stddev=0.1))
conv2_biases = tf.get_variable("bias", [CONV2_DEEP], initializer=tf.constant_initializer(0.0))
conv2 = tf.nn.conv2d(pool1, conv2_weights, strides=[1, 1, 1, 1], padding='SAME')
relu2 = tf.nn.relu(tf.nn.bias_add(conv2, conv2_biases))
with tf.name_scope("layer4-pool2"):
pool2 = tf.nn.max_pool(relu2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
pool_shape = pool2.get_shape().as_list()
nodes = pool_shape[1] * pool_shape[2] * pool_shape[3]
reshaped = tf.reshape(pool2, [pool_shape[0], nodes])
with tf.variable_scope('layer5-fc1'):
fc1_weights = tf.get_variable("weight", [nodes, FC_SIZE],
initializer=tf.truncated_normal_initializer(stddev=0.1))
if regularizer != None: tf.add_to_collection('losses', regularizer(fc1_weights))
fc1_biases = tf.get_variable("bias", [FC_SIZE], initializer=tf.constant_initializer(0.1))
fc1 = tf.nn.relu(tf.matmul(reshaped, fc1_weights) + fc1_biases)
if train: fc1 = tf.nn.dropout(fc1, 0.5)
with tf.variable_scope('layer6-fc2'):
fc2_weights = tf.get_variable("weight", [FC_SIZE, NUM_LABELS],
initializer=tf.truncated_normal_initializer(stddev=0.1))
if regularizer != None: tf.add_to_collection('losses', regularizer(fc2_weights))
fc2_biases = tf.get_variable("bias", [NUM_LABELS], initializer=tf.constant_initializer(0.1))
logit = tf.matmul(fc1, fc2_weights) + fc2_biases
return logit
tfrecords.py:
import os
import tensorflow as tf
from PIL import Image
import Net
def create_record(op_type):
writer = tf.python_io.TFRecordWriter("./tfrecords/" + op_type + ".tfrecords")
f = open("./" + op_type + ".txt", 'r')
img_num = 0
for line in f.readlines():
img_num += 1
if img_num % 2000 == 0:
print("already read in %d images." % (img_num))
str_split = line.split()
img_path = "./" + str_split[0]
index = int(str_split[1])
img = Image.open(img_path)
img = img.resize((Net.IMAGE_SIZE, Net.IMAGE_SIZE))
img_raw = img.tobytes()
example = tf.train.Example(features=tf.train.Features(feature={
'label': tf.train.Feature(int64_list=tf.train.Int64List(value=[index])),
'img_raw': tf.train.Feature(bytes_list=tf.train.BytesList(value=[img_raw]))
}))
writer.write(example.SerializeToString())
writer.close()
def read_and_decode(filename):
filename_queue = tf.train.string_input_producer([filename], shuffle = True)
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(serialized_example,
features={
'label': tf.FixedLenFeature([], tf.int64),
'img_raw' : tf.FixedLenFeature([], tf.string),
})
img = tf.decode_raw(features['img_raw'], tf.uint8)
img = tf.reshape(img, [Net.IMAGE_SIZE, Net.IMAGE_SIZE, Net.NUM_CHANNELS])
img = 1 - tf.cast(img, tf.float32) * (1. / 255)
label = tf.cast(features['label'], tf.int32)
one_hot = tf.one_hot(label, 10, dtype = tf.float32)
one_hot = tf.reshape(one_hot, [-1])
return img, one_hot
Eval.py:
import time
import math
import tensorflow as tf
import numpy as np
import Net
import Train
import tfrecords as rd
def evaluate():
with tf.Graph().as_default() as g:
x = tf.placeholder(tf.float32, [
10000,
Net.IMAGE_SIZE,
Net.IMAGE_SIZE,
Net.NUM_CHANNELS],
name='x-input')
y_ = tf.placeholder(tf.float32, [None, Net.OUTPUT_NODE], name='y-input')
#validate_feed = {x: mnist.test.images, y_: mnist.test.labels}
global_step = tf.Variable(0, trainable=False)
regularizer = tf.contrib.layers.l2_regularizer(Train.REGULARIZATION_RATE)
y = Net.inference(x, False, regularizer)
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
variable_averages = tf.train.ExponentialMovingAverage(Train.MOVING_AVERAGE_DECAY)
variables_to_restore = variable_averages.variables_to_restore()
saver = tf.train.Saver(variables_to_restore)
for i in range(1):
img, one_hot = rd.read_and_decode("./tfrecords/test.tfrecords")
img_batch, label_batch = tf.train.shuffle_batch(tensors = [img, one_hot], batch_size = 10000, capacity = 10000 + 3 * 100, min_after_dequeue = 10000)
with tf.Session() as sess:
ckpt = tf.train.get_checkpoint_state(Train.MODEL_SAVE_PATH)
if ckpt and ckpt.model_checkpoint_path:
saver.restore(sess, ckpt.model_checkpoint_path)
global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
#the reason of this error!!!
#tf.global_variables_initializer().run()
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord = coord, sess = sess)
try:
xs, ys = sess.run([img_batch, label_batch])
accuracy_score = sess.run(accuracy, feed_dict={x:xs, y_:ys})
print("After %s training step(s), test accuracy = %g" % (global_step, accuracy_score))
except tf.errors.OutOfRangeError:
print("Done testting after reading all data")
finally:
coord.request_stop()
coord.join(threads)
sess.close()
else:
print('No checkpoint file found')
return
def main(argv=None):
rd.create_record("test")
evaluate()
if __name__ == '__main__':
main()
Now, I have solved this problem.
My code and other flie: https://github.com/xmy7216/MNIST_classification.git
Linux: Red Hat Enterprise Linux Server release 7.2 (Maipo)
GPU: Tesla P4
TensorFlow: 1.3
Python:2.7.5

I find the reason. Because after I load the model using saver.restore, I initialize the variables again by tf.global_variables_initializer().run().
How stupid I am!

Tensorflow Memory Leak could not allocate pinned host memory of size

I have a built a convolutional model, only to find that there is a massive memory leak in my code or in Tensorflow's code. Can anyone spot out the issue and give insight onto where the issue lies?
Below is a minimal reproducible example and some of its outputs:
Process.py:
import os
import sys
import tensorflow as tf
import Input
import os, re
FLAGS = tf.app.flags.FLAGS
TOWER_NAME = 'tower'
tf.app.flags.DEFINE_integer('batch_size', 1, "hello")
tf.app.flags.DEFINE_string('data_dir', '/home/zan/Desktop/Neural-Network-Prostate', "hello")
NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN = Input.NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN
NUM_EXAMPLES_PER_EPOCH_FOR_EVAL = Input.NUM_EXAMPLES_PER_EPOCH_FOR_EVAL
NUM_EPOCHS_PER_DECAY = 3
MOVING_AVERAGE_DECAY = 0.9999
NUM_EPOCHS_PER_DECAY = 30
LEARNING_RATE_DECAY_FACTOR = 0.1
INITIAL_LEARNING_RATE = 0.1
def _activation_summary(x):
tensor_name = re.sub('%s_[0-9]*/' % TOWER_NAME, '', x.op.name)
tf.histogram_summary(tensor_name + '/activations', x)
tf.scalar_summary(tensor_name + '/sparsity', tf.nn.zero_fraction(x))
def inputs():
if not FLAGS.data_dir:
raise ValueError('Source Data Missing')
data_dir = FLAGS.data_dir
images, labels = Input.inputs(data_dir = data_dir, batch_size = FLAGS.batch_size)
return images, labels
def eval_inputs():
data_dir = FLAGS.data_dir
images, labels = Input.eval_inputs(data_dir = data_dir, batch_size = 1)
return images, labels
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape = shape)
return tf.Variable(initial)
def conv2d(images, W):
return tf.nn.conv2d(images, W, strides = [1, 1, 1, 1], padding = 'SAME')
def max_pool_5x5(images):
return tf.nn.max_pool(images, ksize = [1, 5, 5, 1], strides = [1, 5, 5, 1], padding = 'SAME')
def forward_propagation(images):
with tf.variable_scope('conv1') as scope:
W_conv1 = weight_variable([5, 5, 3, 32])
b_conv1 = bias_variable([32])
image_matrix = tf.reshape(images, [-1, 1750, 1750, 3])
h_conv1 = tf.nn.sigmoid(conv2d(image_matrix, W_conv1) + b_conv1)
_activation_summary(h_conv1)
h_pool1 = max_pool_5x5(h_conv1)
with tf.variable_scope('conv2') as scope:
W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.sigmoid(conv2d(h_pool1, W_conv2) + b_conv2)
_activation_summary(h_conv2)
h_pool2 = max_pool_5x5(h_conv2)
with tf.variable_scope('conv3') as scope:
W_conv3 = weight_variable([5, 5, 64, 128])
b_conv3 = bias_variable([128])
h_conv3 = tf.nn.sigmoid(conv2d(h_pool2, W_conv3) + b_conv3)
_activation_summary(h_conv3)
h_pool3 = max_pool_5x5(h_conv3)
with tf.variable_scope('local3') as scope:
W_fc1 = weight_variable([14 * 14 * 128, 256])
b_fc1 = bias_variable([256])
h_pool3_flat = tf.reshape(h_pool3, [-1, 14 * 14 * 128])
h_fc1 = tf.nn.sigmoid(tf.matmul(h_pool3_flat, W_fc1) + b_fc1)
_activation_summary(h_fc1)
keep_prob = tf.Variable(1.0)
W_fc2 = weight_variable([256, 4])
b_fc2 = bias_variable([4])
y_conv = tf.nn.softmax(tf.matmul(h_fc1, W_fc2) + b_fc2)
_activation_summary(y_conv)
return y_conv
def error(forward_propagation_results, labels):
labels = tf.one_hot(labels, 4)
tf.transpose(labels)
labels = tf.cast(labels, tf.float32)
mean_squared_error = tf.square(tf.sub(labels, forward_propagation_results))
cost = tf.reduce_mean(mean_squared_error)
#train_loss = tf.train.GradientDescentOptimizer(learning_rate = 0.05).minimize(cost)
tf.histogram_summary('accuracy', mean_squared_error)
tf.add_to_collection('losses', cost)
tf.scalar_summary('LOSS', cost)
return cost
def _add_loss_summaries(cost):
loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg')
losses = tf.get_collection('LOSS')
loss_averages_op = loss_averages.apply(losses + [cost])
for l in losses + [cost]:
tf.scalar_summary(l.op.name +' (raw)', l)
tf.scalar_summary(l.op.name, loss_averages.average(l))
return loss_averages_op
def train(cost, global_step):
num_batches_per_epoch = NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN / FLAGS.batch_size
decay_steps = int(num_batches_per_epoch * NUM_EPOCHS_PER_DECAY)
lr = tf.train.exponential_decay(INITIAL_LEARNING_RATE,
global_step,
decay_steps,
LEARNING_RATE_DECAY_FACTOR,
staircase=True)
tf.scalar_summary('learning_rate', lr)
loss_averages_op = _add_loss_summaries(cost)
with tf.control_dependencies([loss_averages_op]):
opt = tf.train.GradientDescentOptimizer(lr)
grads = opt.compute_gradients(cost)
apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)
for var in tf.trainable_variables():
tf.histogram_summary(var.op.name, var)
for grad, var in grads:
if grad is not None:
tf.histogram_summary(var.op.name + '/gradients', grad)
variable_averages = tf.train.ExponentialMovingAverage(
MOVING_AVERAGE_DECAY, global_step)
variables_averages_op = variable_averages.apply(tf.trainable_variables())
with tf.control_dependencies([apply_gradient_op, variables_averages_op]):
train_op = tf.no_op(name='train')
return train_op
main.py:
import Process
import time
import numpy as np
import os
import tensorflow as tf
from datetime import datetime
FLAGS = tf.app.flags.FLAGS
def train():
with tf.Graph().as_default():
global_step = tf.Variable(0, trainable=False)
images, labels = Process.inputs()
forward_propgation_results = Process.forward_propagation(images)
cost = Process.error(forward_propgation_results, labels)
train_op = Process.train(cost, global_step)
image_summary_t = tf.image_summary(images.name, images, max_images = 2)
summary_op = tf.merge_all_summaries()
init = tf.initialize_all_variables()
saver = tf.train.Saver()
sess = tf.InteractiveSession()
sess.run(init)
saver = tf.train.Saver(tf.all_variables())
tf.train.start_queue_runners(sess = sess)
train_dir = "/home/zan/nn-data"
summary_writer = tf.train.SummaryWriter(train_dir, sess.graph)
for step in xrange(650):
start_time = time.time()
_, loss_value = sess.run([train_op, cost])
duration = time.time() - start_time
assert not np.isnan(loss_value)
if step % 1 == 0:
num_examples_per_step = FLAGS.batch_size
examples_per_sec = num_examples_per_step / duration
sec_per_batch = float(duration)
format_str = ('%s: step %d, (%.1f examples/sec; %.3f ''sec/batch)')
print (format_str % (datetime.now(), step, examples_per_sec, sec_per_batch))
summary_str = sess.run(summary_op)
summary_writer.add_summary(summary_str, step)
if step % 20 or (step + 1) == 20:
checkpoint_path = os.path.join(train_dir, 'model.ckpt')
saver.save(sess, checkpoint_path, global_step=step)
def main(argv = None):
train()
if __name__ == '__main__':
tf.app.run()
Luckily I was able to kill the process in time and below are the outputs the program recorded.
Output:
I tensorflow/core/common_runtime/gpu/gpu_device.cc:838] Creating TensorFlow device (/gpu:0) -> (device: 0, name: GeForce GTX 960, pci bus id: 0000:03:00.0)
E tensorflow/stream_executor/cuda/cuda_driver.cc:997] failed to alloc 8589934592 bytes on host: CUDA_ERROR_OUT_OF_MEMORY
W ./tensorflow/core/common_runtime/gpu/pool_allocator.h:195] could not allocate pinned host memory of size: 8589934592
E tensorflow/stream_executor/cuda/cuda_driver.cc:997] failed to alloc 7730940928 bytes on host: CUDA_ERROR_OUT_OF_MEMORY
W ./tensorflow/core/common_runtime/gpu/pool_allocator.h:195] could not allocate pinned host memory of size: 7730940928

The out-of-memory error you're seeing is running out of CPU memory, not GPU memory; the pinned host that's being referred to is the CPU.

failed sess.run error “Cannot feed value of shape (50, 2352) for Tensor 'Placeholder:0', which has shape '(?, 784)'”

Please Help me...
I learning to tensorflow using my own data based on tutorial expert.
following my code:
#datasets define
NUM_CLASSES = 65535
IMAGE_SIZE = 28
IMAGE_PIXELS = IMAGE_SIZE*IMAGE_SIZE*1
#read datasets
with open(FLAGS.train, 'r') as f: # train.txt
train_image = []
train_label = []
num = 0
for line in f:
if num == 500:
break
line = line.rstrip()
l = line.split(',')
print(l[0])
img = cv2.imread(l[0])
img = cv2.resize(img, (IMAGE_SIZE, IMAGE_SIZE))
train_image.append(img.flatten().astype(np.float32)/255.0)
tmp = np.zeros(NUM_CLASSES)
tmp[int(l[1])] = 1
train_label.append(tmp)
num += 1
train_image = np.asarray(train_image)
train_label = np.asarray(train_label)
train_len = len(train_image)
def inference(images_placeholder, keep_prob):
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME')
x_images = tf.reshape(images_placeholder, [-1, IMAGE_SIZE, IMAGE_SIZE, 1])
with tf.name_scope('conv1') as scope:
W_conv1 = weight_variable([5, 5, 1, 32])
b_conv1 = bias_variable([32])
h_conv1 = tf.nn.relu(conv2d(x_images, W_conv1) + b_conv1)
with tf.name_scope('pool1') as scope:
h_pool1 = max_pool_2x2(h_conv1)
with tf.name_scope('conv2') as scope:
W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
with tf.name_scope('pool2') as scope:
h_pool2 = max_pool_2x2(h_conv2)
with tf.name_scope('fc1') as scope:
W_fc1 = weight_variable([7*7*64, 1024])
b_fc1 = bias_variable([1024])
h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
with tf.name_scope('fc2') as scope:
W_fc2 = weight_variable([1024, NUM_CLASSES])
b_fc2 = bias_variable([NUM_CLASSES])
with tf.name_scope('softmax') as scope:
y_conv=tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)
return y_conv
#learn
with tf.Graph().as_default():
images_placeholder = tf.placeholder("float", shape=(None, IMAGE_PIXELS))
labels_placeholder = tf.placeholder("float", shape=(None, NUM_CLASSES))
keep_prob = tf.placeholder("float")
logits = inference(images_placeholder, keep_prob)
loss_value = loss(logits, labels_placeholder)
train_op = training(loss_value, FLAGS.learning_rate)
print("train_op =", train_op)
acc = accuracy(logits, labels_placeholder)
saver = tf.train.Saver()
sess = tf.Session()
sess.run(tf.initialize_all_variables())
summary_op = tf.merge_all_summaries()
summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph_def)
if train_len % FLAGS.batch_size is 0:
train_batch = train_len/FLAGS.batch_size
else:
train_batch = (train_len/FLAGS.batch_size)+1
print("train_batch = %d",str(train_batch))
for step in range(FLAGS.max_steps):
for i in range(int(train_batch)):
batch = FLAGS.batch_size*i
batch_plus = FLAGS.batch_size*(i+1)
print("batch_plus =", batch_plus)
if batch_plus > train_len: batch_plus = train_len
sess.run(train_op, feed_dict={
images_placeholder: train_image[batch:batch_plus],
labels_placeholder: train_label[batch:batch_plus],
keep_prob: 0.5})
if step % 10 == 0:
train_accuracy = 0.0
for i in range(train_batch):
batch = FLAGS.batch_size*i
batch_plus = FLAGS.batch_size*(i+1)
if batch_plus > train_len: batch_plus = train_len
train_accuracy += sess.run(acc, feed_dict={
images_placeholder: train_image[batch:batch_plus],
labels_placeholder: train_label[batch:batch_plus],
keep_prob: 1.0})
if i is not 0: train_accuracy /= 2.0
#summary_str = sess.run(summary_op, feed_dict={
# images_placeholder: train_image,
# labels_placeholder: train_label,
# keep_prob: 1.0})
#summary_writer.add_summary(summary_str, step)
print("step %d, training accuracy %g",(step, train_accuracy))
if test_len % FLAGS.batch_size is 0:
test_batch = test_len/FLAGS.batch_size
else:
test_batch = (test_len/FLAGS.batch_size)+1
print("test_batch = ",str(test_batch))
test_accuracy = 0.0
for i in range(test_batch):
batch = FLAGS.batch_size*i
batch_plus = FLAGS.batch_size*(i+1)
if batch_plus > train_len: batch_plus = train_len
test_accuracy += sess.run(acc, feed_dict={
images_placeholder: test_image[batch:batch_plus],
labels_placeholder: test_label[batch:batch_plus],
keep_prob: 1.0})
if i is not 0: test_accuracy /= 2.0
print("test accuracy %g",(test_accuracy))
save_path = saver.save(sess, FLAGS.save_model)
but when I try to run it I gives me an error:
ValueError:Cannot feed value of shape (50, 2352) for Tensor 'Placeholder:0', which has shape '(?, 784)'
I feel like i'm overlooking something small but I don't see it.

EDIT: Sorry if you read my previous analysis, it was wrong.
2352 / 3 = 784, I think you have kept the 3 rgb color channels instead of a single pixel intensity (or the resize function has 3 color channels in output by default).
Quite a lot of the complexity of the mnist example is hidden in the data loading, see tensorflow/tensorflow/contrib/learn/python/learn/datasets/mnist.py to understand it better and make something similar for your case. They load images in a 4D tensor [index, x, y, depth] and that's how they can look at a minibatch taking a subset of the indexes.
Good luck !

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

ResourceExhaustedError when trying to train with MNIST - python

Related

Loading the CNN model and predict the CSV file

C3D-TF: ValueError:Cannot feed value of shape (10,0) for Tensor u 'Placeholder:0', which has shape '(10,16,112,112,3)'

I use CNN to classification MNIST with TensorFlow. But I want to read the dataset with TFRecordrs. However I only get 10% accuracy

Tensorflow Memory Leak could not allocate pinned host memory of size

failed sess.run error “Cannot feed value of shape (50, 2352) for Tensor 'Placeholder:0', which has shape '(?, 784)'”

Categories

Resources