How would I go about changing the MNIST tutorial to use TFRecords instead of the odd format the tutorial downloads from the web?
I used build_image_data.py from the inception model to create my TFRecords containing 200x200 RGB images and intend to train this on a 1080Ti, but I can't find any good examples on how to load TFRecords and feed them into a convolutional neural network.
I did a similar thing as you intend doing. I also took the same script to build image data. My code for reading the data and training it is
import tensorflow as tf
height = 28
width = 28
tfrecords_train_filename = 'train-00000-of-00001'
tfrecords_test_filename = 'test-00000-of-00001'
def read_and_decode(filename_queue):
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(
serialized_example,
features={
'image/height': tf.FixedLenFeature([], tf.int64),
'image/width': tf.FixedLenFeature([], tf.int64),
'image/colorspace': tf.FixedLenFeature([], dtype=tf.string, default_value=''),
'image/channels': tf.FixedLenFeature([], tf.int64),
'image/class/label': tf.FixedLenFeature([], tf.int64),
'image/class/text': tf.FixedLenFeature([], dtype=tf.string, default_value=''),
'image/format': tf.FixedLenFeature([], dtype=tf.string, default_value=''),
'image/filename': tf.FixedLenFeature([], dtype=tf.string, default_value=''),
'image/encoded': tf.FixedLenFeature([], dtype=tf.string, default_value='')
})
image_buffer = features['image/encoded']
image_label = tf.cast(features['image/class/label'], tf.int32)
# Decode the jpeg
with tf.name_scope('decode_jpeg', [image_buffer], None):
# decode
image = tf.image.decode_jpeg(image_buffer, channels=3)
# and convert to single precision data type
image = tf.image.convert_image_dtype(image, dtype=tf.float32)
image = tf.image.rgb_to_grayscale(image)
image_shape = tf.stack([height, width, 1])
image = tf.reshape(image, image_shape)
return image, image_label
def inputs(filename, batch_size, num_epochs):
if not num_epochs: num_epochs = None
with tf.name_scope('input'):
filename_queue = tf.train.string_input_producer([filename], num_epochs=None)
image, label = read_and_decode(filename_queue)
# Shuffle the examples and collect them into batch_size batches.
images, sparse_labels = tf.train.shuffle_batch(
[image, label], batch_size=batch_size, num_threads=2,
capacity=1000 + 3 * batch_size,
min_after_dequeue=1000)
return images, sparse_labels
image, label = inputs(filename=tfrecords_train_filename, batch_size=200, num_epochs=None)
image = tf.reshape(image, [-1, 784])
label = tf.one_hot(label - 1, 10)
# Create the model
x = tf.placeholder(tf.float32, [None, 784])
W = tf.Variable(tf.zeros([784, 10]))
b = tf.Variable(tf.zeros([10]))
y = tf.matmul(x, W) + b
y_ = tf.placeholder(tf.float32, [None, 10])
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y))
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
for i in range(1000):
img, lbl = sess.run([image, label])
sess.run(train_step, feed_dict={x: img, y_: lbl})
img, lbl = sess.run([image, label])
print(sess.run(accuracy, feed_dict={x: img, y_: lbl}))
coord.request_stop()
coord.join(threads)
This is a super easy model for classifying mnist. However I think it is also an extensible answer for how to train with TFRecord files. It does not yet take into account the evaluation data, since this needs more coordination to be done.
Related
I'd like to make TFRecords into TFRecordsDataset. And unpack the data while feeding it. This is my function for parsing.
def read_tfrecord(tfrecord, epochs, batch_size):
dataset = tf.data.TFRecordDataset(tfrecord)
def parse(record):
features = {
"x_img": tf.io.FixedLenFeature([], tf.string),
"x_img_shape": tf.io.FixedLenFeature([], tf.int64),
"x_spectrogram": tf.io.FixedLenFeature([], tf.string),
"x_spec_shape": tf.io.FixedLenFeature([], tf.int64),
"x_wave": tf.io.FixedLenFeature([], tf.string),
"x_wave_shape": tf.io.FixedLenFeature([], tf.int64),
"y":tf.io.FixedLenFeature([], tf.string),
"y_shape": tf.io.FixedLenFeature([], tf.int64)}
example = tf.io.parse_single_example(record, features)
x_image = tf.io.decode_image(example["x_img"])
x_spec = tf.io.decode_image(example["x_spectrogram"])
x_wave = tf.cast(example["x_wave"], tf.string)
y = tf.io.decode_image(example["y"])
return x_image, x_spec, x_wave, y
dataset = dataset.map(parse)
dataset = dataset.shuffle(buffer_size=5)
dataset = dataset.prefetch(buffer_size=batch_size) #
dataset = dataset.batch(batch_size, drop_remainder=True)
dataset = dataset.repeat(epochs)
return dataset
I made an iterator to do the task:
train_files = path + "train.tfrecords"
EPOCHS = 100
BATCH_SIZE = 5
train_dataset = read_tfrecord(tfrecord=train_files,
epochs=EPOCHS,
batch_size=BATCH_SIZE)
iterator = iter(train_dataset)
x_img, x_spec, x_wave, y = next(iterator)
But I got this error:
InvalidArgumentError: Key: x_img_shape. Can't parse serialized Example.
[[{{node ParseSingleExample/ParseExample/ParseExampleV2}}]]
The code that serializes the data:
with tf.io.TFRecordWriter(tfrcpath) as file_writer:
for video in sorted(video_list):
print(video)
x_img = frame_extraction(raw_loc, video, interval)
x_spec, x_wave = audio_extraction(raw_loc, video, interval)
y = ground_truth_extraction(raw_loc, video, ground_truth, interval)
record_bytes = tf.train.Example(
features= tf.train.Features(
feature={
"x_img": tf.train.Feature(float_list=tf.train.FloatList(value= x_img.flatten())),
"x_img_shape": tf.train.Feature(int64_list=tf.train.Int64List(value= x_img.shape)),
"x_spec": tf.train.Feature(float_list=tf.train.FloatList(value= x_spec.flatten())),
"x_spec_shape": tf.train.Feature(int64_list=tf.train.Int64List(value= x_spec.shape)),
"x_wave": tf.train.Feature(float_list=tf.train.FloatList(value= x_wave.flatten())),
"x_wave_shape": tf.train.Feature(int64_list=tf.train.Int64List(value= x_wave.shape)),
"y": tf.train.Feature(float_list=tf.train.FloatList(value= y.flatten())),
"y_shape": tf.train.Feature(float_list=tf.train.FloatList(value= y.shape)),
})).SerializeToString()
file_writer.write(record_bytes)
What I have tried:
Solution in link suggests adding the shape of the data. So I tried update it as "x_img_shape": tf.io.FixedLenFeature([3], tf.int64), but didn't solve my issue.
My questions are:
What is the error meaning?
Why is the error only shows at x_img_shape but not x_img? x_img_shape is only used to record the shape of the images
Since I did flatten the images when writing theam as TFRecords, do I need to reshape the training images back? If so, how do I do that?
I'm basically trying to read images as a batch using tfrecords, but I'm running into this error. It seems to be resizing the images correctly according to the print statement, but I'm getting the error at the end of print statement.
Part of the traceback error looks like this:
next(self.gen)
File "C:\Users\Moondra\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\framework\errors_impl.py", line 466, in raise_exception_on_not_ok_status
pywrap_tensorflow.TF_GetCode(status))
tensorflow.python.framework.errors_impl.InvalidArgumentError: Input to reshape is a tensor with 40400 values, but the requested shape has 30300
[[Node: Reshape = Reshape[T=DT_UINT8, Tshape=DT_INT32, _device="/job:localhost/replica:0/task:0/cpu:0"](DecodeRaw, stack)]]
print statements before the traceback error (batch shape look correct):
(5, 200, 200, 3)
(5, 200, 200, 3)
(5, 200, 200, 3)
my code
#tfrecords and batch images
import matplotlib.pyplot as plt
import numpy as np
from scipy.misc import imread
import os
import tensorflow as tf
IMAGE_HEIGHT = 200
IMAGE_WIDTH = 200
tfrecords_filename = r'C:\Users\Moondra\Desktop\Transfer Learning Tutorials\testing.tfrecords'
def read_and_decode(filename_queue):
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(
serialized_example,
# Defaults are not specified since both keys are required.
features={
'height': tf.FixedLenFeature([], tf.int64),
'width': tf.FixedLenFeature([], tf.int64),
'image_raw': tf.FixedLenFeature([], tf.string),
})
image = tf.decode_raw(features['image_raw'], tf.uint8)
height = tf.cast(features['height'], tf.int32)
width = tf.cast(features['width'], tf.int32)
image_shape = tf.stack([height, width, 3])
image = tf.reshape(image, image_shape)
#image_size_const = tf.constant((IMAGE_HEIGHT, IMAGE_WIDTH, 3), dtype=tf.int32)
resized_image = tf.image.resize_image_with_crop_or_pad(image=image,
target_height=200,
target_width=200)
images = tf.train.shuffle_batch([resized_image], batch_size = 5,
num_threads = 3, capacity=30,
min_after_dequeue=10)
return images
filename_queue = tf.train.string_input_producer(
[tfrecords_filename], num_epochs=10)
image = read_and_decode(filename_queue)
init_op = tf.group(tf.global_variables_initializer(),
tf.local_variables_initializer())
with tf.Session() as sess:
sess.run(init_op)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord =coord)
for i in range(3):
img = sess.run(image)
print(img.shape)
coord.request_stop()
corrd.join(threads)
I use TFrecord as input.
And now I need triple batch input. The image_batch and label_batch is ok. But second posimage_batch, poslabel_batch is error.
I have read many posts about RandomShuffleQueue error question.
The answer tf.local_variables_initializer() doesn't solve my error
Because I search only one batch_data and batch_label as input. So I have no idea about triple input.
I am searching for a long time on net. But no use. Please help or try to give some ideas how to achieve this.
def real_read_and_decode(filename):
filename_queue = tf.train.string_input_producer([filename])
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(serialized_example,
features={
'label': tf.FixedLenFeature([], tf.int64),
'img_raw' : tf.FixedLenFeature([], tf.string),
})
img = tf.decode_raw(features['img_raw'], tf.uint8)
img = tf.reshape(img, [WIDTH,HEIGHT, 3])
label = tf.cast(features['label'], tf.int32)
labels = tf.one_hot(label, NUM_CLASSES)
return img, labels
def main():
image, label = read_and_decode("sketch_train.tfrecords")
posimage, poslabel = real_read_and_decode("pos_train.tfrecords")
negimage, neglabel = real_read_and_decode("neg_train.tfrecords")
image_batch, label_batch =tf.train.shuffle_batch([image, label],batch_size=BATCH_SIZE,capacity=1500, min_after_dequeue=1000)
posimage_batch, poslabel_batch = tf.train.shuffle_batch([posimage, poslabel],batch_size=BATCH_SIZE,capacity=1500, min_after_dequeue=1000)
negimage_batch, neglabel_batch = tf.train.shuffle_batch([negimage, neglabel],batch_size=BATCH_SIZE,capacity=1500, min_after_dequeue=1000)
with tf.Session(config=config) as sess:
sess.run(tf.local_variables_initializer())
sess.run(tf.global_variables_initializer())
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess,coord=coord)
for i in range(ITERATION):
if coord.should_stop():
print('corrd break!!!!!!')
break
#sess.run(tf.local_variables_initializer())
example_train, l_train = sess.run([image_batch, label_batch])
example_train2, l_train2= sess.run([posimage_batch, poslabel_batch])
example_train3, l_train3 = sess.run([negimage_batch, neglabel_batch])
_, loss_v = sess.run([train_step, loss],feed_dict={x1: example_train,y1: l_train,x2: example_train2, y2: l_train2,x3: example_train3, y3: l_train3})
This is my log
Because I am a new user, and my english is not good.
Hope you don't mind.
You probably just need to add some handling of the OutOfRangeError exception which is expected to happen sooner or later:
try:
sess.run(tf.local_variables_initializer())
sess.run(tf.global_variables_initializer())
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess,coord=coord)
for i in range(ITERATION):
#....
except tf.errors.OutOfRangeError:
print('Done training -- limit reached')
finally:
coord.request_stop()
I just want to read image and text in your tfrecords file: fsns/train/train-00511-of-00512Hi in FSNS datasets.
But when I do the work follow the guide in Tfrecords Guideļ¼ link, it shows error message following:
InvalidArgumentError (see above for traceback): Name: <unknown>, Feature: encoded (data type: string) is required but could not be found.
[[Node: ParseSingleExample/ParseExample/ParseExample = ParseExample[Ndense=4, Nsparse=0, Tdense=[DT_STRING, DT_INT64, DT_STRING, DT_INT64], dense_shapes=[[], [], [], []], sparse_types=[], _device="/job:localhost/replica:0/task:0/cpu:0"](ParseSingleExample/ExpandDims, ParseSingleExample/ParseExample/ParseExample/names, ParseSingleExample/ParseExample/ParseExample/dense_keys_0, ParseSingleExample/ParseExample/ParseExample/dense_keys_1, ParseSingleExample/ParseExample/ParseExample/dense_keys_2, ParseSingleExample/ParseExample/ParseExample/dense_keys_3, ParseSingleExample/ParseExample/Const, ParseSingleExample/ParseExample/Const_1, ParseSingleExample/ParseExample/Const_2, ParseSingleExample/ParseExample/Const_3)]]
It seems that the key name is wrong? My code is attached, could author or any other check my code and help me to fix the bug?
import tensorflow as tf
import skimage.io as io
IMAGE_HEIGHT = 384
IMAGE_WIDTH = 384
tfrecords_filename = '/home/wangjianbo_i/google_model/MyCode/models/attention_ocr/python/datasets/data/fsns/train/train-00511-of-00512'
def read_and_decode(filename_queue):
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(
serialized_example,
# Defaults are not specified since both keys are required.
features={
'height': tf.FixedLenFeature([], tf.int64),
'width': tf.FixedLenFeature([], tf.int64),
'encoded': tf.FixedLenFeature([], tf.string),
'text':tf.FixedLenFeature([], tf.string)
})
image = tf.decode_raw(features['encoded'], tf.uint8)
text = tf.decode_raw(features['text'], tf.uint8)
height = tf.cast(features['height'], tf.int32)
width = tf.cast(features['width'], tf.int32)
image_shape = tf.stack([height, width, 3])
image = tf.reshape(image, image_shape)
image_size_const = tf.constant((IMAGE_HEIGHT, IMAGE_WIDTH, 3), dtype=tf.int32)
resized_image = tf.image.resize_image_with_crop_or_pad(image=image,
target_height=IMAGE_HEIGHT,
target_width=IMAGE_WIDTH)
images = tf.train.shuffle_batch( [resized_image],
batch_size=2,
capacity=30,
num_threads=2,
min_after_dequeue=10)
return images,text
filename_queue = tf.train.string_input_producer(
[tfrecords_filename], num_epochs=10)
image,text = read_and_decode(filename_queue)
init_op = tf.group(tf.global_variables_initializer(),
tf.local_variables_initializer())
with tf.Session() as sess:
sess.run(init_op)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
# Let's read off 3 batches just for example
for i in xrange(3):
img,text= sess.run([image,text])
print img,text
print(img[0, :, :, :].shape)
print('current batch')
io.imshow(img[0, :, :, :])
io.show()
io.imshow(img[1, :, :, :])
io.show()
coord.request_stop()
coord.join(threads)
To read the FSNS dataset you can use https://github.com/tensorflow/models/blob/master/attention_ocr/python/datasets/fsns.py directly or as a reference.
The feature keys are incorrect in the code snippet you provided - missing the 'image/' prefix. It should be 'image/encoded' instead of just 'encoded', 'image/width' instead of 'image' and so on. Refer to the Table 4 in the paper.
I used the a script similar to the one here to convert my dataset to sharded tfrecords. But when I attempt to read it using script below tensorflow freezes and I have to kill the process using kill. (Note: Right now I am working in CPU mode)
def parse_example_proto(example_serialized):
feature_map = {
'image/encoded': tf.FixedLenFeature([], dtype=tf.string,
default_value=''),
'image/class/label': tf.FixedLenFeature([1], dtype=tf.int64,
default_value=-1),
'image/class/text': tf.FixedLenFeature([], dtype=tf.string,
default_value=''),
}
features = tf.parse_single_example(example_serialized, feature_map)
init_image = tf.image.decode_jpeg(features['image/encoded'], channels = 3)
init_image.set_shape([800,480,3])
image = tf.reshape(init_image,tf.pack([800, 480, 3]))
float_image = tf.image.convert_image_dtype(image, dtype=tf.float32)
label = tf.cast(features['image/class/label'], dtype=tf.int32)
return float_image , label, features['image/class/text']
def batch_inputs(batch_size, train,sess, num_preprocess_threads=4,
num_readers=1):
with tf.name_scope('batch_processing'):
tf_record_pattern = os.path.join('/home/raarora/', '%s-*' % 'train')
data_files = tf.gfile.Glob(tf_record_pattern)
if data_files is None:
raise ValueError('No data files found for this dataset')
# print data_files
# Create filename_queue
if train:
filename_queue = tf.train.string_input_producer(data_files,
shuffle=True,
capacity=8)
else:
filename_queue = tf.train.string_input_producer(data_files,
shuffle=False,
capacity=1)
reader =tf.TFRecordReader()
_, example_serialized = reader.read(filename_queue)
image, label, _ = parse_example_proto(example_serialized)
examples_per_shard = 201
min_queue_examples = examples_per_shard * 2
images, labels = tf.train.shuffle_batch(
[image, label], batch_size=batch_size, num_threads=4,
capacity=min_queue_examples + 3 * batch_size,
min_after_dequeue=min_queue_examples)
print images.eval(session=sess)
return s,images,labels
if __name__ == '__main__':
sess = tf.Session()
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
s,_,_ = batch_inputs(2,1,sess)
Was able to resolve this. I thought TFRecord is sort of dictionary and you need to give only the keys required, but upon giving the entire feature map along with small changes to how image was processed later, it worked.
Another mistake I made was that queue_runner should be started after calling tf.train.shuffle_batch(). I dont know if it's a bug or a gap in my understanding
Here's the working code for reading the data
def getImage(filename):
# convert filenames to a queue for an input pipeline.
filenameQ = tf.train.string_input_producer([filename],num_epochs=None)
# object to read records
recordReader = tf.TFRecordReader()
# read the full set of features for a single example
key, fullExample = recordReader.read(filenameQ)
# parse the full example into its' component features.
features = tf.parse_single_example(
fullExample,
features={
'image/height': tf.FixedLenFeature([], tf.int64),
'image/width': tf.FixedLenFeature([], tf.int64),
'image/colorspace': tf.FixedLenFeature([], dtype=tf.string,default_value=''),
'image/channels': tf.FixedLenFeature([], tf.int64),
'image/class/label': tf.FixedLenFeature([],tf.int64),
'image/class/text': tf.FixedLenFeature([], dtype=tf.string,default_value=''),
'image/format': tf.FixedLenFeature([], dtype=tf.string,default_value=''),
'image/filename': tf.FixedLenFeature([], dtype=tf.string,default_value=''),
'image/encoded': tf.FixedLenFeature([], dtype=tf.string, default_value='')
})
# now we are going to manipulate the label and image features
label = features['image/class/label']
image_buffer = features['image/encoded']
# Decode the jpeg
with tf.name_scope('decode_jpeg',[image_buffer], None):
# decode
image = tf.image.decode_jpeg(image_buffer, channels=3)
# and convert to single precision data type
image = tf.image.convert_image_dtype(image, dtype=tf.float32)
# cast image into a single array, where each element corresponds to the greyscale
# value of a single pixel.
# the "1-.." part inverts the image, so that the background is black.
# re-define label as a "one-hot" vector
# it will be [0,1] or [1,0] here.
# This approach can easily be extended to more classes.
image=tf.reshape(image,[height,width,3])
label=tf.pack(tf.one_hot(label-1, nClass))
return label, image
label, image = getImage("train-00000-of-00001")
imageBatch, labelBatch = tf.train.shuffle_batch(
[image, label], batch_size=2,
capacity=20,
min_after_dequeue=10)
sess = tf.InteractiveSession()
sess.run(tf.initialize_all_variables())
# start the threads used for reading files
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess,coord=coord)
batch_xs, batch_ys = sess.run([imageBatch, labelBatch])
print batch_xs
coord.request_stop()
coord.join(threads)
Note: I wasnt clear about sharded records so I used just one shard.
Credits to https://agray3.github.io/2016/11/29/Demystifying-Data-Input-to-TensorFlow-for-Deep-Learning.html