I used the a script similar to the one here to convert my dataset to sharded tfrecords. But when I attempt to read it using script below tensorflow freezes and I have to kill the process using kill. (Note: Right now I am working in CPU mode)
def parse_example_proto(example_serialized):
feature_map = {
'image/encoded': tf.FixedLenFeature([], dtype=tf.string,
default_value=''),
'image/class/label': tf.FixedLenFeature([1], dtype=tf.int64,
default_value=-1),
'image/class/text': tf.FixedLenFeature([], dtype=tf.string,
default_value=''),
}
features = tf.parse_single_example(example_serialized, feature_map)
init_image = tf.image.decode_jpeg(features['image/encoded'], channels = 3)
init_image.set_shape([800,480,3])
image = tf.reshape(init_image,tf.pack([800, 480, 3]))
float_image = tf.image.convert_image_dtype(image, dtype=tf.float32)
label = tf.cast(features['image/class/label'], dtype=tf.int32)
return float_image , label, features['image/class/text']
def batch_inputs(batch_size, train,sess, num_preprocess_threads=4,
num_readers=1):
with tf.name_scope('batch_processing'):
tf_record_pattern = os.path.join('/home/raarora/', '%s-*' % 'train')
data_files = tf.gfile.Glob(tf_record_pattern)
if data_files is None:
raise ValueError('No data files found for this dataset')
# print data_files
# Create filename_queue
if train:
filename_queue = tf.train.string_input_producer(data_files,
shuffle=True,
capacity=8)
else:
filename_queue = tf.train.string_input_producer(data_files,
shuffle=False,
capacity=1)
reader =tf.TFRecordReader()
_, example_serialized = reader.read(filename_queue)
image, label, _ = parse_example_proto(example_serialized)
examples_per_shard = 201
min_queue_examples = examples_per_shard * 2
images, labels = tf.train.shuffle_batch(
[image, label], batch_size=batch_size, num_threads=4,
capacity=min_queue_examples + 3 * batch_size,
min_after_dequeue=min_queue_examples)
print images.eval(session=sess)
return s,images,labels
if __name__ == '__main__':
sess = tf.Session()
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
s,_,_ = batch_inputs(2,1,sess)
Was able to resolve this. I thought TFRecord is sort of dictionary and you need to give only the keys required, but upon giving the entire feature map along with small changes to how image was processed later, it worked.
Another mistake I made was that queue_runner should be started after calling tf.train.shuffle_batch(). I dont know if it's a bug or a gap in my understanding
Here's the working code for reading the data
def getImage(filename):
# convert filenames to a queue for an input pipeline.
filenameQ = tf.train.string_input_producer([filename],num_epochs=None)
# object to read records
recordReader = tf.TFRecordReader()
# read the full set of features for a single example
key, fullExample = recordReader.read(filenameQ)
# parse the full example into its' component features.
features = tf.parse_single_example(
fullExample,
features={
'image/height': tf.FixedLenFeature([], tf.int64),
'image/width': tf.FixedLenFeature([], tf.int64),
'image/colorspace': tf.FixedLenFeature([], dtype=tf.string,default_value=''),
'image/channels': tf.FixedLenFeature([], tf.int64),
'image/class/label': tf.FixedLenFeature([],tf.int64),
'image/class/text': tf.FixedLenFeature([], dtype=tf.string,default_value=''),
'image/format': tf.FixedLenFeature([], dtype=tf.string,default_value=''),
'image/filename': tf.FixedLenFeature([], dtype=tf.string,default_value=''),
'image/encoded': tf.FixedLenFeature([], dtype=tf.string, default_value='')
})
# now we are going to manipulate the label and image features
label = features['image/class/label']
image_buffer = features['image/encoded']
# Decode the jpeg
with tf.name_scope('decode_jpeg',[image_buffer], None):
# decode
image = tf.image.decode_jpeg(image_buffer, channels=3)
# and convert to single precision data type
image = tf.image.convert_image_dtype(image, dtype=tf.float32)
# cast image into a single array, where each element corresponds to the greyscale
# value of a single pixel.
# the "1-.." part inverts the image, so that the background is black.
# re-define label as a "one-hot" vector
# it will be [0,1] or [1,0] here.
# This approach can easily be extended to more classes.
image=tf.reshape(image,[height,width,3])
label=tf.pack(tf.one_hot(label-1, nClass))
return label, image
label, image = getImage("train-00000-of-00001")
imageBatch, labelBatch = tf.train.shuffle_batch(
[image, label], batch_size=2,
capacity=20,
min_after_dequeue=10)
sess = tf.InteractiveSession()
sess.run(tf.initialize_all_variables())
# start the threads used for reading files
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess,coord=coord)
batch_xs, batch_ys = sess.run([imageBatch, labelBatch])
print batch_xs
coord.request_stop()
coord.join(threads)
Note: I wasnt clear about sharded records so I used just one shard.
Credits to https://agray3.github.io/2016/11/29/Demystifying-Data-Input-to-TensorFlow-for-Deep-Learning.html
Related
I'd like to make TFRecords into TFRecordsDataset. And unpack the data while feeding it. This is my function for parsing.
def read_tfrecord(tfrecord, epochs, batch_size):
dataset = tf.data.TFRecordDataset(tfrecord)
def parse(record):
features = {
"x_img": tf.io.FixedLenFeature([], tf.string),
"x_img_shape": tf.io.FixedLenFeature([], tf.int64),
"x_spectrogram": tf.io.FixedLenFeature([], tf.string),
"x_spec_shape": tf.io.FixedLenFeature([], tf.int64),
"x_wave": tf.io.FixedLenFeature([], tf.string),
"x_wave_shape": tf.io.FixedLenFeature([], tf.int64),
"y":tf.io.FixedLenFeature([], tf.string),
"y_shape": tf.io.FixedLenFeature([], tf.int64)}
example = tf.io.parse_single_example(record, features)
x_image = tf.io.decode_image(example["x_img"])
x_spec = tf.io.decode_image(example["x_spectrogram"])
x_wave = tf.cast(example["x_wave"], tf.string)
y = tf.io.decode_image(example["y"])
return x_image, x_spec, x_wave, y
dataset = dataset.map(parse)
dataset = dataset.shuffle(buffer_size=5)
dataset = dataset.prefetch(buffer_size=batch_size) #
dataset = dataset.batch(batch_size, drop_remainder=True)
dataset = dataset.repeat(epochs)
return dataset
I made an iterator to do the task:
train_files = path + "train.tfrecords"
EPOCHS = 100
BATCH_SIZE = 5
train_dataset = read_tfrecord(tfrecord=train_files,
epochs=EPOCHS,
batch_size=BATCH_SIZE)
iterator = iter(train_dataset)
x_img, x_spec, x_wave, y = next(iterator)
But I got this error:
InvalidArgumentError: Key: x_img_shape. Can't parse serialized Example.
[[{{node ParseSingleExample/ParseExample/ParseExampleV2}}]]
The code that serializes the data:
with tf.io.TFRecordWriter(tfrcpath) as file_writer:
for video in sorted(video_list):
print(video)
x_img = frame_extraction(raw_loc, video, interval)
x_spec, x_wave = audio_extraction(raw_loc, video, interval)
y = ground_truth_extraction(raw_loc, video, ground_truth, interval)
record_bytes = tf.train.Example(
features= tf.train.Features(
feature={
"x_img": tf.train.Feature(float_list=tf.train.FloatList(value= x_img.flatten())),
"x_img_shape": tf.train.Feature(int64_list=tf.train.Int64List(value= x_img.shape)),
"x_spec": tf.train.Feature(float_list=tf.train.FloatList(value= x_spec.flatten())),
"x_spec_shape": tf.train.Feature(int64_list=tf.train.Int64List(value= x_spec.shape)),
"x_wave": tf.train.Feature(float_list=tf.train.FloatList(value= x_wave.flatten())),
"x_wave_shape": tf.train.Feature(int64_list=tf.train.Int64List(value= x_wave.shape)),
"y": tf.train.Feature(float_list=tf.train.FloatList(value= y.flatten())),
"y_shape": tf.train.Feature(float_list=tf.train.FloatList(value= y.shape)),
})).SerializeToString()
file_writer.write(record_bytes)
What I have tried:
Solution in link suggests adding the shape of the data. So I tried update it as "x_img_shape": tf.io.FixedLenFeature([3], tf.int64), but didn't solve my issue.
My questions are:
What is the error meaning?
Why is the error only shows at x_img_shape but not x_img? x_img_shape is only used to record the shape of the images
Since I did flatten the images when writing theam as TFRecords, do I need to reshape the training images back? If so, how do I do that?
I created the images.tfrecoreds file using following code
from PIL import Image
import numpy as np
import tensorflow as tf
import glob
images = glob.glob('E:\Projects/FYPT/vehicle/bus/*.jpg')
def _bytes_feature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def _int64_feature(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
tfrecords_filename = 'E:\Projects/FYPT/vehicle/images.tfrecords'
writer = tf.python_io.TFRecordWriter(tfrecords_filename)
original_images = []
for img_path in images:
img = np.array(Image.open(img_path))
height = img.shape[0]
width = img.shape[1]
# Put in the original images into array
# Just for future check for correctness
original_images.append((img))
img_raw = img.tostring()
example = tf.train.Example(features=tf.train.Features(feature={
'height': _int64_feature(height),
'width': _int64_feature(width),
'image_raw': _bytes_feature(img_raw)
}))
writer.write(example.SerializeToString())
writer.close()
Then I tried to Inspect the output of the tf.TFRecordReader() by printing the output of the "serialized_example"
import tensorflow as tf
import skimage.io as io
reader = tf.TFRecordReader()
tfrecords_filename = 'E:\Projects/FYPT/vehicle/images.tfrecords'
filename_queue = tf.train.string_input_producer([tfrecords_filename],num_epochs=10)
_,serialized_example = reader.read(filename_queue)
sess= tf.Session()
print(sess.run(serialized_example))
but it gives me following warning and not give any output of the "serialized_example" this is the screen shot of the command line
What is the mistake have I done and how should I print the output of the "serialized_example"
You get that warning, because you are using tf.train.string_input_producer() which returns a queue, but input pipelines based on QueueRunner API are deprecated and not supported in future versions.
Queue-based solution - not recommended!
serialized_example is just a string object (the same that was written with the tf.python_io.TFRecordWriter to images.tfrecords file for each example).
You need to parse each single example to get its features. In your case:
features = tf.parse_single_example(serialized_example,
features={"image_raw": tf.FixedLenFeature([], tf.string),
"height": tf.FixedLenFeature([], tf.int64) }
img_raw = tf.image.decode_jpeg(features["image_raw"])
img_height = features["height"]
# initialize global and local variables
init_op = tf.group(tf.local_variables_initializer(),
tf.global_variables_initializer())
with tf.Session() as sess:
sess.run(init_op)
# start a number of threads
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
try:
while not coord.should_stop():
img_raw_value, img_height_value = sess.run([img_raw, img_height])
print(img_raw_value.shape)
print(img_height_value)
except tf.errors.OutOfRangeError:
print("End of data")
finally:
coord.request_stop()
# wait for all threads to terminate
coord.join(threads)
sess.close()
Dataset API - highly recommended!
A detailed description how to build a input pipeline can be found here: TensorFlow API.
In your case you should define a _parse_function like this:
def _parse_function(example_proto):
features={"imgage_raw": tf.FixedLenFeature([], tf.string),
"height": tf.FixedLenFeature([], tf.int64),
"width": tf.FixedLenFeature([], tf.int64)}
parsed_features = tf.parse_single_example(example_proto, features)
img_raw = tf.image.decode_jpeg(parsed_features["img_raw"])
height = parsed_features["height"]
width = parsed_features["width"]
return img_raw, height, width
Than create a dataset that reads all of the examples from TFRecord file, and extract the features:
dataset = tf.data.TFRecordDataset([tfrecords_filename])
dataset = dataset.map(_parse_function)
# here you could batch and shuffle
iterator = dataset.make_one_shot_iterator()
next_element = iterator.get_next()
with tf.Session() as sess:
while True:
try:
val = sess.run(next_element)
print("img_raw:", val[0].shape)
print("height:", val[1])
print("width:", val[2])
except tf.errors.OutOfRangeError:
print("End of dataset")
break
I hope this helps.
I use TFrecord as input.
And now I need triple batch input. The image_batch and label_batch is ok. But second posimage_batch, poslabel_batch is error.
I have read many posts about RandomShuffleQueue error question.
The answer tf.local_variables_initializer() doesn't solve my error
Because I search only one batch_data and batch_label as input. So I have no idea about triple input.
I am searching for a long time on net. But no use. Please help or try to give some ideas how to achieve this.
def real_read_and_decode(filename):
filename_queue = tf.train.string_input_producer([filename])
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(serialized_example,
features={
'label': tf.FixedLenFeature([], tf.int64),
'img_raw' : tf.FixedLenFeature([], tf.string),
})
img = tf.decode_raw(features['img_raw'], tf.uint8)
img = tf.reshape(img, [WIDTH,HEIGHT, 3])
label = tf.cast(features['label'], tf.int32)
labels = tf.one_hot(label, NUM_CLASSES)
return img, labels
def main():
image, label = read_and_decode("sketch_train.tfrecords")
posimage, poslabel = real_read_and_decode("pos_train.tfrecords")
negimage, neglabel = real_read_and_decode("neg_train.tfrecords")
image_batch, label_batch =tf.train.shuffle_batch([image, label],batch_size=BATCH_SIZE,capacity=1500, min_after_dequeue=1000)
posimage_batch, poslabel_batch = tf.train.shuffle_batch([posimage, poslabel],batch_size=BATCH_SIZE,capacity=1500, min_after_dequeue=1000)
negimage_batch, neglabel_batch = tf.train.shuffle_batch([negimage, neglabel],batch_size=BATCH_SIZE,capacity=1500, min_after_dequeue=1000)
with tf.Session(config=config) as sess:
sess.run(tf.local_variables_initializer())
sess.run(tf.global_variables_initializer())
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess,coord=coord)
for i in range(ITERATION):
if coord.should_stop():
print('corrd break!!!!!!')
break
#sess.run(tf.local_variables_initializer())
example_train, l_train = sess.run([image_batch, label_batch])
example_train2, l_train2= sess.run([posimage_batch, poslabel_batch])
example_train3, l_train3 = sess.run([negimage_batch, neglabel_batch])
_, loss_v = sess.run([train_step, loss],feed_dict={x1: example_train,y1: l_train,x2: example_train2, y2: l_train2,x3: example_train3, y3: l_train3})
This is my log
Because I am a new user, and my english is not good.
Hope you don't mind.
You probably just need to add some handling of the OutOfRangeError exception which is expected to happen sooner or later:
try:
sess.run(tf.local_variables_initializer())
sess.run(tf.global_variables_initializer())
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess,coord=coord)
for i in range(ITERATION):
#....
except tf.errors.OutOfRangeError:
print('Done training -- limit reached')
finally:
coord.request_stop()
How would I go about changing the MNIST tutorial to use TFRecords instead of the odd format the tutorial downloads from the web?
I used build_image_data.py from the inception model to create my TFRecords containing 200x200 RGB images and intend to train this on a 1080Ti, but I can't find any good examples on how to load TFRecords and feed them into a convolutional neural network.
I did a similar thing as you intend doing. I also took the same script to build image data. My code for reading the data and training it is
import tensorflow as tf
height = 28
width = 28
tfrecords_train_filename = 'train-00000-of-00001'
tfrecords_test_filename = 'test-00000-of-00001'
def read_and_decode(filename_queue):
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(
serialized_example,
features={
'image/height': tf.FixedLenFeature([], tf.int64),
'image/width': tf.FixedLenFeature([], tf.int64),
'image/colorspace': tf.FixedLenFeature([], dtype=tf.string, default_value=''),
'image/channels': tf.FixedLenFeature([], tf.int64),
'image/class/label': tf.FixedLenFeature([], tf.int64),
'image/class/text': tf.FixedLenFeature([], dtype=tf.string, default_value=''),
'image/format': tf.FixedLenFeature([], dtype=tf.string, default_value=''),
'image/filename': tf.FixedLenFeature([], dtype=tf.string, default_value=''),
'image/encoded': tf.FixedLenFeature([], dtype=tf.string, default_value='')
})
image_buffer = features['image/encoded']
image_label = tf.cast(features['image/class/label'], tf.int32)
# Decode the jpeg
with tf.name_scope('decode_jpeg', [image_buffer], None):
# decode
image = tf.image.decode_jpeg(image_buffer, channels=3)
# and convert to single precision data type
image = tf.image.convert_image_dtype(image, dtype=tf.float32)
image = tf.image.rgb_to_grayscale(image)
image_shape = tf.stack([height, width, 1])
image = tf.reshape(image, image_shape)
return image, image_label
def inputs(filename, batch_size, num_epochs):
if not num_epochs: num_epochs = None
with tf.name_scope('input'):
filename_queue = tf.train.string_input_producer([filename], num_epochs=None)
image, label = read_and_decode(filename_queue)
# Shuffle the examples and collect them into batch_size batches.
images, sparse_labels = tf.train.shuffle_batch(
[image, label], batch_size=batch_size, num_threads=2,
capacity=1000 + 3 * batch_size,
min_after_dequeue=1000)
return images, sparse_labels
image, label = inputs(filename=tfrecords_train_filename, batch_size=200, num_epochs=None)
image = tf.reshape(image, [-1, 784])
label = tf.one_hot(label - 1, 10)
# Create the model
x = tf.placeholder(tf.float32, [None, 784])
W = tf.Variable(tf.zeros([784, 10]))
b = tf.Variable(tf.zeros([10]))
y = tf.matmul(x, W) + b
y_ = tf.placeholder(tf.float32, [None, 10])
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y))
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
for i in range(1000):
img, lbl = sess.run([image, label])
sess.run(train_step, feed_dict={x: img, y_: lbl})
img, lbl = sess.run([image, label])
print(sess.run(accuracy, feed_dict={x: img, y_: lbl}))
coord.request_stop()
coord.join(threads)
This is a super easy model for classifying mnist. However I think it is also an extensible answer for how to train with TFRecord files. It does not yet take into account the evaluation data, since this needs more coordination to be done.
I just want to read image and text in your tfrecords file: fsns/train/train-00511-of-00512Hi in FSNS datasets.
But when I do the work follow the guide in Tfrecords Guideļ¼ link, it shows error message following:
InvalidArgumentError (see above for traceback): Name: <unknown>, Feature: encoded (data type: string) is required but could not be found.
[[Node: ParseSingleExample/ParseExample/ParseExample = ParseExample[Ndense=4, Nsparse=0, Tdense=[DT_STRING, DT_INT64, DT_STRING, DT_INT64], dense_shapes=[[], [], [], []], sparse_types=[], _device="/job:localhost/replica:0/task:0/cpu:0"](ParseSingleExample/ExpandDims, ParseSingleExample/ParseExample/ParseExample/names, ParseSingleExample/ParseExample/ParseExample/dense_keys_0, ParseSingleExample/ParseExample/ParseExample/dense_keys_1, ParseSingleExample/ParseExample/ParseExample/dense_keys_2, ParseSingleExample/ParseExample/ParseExample/dense_keys_3, ParseSingleExample/ParseExample/Const, ParseSingleExample/ParseExample/Const_1, ParseSingleExample/ParseExample/Const_2, ParseSingleExample/ParseExample/Const_3)]]
It seems that the key name is wrong? My code is attached, could author or any other check my code and help me to fix the bug?
import tensorflow as tf
import skimage.io as io
IMAGE_HEIGHT = 384
IMAGE_WIDTH = 384
tfrecords_filename = '/home/wangjianbo_i/google_model/MyCode/models/attention_ocr/python/datasets/data/fsns/train/train-00511-of-00512'
def read_and_decode(filename_queue):
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(
serialized_example,
# Defaults are not specified since both keys are required.
features={
'height': tf.FixedLenFeature([], tf.int64),
'width': tf.FixedLenFeature([], tf.int64),
'encoded': tf.FixedLenFeature([], tf.string),
'text':tf.FixedLenFeature([], tf.string)
})
image = tf.decode_raw(features['encoded'], tf.uint8)
text = tf.decode_raw(features['text'], tf.uint8)
height = tf.cast(features['height'], tf.int32)
width = tf.cast(features['width'], tf.int32)
image_shape = tf.stack([height, width, 3])
image = tf.reshape(image, image_shape)
image_size_const = tf.constant((IMAGE_HEIGHT, IMAGE_WIDTH, 3), dtype=tf.int32)
resized_image = tf.image.resize_image_with_crop_or_pad(image=image,
target_height=IMAGE_HEIGHT,
target_width=IMAGE_WIDTH)
images = tf.train.shuffle_batch( [resized_image],
batch_size=2,
capacity=30,
num_threads=2,
min_after_dequeue=10)
return images,text
filename_queue = tf.train.string_input_producer(
[tfrecords_filename], num_epochs=10)
image,text = read_and_decode(filename_queue)
init_op = tf.group(tf.global_variables_initializer(),
tf.local_variables_initializer())
with tf.Session() as sess:
sess.run(init_op)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
# Let's read off 3 batches just for example
for i in xrange(3):
img,text= sess.run([image,text])
print img,text
print(img[0, :, :, :].shape)
print('current batch')
io.imshow(img[0, :, :, :])
io.show()
io.imshow(img[1, :, :, :])
io.show()
coord.request_stop()
coord.join(threads)
To read the FSNS dataset you can use https://github.com/tensorflow/models/blob/master/attention_ocr/python/datasets/fsns.py directly or as a reference.
The feature keys are incorrect in the code snippet you provided - missing the 'image/' prefix. It should be 'image/encoded' instead of just 'encoded', 'image/width' instead of 'image' and so on. Refer to the Table 4 in the paper.