How to read images and text in FSNS datasets?

How to read images and text in FSNS datasets? - python

I just want to read image and text in your tfrecords file: fsns/train/train-00511-of-00512Hi in FSNS datasets.
But when I do the work follow the guide in Tfrecords Guide： link, it shows error message following:
InvalidArgumentError (see above for traceback): Name: <unknown>, Feature: encoded (data type: string) is required but could not be found.
[[Node: ParseSingleExample/ParseExample/ParseExample = ParseExample[Ndense=4, Nsparse=0, Tdense=[DT_STRING, DT_INT64, DT_STRING, DT_INT64], dense_shapes=[[], [], [], []], sparse_types=[], _device="/job:localhost/replica:0/task:0/cpu:0"](ParseSingleExample/ExpandDims, ParseSingleExample/ParseExample/ParseExample/names, ParseSingleExample/ParseExample/ParseExample/dense_keys_0, ParseSingleExample/ParseExample/ParseExample/dense_keys_1, ParseSingleExample/ParseExample/ParseExample/dense_keys_2, ParseSingleExample/ParseExample/ParseExample/dense_keys_3, ParseSingleExample/ParseExample/Const, ParseSingleExample/ParseExample/Const_1, ParseSingleExample/ParseExample/Const_2, ParseSingleExample/ParseExample/Const_3)]]
It seems that the key name is wrong? My code is attached, could author or any other check my code and help me to fix the bug?
import tensorflow as tf
import skimage.io as io
IMAGE_HEIGHT = 384
IMAGE_WIDTH = 384
tfrecords_filename = '/home/wangjianbo_i/google_model/MyCode/models/attention_ocr/python/datasets/data/fsns/train/train-00511-of-00512'
def read_and_decode(filename_queue):
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(
serialized_example,
# Defaults are not specified since both keys are required.
features={
'height': tf.FixedLenFeature([], tf.int64),
'width': tf.FixedLenFeature([], tf.int64),
'encoded': tf.FixedLenFeature([], tf.string),
'text':tf.FixedLenFeature([], tf.string)
})
image = tf.decode_raw(features['encoded'], tf.uint8)
text = tf.decode_raw(features['text'], tf.uint8)
height = tf.cast(features['height'], tf.int32)
width = tf.cast(features['width'], tf.int32)
image_shape = tf.stack([height, width, 3])
image = tf.reshape(image, image_shape)
image_size_const = tf.constant((IMAGE_HEIGHT, IMAGE_WIDTH, 3), dtype=tf.int32)
resized_image = tf.image.resize_image_with_crop_or_pad(image=image,
target_height=IMAGE_HEIGHT,
target_width=IMAGE_WIDTH)
images = tf.train.shuffle_batch( [resized_image],
batch_size=2,
capacity=30,
num_threads=2,
min_after_dequeue=10)
return images,text
filename_queue = tf.train.string_input_producer(
[tfrecords_filename], num_epochs=10)
image,text = read_and_decode(filename_queue)
init_op = tf.group(tf.global_variables_initializer(),
tf.local_variables_initializer())
with tf.Session() as sess:
sess.run(init_op)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
# Let's read off 3 batches just for example
for i in xrange(3):
img,text= sess.run([image,text])
print img,text
print(img[0, :, :, :].shape)
print('current batch')
io.imshow(img[0, :, :, :])
io.show()
io.imshow(img[1, :, :, :])
io.show()
coord.request_stop()
coord.join(threads)

To read the FSNS dataset you can use https://github.com/tensorflow/models/blob/master/attention_ocr/python/datasets/fsns.py directly or as a reference.
The feature keys are incorrect in the code snippet you provided - missing the 'image/' prefix. It should be 'image/encoded' instead of just 'encoded', 'image/width' instead of 'image' and so on. Refer to the Table 4 in the paper.

Related

How to convert MapDataset variable into np.array?

Environment
Ubuntu 18.04
Python 3.6.8
Tensorflow 1.12.0
The problem
I want to convert the data with MapDataset class into numpy.array to check the contents.
Data detail
I have data as TFRecord. This data contains images (150x150x3) and labels (1 or 0). This TFRecord was created from the following code.
def int64_feature(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
def bytes_feature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
np.random.seed(42)
rnd_index = np.random.permutation(len(image_paths))
X_train, y_train = image_paths[rnd_index[:-1000]], labels[rnd_index[:-1000]]
X_test, y_test = image_paths[rnd_index[-1000:]], labels[rnd_index[-1000:]]
writer = tf.python_io.TFRecordWriter('training.tfrecord')
for image_path, label in zip(X_train, y_train):
image = cv2.imread(image_path)
image = cv2.resize(image, (150, 150)) / 255.0
img_raw = image.tostring()
ex = tf.train.Example(features=tf.train.Features(feature={
'image': bytes_feature(img_raw),
'label': int64_feature(label)
}))
writer.write(ex.SerializeToString())
writer.close()
Parsing data
I parsed the data by following code.
def parse(example_proto):
features = {
'label' : tf.FixedLenFeature((), tf.int64),
'image' : tf.FixedLenFeature((), tf.string)
}
parsed_features = tf.parse_single_example(example_proto, features)
img_shape = tf.stack([150, 150, 3])
image = tf.decode_raw(parsed_features['image'], tf.float32)
image = tf.reshape(image, img_shape)
label = tf.cast(parsed_features['label'], tf.int32)
return image, label
with tf.Session() as sess:
dataset = tf.data.TFRecordDataset('training.tfrecord')
dataset = dataset.map(parse)
I want to get the image from 'dataset' variable, but I don't know how to do.
I tried running the following code on the jupyter notebook.
with tf.Session() as sess:
dataset = tf.data.TFRecordDataset('training.tfrecord')
dataset = dataset.map(parse)
iterator = dataset.make_initializable_iterator()
sess.run(iterator.initializer)
next_element = iterator.get_next()
elem = next_element[0].eval()
dataset
But I got the error message.
InvalidArgumentError: Feature: image (data type: string) is required but could not be found.
[[{{node ParseSingleExample/ParseSingleExample}} = ParseSingleExample[Tdense=[DT_STRING, DT_INT64], dense_keys=["image", "label"], dense_shapes=[[], []], num_sparse=0, sparse_keys=[], sparse_types=[]](arg0, ParseSingleExample/Const, ParseSingleExample/Const_1)]]
[[node IteratorGetNext (defined at <ipython-input-3-350cc5050691>:19) = IteratorGetNext[output_shapes=[[150,150,3], []], output_types=[DT_FLOAT, DT_INT32], _device="/job:localhost/replica:0/task:0/device:CPU:0"](IteratorV2)]]
I'm a beginner of Tensorflow, so I cannot understand what this message means, and how to deal with it.

Reshape.error tensorflow -- [[Node: Reshape = Reshape[T=DT_UINT8, Tshape=DT_INT32]....,

I'm basically trying to read images as a batch using tfrecords, but I'm running into this error. It seems to be resizing the images correctly according to the print statement, but I'm getting the error at the end of print statement.
Part of the traceback error looks like this:
next(self.gen)
File "C:\Users\Moondra\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\framework\errors_impl.py", line 466, in raise_exception_on_not_ok_status
pywrap_tensorflow.TF_GetCode(status))
tensorflow.python.framework.errors_impl.InvalidArgumentError: Input to reshape is a tensor with 40400 values, but the requested shape has 30300
[[Node: Reshape = Reshape[T=DT_UINT8, Tshape=DT_INT32, _device="/job:localhost/replica:0/task:0/cpu:0"](DecodeRaw, stack)]]
print statements before the traceback error (batch shape look correct):
(5, 200, 200, 3)
(5, 200, 200, 3)
(5, 200, 200, 3)
my code
#tfrecords and batch images
import matplotlib.pyplot as plt
import numpy as np
from scipy.misc import imread
import os
import tensorflow as tf
IMAGE_HEIGHT = 200
IMAGE_WIDTH = 200
tfrecords_filename = r'C:\Users\Moondra\Desktop\Transfer Learning Tutorials\testing.tfrecords'
def read_and_decode(filename_queue):
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(
serialized_example,
# Defaults are not specified since both keys are required.
features={
'height': tf.FixedLenFeature([], tf.int64),
'width': tf.FixedLenFeature([], tf.int64),
'image_raw': tf.FixedLenFeature([], tf.string),
})
image = tf.decode_raw(features['image_raw'], tf.uint8)
height = tf.cast(features['height'], tf.int32)
width = tf.cast(features['width'], tf.int32)
image_shape = tf.stack([height, width, 3])
image = tf.reshape(image, image_shape)
#image_size_const = tf.constant((IMAGE_HEIGHT, IMAGE_WIDTH, 3), dtype=tf.int32)
resized_image = tf.image.resize_image_with_crop_or_pad(image=image,
target_height=200,
target_width=200)
images = tf.train.shuffle_batch([resized_image], batch_size = 5,
num_threads = 3, capacity=30,
min_after_dequeue=10)
return images
filename_queue = tf.train.string_input_producer(
[tfrecords_filename], num_epochs=10)
image = read_and_decode(filename_queue)
init_op = tf.group(tf.global_variables_initializer(),
tf.local_variables_initializer())
with tf.Session() as sess:
sess.run(init_op)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord =coord)
for i in range(3):
img = sess.run(image)
print(img.shape)
coord.request_stop()
corrd.join(threads)

image and its name mismatch after read images from TFRecords file

I write two methods using TensorFlow:
convert_imgs_to_TFRecords, coverts all images in ./dataset to TFRecords file img.tfrecords
read_imgs_from_TFRecords, reads the img.tfrecords, get images and their information, including height, weight, channel and name.
But the images mismatch their names.
For example, A image named 001.jpg and B image named 002.jpg are converted to img.tfrecords, but A image get name 002.jpg and B image get 001.jpg after read_imgs_from_TFRecords.
The two methods as follows:
def convert_imgs_to_TFRecords(imgs_dir='./dataset', tfrecords_name='img.tfrecords'):
img_filenames_list = os.listdir(imgs_dir)
writer = tf.python_io.TFRecordWriter(tfrecords_name)
for item in img_filenames_list:
file_extension = item.split('.')[-1]
if(file_extension == 'jpg'):
img_filename = os.path.join('./dataset', item)
print("writing {0}".format(item))
img = cv2.imread(img_filename)# uint8 dtype
rows = img.shape[0]
cols = img.shape[1]
channels = img.shape[2]
example = tf.train.Example(features = tf.train.Features(feature={
'name': _bytes_feature(item.encode('utf-8')), # str to bytes
'height': _int64_feature(rows),
'width': _int64_feature(cols),
'channel': _int64_feature(channels),
'img': _bytes_feature(img.tostring())
}))
writer.write(example.SerializeToString())
writer.close()
and
def read_imgs_from_TFRecords(tfrecords_file='./img.tfrecords'):
filename_queue = tf.train.string_input_producer(string_tensor=[tfrecords_file],
num_epochs=None,
shuffle=False,
seed=None,
capacity=32,
shared_name=None,
name=None,
cancel_op=None)
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(serialized_example, features={
'name': tf.FixedLenFeature([], tf.string),
'height': tf.FixedLenFeature([], tf.int64),
'width': tf.FixedLenFeature([], tf.int64),
'channel': tf.FixedLenFeature([], tf.int64),
'img': tf.FixedLenFeature([], tf.string)
})
image = tf.decode_raw(features['img'], tf.uint8)
# normalize
# normalize_op = tf.cast(image, tf.float32) * (1.0/255) - 0.5
height = features['height']
width = features['width']
channel = features['channel']
name = features['name']
print("ready to run session")
init_op = tf.group(tf.local_variables_initializer(),
tf.global_variables_initializer())
with tf.Session() as sess:
sess.run(init_op)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
for i in range(22):
img = image.eval()
h, w, c = [height.eval(), width.eval(), channel.eval()]
title = name.eval()
title = title.decode()#bytes to str
img = img.reshape([h, w, c])
# pil_image = Image.fromarray(img)
# pil_image.show()
print('showing ' + title)
cv2.imwrite(title, img)
coord.request_stop()
coord.join(threads)

As Gphilo and Jie.Zhou said in the comment, we should group all the pieces of an single example to an single sess.run.
So, I correct
img = image.eval()
h, w, c = [height.eval(), width.eval(), channel.eval()]
title = name.eval()
to
img, h, w, c, title = sess.run([image, height, width, channel, name])
The two methods are just trying tf.TFRecord, and one should better use Datasets API in your project.

Tensorflow MNIST TFRecord

How would I go about changing the MNIST tutorial to use TFRecords instead of the odd format the tutorial downloads from the web?
I used build_image_data.py from the inception model to create my TFRecords containing 200x200 RGB images and intend to train this on a 1080Ti, but I can't find any good examples on how to load TFRecords and feed them into a convolutional neural network.

I did a similar thing as you intend doing. I also took the same script to build image data. My code for reading the data and training it is
import tensorflow as tf
height = 28
width = 28
tfrecords_train_filename = 'train-00000-of-00001'
tfrecords_test_filename = 'test-00000-of-00001'
def read_and_decode(filename_queue):
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(
serialized_example,
features={
'image/height': tf.FixedLenFeature([], tf.int64),
'image/width': tf.FixedLenFeature([], tf.int64),
'image/colorspace': tf.FixedLenFeature([], dtype=tf.string, default_value=''),
'image/channels': tf.FixedLenFeature([], tf.int64),
'image/class/label': tf.FixedLenFeature([], tf.int64),
'image/class/text': tf.FixedLenFeature([], dtype=tf.string, default_value=''),
'image/format': tf.FixedLenFeature([], dtype=tf.string, default_value=''),
'image/filename': tf.FixedLenFeature([], dtype=tf.string, default_value=''),
'image/encoded': tf.FixedLenFeature([], dtype=tf.string, default_value='')
})
image_buffer = features['image/encoded']
image_label = tf.cast(features['image/class/label'], tf.int32)
# Decode the jpeg
with tf.name_scope('decode_jpeg', [image_buffer], None):
# decode
image = tf.image.decode_jpeg(image_buffer, channels=3)
# and convert to single precision data type
image = tf.image.convert_image_dtype(image, dtype=tf.float32)
image = tf.image.rgb_to_grayscale(image)
image_shape = tf.stack([height, width, 1])
image = tf.reshape(image, image_shape)
return image, image_label
def inputs(filename, batch_size, num_epochs):
if not num_epochs: num_epochs = None
with tf.name_scope('input'):
filename_queue = tf.train.string_input_producer([filename], num_epochs=None)
image, label = read_and_decode(filename_queue)
# Shuffle the examples and collect them into batch_size batches.
images, sparse_labels = tf.train.shuffle_batch(
[image, label], batch_size=batch_size, num_threads=2,
capacity=1000 + 3 * batch_size,
min_after_dequeue=1000)
return images, sparse_labels
image, label = inputs(filename=tfrecords_train_filename, batch_size=200, num_epochs=None)
image = tf.reshape(image, [-1, 784])
label = tf.one_hot(label - 1, 10)
# Create the model
x = tf.placeholder(tf.float32, [None, 784])
W = tf.Variable(tf.zeros([784, 10]))
b = tf.Variable(tf.zeros([10]))
y = tf.matmul(x, W) + b
y_ = tf.placeholder(tf.float32, [None, 10])
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y))
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
for i in range(1000):
img, lbl = sess.run([image, label])
sess.run(train_step, feed_dict={x: img, y_: lbl})
img, lbl = sess.run([image, label])
print(sess.run(accuracy, feed_dict={x: img, y_: lbl}))
coord.request_stop()
coord.join(threads)
This is a super easy model for classifying mnist. However I think it is also an extensible answer for how to train with TFRecord files. It does not yet take into account the evaluation data, since this needs more coordination to be done.

Tensorflow: Unable to visualize jpeg images

I used the a script similar to the one here to convert my dataset to sharded tfrecords. But when I attempt to read it using script below tensorflow freezes and I have to kill the process using kill. (Note: Right now I am working in CPU mode)
def parse_example_proto(example_serialized):
feature_map = {
'image/encoded': tf.FixedLenFeature([], dtype=tf.string,
default_value=''),
'image/class/label': tf.FixedLenFeature([1], dtype=tf.int64,
default_value=-1),
'image/class/text': tf.FixedLenFeature([], dtype=tf.string,
default_value=''),
}
features = tf.parse_single_example(example_serialized, feature_map)
init_image = tf.image.decode_jpeg(features['image/encoded'], channels = 3)
init_image.set_shape([800,480,3])
image = tf.reshape(init_image,tf.pack([800, 480, 3]))
float_image = tf.image.convert_image_dtype(image, dtype=tf.float32)
label = tf.cast(features['image/class/label'], dtype=tf.int32)
return float_image , label, features['image/class/text']
def batch_inputs(batch_size, train,sess, num_preprocess_threads=4,
num_readers=1):
with tf.name_scope('batch_processing'):
tf_record_pattern = os.path.join('/home/raarora/', '%s-*' % 'train')
data_files = tf.gfile.Glob(tf_record_pattern)
if data_files is None:
raise ValueError('No data files found for this dataset')
# print data_files
# Create filename_queue
if train:
filename_queue = tf.train.string_input_producer(data_files,
shuffle=True,
capacity=8)
else:
filename_queue = tf.train.string_input_producer(data_files,
shuffle=False,
capacity=1)
reader =tf.TFRecordReader()
_, example_serialized = reader.read(filename_queue)
image, label, _ = parse_example_proto(example_serialized)
examples_per_shard = 201
min_queue_examples = examples_per_shard * 2
images, labels = tf.train.shuffle_batch(
[image, label], batch_size=batch_size, num_threads=4,
capacity=min_queue_examples + 3 * batch_size,
min_after_dequeue=min_queue_examples)
print images.eval(session=sess)
return s,images,labels
if __name__ == '__main__':
sess = tf.Session()
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
s,_,_ = batch_inputs(2,1,sess)

Was able to resolve this. I thought TFRecord is sort of dictionary and you need to give only the keys required, but upon giving the entire feature map along with small changes to how image was processed later, it worked.
Another mistake I made was that queue_runner should be started after calling tf.train.shuffle_batch(). I dont know if it's a bug or a gap in my understanding
Here's the working code for reading the data
def getImage(filename):
# convert filenames to a queue for an input pipeline.
filenameQ = tf.train.string_input_producer([filename],num_epochs=None)
# object to read records
recordReader = tf.TFRecordReader()
# read the full set of features for a single example
key, fullExample = recordReader.read(filenameQ)
# parse the full example into its' component features.
features = tf.parse_single_example(
fullExample,
features={
'image/height': tf.FixedLenFeature([], tf.int64),
'image/width': tf.FixedLenFeature([], tf.int64),
'image/colorspace': tf.FixedLenFeature([], dtype=tf.string,default_value=''),
'image/channels': tf.FixedLenFeature([], tf.int64),
'image/class/label': tf.FixedLenFeature([],tf.int64),
'image/class/text': tf.FixedLenFeature([], dtype=tf.string,default_value=''),
'image/format': tf.FixedLenFeature([], dtype=tf.string,default_value=''),
'image/filename': tf.FixedLenFeature([], dtype=tf.string,default_value=''),
'image/encoded': tf.FixedLenFeature([], dtype=tf.string, default_value='')
})
# now we are going to manipulate the label and image features
label = features['image/class/label']
image_buffer = features['image/encoded']
# Decode the jpeg
with tf.name_scope('decode_jpeg',[image_buffer], None):
# decode
image = tf.image.decode_jpeg(image_buffer, channels=3)
# and convert to single precision data type
image = tf.image.convert_image_dtype(image, dtype=tf.float32)
# cast image into a single array, where each element corresponds to the greyscale
# value of a single pixel.
# the "1-.." part inverts the image, so that the background is black.
# re-define label as a "one-hot" vector
# it will be [0,1] or [1,0] here.
# This approach can easily be extended to more classes.
image=tf.reshape(image,[height,width,3])
label=tf.pack(tf.one_hot(label-1, nClass))
return label, image
label, image = getImage("train-00000-of-00001")
imageBatch, labelBatch = tf.train.shuffle_batch(
[image, label], batch_size=2,
capacity=20,
min_after_dequeue=10)
sess = tf.InteractiveSession()
sess.run(tf.initialize_all_variables())
# start the threads used for reading files
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess,coord=coord)
batch_xs, batch_ys = sess.run([imageBatch, labelBatch])
print batch_xs
coord.request_stop()
coord.join(threads)
Note: I wasnt clear about sharded records so I used just one shard.
Credits to https://agray3.github.io/2016/11/29/Demystifying-Data-Input-to-TensorFlow-for-Deep-Learning.html

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

How to read images and text in FSNS datasets? - python

Related

How to convert MapDataset variable into np.array?

Reshape.error tensorflow -- [[Node: Reshape = Reshape[T=DT_UINT8, Tshape=DT_INT32]....,

image and its name mismatch after read images from TFRecords file

Tensorflow MNIST TFRecord

Tensorflow: Unable to visualize jpeg images

Categories

Resources