Tensorflow mixes up images and labels when making batch - python

So I've been stuck on this problem for weeks. I want to make an image batch from a list of image filenames. I insert the filename list into a queue and use a reader to get the file. The reader then returns the filename and the read image file.
My problem is that when I make a batch using the decoded jpg and the labels from the reader, tf.train.shuffle_batch() mixes up the images and the filenames so that now the labels are in the wrong order for the image files. Is there something I am doing wrong with the queue/shuffle_batch and how can I fix it such that the batch comes out with the right labels for the right files?
Much thanks!
import tensorflow as tf
from tensorflow.python.framework import ops
def preprocess_image_tensor(image_tf):
image = tf.image.convert_image_dtype(image_tf, dtype=tf.float32)
image = tf.image.resize_image_with_crop_or_pad(image, 300, 300)
image = tf.image.per_image_standardization(image)
return image
# original image names and labels
image_paths = ["image_0.jpg", "image_1.jpg", "image_2.jpg", "image_3.jpg", "image_4.jpg", "image_5.jpg", "image_6.jpg", "image_7.jpg", "image_8.jpg"]
labels = [0, 1, 2, 3, 4, 5, 6, 7, 8]
# converting arrays to tensors
image_paths_tf = ops.convert_to_tensor(image_paths, dtype=tf.string, name="image_paths_tf")
labels_tf = ops.convert_to_tensor(labels, dtype=tf.int32, name="labels_tf")
# getting tensor slices
image_path_tf, label_tf = tf.train.slice_input_producer([image_paths_tf, labels_tf], shuffle=False)
# getting image tensors from jpeg and performing preprocessing
image_buffer_tf = tf.read_file(image_path_tf, name="image_buffer")
image_tf = tf.image.decode_jpeg(image_buffer_tf, channels=3, name="image")
image_tf = preprocess_image_tensor(image_tf)
# creating a batch of images and labels
batch_size = 5
num_threads = 4
images_batch_tf, labels_batch_tf = tf.train.batch([image_tf, label_tf], batch_size=batch_size, num_threads=num_threads)
# running testing session to check order of images and labels
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
print image_path_tf.eval()
print label_tf.eval()
coord.request_stop()
coord.join(threads)

Wait.... Isn't your tf usage a little weird?
You are basically running the graph twice by calling:
print image_path_tf.eval()
print label_tf.eval()
And since you are only asking for image_path_tf and label_tf, anything below this line is not even run:
image_path_tf, label_tf = tf.train.slice_input_producer([image_paths_tf, labels_tf], shuffle=False)
Maybe try this?
image_paths, labels = sess.run([images_batch_tf, labels_batch_tf])
print(image_paths)
print(labels)

From your code I'm unsure how your labels are encoded/extracted from the jpeg images. I used to encode everything in the same file, but have since found a much more elegant solution. Assuming you can get a list of filenames, image_paths and a numpy array of labels labels, you can bind them together and operate on individual examples with tf.train.slice_input_producer then batch them together using tf.train.batch.
import tensorflow as tf
from tensorflow.python.framework import ops
shuffle = True
batch_size = 128
num_threads = 8
def get_data():
"""
Return image_paths, labels such that label[i] corresponds to image_paths[i].
image_paths: list of strings
labels: list/np array of labels
"""
raise NotImplementedError()
def preprocess_image_tensor(image_tf):
"""Preprocess a single image."""
image = tf.image.convert_image_dtype(image_tf, dtype=tf.float32)
image = tf.image.resize_image_with_crop_or_pad(image, 300, 300)
image = tf.image.per_image_standardization(image)
return image
image_paths, labels = get_data()
image_paths_tf = ops.convert_to_tensor(image_paths, dtype=tf.string, name='image_paths')
labels_tf = ops.convert_to_tensor(image_paths, dtype=tf.int32, name='labels')
image_path_tf, label_tf = tf.train.slice_input_producer([image_paths_tf, labels_tf], shuffle=shuffle)
# preprocess single image paths
image_buffer_tf = tf.read_file(image_path_tf, name='image_buffer')
image_tf = tf.image.decode_jpeg(image_buffer_tf, channels=3, name='image')
image_tf = preprocess_image_tensor(image_tf)
# batch the results
image_batch_tf, labels_batch_tf = tf.train.batch([image_tf, label_tf], batch_size=batch_size, num_threads=num_threads)

Related

ValueError: Cannot reshape a tensor with 150528 elements to shape [224,150528]

I am new to tensorflow and im still learning so I apologize if I am missing something obvious. So basically my problem is that I am trying to set up a simple image classifier with tensorflow in python, I figured out how to create my datasets and such but now I am just trying to get the training process going but the problem is that whenever I try I get this error
ValueError: Cannot reshape a tensor with 150528 elements to shape [224,150528] (33718272 elements) for 'dnn/input_from_feature_columns/input_layer/Image/Reshape' (op: 'Reshape') with input shapes: [224,224,3], [2] and with input tensors computed as partial shapes: input[1] = [224,150528].
I looked at some other posts I found here on stack overflow on this problem and they said that he was reshaping his data incorrectly and from what ive read from my error I have the same problem, the thing is I am using a premade estimator and I myself am not calling any reshaping functions in my code, the estimator is doing that for me I assume so how can I go about resolving this error? Thanks for any help here is my code
Training Code:
import tensorflow as tf
import cv2
import numpy as np
from DatasetCreator import DatasetCreator
trainingImagesPath = "TrainingImages"
#Loads the image data
def load_image(addr):
imageDimensions = (224, 224)
image = cv2.imread(addr)
#Resize the image to the size we need
image = cv2.resize(image, imageDimensions, interpolation=cv2.INTER_CUBIC)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)#Convert to RGB color space
image = image.astype(np.float32)
print(image.shape)
return image
#Load in the datasets
with tf.Graph().as_default() as graph:
dataCreator = DatasetCreator()
trainingDataset = dataCreator.generateDataset(trainingImagesPath)
#Create a training iterator for getting info from dataset
trainingIterator = trainingDataset.make_one_shot_iterator()
next_element = trainingIterator.get_next()
def train_input_fn():
with tf.Session(graph = graph) as sess:
features = sess.run(next_element)
#Get the image path
imagePath = str(features["ImagePath"])
imagePath = imagePath[2:len(imagePath)-1]
#Get the label
label = features["ImageLabel"]
image = load_image(imagePath)#Get image data
return {"Image": image, "Label": label}
featureColumns = [tf.feature_column.numeric_column("Image", [224, 224, 3]),
tf.feature_column.numeric_column("Label")]
estimator = tf.estimator.DNNClassifier(
model_dir = "Model",
feature_columns = featureColumns,
hidden_units = [1024, 512, 256],
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01)
)
print("Began training")
estimator.train(input_fn = train_input_fn, steps=1000)
Here is my code that creates my training dataset:
import tensorflow as tf
import cv2
from random import shuffle
import glob
import numpy as np
class DatasetCreator:
def __init__(self):
pass;
def generateDataset(self, trainingImagesPath):
addrs = glob.glob(trainingImagesPath + "/*.jpg")#Get list of paths of all the images in folder
#Label each address
labels = [0 if "car" in image else 1 for image in addrs]#Compile list of labels
# Divide the hata into 60% train, 20% validation, and 20% test
train_addrs = addrs[0:int(0.6*len(addrs))]
train_labels = labels[0:int(0.6*len(labels))]
val_addrs = addrs[int(0.6*len(addrs)):int(0.8*len(addrs))]
val_labels = labels[int(0.6*len(addrs)):int(0.8*len(addrs))]
test_addrs = addrs[int(0.8*len(addrs)):]
test_labels = labels[int(0.8*len(labels)):]
#Create dataset
dataset = tf.data.Dataset.from_tensor_slices(
{"ImagePath": train_addrs,
"ImageLabel": train_labels})
return dataset;

TensorFlow read and decode BATCH of images

Using tf.train.string_input_producer and tf.image.decode_jpeg I manage to read from disk and decode a single image.
This is the code:
# -------- Graph
filename_queue = tf.train.string_input_producer(
[img_path, img_path])
image_reader = tf.WholeFileReader()
key, image_file = image_reader.read(filename_queue)
image = tf.image.decode_jpeg(image_file, channels=3)
# Run my network
logits = network.get_logits(image)
# -------- Session
sess = tf.Session()
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
logits_output = sess.run(logits)
The thing is, that when I look at the shape of the logit_outputs I get only 1 value even though the queue is 2 images long.
How can I read and decode the entire queue?
tf.WholeFileReader(), along tf.train.string_input_producer() work as an iterator, and thus does not have an easy way to evaluate the size of the complete dataset it is handling.
To obtain batches of N samples out of it, you could instead use image_reader.read_up_to(filename_queue, N).
Note: you can achieve the same using the newer tf.data pipeline:
def _parse_function(filename):
image_string = tf.read_file(filename)
image_decoded = tf.image.decode_image(image_string)
return image_decoded
# A vector of filenames.
filenames = tf.constant([img_path, img_path])
dataset = tf.data.Dataset.from_tensor_slices((filenames))
dataset = dataset.map(_parse_function).batch(N)
iterator = dataset.make_one_shot_iterator()
next_image_batch = iterator.get_next()
logits = network.get_logits(next_image_batch)
# ...

Saving predicted tensor to image in TensorFlow - Graph finalized

I was able to train a model in TensorFlow with my own data. Input and Output of the model are images. I now tried to get the output of the predictions and save it to an png image file to see what's going on. Unfortunately I am getting an error when running the following function I created to test with predictions. My goal is to save the prediction that is also an image so I can open it with a normal image viewer.
Some more to the code. In my main I am creating an estimator
def predict_element(my_model, features):
eval_input_fn = tf.estimator.inputs.numpy_input_fn(
x=features,
num_epochs=1,
shuffle=False)
eval_results = my_model.predict(input_fn=eval_input_fn)
predictions = eval_results.next() #this returns a dict with my tensors
prediction_tensor = predictions["y"] #get the tensor from the dict
image_tensor = tf.reshape(prediction_tensor, [IMG_WIDTH, -1]) #reshape to a matrix due my returned tensor is a 1D flat one
decoded_image = tf.image.encode_png(image_tensor)
write_image = tf.write_file("output/my_output_image.png", decoded_image)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
print(sess.run(write_image))
def get_input():
filename_dataset = tf.data.Dataset.list_files("features/*.png")
label_dataset = tf.data.Dataset.list_files("labels/*.png")
# Make a Dataset of image tensors by reading and decoding the files.
image_dataset = filename_dataset.map(lambda x: tf.cast(tf.image.decode_png(tf.read_file(x), channels=1),tf.float32))
l_dataset = label_dataset.map(lambda x: tf.cast(tf.image.decode_png(tf.read_file(x),channels=1),tf.float32))
image_reshape = image_dataset.map(lambda x: tf.reshape(x, [IM_WIDTH * IM_HEIGHT]))
label_reshape = l_dataset.map(lambda x: tf.reshape(x, [IM_WIDTH * IM_HEIGHT]))
iterator = image_reshape.make_one_shot_iterator()
iterator2 = label_reshape.make_one_shot_iterator()
next_img = iterator.get_next()
next_lbl = iterator2.get_next()
features = []
labels = []
# read all 10 images and labels and put it in the array
# so we can pass it to the estimator
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(10):
t1, t2 = sess.run([next_img, next_lbl])
features.append(t1)
labels.append(t2)
return {"x": np.array(features)}, np.array(labels)
def main(unused_argv):
features, labels = get_input() # creating the features dict {"x": }
my_estimator = tf.estimator.Estimator(model_fn=my_cnn_model, model_dir="/tmp/my_model")
predict_element(my_estimator, features)
The error is
Graph is finalized and cannot be modified
With some easy print() statements I could see that retrieving the dict with
eval_results = my_model.predict(input_fn=eval_input_fn)
is probable the one which finalizes the graph.
I absolutely don't know what to do or where to look for a solution here. How could I save the output?
I tried this in my model_fn:
#the last layer of my network is dropout
predictions = {
"y": dropout
}
if mode == tf.estimator.ModeKeys.PREDICT:
reshape1 = tf.reshape(dropout, [-1,IM_WIDTH, IM_HEIGHT])
sliced = tf.slice(reshape1, [0,0,0], [1, IM_WIDTH, IM_HEIGHT])
encoded = tf.image.encode_png(tf.cast(sliced, dtype=tf.uint8))
outputfile = tf.write_file(params["output_path"], encoded)
return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
My problem here is that I can't pass back the "outputfile" node so I can work with it.
Well your graph is finalized and cannot be modified. You can either add this tensorflow operations to your model (before running it) or simply write some python code which saves the images seperately (without using tensorflow). Maybe I'll find some old code of mine as an example.
You could also create a second graph, then you can use tensorflow without changing the existing model graph.
You have to distinguish between graph nodes and evaluated objects. tf.reshape doesn't take an array as input but a graph node.
https://www.tensorflow.org/programmers_guide/graphs
for everyone with the same problem here is my solution. I don't know if this is the proper way but it works.
In my predict function i created a second graph for the reshaping, slicing, encoding and saving like:
pred_dict = eval_results.next() #generator the predict function returns
preds = pred_dict["y"] #get the predictions from the dict
#create the second graph
g = tf.Graph()
with g.as_default():
inp = tf.Variable(preds)
reshape1 = tf.reshape(printnode, [IM_WIDTH, IM_HEIGHT, -1])
sliced = tf.slice(reshape1, [0,0,0], [ IM_WIDTH, IM_HEIGHT,1])
reshaped = tf.reshape(sliced, [IM_HEIGHT, IM_WIDTH, 1])
encoded = tf.image.encode_png(tf.image.convert_image_dtype(reshaped,tf.uint16))
outputfile = tf.write_file("/tmp/pred_output/prediction_img.png", encoded)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
sess.run(outputfile)

Trying to expand working code to read multiple JPGs instead of just one into a tensor

I am trying to read in a whole directory into one tensor, with each file in the directory a 28x28 image that is flattened into one row, with each new row representing another image (images are titled 0001.jpg, etc). I have been able to successfully do this for one image but have had no luck creating any loop mechanism to load the files.
I realize this is probably a simple solution but I have no clue how to go about it. If anyone has an example to point me to, or any help would be incredibly appreciated. Thank you.
import numpy as np
import tensorflow as tf
filenames = tf.train.match_filenames_once("C:/train_data/*.jpg")
filename_queue = tf.train.string_input_producer(filenames)
image_reader = tf.WholeFileReader()
_, image_file = image_reader.read(filename_queue)
image_orig = tf.image.decode_jpeg(image_file)
images = tf.image.decode_jpeg(image_file)
images = tf.reshape(images, [-1, 784])
with tf.Session() as sess:
tf.global_variables_initializer().run()
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
image_tensor = sess.run([images])
coord.request_stop()
coord.join(threads)
To load multiple images into the same tensor, you can use tf.train.batch(). For example, to combine 32 images—which must have the same dimensions—into a single tensor, you can modify your program as follows:
filenames = tf.train.match_filenames_once("C:/train_data/*.jpg")
filename_queue = tf.train.string_input_producer(filenames)
image_reader = tf.WholeFileReader()
_, image_file = image_reader.read(filename_queue)
image_orig = tf.image.decode_jpeg(image_file)
images = tf.image.decode_jpeg(image_file)
# `image_batch` contains 32 consecutive images, packed into a single tensor.
image_batch, = tf.train.batch((images,), 32)
If your images might have different sizes, you can use an image-processing function like tf.image.resize_image_with_crop_or_pad() to convert each image to the same shape before passing it to tf.train.batch().

Cannot read image successfully in tensorflow

I want to read the jpeg images into batch for image recognation. The images are in the /Image_p/ file and the image name are listed in the label.csv file, which are presented like 14634_right.
My question is how to fix my code to read the images into batch successfully?
To be more spcific, I don't know whether should I write a for loop and where to implement it.
For the original code, I got the error message on tf.train.shuffle_batch() function:
ValueError: All shapes must be fully defined: [TensorShape([Dimension(None), Dimension(None), Dimension(3)]), TensorShape([])]
My origin code:
# filepath
csv_filepath = r'C:\Users\Jeffy\OneDrive\Course\NMDA\retinaProject\label.csv'
# image parameter
pic_num = 100
pic_height = 64
pic_width = 64
batch_size = 10
# =============================================================================
# import library
import tensorflow as tf
import numpy as np
# =============================================================================
# read csv data
csv = np.loadtxt(open(csv_filepath,"rb"), delimiter=",", dtype='str')
pic_filename = ["" for x in range(pic_num)]
for i in range(pic_num):
pic_filename[i] = eval(csv[i,0]).decode("utf-8") +'.jpeg'
# read the data into batch
for i in range(pic_num):
# read and decode the image
image_contents = tf.read_file('Image_p/' + eval(csv[i,0]).decode("utf-8") +'.jpeg')
image = tf.image.decode_jpeg(image_contents, channels=3)
image = tf.to_float(image)
# Generate batch
batch = tf.train.shuffle_batch([image, float(eval(csv[i,1]))],
batch_size = batch_size,
num_threads = 1,
capacity = batch_size * 100,
min_after_dequeue = batch_size * 10)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
image_tensor = sess.run([batch])
print(batch)
coord.request_stop()
coord.join(threads)
Plus, I have also writen a new file which can read a image successfully (thanks for the help from martianwars).
My test code:
import tensorflow as tf
# read and decode the image
image_contents = tf.read_file('Image_p/11247_left.jpeg')
image = tf.image.decode_jpeg(image_contents, channels=3)
with tf.Session() as sess:
img = sess.run(image)
print(img)
image will have a (?, ?, 3) shape since it's not yet been read, but you have specified the channel in the decode_jpeg() function. Try to print this instead,
with tf.Session() as sess:
img = sess.run(image)
print(img)

Categories