I am just starting with tensorflow and I thought a good first step would be to adapt CIFAR10 model for my own use. My database are not images but signals and a whole database has a shape of [16400,3000,1,1] (dimensionwise: number of all samples, height, width and number of channels added on purpose). I am already working on this problem with MatConvNet toolbox, so this question is strictly about tensorflow machnism. The database is a ready numpy tensor of the size above, in the code below is my attempt to prepare the data to be readable for the training script
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
from six.moves import xrange # pylint: disable=redefined-builtin
import tensorflow as tf
import numpy as np
IMAGE_SIZE = 3000
data = np.load('/home/tensorflow-master/tensorflow/models/image/cifar10 /konsensop/data.npy')
labels = np.load('/home/tensorflow-master/tensorflow/models/image/cifar10/konsensop/labels.npy')
labels = labels-1
labels = labels.astype(int)
data = tf.cast(data,tf.float32)
labels = tf.cast(labels,tf.int64)
NUM_CLASSES = 2
NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN = 10000
NUM_EXAMPLES_PER_EPOCH_FOR_EVAL = 6400
def _generate_image_and_label_batch(data_sample, label, in_queue_examples,
batch_size, shuffle):
num_preprocess_threads = 16
if shuffle:
data, label_batch = tf.train.shuffle_batch(
[data_sample, label],
batch_size=batch_size,
num_threads=num_preprocess_threads,
capacity=min_queue_examples + batch_size,
min_after_dequeue=min_queue_examples)
else:
data, label_batch = tf.train.batch(
[data_sample, label],
batch_size=batch_size,
num_threads=num_preprocess_threads,
capacity=min_queue_examples + batch_size)
return data, tf.reshape(label_batch, [batch_size])
def inputs(data,labels, batch_size):
for i in xrange(0, data.shape[0]/batch_size):
data_sample = data[i,:,:,:]
label = labels[i,0]
height = 3000
width = 1
min_fraction_of_examples_in_queue = 0.4
min_queue_examples = int(NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN*
min_fraction_of_examples_in_queue)
print('Filling queue with %d data before starting to train' % min_queue_examples)
return _generate_image_and_label_batch(data_sample, label,
min_queue_examples, batch_size,
shuffle=True)
I'm trying to load the data I aleady have and generate batches in a way cifar10 model did, but when running the trainer code I get an error indata,labels = konsensop_input.inputs(data,labels,batch_size) UnboundcocalError: local variable 'data' referenced before assigment
data = konsensop_input.data
labels = konsensop_input.labels
def train():
with tf.Graph().as_default():
global_step = tf.Variable(0, trainable = False)
data, labels = konsensop_input.inputs(data, labels, batch_size)
logits = konsensop_train.inference(data)
# calculate loss
loss = konsensop.loss(logits, labels)
train_op = konsensop.train(loss, global_step)
# create a saver
saver = tf.train.Saver(tf.all_variables()) #saves all variables in a graph
# build the summary operation based on the TF collection of summaries
summary_op = tf.merge_all_summaries()
# build an initialization operation to run below
init = tf.initialize_all_variables()
# start running operations on the graph
sess = tf.Session(config = tf.ConfigProto(log_device_placement=False))
sess.run(init)
# start the queue runners
tf.train.start_queue_runners(sess = sess) #co to i po co to"""
summary_writer = tf.train.SummaryWriter( FLAGS.train_dir, sess.graph)
for step in xrange(FLAGS.max_step):
start_time = time.time()
_, loss_value = sess.run([train_op, loss])
duration = time.time() - start_time
assert not np.isnan(loss_value), 'Model diverged with loss = NaN'
if step % 10 == 0:
num_examples_per_step = FLAGS.batch_size
examples_per_sec = num_examples_per_step / duration
sec_per_batch = float(duration)
format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f sec/batch)')
print ( format_str % (datetime.now(), step, loss_value, examples_per_sec, sec_per_batch))
if step % 100 == 0:
summary_str = sess.run(summary_op)
summary_writer.add_summary(summary_str, step)
if step % 1000 == 0 or (step + 1) == FLAGS.max_steps:
checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt')
saver.save(sess, checkpoint_path, global_step = step)
def main(argv=None):
train()
if __name__=='__main__':
tf.app.run()
I would like to figure out how to implement a reasonable data feeding technique here
For the relatively small data set you want to work with, you might consider just loading it into a big numpy array, then iterating over it in mini-batches, which you feed to the computation graph via tf.placeholders and the feed_dict mechanism.
The mini-batch iteration could look something like this (you should probably add random shuffling after each epoch):
def iterate_batches(X, y, batch_size, num_epochs):
N = np.size(X, 0)
batches_per_epoch = N/float(batch_size)
for i in range(num_epochs):
for j in range(batches_per_epoch):
start, stop = j*batch_size, (j+1)*batch_size
yield X[start:stop, :], y[start:stop]
(If you are not familiar with Python's yield mechanism, google for Python generators. There a lots of good introductions on the web.)
Given that you have a mechanism to load the whole data set into a numpy array X_train, y_train, you can then write your training loop like this
train_op = ...
for X, y in iterate_batches(X_train, y_train, you_batch_size, your_num_epochs):
sess.run([train_op], feed_dict={X_tensor: X, y_tensor: y}
Here, X_tensor and y_tensor are tf.placeholders for the data, that you have to specify in your network architecture.
Related
i'm trying to create a neural network model for a kaggle competition using mnist dataset. currently, my code looks like this since i am trying to capture certain metrics. however, i can't seem to figure out how to turn this into an output to submit.
current:
import time
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
import pandas as pd
import numpy as np
from tensorflow.python.framework import ops
ops.reset_default_graph()
#requests.packages.urllib3.disable_warnings()
import ssl
try:
_create_unverified_https_context = ssl._create_unverified_context
except AttributeError:
# Legacy Python that doesn't verify HTTPS certificates by default
pass
else:
# Handle target environment that doesn't support HTTPS verification
ssl._create_default_https_context = _create_unverified_https_context
# Load training and testing data directly from TensorFlow
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
X_train = X_train.astype(np.float32).reshape(-1, 28*28) / 255.0
X_test = X_test.astype(np.float32).reshape(-1, 28*28) / 255.0
y_train = y_train.astype(np.int32)
y_test = y_test.astype(np.int32)
# Initialize metrics
metrics = {}
# Initialize metric names
names = ['Number of Hidden Layers', 'Nodes per Layer', 'Time in Seconds',
'Training Set Accuracy', 'Test Set Accuracy']
# Set fixed parameters
n_epochs = 20
batch_size = 50
learning_rate = 0.01
# Function that creates batch generator used in training
def shuffle_batch(X, y, batch_size):
rnd_idx = np.random.permutation(len(X))
n_batches = len(X) // batch_size
for batch_idx in np.array_split(rnd_idx, n_batches):
X_batch, y_batch = X[batch_idx], y[batch_idx]
yield X_batch, y_batch
# Start timer
start = time.process_time()
n_hidden = 300
# Reset the session
tf.reset_default_graph()
#ops.reset_default_graph()
tf.set_random_seed(2141)
#tf.random.set_seed(2141)
np.random.seed(9347)
# Set X and y placeholders
X = tf.placeholder(tf.float32, shape=(None, 784), name="X")
y = tf.placeholder(tf.int32, shape=(None), name="y")
with tf.name_scope("dnn"):
hidden1 = tf.layers.dense(X, n_hidden, name="hidden1",
activation=tf.nn.relu)
hidden2 = tf.layers.dense(hidden1, n_hidden, name="hidden2",
activation=tf.nn.relu)
logits = tf.layers.dense(hidden2, 10, name="outputs")
y_proba = tf.nn.softmax(logits)
with tf.name_scope("loss"):
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y,
logits=logits)
loss = tf.reduce_mean(xentropy, name="loss")
with tf.name_scope("train"):
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
training_op = optimizer.minimize(loss)
with tf.name_scope("eval"):
correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
with tf.Session() as sess:
tf.global_variables_initializer().run()
for epoch in range(n_epochs):
for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
acc_train = accuracy.eval(feed_dict={X: X_train, y: y_train})
acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})
# Record the clock time it takes
duration = time.process_time() - start
metrics['Model 1'] = [2, n_hidden, duration, acc_train, acc_test]
# Convert metrics dictionary to dataframe for display
results_summary = pd.DataFrame.from_dict(metrics, orient='index')
results_summary.columns = names
# Sort by model number
results_summary.reset_index(inplace=True)
results_summary.sort_values(by=['index'], axis=0, inplace=True)
results_summary.set_index(['index'], inplace=True)
results_summary.index.name = None
# Export to csv
results_summary.to_csv('results_summary.csv')
results_summary
i need to create an output that looks something like this in csv file:
ImageId Label
0 1 2
1 2 0
2 3 9
3 4 0
4 5 3
would i have to recreate the whole thing in order to actually create "y_pred" when doing something like model.predict(X_test), or can i just reshape the existing code in some way to do this? ideally, i would like to capture predicted values and compare them to true values using a confusion matrix.
Since you're using old tensorflow style, predict using old tensorflow style at the end:
feed_dict = {X: X_test}
classification = tf.run(y_proba, feed_dict)
label = numpy.argmax(classification, axis=-1)
Maybe you need to create a new session for this, or use the existing sess.run, I'm not sure how this works, try to see which option gives out reasonable results.
I would like to be able plot the training loss per batch and the average validation loss for the validation set on the same plot in Tensorboard. I ran into this issue when my validation set was too large to fit into memory so required batching and the use of tf.metrics update ops.
This question could apply to any Tensorflow metrics you wanted to appear on the same graph in Tensorboard.
I am able to
plot these two graphs separately (see here)
plot the validation-loss-per-validation-batch on the same graph as the training-loss-per-training-batch (this was OK when the validation set could be a single batch and I could reuse the training summary op train_summ below)
In the example code below, my issue stems from the fact that my validation summary tf.summary.scalar with name=loss gets renamed to loss_1 and thus is moved to a separate graph in Tensorboard. From what I can work out Tensorboard takes "same name" and plots them on the same graph, regardless of what folder they are in. This is frustrating as train_summ (name=loss) is only ever written to the train folder and valid_summ (name=loss) is only ever written to the valid folder - but is still renamed to loss_1.
The example code:
# View graphs with (Linux): $ tensorboard --logdir=/tmp/my_tf_model
import tensorflow as tf
import numpy as np
import os
import tempfile
def train_data_gen():
yield np.random.normal(size=[3]), np.array([0.5, 0.5, 0.5])
def valid_data_gen():
yield np.random.normal(size=[3]), np.array([0.8, 0.8, 0.8])
batch_size = 25
n_training_batches = 4
n_valid_batches = 2
n_epochs = 5
summary_loc = os.path.join(tempfile.gettempdir(), 'my_tf_model')
print("Summaries written to" + summary_loc)
# Dummy data
train_data = tf.data.Dataset.from_generator(train_data_gen, (tf.float32, tf.float32)).repeat().batch(batch_size)
valid_data = tf.data.Dataset.from_generator(valid_data_gen, (tf.float32, tf.float32)).repeat().batch(batch_size)
handle = tf.placeholder(tf.string, shape=[])
iterator = tf.data.Iterator.from_string_handle(handle,
train_data.output_types, train_data.output_shapes)
batch_x, batch_y = iterator.get_next()
train_iter = train_data.make_initializable_iterator()
valid_iter = valid_data.make_initializable_iterator()
# Some ops on the data
loss = tf.losses.mean_squared_error(batch_x, batch_y)
valid_loss, valid_loss_update = tf.metrics.mean(loss)
# Write to summaries
train_summ = tf.summary.scalar('loss', loss)
valid_summ = tf.summary.scalar('loss', valid_loss) # <- will be renamed to "loss_1"
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
train_handle, valid_handle = sess.run([train_iter.string_handle(), valid_iter.string_handle()])
sess.run([train_iter.initializer, valid_iter.initializer])
# Summary writers
writer_train = tf.summary.FileWriter(os.path.join(summary_loc, 'train'), sess.graph)
writer_valid = tf.summary.FileWriter(os.path.join(summary_loc, 'valid'), sess.graph)
global_step = 0 # implicit as no actual training
for i in range(n_epochs):
# "Training"
for j in range(n_training_batches):
global_step += 1
summ = sess.run(train_summ, feed_dict={handle: train_handle})
writer_train.add_summary(summary=summ, global_step=global_step)
# "Validation"
sess.run(tf.local_variables_initializer())
for j in range(n_valid_batches):
_, batch_summ = sess.run([valid_loss_update, train_summ], feed_dict={handle: valid_handle})
# The following will plot the batch loss for the validation set on the loss plot with the training data:
# writer_valid.add_summary(summary=batch_summ, global_step=global_step + j + 1)
summ = sess.run(valid_summ)
writer_valid.add_summary(summary=summ, global_step=global_step) # <- I want this on the training loss graph
What I have tried
Separate tf.summary.FileWriter objects (one for training, one for validation), as recommended by this issue and this question (think what I'm after is alluded to in the comment of that question)
The use of tf.summary.merge to merge all my training and validation/test metrics into overall summary ops; does useful book-keeping but doesn't plot what I want on the same graph
Use of the tf.summary.scalar family attribute (loss still gets renamed to loss_1)
(Complete hack solution) Use valid_loss, valid_loss_update = tf.metrics.mean(loss) on the training data and then run tf.local_variables_initializer() every training batch. This does give you the same summary op and thus puts things on the same graph but is surely not how you're meant to do this? It also doesn't generalise to other metrics.
Context
Tensorflow 1.9.0
Tensorboard 1.9.0
Python 3.5.2
The Tensorboard custom_scalar plugin is the way to solve this problem.
Here's the same example again with a custom_scalar to plot the two losses (per training batch + averaged over all validation batches) on the same plot:
# View graphs with (Linux): $ tensorboard --logdir=/tmp/my_tf_model
import os
import tempfile
import tensorflow as tf
import numpy as np
from tensorboard import summary as summary_lib
from tensorboard.plugins.custom_scalar import layout_pb2
def train_data_gen():
yield np.random.normal(size=[3]), np.array([0.5, 0.5, 0.5])
def valid_data_gen():
yield np.random.normal(size=[3]), np.array([0.8, 0.8, 0.8])
batch_size = 25
n_training_batches = 4
n_valid_batches = 2
n_epochs = 5
summary_loc = os.path.join(tempfile.gettempdir(), 'my_tf_model')
print("Summaries written to " + summary_loc)
# Dummy data
train_data = tf.data.Dataset.from_generator(
train_data_gen, (tf.float32, tf.float32)).repeat().batch(batch_size)
valid_data = tf.data.Dataset.from_generator(
valid_data_gen, (tf.float32, tf.float32)).repeat().batch(batch_size)
handle = tf.placeholder(tf.string, shape=[])
iterator = tf.data.Iterator.from_string_handle(handle, train_data.output_types,
train_data.output_shapes)
batch_x, batch_y = iterator.get_next()
train_iter = train_data.make_initializable_iterator()
valid_iter = valid_data.make_initializable_iterator()
# Some ops on the data
loss = tf.losses.mean_squared_error(batch_x, batch_y)
valid_loss, valid_loss_update = tf.metrics.mean(loss)
with tf.name_scope('loss'):
train_summ = summary_lib.scalar('training', loss)
valid_summ = summary_lib.scalar('valid', valid_loss)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
train_handle, valid_handle = sess.run([train_iter.string_handle(), valid_iter.string_handle()])
sess.run([train_iter.initializer, valid_iter.initializer])
writer_train = tf.summary.FileWriter(os.path.join(summary_loc, 'train'), sess.graph)
writer_valid = tf.summary.FileWriter(os.path.join(summary_loc, 'valid'), sess.graph)
layout_summary = summary_lib.custom_scalar_pb(
layout_pb2.Layout(category=[
layout_pb2.Category(
title='losses',
chart=[
layout_pb2.Chart(
title='losses',
multiline=layout_pb2.MultilineChartContent(tag=[
'loss/training', 'loss/valid'
]))
])
]))
writer_train.add_summary(layout_summary)
global_step = 0
for i in range(n_epochs):
for j in range(n_training_batches): # "Training"
global_step += 1
summ = sess.run(train_summ, feed_dict={handle: train_handle})
writer_train.add_summary(summary=summ, global_step=global_step)
sess.run(tf.local_variables_initializer())
for j in range(n_valid_batches): # "Validation"
_, batch_summ = sess.run([valid_loss_update, train_summ], feed_dict={handle: valid_handle})
summ = sess.run(valid_summ)
writer_valid.add_summary(summary=summ, global_step=global_step)
Here's the resulting output in Tensorboard.
#!/usr/bin/python
#-*- coding:utf-8 -*-
"""
Created on Tue Jan 2 16:31:45 2018
#author: houlinjie
"""
import tensorflow as tf
from tensorflow.contrib.framework.python.ops.variables import get_or_create_global_step
from tensorflow.python.platform import tf_logging as logging
import inception_preprocessing
from inception_resnet_v2 import inception_resnet_v2, inception_resnet_v2_arg_scope
import os
import time
slim = tf.contrib.slim
import sys
import matplotlib.pyplot as plt
import numpy as np
#================ DATASET INFORMATION ======================
#State dataset directory where the tfrecord files are located
dataset_dir = '.'
#设定Gpu使用量
#config = tf.ConfigProto()
#config.gpu_options.per_process_gpu_memory_fraction = 0.7
#State where your log file is at. If it doesn't exist, create it.
log_dir = './log'
#State where your checkpoint file is
checkpoint_file = './inception_resnet_v2_2016_08_30.ckpt'
#State the image size you're resizing your images to. We will use the default inception size of 299.
img_height = 600
img_width = 800
#State the number of classes to predict:
num_classes = 6
#State the labels file and read it
labels_file = './labels.txt'
labels = open(labels_file, 'r')
#Create a dictionary to refer each label to their string name
labels_to_name = {}
for line in labels:
label, string_name = line.split(':')
string_name = string_name[:-1] #Remove newline
labels_to_name[int(label)] = string_name
#Create the file pattern of your TFRecord files so that it could be recognized later on
file_pattern = 'estate_%s_*.tfrecord'
#Create a dictionary that will help people understand your dataset better. This is required by the Dataset class later.
items_to_descriptions = {
'image': 'A 3-channel RGB coloured real estate image that is either bathroom, bedroom, floorplan, kitchen, or livingroom, other.',
'label': 'A label that is as such -- 0:bathroom, 1:bedroom, 2:floorplan, 3:kitchen, 4:livingroom, 5:other'
}
#================= TRAINING INFORMATION ==================
#State the number of epochs to train
num_epochs = 10
#State your batch size
batch_size = 4
#Learning rate information and configuration (Up to you to experiment)
initial_learning_rate = 0.0002
learning_rate_decay_factor = 0.7
num_epochs_before_decay = 2
#============== DATASET LOADING ======================
#We now create a function that creates a Dataset class which will give us many TFRecord files to feed in the examples into a queue in parallel.
def get_split(split_name, dataset_dir, file_pattern=file_pattern, file_pattern_for_counting='estate'):
'''
Obtains the split - training or validation - to create a Dataset class for feeding the examples into a queue later on. This function will
set up the decoder and dataset information all into one Dataset class so that you can avoid the brute work later on.
Your file_pattern is very important in locating the files later.
INPUTS:
- split_name(str): 'train' or 'validation'. Used to get the correct data split of tfrecord files
- dataset_dir(str): the dataset directory where the tfrecord files are located
- file_pattern(str): the file name structure of the tfrecord files in order to get the correct data
- file_pattern_for_counting(str): the string name to identify your tfrecord files for counting
OUTPUTS:
- dataset (Dataset): A Dataset class object where we can read its various components for easier batch creation later.
'''
#First check whether the split_name is train or validation
if split_name not in ['train', 'validation']:
raise ValueError('The split_name %s is not recognized. Please input either train or validation as the split_name' % (split_name))
#Create the full path for a general file_pattern to locate the tfrecord_files
file_pattern_path = os.path.join(dataset_dir, file_pattern % (split_name))
#Count the total number of examples in all of these shard
num_samples = 0
file_pattern_for_counting = file_pattern_for_counting + '_' + split_name
tfrecords_to_count = [os.path.join(dataset_dir, file) for file in os.listdir(dataset_dir) if file.startswith(file_pattern_for_counting)]
for tfrecord_file in tfrecords_to_count:
for record in tf.python_io.tf_record_iterator(tfrecord_file):
num_samples += 1
#Create a reader, which must be a TFRecord reader in this case
reader = tf.TFRecordReader
#Create the keys_to_features dictionary for the decoder
keys_to_features = {
'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''),
'image/format': tf.FixedLenFeature((), tf.string, default_value='jpg'),
'image/class/label': tf.FixedLenFeature(
[], tf.int64, default_value=tf.zeros([], dtype=tf.int64)),
}
#Create the items_to_handlers dictionary for the decoder.
items_to_handlers = {
'image': slim.tfexample_decoder.Image(),
'label': slim.tfexample_decoder.Tensor('image/class/label'),
}
#Start to create the decoder
decoder = slim.tfexample_decoder.TFExampleDecoder(keys_to_features, items_to_handlers)
#Create the labels_to_name file
labels_to_name_dict = labels_to_name
#Actually create the dataset
#dataset 对象定义了数据集的文件位置,解码方式等元信息
dataset = slim.dataset.Dataset(
data_sources = file_pattern_path,
decoder = decoder,
reader = reader,
num_readers = 4,
num_samples = num_samples,
num_classes = num_classes,
labels_to_name = labels_to_name_dict,
items_to_descriptions = items_to_descriptions)
return dataset
def load_batch(dataset, batch_size, height=img_height, width=img_width, is_training=True):
'''
Loads a batch for training.
INPUTS:
- dataset(Dataset): a Dataset class object that is created from the get_split function
- batch_size(int): determines how big of a batch to train
- height(int): the height of the image to resize to during preprocessing
- width(int): the width of the image to resize to during preprocessing
- is_training(bool): to determine whether to perform a training or evaluation preprocessing
OUTPUTS:
- images(Tensor): a Tensor of the shape (batch_size, height, width, channels) that contain one batch of images
- labels(Tensor): the batch's labels with the shape (batch_size,) (requires one_hot_encoding).
'''
#First create the data_provider object
data_provider = slim.dataset_data_provider.DatasetDataProvider(
dataset,
common_queue_capacity = 24 + 3 * batch_size,
common_queue_min = 24)
#Obtain the raw image using the get method
raw_image, label = data_provider.get(['image', 'label'])
#Perform the correct preprocessing for this image depending if it is training or evaluating
image = inception_preprocessing.preprocess_image(raw_image, height, width, is_training)
#As for the raw images, we just do a simple reshape to batch it up
raw_image = tf.expand_dims(raw_image, 0)
raw_image = tf.image.resize_nearest_neighbor(raw_image, [height, width])
raw_image = tf.squeeze(raw_image)
#Batch up the image by enqueing the tensors internally in a FIFO queue and dequeueing many elements with tf.train.batch.
images, raw_images, labels = tf.train.batch(
[image, raw_image, label],
batch_size = batch_size,
num_threads = 4,
capacity = 4 * batch_size,
allow_smaller_final_batch = True)
print("images tensor data type:", tf.shape(images))
return images, raw_images, labels
def train():
#Create the log directory here. Must be done here otherwise import will activate this unneededly.
if not os.path.exists(log_dir):
os.mkdir(log_dir)
#======================= TRAINING PROCESS =========================
#Now we start to construct the graph and build our model
#with tf.Graph().as_default() as graph:
tf.logging.set_verbosity(tf.logging.INFO) #Set the verbosity to INFO level
#First create the dataset and load one batch
dataset = get_split('train', dataset_dir, file_pattern=file_pattern)
images, raw_images, labels = load_batch(dataset, batch_size=batch_size)
print('num_samples:', dataset.num_samples)
#Know the number steps to take before decaying the learning rate and batches per epoch
num_batches_per_epoch = int(dataset.num_samples / batch_size)
num_steps_per_epoch = num_batches_per_epoch #Because one step is one batch processed
decay_steps = int(num_epochs_before_decay * num_steps_per_epoch)
x = tf.placeholder(tf.float32, shape=[None, img_height, img_width, 3], name='x')
y_true = tf.placeholder(tf.int32, shape=[None], name='y_true')
#Create the model inference
with slim.arg_scope(inception_resnet_v2_arg_scope()):
logits, end_points = inception_resnet_v2(x, num_classes = dataset.num_classes, is_training = True)
#Define the scopes that you want to exclude for restoration
exclude = ['InceptionResnetV2/Logits', 'InceptionResnetV2/AuxLogits']
variables_to_restore = slim.get_variables_to_restore(exclude = exclude)
#Perform one-hot-encoding of the labels (Try one-hot-encoding within the load_batch function!)
one_hot_labels = slim.one_hot_encoding(y_true, dataset.num_classes)
#Performs the equivalent to tf.nn.sparse_softmax_cross_entropy_with_logits but enhanced with checks
loss = tf.losses.softmax_cross_entropy(onehot_labels = one_hot_labels, logits = logits)
total_loss = tf.losses.get_total_loss() #obtain the regularization losses as well
#Create the global step for monitoring the learning_rate and training.
global_step = get_or_create_global_step()
#Define your exponentially decaying learning rate
lr = tf.train.exponential_decay(
learning_rate = initial_learning_rate,
global_step = global_step,
decay_steps = decay_steps,
decay_rate = learning_rate_decay_factor,
staircase = True)
#Now we can define the optimizer that takes on the learning rate
optimizer = tf.train.AdamOptimizer(learning_rate = lr)
#Create the train_op.
train_op = slim.learning.create_train_op(total_loss, optimizer)
#State the metrics that you want to predict. We get a predictions that is not one_hot_encoded.
y_pred = tf.nn.softmax(logits, name='y_pred')
predictions = tf.argmax(end_points['Predictions'], 1)
probabilities = end_points['Predictions']
accuracy, accuracy_update = tf.contrib.metrics.streaming_accuracy(predictions,y_true)
precision, precision_update = tf.contrib.metrics.streaming_precision(predictions, y_true)
recall, recall_update = tf.contrib.metrics.streaming_recall(predictions, y_true)
#tf.group 返回的值是 ‘op’
metrics_op = tf.group(accuracy_update, probabilities, precision_update, recall_update)
#Now finally create all the summaries you need to monitor and group them into one summary op.
tf.summary.scalar('losses/Total_Loss', total_loss)
tf.summary.scalar('accuracy', accuracy)
tf.summary.scalar('learning_rate', lr)
tf.summary.scalar('precision',precision)
tf.summary.scalar('recall',recall)
my_summary_op = tf.summary.merge_all()
#my_summary_op = tf.summary.merge(tf.get_collection(tf.GraphKeys.SUMMARIES))
#Now we need to create a training step function that runs both the train_op, metrics_op and updates the global_step concurrently.
def train_step(sess, train_op, global_step, img, lab):
'''
Simply runs a session for the three arguments provided and gives a logging on the time elapsed for each global step
'''
#Check the time for each sess run
start_time = time.time()
total_loss, global_step_count, _ = sess.run([train_op, global_step, metrics_op],feed_dict={x: img.eval(session=sess), y_true: lab.eval(session=sess)})
#total_loss, global_step_count, _ = sess.run([train_op, global_step, metrics_op])
time_elapsed = time.time() - start_time
#Run the logging to print some results
#if global_step_count % 10 == 0:
logging.info('global step %s: loss: %.4f (%.2f sec/step)', global_step_count, total_loss, time_elapsed)
return total_loss, global_step_count
#Now we create a saver function that actually restores the variables from a checkpoint file in a sess
saver = tf.train.Saver(variables_to_restore)
def restore_fn(sess):
return saver.restore(sess, checkpoint_file)
#Define your supervisor for running a managed session. Do not run the summary_op automatically or else it will consume too much memory
#logdir used for save checkpoint and summary
"""
Supervisor的作用
1.自动去checkpoint 加载数据或初始化数据
2.自身有一个Saver, 可以用来保存checkpoint
3.有一个summary_computed 用来保存Summary
所以我们就不需要:
1.手动初始化或从checkpoint 中加载数据
2.不需要创建Saver, 使用sv内部的就可以
3.不需要创建summary writer
"""
sv = tf.train.Supervisor(logdir = log_dir, summary_op = None, init_fn = restore_fn)
#Run the managed session
#会自动去logdir 中去找checkpoint, 如果没有的话,自动执行初始化
with sv.managed_session() as sess:
for step in xrange(num_steps_per_epoch * num_epochs):
#At the start of every epoch, show the vital information
if step % num_batches_per_epoch == 0:
logging.info('Epoch %s/%s', step/num_batches_per_epoch + 1, num_epochs)
learning_rate_value, accuracy_value = sess.run([lr, accuracy])
logging.info('Current Learning Rate: %s', learning_rate_value)
logging.info('Current Streaming Accuracy: %s', accuracy_value)
# optionally, print your logits and predictions for a sanity check that things are going fine.
logits_value, probabilities_value, predictions_value, labels_value = sess.run([logits, probabilities, predictions, labels], feed_dict={x: images.eval(session=sess), y_true: labels.eval(session=sess)})
print 'logits: \n', logits_value
print 'Probabilities: \n', probabilities_value
print 'predictions: \n', predictions_value
print 'Labels:\n:', labels_value
#Log the summaries every 10 step.
if step % 10 == 0:
loss, _ = train_step(sess, train_op, sv.global_step, images, labels)
summaries = sess.run(my_summary_op, feed_dict={x: images.eval(session=sess), y_true: labels.eval(session=sess)})
sv.summary_computed(sess, summaries)
#If not, simply run the training step
else:
loss, _ = train_step(sess, train_op, sv.global_step, images, labels)
#raw_images, labels, predictions = sess.run([raw_images, labels, predictions], feed_dict={x: images.eval(session=sess), y_true: labels.eval(session=sess)})
#We log the final training loss and accuracy
logging.info('Final Loss: %s', loss)
logging.info('Final Accuracy: %s', sess.run(accuracy))
#Once all the training has been done, save the log files and checkpoint model
logging.info('Finished training! Saving model to disk now.')
#saver.save(sess, "./log/estate_model.ckpt")
sv.saver.save(sess, sv.save_path, global_step = sv.global_step)
if __name__ == '__main__':
train()
The code can run but accuracy is very low,I modify the code from batch load dataset to placeholder method, I convert to the return value that load_batch() function from a tensor to numpy.array, use tensor.eval() method to convert a tensor feed_dict={x: img.eval(session=sess), y_true: lab.eval(session=sess)}),I suspect the below code snippet ,but i can't find out the issue
dataset = get_split('train', dataset_dir, file_pattern=file_pattern)
images, raw_images, labels = load_batch(dataset, batch_size=batch_size)
print('num_samples:', dataset.num_samples)
#Know the number steps to take before decaying the learning rate and batches per epoch
num_batches_per_epoch = int(dataset.num_samples / batch_size)
num_steps_per_epoch = num_batches_per_epoch #Because one step is one batch processed
decay_steps = int(num_epochs_before_decay * num_steps_per_epoch)
x = tf.placeholder(tf.float32, shape=[None, img_height, img_width, 3], name='x')
y_true = tf.placeholder(tf.int32, shape=[None], name='y_true')
#Create the model inference
with slim.arg_scope(inception_resnet_v2_arg_scope()):
logits, end_points = inception_resnet_v2(x, num_classes = dataset.num_classes, is_training = True)
#Define the scopes that you want to exclude for restoration
exclude = ['InceptionResnetV2/Logits', 'InceptionResnetV2/AuxLogits']
variables_to_restore = slim.get_variables_to_restore(exclude = exclude)
#Perform one-hot-encoding of the labels (Try one-hot-encoding within the load_batch function!)
one_hot_labels = slim.one_hot_encoding(y_true, dataset.num_classes)
#Performs the equivalent to tf.nn.sparse_softmax_cross_entropy_with_logits but enhanced with checks
loss = tf.losses.softmax_cross_entropy(onehot_labels = one_hot_labels, logits = logits)
total_loss = tf.losses.get_total_loss() #obtain the regularization losses as well
I like to perform image classification on our own large image libary (millions of labeled images) with tensorflow. I´m new to stackoverflow, python and tensorflow and worked myself through a few tutorials (mnist etc.) and got to the point, where i was able to prepare a TensorFlow datset from a dictionary including the absolute path to the images and the according labels. However, i´m stuck at the point using the dataset in a TensorFlow session. Here is my (example) code:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
import numpy as np
import time
import mymodule # I build my module to read the images and labels
from tensorflow.python.framework import ops
from tensorflow.python.framework import dtypes
from tensorflow.contrib.data import Iterator
beginTime = time.time()
batch_size = 100
learning_rate = 0.005
max_steps = 2
NUM_CLASSES = 25
def input_parser(img_path, label):
one_hot = tf.one_hot(label, NUM_CLASSES)
img_file = tf.read_file(img_path)
img_decoded = tf.image.decode_jpeg(img_file, channels = 3)
return img_decoded, one_hot
#Import Training data (returns the dicitonary with paths and labels)
train_dict = mymodule.getFileMap(labelList, imageList)
#Import Test data
test_dict = mymodule.getFileMap(labelList, imageList)
#Get train data
train_file_list, train_label_list = get_file_label_list(train_dict)
train_images_tensor = ops.convert_to_tensor(train_file_list, dtype=dtypes.string)
train_labels_tensor = ops.convert_to_tensor(train_label_list, dtype=dtypes.int64)
#Get test data
test_file_list, test_label_list = get_file_label_list(test_dict)
test_images_tensor = ops.convert_to_tensor(test_file_list, dtype=dtypes.string)
test_labels_tensor = ops.convert_to_tensor(test_label_list, dtype=dtypes.int64)
#Create TensorFlow Datset object
train_data = tf.data.Dataset.from_tensor_slices((train_images_tensor, train_labels_tensor))
test_data = tf.data.Dataset.from_tensor_slices((test_images_tensor, test_labels_tensor))
# Transform the datset so that it contains decoded images
# and one-hot vector labels
train_data = train_data.map(input_parser)
test_data = test_data.map(input_parser)
# Batching --> How to do it right?
#train_data = train_data.batch(batch_size = 100)
#test_data = train_data.batch(batch_size = 100)
#Define input placeholders
image_size = 990*990*3
images_placeholder = tf.placeholder(tf.float32, shape=[None, image_size])
labels_placeholder = tf.placeholder(tf.int64, shape=[None])
# Define variables (these afe the values we want to optimize)
weigths = tf.Variable(tf.zeros([image_size, NUM_CLASSES]))
biases = tf.Variable(tf.zeros([NUM_CLASSES]))
# Define the classifier´s result
logits = tf.matmul(images_placeholder, weigths) + biases
# Define the loss function
loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits = logits, labels = labels_placeholder))
# Define the training operation
train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)
# Operation comparing prediciton with true label
correct_prediciton = tf.equal(tf.argmax(logits, 1), labels_placeholder)
# Operation calculating the accuracy of our predicitons
accuracy = tf.reduce_mean(tf.cast(correct_prediciton, tf.float32))
#Create TensorFlow Iterator object
iterator = Iterator.from_structure(train_data.output_types,
train_data.output_shapes)
next_element = iterator.get_next()
#Create two initialization ops to switch between the datasets
train_init_op = iterator.make_initializer(train_data)
test_init_op = iterator.make_initializer(test_data)
with tf.Session() as sess:
#Initialize variables
sess.run(tf.global_variables_initializer())
sess.run(train_init_op)
for _ in range(10):
try:
elem = sess.run(next_element)
print(elem)
except tf.errors.OutOfRangeError:
print("End of training datset.")
break
Following this and this tutorial i could not solve the problem of how to use the (image and label) dataset in a tensorflow session for training. I was able to print out the datset by iterating through it, but wasn´t able to use it for learning.
I don´t understand how to access the images and labels seperately after they have been merged in the train_data = tf.data.Dataset.from_tensor_slices((train_images_tensor, train_labels_tensor)) operation, as requried by the 2nd tutorial. Also i don´t know how to implement batching correctly.
What i want to do in the session is basically this (from the 2nd tutorial):
# Generate input data batch
indices = np.random.choice(data_sets['images_train'].shape[0], batch_size)
images_batch = data_sets['images_train'][indices]
labels_batch = data_sets['labels_train'][indices]
# Periodically print out the model's current accuracy
if i % 100 == 0:
train_accuracy = sess.run(accuracy, feed_dict={
images_placeholder: images_batch, labels_placeholder: labels_batch})
print('Step {:5d}: training accuracy {:g}'.format(i, train_accuracy))
# Perform a single training step
sess.run(train_step, feed_dict={images_placeholder: images_batch,
labels_placeholder: labels_batch})
# After finishing the training, evaluate on the test set
test_accuracy = sess.run(accuracy, feed_dict={
images_placeholder: data_sets['images_test'],
labels_placeholder: data_sets['labels_test']})
print('Test accuracy {:g}'.format(test_accuracy))
endTime = time.time()
print('Total time: {:5.2f}s'.format(endTime - beginTime))
If anyone can tell me, how to access images and labels in the dataset sepearately and use it for training, i would be really thankful. Also a tip where and how to do the batching would be appreciated.
Thank you.
In your code, next_element is a tuple of two tensors, matching the structure of your datasets: i.e. it is a tuple whose first element is an image, and second element is a label. To access the individual tensors, you can do the following:
next_element = iterator.get_next()
next_image = next_element[0]
next_label = next_element[1]
# Or, in a single line:
next_image, next_label = iterator.get_next()
To batch a tf.data.Dataset, you can use the Dataset.batch() transformation. Your commented out code for this should simply work:
train_data = train_data.batch(batch_size = 100)
test_data = train_data.batch(batch_size = 100)
I understand that there are advantages (especially as I expand the scope of the models I build and the size of the datasets they work on) to using TensorFlow's new Dataset as the idiom for my data feeding pipeline. However I'm having trouble mapping my existing feed_dict based code to this new model.
One problem I face is that I can't sort out how batching and epochs interact, or how these interleave with the logging and validation that I often do.
For example, how does something like the following map to using Dataset?
# Load and process data into tensors of dimension (N, C_i) for input and (N, C_o) for output
# where N is the number of examples and C_ is the number of chanels, and the values are activations
train_x, train_y, valid_x, valid_y = load_data(file, [segments], ...)
train_size = len(train_x)
train_stats_feed = {input_activation: train_x, correct_output: train_y, is_train: False}
valid_stats_feed = {input_activation: valid_x, correct_output: valid_y, is_train: False}
with tf.Session(config=tf.ConfigProto(...)) as sess:
sess.run(tf.initialize_all_variables())
# Some analysis; not always done but the code needs to support it
train_writer.add_summary(sess.run(merged, feed_dict=train_stats_feed), 0)
test_writer.add_summary(sess.run(merged, feed_dict=valid_stats_feed), 0)
test_writer.add_summary(sess.run(gs_summary), 0)
print(log_fmt.format(0, float(sess.run(accuracy, feed_dict=valid_stats_feed)),
float(sess.run(loss, feed_dict=valid_stats_feed))))
for ep in range(epochs):
# Slice the training data into random batches
batch_indices = np.array_split(np.random.permutation(train_size), int(train_size/mb_size))
for mini_batch_indices in batch_indices:
sess.run(train_step, feed_dict={input_activation: train_x[mini_batch_indices],
correct_output: train_y[mini_batch_indices], is_train: True})
gs = int(sess.run(global_step))
if gs % log_steps == 0:
test_writer.add_summary(sess.run(merged, feed_dict=valid_stats_feed), gs)
train_writer.add_summary(sess.run(merged, feed_dict=train_stats_feed), gs)
acc = float(sess.run(accuracy, feed_dict=valid_stats_feed))
sess.run(validation_accuracy.assign(acc))
print(log_fmt.format(gs, acc, float(sess.run(loss, feed_dict=valid_stats_feed))))
print(ep_fmt.format(ep + 2))
test_writer.add_summary(sess.run(gs_summary), ep + 1)
Some of the less obvious definitions for the above, if needed:
# Preliminaries
# Some basic preliminaries, the details of which are not important to the question
# Mostly pretty standard; obvious things omitted from MWE for brevity
global_step = tf.Variable(0, trainable=False, name='global_step')
validation_accuracy = tf.Variable(0.0, trainable=False, name='validation_accuracy', dtype=tf.float32)
is_train = tf.placeholder(tf.bool, [], name='is_train')
input_activation = tf.placeholder(tf.float32, shape=[None, in_nodes], name='inputs')
correct_output = tf.placeholder(tf.float32, shape=[None, out_nodes], name='correct_outputs')
network_output = tf.identity(out_activations)
correct_predictions = correct_fn(correct_output, network_output)
accuracy = tf.reduce_mean(tf.cast(correct_predictions, tf.float32))
error = cost_fn(correct_output, network_output)
loss = error + FLAGS.regularization_weight * sum(tf.nn.l2_loss(w) for w in layer_weights)
train_step = tf.train.MomentumOptimizer(learning_rate, momentum=momentum).minimize(loss, global_step=global_step)
# Logging
train_writer = tf.summary.FileWriter(trainlogfile, tf.get_default_graph())
test_writer = tf.summary.FileWriter(testlogfile, tf.get_default_graph())
gs_summary = tf.summary.scalar('global_step_at_epoch', global_step)
merged = tf.summary.merge_all()
Here're few lines for training to get started. Same logics apply for validation
# Define placeholder for inputs data and labels
inputs_placeholder = tf.placeholder(train_x.dtype, train_x.shape)
labels_placeholder = tf.placeholder(train_y.dtype, train_y.shape)
# Define a Dataset object using the above placeholders
dataset = tf.contrib.data.Dataset.from_tensor_slices((inputs_placeholder, labels_placeholder))
# Define batch_size
batch_size = 128
dataset = dataset.batch(batch_size)
# Define iterator
iterator = dataset.make_initializable_iterator()
# Get one batch
next_example, next_label = iterator.get_next()
# calculate loss from the model fucntion you are using
loss = some_model(next_example, next_label)
# Set number of Epochs here
num_epochs = 100
for _ in range(num_epochs):
sess.run(iterator.initializer, feed_dict={inputs_placeholder: train_x, labels_placeholder: train_y}))
while True:
try:
_loss = sess.run(loss)
except tf.errors.OutOfRangeError:
break