I'm new in TensorFlow and Machine Learning (python also).
In first step to create an image recognition program, i was hit the wall of confusion in feeding data preparation. Can someone please help me on this?
I was look into this tutorial, but the data preparation is obfuscated.
I didn't expect to get a whole perfect program from this question, instead i would love to hear if you can tell me how TensorFlow work on feed_dict. For now in my mind, it is: "Work like a [for] loop, go though imageHolder, get the data of 2352 byte/ 1 image and put in the training op, in there it's perform predict base on current model and compare with data from labelHolder of same index then perform correction on model." so i was expect to put in a set of 2352 byte data (another image with same size) and get the prediction. I will also put the code here, in case my idea is correct and the error come from bad implementation.
Said: i have a set off data for 5 classes, with 3670 images in total.
When load the data to feed_dict for training, i have converted all image to 28x28 pixels, with 3 channels. it result me a tensor of (3670, 2352) for image holder in the feed_dict. After that, i managed to prepare a tensor of (3670,) for label holder in the feed_dict.
The training code is look like this:
for step in xrange(FLAGS.max_steps):
feed_dict = {
imageHolder: imageTrain,
labelHolder: labelTrain,
_, loss_rate = sess.run([train_op, loss_op], feed_dict=feed_dict)
Then i have my code to predict a new image with the model above:
testing_dataset = do_get_file_list(FLAGS.guess_dir)
x = tf.placeholder(tf.float32, shape=(IMAGE_PIXELS))
for data in testing_dataset:
image = Image.open(data)
image = image.resize((IMAGE_SIZE, IMAGE_SIZE))
image = np.array(image).reshape(IMAGE_PIXELS)
prediction = session.run(tf.argmax(logits, 1), feed_dict={x: image})
But the problem is the predict line always raise an error of "Can not feed value of shape...." no matter what shape my testing data is (2352,), (1, 2352) (it's ask for (3670, 2352) shape, but no way)
This is some flag i have used
The training op and loss computing:
def do_get_op_compute_loss(logits, labels):
labels = tf.to_int64(labels)
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=logits, name='xentropy')
loss = tf.reduce_mean(cross_entropy, name='xentropy_mean')
return loss
def do_get_op_training(loss_op, training_rate):
optimizer = tf.train.GradientDescentOptimizer(FLAGS.learning_rate)
global_step = tf.Variable(0, name='global_step', trainable=False)
train_op = optimizer.minimize(loss_op, global_step=global_step)
return train_op
imageHolder = tf.placeholder(tf.float32, [data_count, IMAGE_PIXELS])
labelHolder = tf.placeholder(tf.int32, [data_count])
For complete program:
import os
import math
import tensorflow as tf
from PIL import Image
import numpy as np
from six.moves import xrange
flags = tf.app.flags
flags.DEFINE_float('learning_rate', 0.01, 'Initial learning rate.')
flags.DEFINE_integer('max_steps', 200, 'Number of steps to run trainer.')
flags.DEFINE_integer('hidden1', 128, 'Number of units in hidden layer 1.')
flags.DEFINE_integer('hidden2', 32, 'Number of units in hidden layer 2.')
flags.DEFINE_integer('batch_size', 4, 'Batch size. '
'Must divide evenly into the dataset sizes.')
flags.DEFINE_string('train_dir', 'data', 'Directory to put the training data.')
flags.DEFINE_string('save_file', '.\\data\\model.ckpt', 'Directory to put the training data.')
flags.DEFINE_string('guess_dir', 'work', 'Directory to put the testing data.')
#flags.DEFINE_boolean('fake_data', False, 'If true, uses fake data '
# 'for unit testing.')
def do_inference(images, hidden1_units, hidden2_units, class_count):
with tf.name_scope('hidden1'):
weights = tf.Variable(
tf.truncated_normal([IMAGE_PIXELS, hidden1_units], stddev=1.0 / math.sqrt(float(IMAGE_PIXELS))),
biases = tf.Variable(tf.zeros([hidden1_units]), name='biases')
hidden1 = tf.nn.relu(tf.matmul(images, weights) + biases)
with tf.name_scope('hidden1'):
weights = tf.Variable(
tf.truncated_normal([hidden1_units, hidden2_units], stddev=1.0 / math.sqrt(float(hidden1_units))),
biases = tf.Variable(tf.zeros([hidden2_units]), name='biases')
hidden2 = tf.nn.relu(tf.matmul(hidden1, weights) + biases)
with tf.name_scope('softmax_linear'):
weights = tf.Variable(
tf.truncated_normal([hidden2_units, class_count], stddev=1.0 / math.sqrt(float(hidden2_units))),
biases = tf.Variable(tf.zeros([class_count]), name='biases')
logits = tf.matmul(hidden2, weights) + biases
return logits
def do_get_op_compute_loss(logits, labels):
labels = tf.to_int64(labels)
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=logits, name='xentropy')
loss = tf.reduce_mean(cross_entropy, name='xentropy_mean')
return loss
def do_get_op_training(loss_op, training_rate):
optimizer = tf.train.GradientDescentOptimizer(FLAGS.learning_rate)
global_step = tf.Variable(0, name='global_step', trainable=False)
train_op = optimizer.minimize(loss_op, global_step=global_step)
return train_op
def do_get_op_evaluate(logits, labels):
correct = tf.nn.in_top_k(logits, labels, 1)
return tf.reduce_sum(tf.cast(correct, tf.int32))
def do_evaluate(session, eval_correct_op, imageset_holder, labelset_holder, train_images, train_labels):
true_count = 0
num_examples = FLAGS.batch_size * FLAGS.batch_size
for step in xrange(FLAGS.batch_size):
feed_dict = {imageset_holder: train_images, labelset_holder: train_labels,}
true_count += session.run(eval_correct_op, feed_dict=feed_dict)
precision = true_count / num_examples
# print(' Num examples: %d Num correct: %d Precision # 1: %0.04f' %
# (num_examples, true_count, precision))
def do_init_param(data_count, class_count):
# Generate placeholder
imageHolder = tf.placeholder(tf.float32, shape=(data_count, IMAGE_PIXELS))
labelHolder = tf.placeholder(tf.int32, shape=(data_count))
# Build a graph for prediction from inference model
logits = do_inference(imageHolder, FLAGS.hidden1, FLAGS.hidden2, class_count)
# Add loss calculating op
loss_op = do_get_op_compute_loss(logits, labelHolder)
# Add training op
train_op = do_get_op_training(loss_op, FLAGS.learning_rate)
# Add evaluate correction op
evaluate_op = do_get_op_evaluate(logits, labelHolder)
# Create session for op operating
sess = tf.Session()
# Init param
init = tf.initialize_all_variables()
return sess, train_op, loss_op, evaluate_op, imageHolder, labelHolder, logits
def do_get_class_list():
return [{'name': name, 'path': os.path.join(FLAGS.train_dir, name)} for name in os.listdir(FLAGS.train_dir)
if os.path.isdir(os.path.join(FLAGS.train_dir, name))]
def do_get_file_list(folderName):
return [os.path.join(folderName, name) for name in os.listdir(folderName)
if (os.path.isdir(os.path.join(folderName, name)) == False)]
def do_init_data_list():
file_list = []
for classItem in do_get_class_list():
for dataItem in do_get_file_list(classItem['path']):
file_list.append({'name': classItem['name'], 'path': dataItem})
# Renew data feeding dictionary
imageTrainList, labelTrainList = do_seperate_data(file_list)
imageTrain = []
for imagePath in imageTrainList:
image = Image.open(imagePath)
image = image.resize((IMAGE_SIZE, IMAGE_SIZE))
imageCount = len(imageTrain)
imageTrain = np.array(imageTrain)
imageTrain = imageTrain.reshape(imageCount, IMAGE_PIXELS)
id_list, id_map = do_generate_id_label(labelTrainList)
labelTrain = np.array(id_list)
return imageTrain, labelTrain, id_map
def do_init():
imageTrain, labelTrain, id_map = do_init_data_list()
sess, train_op, loss_op, evaluate_op, imageHolder, labelHolder, logits = do_init_param(len(imageTrain), len(id_map))
return sess, train_op, loss_op, evaluate_op, imageHolder, labelHolder, imageTrain, labelTrain, id_map, logits
def do_seperate_data(data):
images = [item['path'] for item in data]
labels = [item['name'] for item in data]
return images, labels
def do_generate_id_label(label_list):
trimmed_label_list = list(set(label_list))
id_map = {trimmed_label_list.index(label): label for label in trimmed_label_list}
reversed_id_map = {label: trimmed_label_list.index(label) for label in trimmed_label_list}
id_list = [reversed_id_map.get(item) for item in label_list]
return id_list, id_map
def do_training(sess, train_op, loss_op, evaluate_op, imageHolder, labelHolder, imageTrain, labelTrain):
# Training state checkpoint saver
saver = tf.train.Saver()
# feed_dict = {
# imageHolder: imageTrain,
# labelHolder: labelTrain,
# }
for step in xrange(FLAGS.max_steps):
feed_dict = {
imageHolder: imageTrain,
labelHolder: labelTrain,
_, loss_rate = sess.run([train_op, loss_op], feed_dict=feed_dict)
if step % 100 == 0:
print('Step {0}: loss = {1}'.format(step, loss_rate))
if (step + 1) % 1000 == 0 or (step + 1) == FLAGS.max_steps:
saver.save(sess, FLAGS.save_file, global_step=step)
print('Evaluate training data')
do_evaluate(sess, evaluate_op, imageHolder, labelHolder, imageTrain, labelTrain)
def do_predict(session, logits):
# xentropy
testing_dataset = do_get_file_list(FLAGS.guess_dir)
x = tf.placeholder(tf.float32, shape=(IMAGE_PIXELS))
print('Perform predict')
for data in testing_dataset:
image = Image.open(data)
image = image.resize((IMAGE_SIZE, IMAGE_SIZE))
image = np.array(image).reshape(IMAGE_PIXELS)
prediction = session.run(logits, {x: image})
print('{0}: {1}'.format(data, prediction))
def main(_):
# TF notice default graph
with tf.Graph().as_default():
sess, train_op, loss_op, evaluate_op, imageHolder, labelHolder, imageTrain, labelTrain, id_map, logits = do_init()
print("done init")
do_training(sess, train_op, loss_op, evaluate_op, imageHolder, labelHolder, imageTrain, labelTrain)
print("done training")
do_predict(sess, logits)
if __name__ == '__main__':
It's important to understand the error, you say
But the problem is the predict line always raise an error of "Can not
feed value of shape...." no matter what shape my testing data is
(2352,), (1, 2352) (it's ask for (3670, 2352) shape, but no way)
Oh yes way my friend, yes way. It says there's a problem with your shape, you need to inspect that. It asks for 3670, why?
Because your model accepts inputs with shape (data_count, IMAGE_PIXELS), which you declare in the below:
def do_init_param(data_count, class_count):
# Generate placeholder
imageHolder = tf.placeholder(tf.float32, shape=(data_count, IMAGE_PIXELS))
labelHolder = tf.placeholder(tf.int32, shape=(data_count))
This function is called here:
sess, train_op, loss_op, evaluate_op, imageHolder, labelHolder, logits = do_init_param(len(imageTrain), len(id_map))
len(imageTrain) is the length of your dataset, probably 3670 images.
Then you have your prediction function:
def do_predict(session, logits):
# xentropy
testing_dataset = do_get_file_list(FLAGS.guess_dir)
x = tf.placeholder(tf.float32, shape=(IMAGE_PIXELS))
prediction = session.run(logits, {x: image})
Note x here is useless. You are feeding your image to predict to your model which does not expect that shape, it expects the original placeholder shape of (3670, 2352), because that's what you said.
The solution is to declare x as a placeholder with non-specific first dimension such as:
imageHolder = tf.placeholder(tf.float32, shape=(None, IMAGE_PIXELS))
When you predict the label of your image, you can have a single image or multiple images (a mini-batch), but always must be of shape [number_images, IMAGE_PIXELS].
Makes sense?
I am trying to save a fine tuned bert model. I have ran the code correctly - it works fine, and in the ipython console I am able to call getPrediction and have it result the result.
I have my weight files saved (highest being model.ckpt-333.data-00000-of-00001
I have no idea how I would go about saving the model to be reuseable.
I am using bert-tensorflow.
import json
import pandas as pd
import tensorflow as tf
import tensorflow_hub as hub
from datetime import datetime
from sklearn.model_selection import train_test_split
import os
print("tensorflow version : ", tf.__version__)
print("tensorflow_hub version : ", hub.__version__)
#Importing BERT modules
import bert
from bert import run_classifier
from bert import optimization
from bert import tokenization
#set output directory of the model
OUTPUT_DIR = 'model'
##markdown Whether or not to clear/delete the directory and create a new one
DO_DELETE = False ##param {type:"boolean"}
print('***** Model output directory: {} *****'.format(OUTPUT_DIR))
### Load the data
data = pd.read_csv("data/bbc-text.csv")
data.columns = ['category', 'text']
print('*****Data Loaded: {} *****'.format(data.head()))
#check to see if any null values are present.
print('*****Empty Data: {} *****'.format(data[data.isnull().any(axis=1)]))
#encode category variable into numeric
data.category = pd.Categorical(data.category)
data['code'] = data.category.cat.codes
from sklearn.model_selection import train_test_split
train, test = train_test_split(data, test_size=0.2, random_state=200)
## 2 -- Data Visualisation
import matplotlib.pyplot as plt
train['code'].value_counts().plot(kind = 'bar')
DATA_COLUMN = 'text'
label_list = [0, 1, 2, 3, 4]
## 2 -- Data Preprocessing
train_InputExamples = train.apply(lambda x: bert.run_classifier.InputExample(guid=None,
text_a = x[DATA_COLUMN],
text_b = None,
label = x[LABEL_COLUMN]), axis = 1)
test_InputExamples = test.apply(lambda x: bert.run_classifier.InputExample(guid=None,
text_a = x[DATA_COLUMN],
text_b = None,
label = x[LABEL_COLUMN]), axis = 1)
# This is a path to an uncased (all lowercase) version of BERT
BERT_MODEL_HUB = "https://tfhub.dev/google/bert_uncased_L-12_H-768_A-12/1"
def create_tokenizer_from_hub_module():
"""Get the vocab file and casing info from the Hub module."""
with tf.Graph().as_default():
bert_module = hub.Module(BERT_MODEL_HUB)
tokenization_info = bert_module(signature="tokenization_info", as_dict=True)
with tf.compat.v1.Session() as sess:
vocab_file, do_lower_case = sess.run([tokenization_info["vocab_file"],
return bert.tokenization.FullTokenizer(
vocab_file=vocab_file, do_lower_case=do_lower_case)
tokenizer = create_tokenizer_from_hub_module()
# We'll set sequences to be at most 128 tokens long.
# Convert our train and validation features to InputFeatures that BERT understands.
train_features = bert.run_classifier.convert_examples_to_features(train_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer)
test_features = bert.run_classifier.convert_examples_to_features(test_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer)
#Example on first observation in the training set
print("Example of train[0] as a training set")
print("Sentence : ", train_InputExamples.iloc[0].text_a)
print("Tokens : ", tokenizer.tokenize(train_InputExamples.iloc[0].text_a))
print("Input IDs : ", train_features[0].input_ids)
print("Input Masks : ", train_features[0].input_mask)
print("Segment IDs : ", train_features[0].segment_ids)
## 3. Creating a Multiclass Classifier
def create_model(is_predicting, input_ids, input_mask, segment_ids, labels,
bert_module = hub.Module(
bert_inputs = dict(
bert_outputs = bert_module(
# Use "pooled_output" for classification tasks on an entire sentence.
# Use "sequence_outputs" for token-level output.
output_layer = bert_outputs["pooled_output"]
hidden_size = output_layer.shape[-1].value
# Create our own layer to tune for politeness data.
output_weights = tf.compat.v1.get_variable(
"output_weights", [num_labels, hidden_size],
output_bias = tf.compat.v1.get_variable(
"output_bias", [num_labels], initializer=tf.zeros_initializer())
with tf.compat.v1.variable_scope("loss"):
# Dropout helps prevent overfitting
output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)
logits = tf.matmul(output_layer, output_weights, transpose_b=True)
logits = tf.nn.bias_add(logits, output_bias)
log_probs = tf.nn.log_softmax(logits, axis=-1)
# Convert labels into one-hot encoding
one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)
predicted_labels = tf.squeeze(tf.argmax(log_probs, axis=-1, output_type=tf.int32))
# If we're predicting, we want predicted labels and the probabiltiies.
if is_predicting:
return (predicted_labels, log_probs)
# If we're train/eval, compute loss between predicted and actual label
per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)
loss = tf.reduce_mean(per_example_loss)
return (loss, predicted_labels, log_probs)
#A function that adapts our model to work for training, evaluation, and prediction.
# model_fn_builder actually creates our model function
# using the passed parameters for num_labels, learning_rate, etc.
def model_fn_builder(num_labels, learning_rate, num_train_steps,
"""Returns `model_fn` closure for TPUEstimator."""
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument
"""The `model_fn` for TPUEstimator."""
input_ids = features["input_ids"]
input_mask = features["input_mask"]
segment_ids = features["segment_ids"]
label_ids = features["label_ids"]
is_predicting = (mode == tf.estimator.ModeKeys.PREDICT)
if not is_predicting:
(loss, predicted_labels, log_probs) = create_model(
is_predicting, input_ids, input_mask, segment_ids, label_ids, num_labels)
train_op = bert.optimization.create_optimizer(
loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu=False)
# Calculate evaluation metrics.
def metric_fn(label_ids, predicted_labels):
accuracy = tf.compat.v1.metrics.accuracy(label_ids, predicted_labels)
true_pos = tf.compat.v1.metrics.true_positives(
true_neg = tf.compat.v1.metrics.true_negatives(
false_pos = tf.compat.v1.metrics.false_positives(
false_neg = tf.compat.v1.metrics.false_negatives(
return {
"eval_accuracy": accuracy,
"true_positives": true_pos,
"true_negatives": true_neg,
"false_positives": false_pos,
"false_negatives": false_neg
eval_metrics = metric_fn(label_ids, predicted_labels)
if mode == tf.estimator.ModeKeys.TRAIN:
return tf.estimator.EstimatorSpec(mode=mode,
return tf.estimator.EstimatorSpec(mode=mode,
(predicted_labels, log_probs) = create_model(
is_predicting, input_ids, input_mask, segment_ids, label_ids, num_labels)
predictions = {
'probabilities': log_probs,
'labels': predicted_labels
return tf.estimator.EstimatorSpec(mode, predictions=predictions)
# Return the actual model function in the closure
return model_fn
# Compute train and warmup steps from batch size
# These hyperparameters are copied from this colab notebook (https://colab.sandbox.google.com/github/tensorflow/tpu/blob/master/tools/colab/bert_finetuning_with_cloud_tpus.ipynb)
# Warmup is a period of time where the learning rate is small and gradually increases--usually helps training.
# Model configs
# Compute train and warmup steps from batch size
num_train_steps = int(len(train_features) / BATCH_SIZE * NUM_TRAIN_EPOCHS)
num_warmup_steps = int(num_train_steps * WARMUP_PROPORTION)
# Specify output directory and number of checkpoint steps to save
run_config = tf.estimator.RunConfig(
# Specify output directory and number of checkpoint steps to save
run_config = tf.estimator.RunConfig(
#Initializing the model and the estimator
model_fn = model_fn_builder(
estimator = tf.estimator.Estimator(
params={"batch_size": BATCH_SIZE})
# Create an input function for training. drop_remainder = True for using TPUs.
train_input_fn = bert.run_classifier.input_fn_builder(
# Create an input function for validating. drop_remainder = True for using TPUs.
test_input_fn = run_classifier.input_fn_builder(
# #Training the model
print(f'Beginning Training!')
current_time = datetime.now()
estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)
print("Training took time ", datetime.now() - current_time)
#Evaluating the model with Validation set
accuracy = estimator.evaluate(input_fn=test_input_fn, steps=None)
# A method to get predictions
def getPrediction(in_sentences):
# A list to map the actual labels to the predictions
labels = ["business", "entertainment", "politics", "sports", "tech"]
# Transforming the test data into BERT accepted form
input_examples = [run_classifier.InputExample(guid="", text_a=x, text_b=None, label=0) for x in in_sentences]
# Creating input features for Test data
input_features = run_classifier.convert_examples_to_features(input_examples, label_list, MAX_SEQ_LENGTH, tokenizer)
# Predicting the classes
predict_input_fn = run_classifier.input_fn_builder(features=input_features, seq_length=MAX_SEQ_LENGTH,
is_training=False, drop_remainder=False)
predictions = estimator.predict(predict_input_fn)
return [(sentence, prediction['probabilities'], prediction['labels'], labels[prediction['labels']]) for
sentence, prediction in zip(in_sentences, predictions)]
pred_sentences = list(test['text'])
predictions = getPrediction(pred_sentences)
enc_labels = []
act_labels = []
for i in range(len(predictions)):
pd.DataFrame(enc_labels, columns = ['category']).to_excel('data/submission_bert.xlsx', index = False)
## Random tester
#Classifying random sentences
tests = getPrediction(['Mr.Modi is the Indian Prime Minister',
'Gaming machines are powered by efficient micro processores and GPUs',
'That HBO TV series is really good',
'A trillion dollar economy '
As the question clearly says to save the model, here is how it works:
import torch
torch.save(model, 'path/to/model')
saved_model = torch.load('path/to/model')
I think you can just rename your model.ckpt-333.data-00000-of-00001 to bert_model.ckpt and then use it in the same way you would use a non-finetuned model. For example, run
python run_classifier.py \
--task_name=MRPC \
--do_predict=true \
--data_dir=$GLUE_DIR/MRPC \
--vocab_file=$BERT_BASE_DIR/vocab.txt \
--bert_config_file=$BERT_BASE_DIR/bert_config.json \
with --init_checkpoint pointing to your model's dir, or run bert-as-service
bert-serving-start -model_dir $TRAINED_CLASSIFIER
with the right -model_dir.
You can use these method:
model = MyModel(num_classes).to(device)
optimizer = AdamW(model.parameters(), lr=2e-5, weight_decay=1e-2)
output_model = './models/nameOfYourModel.pth'
# save
def save(model, optimizer):
# save
'model_state_dict': model.state_dict(),
'optimizer_state_dict': optimizer.state_dict()
}, output_model)
save(model, optimizer)
# load
checkpoint = torch.load(output_model, map_location='cpu')
Source: https://github.com/huggingface/transformers/issues/7849#issuecomment-718726121
I have LSTM training in tensorflow and the whole sess is saved with saver = tf.train.Saver().
The whole code is shown below.
def LSTM_RNN(_X, _weights, _biases):
# model architecture based on "guillaume-chevalier" and "aymericdamien" under the MIT license.
_X = tf.transpose(_X, [1, 0, 2]) # permute n_steps and batch_size
_X = tf.reshape(_X, [-1, n_input])
# Rectifies Linear Unit activation function used
_X = tf.nn.relu(tf.matmul(_X, _weights['hidden']) + _biases['hidden'])
# Split data because rnn cell needs a list of inputs for the RNN inner loop
_X = tf.split(_X, n_steps, 0)
# Define two stacked LSTM cells (two recurrent layers deep) with tensorflow
lstm_cell_1 = tf.contrib.rnn.BasicLSTMCell(n_hidden, forget_bias=1.0, state_is_tuple=True)
lstm_cell_2 = tf.contrib.rnn.BasicLSTMCell(n_hidden, forget_bias=1.0, state_is_tuple=True)
lstm_cells = tf.contrib.rnn.MultiRNNCell([lstm_cell_1, lstm_cell_2], state_is_tuple=True)
outputs, states = tf.contrib.rnn.static_rnn(lstm_cells, _X, dtype=tf.float32)
# A single output is produced, in style of "many to one" classifier, refer to http://karpathy.github.io/2015/05/21/rnn-effectiveness/ for details
lstm_last_output = outputs[-1]
# Linear activation
return tf.matmul(lstm_last_output, _weights['out']) + _biases['out']
# Graph input/output
x = tf.placeholder(tf.float32, [None, n_steps, n_input])
y = tf.placeholder(tf.float32, [None, n_classes])
# Graph weights
weights = {
'hidden': tf.Variable(tf.random_normal([n_input, n_hidden])), # Hidden layer weights
'out': tf.Variable(tf.random_normal([n_hidden, n_classes], mean=1.0))
biases = {
'hidden': tf.Variable(tf.random_normal([n_hidden])),
'out': tf.Variable(tf.random_normal([n_classes]))
pred = LSTM_RNN(x, weights, biases)
# Loss, optimizer and evaluation
l2 = lambda_loss_amount * sum(
tf.nn.l2_loss(tf_var) for tf_var in tf.trainable_variables()
) # L2 loss prevents this overkill neural network to overfit the data
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=pred)) + l2 # Softmax loss
if decaying_learning_rate:
learning_rate = tf.train.exponential_decay(init_learning_rate, global_step*batch_size, decay_steps, decay_rate, staircase=True)
#decayed_learning_rate = learning_rate * decay_rate ^ (global_step / decay_steps) #exponentially decayed learning rate
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost,global_step=global_step) # Adam Optimizer
while step * batch_size <= training_iters:
#print (sess.run(learning_rate)) #decaying learning rate
#print (sess.run(global_step)) # global number of iterations
if len(unsampled_indices) < batch_size:
unsampled_indices = list(range(0,len(X_train)))
batch_xs, raw_labels, unsampled_indicies = extract_batch_size(X_train, y_train, unsampled_indices, batch_size)
batch_ys = one_hot(raw_labels)
# check that encoded output is same length as num_classes, if not, pad it
if len(batch_ys[0]) < n_classes:
temp_ys = np.zeros((batch_size, n_classes))
temp_ys[:batch_ys.shape[0],:batch_ys.shape[1]] = batch_ys
batch_ys = temp_ys
# Fit training using batch data
_, loss, acc = sess.run(
[optimizer, cost, accuracy],
x: batch_xs,
y: batch_ys
# Evaluate network only at some steps for faster training:
if (step*batch_size % display_iter == 0) or (step == 1) or (step * batch_size > training_iters):
# To not spam console, show training accuracy/loss in this "if"
print("Iter #" + str(step*batch_size) + ": Learning rate = " + "{:.6f}".format(sess.run(learning_rate)) + ": Batch Loss = " + "{:.6f}".format(loss) + ", Accuracy = {}".format(acc))
# Evaluation on the test set (no learning made here - just evaluation for diagnosis)
loss, acc = sess.run([cost, accuracy], feed_dict={x: X_test,y: one_hot(y_test)})
print("PERFORMANCE ON TEST SET: " + "Batch Loss = {}".format(loss) + ", Accuracy = {}".format(acc))
step += 1
print("Optimization Finished!")
save_path = saver.save(sess, "ActivityTrainedModels/model.ckpt")
Then I restore the model for deployment.
At that time, I need to use together with another human pose estimator model for Human pose estimation. Pose estimator is loaded with get_graph_path().
I can't load both. I can load either one only. If I load both I have error as
NotFoundError (see above for traceback): Restoring from checkpoint failed. This is most likely due to a Variable name or other graph key that is missing from the checkpoint. Please ensure that you have not altered the graph expected based on the checkpoint. Original error:
Key smoothing/gauss_weight not found in checkpoint
[[node save/RestoreV2 (defined at ActivityDetection.py:219) = RestoreV2[dtypes=[DT_INT32, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT], _device="/job:localhost/replica:0/task:0/device:CPU:0"](_arg_save/Const_0_0, save/RestoreV2/tensor_names, save/RestoreV2/shape_and_slices)]]
My deployment code is as follow.
n_steps = 32 # 32 timesteps per series
n_input = 36 # num input parameters per timestep
n_hidden = 34 # Hidden layer num of features
n_classes = 3
global_step = tf.Variable(0, trainable=False)
# Graph input/output
x = tf.placeholder(tf.float32, [None, n_steps, n_input])
y = tf.placeholder(tf.float32, [None, n_classes])
# Graph weights
weights = {
'hidden': tf.Variable(tf.random_normal([n_input, n_hidden])), # Hidden layer weights
'out': tf.Variable(tf.random_normal([n_hidden, n_classes], mean=1.0))
biases = {
'hidden': tf.Variable(tf.random_normal([n_hidden])),
'out': tf.Variable(tf.random_normal([n_classes]))
pred = LSTM_RNN(x, weights, biases)
sess = tf.InteractiveSession(config=tf.ConfigProto(log_device_placement=True))
init = tf.global_variables_initializer()
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Activity Recognition')
parser.add_argument('--video', type=str, default='../../tf-openpose/TestVideos/2019_01-Feb Recording/C4-13.mp4')
parser.add_argument('--resolution', type=str, default='640x360', help='network input resolution. default=432x368')
parser.add_argument('--model', type=str, default='mobilenet_thin', help='cmu / mobilenet_thin')
parser.add_argument('--resize', type=str, default='0x0',
help='if provided, resize images before they are processed. default=0x0, Recommends : 432x368 or 656x368 or 1312x736 ')
parser.add_argument('--resize-out-ratio', type=float, default=4.0,
help='if provided, resize heatmaps before they are post-processed. default=1.0')
parser.add_argument('--show-process', type=bool, default=False,
help='for debug purpose, if enabled, speed for inference is dropped.')
parser.add_argument('--showBG', type=bool, default=True, help='False to show skeleton only.')
parser.add_argument('--s', type=str, default='00:00', help='start time to crop')
parser.add_argument('--e', type=str, default='00:00', help='end time to crop')
args = parser.parse_args()
w, h = model_wh(args.resize)
if w > 0 and h > 0:
e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h))
e = TfPoseEstimator(get_graph_path(args.model), target_size=(img_w, img_h))
'''with tf.Session() as sess:
saver = tf.train.Saver()
saver.restore(sess, tf.train.latest_checkpoint('/home/coie/venvp3/HumanActivityRecognition/HumanActivityRecognition/ActivityTrainedModels/'))
print("Model restored.")
all_vars = tf.trainable_variables()
for i in range(len(all_vars)):
name = all_vars[i].name
values = sess.run(name)
print('name', name)
#print('value', values)
#result = sess.run(pred, feed_dict={x: X_test[24:27]})
#for r in range(len(result)):
#print("predicted activity:", LABELS[result[r].argmax(0)])
If I load TfPoseEstimator, I can't restore LSTM model.
How can I solve the problem?
I made a separate class for LSTM and LSTM graph is loaded with a sess in the class. So main python code has another sess with default graph. The default graph at main python load postestimator graph.
My LSTM class is defined as
class ActivityRecognition:
#Utility functions for training:
def LSTM_RNN(self,_X, _weights, _biases):
# model architecture based on "guillaume-chevalier" and "aymericdamien" under the MIT license.
_X = tf.transpose(_X, [1, 0, 2]) # permute n_steps and batch_size
_X = tf.reshape(_X, [-1, self.n_input])
# Rectifies Linear Unit activation function used
_X = tf.nn.relu(tf.matmul(_X, _weights['hidden']) + _biases['hidden'])
# Split data because rnn cell needs a list of inputs for the RNN inner loop
_X = tf.split(_X, self.n_steps, 0)
# Define two stacked LSTM cells (two recurrent layers deep) with tensorflow
lstm_cell_1 = tf.contrib.rnn.BasicLSTMCell(self.n_hidden, forget_bias=1.0, state_is_tuple=True)
lstm_cell_2 = tf.contrib.rnn.BasicLSTMCell(self.n_hidden, forget_bias=1.0, state_is_tuple=True)
lstm_cells = tf.contrib.rnn.MultiRNNCell([lstm_cell_1, lstm_cell_2], state_is_tuple=True)
outputs, states = tf.contrib.rnn.static_rnn(lstm_cells, _X, dtype=tf.float32)
lstm_last_output = outputs[-1]
return tf.matmul(lstm_last_output, _weights['out']) + _biases['out']
def __init__(self):
self.n_steps = 32 # 32 timesteps per series
self.n_input = 36 # num input parameters per timestep
self.n_hidden = 34 # Hidden layer num of features
self.n_classes = 3
self.global_step = tf.Variable(0, trainable=False)
# Graph input/output
self.x = tf.placeholder(tf.float32, [None, self.n_steps, self.n_input])
self.y = tf.placeholder(tf.float32, [None, self.n_classes])
# Graph weights
self.weights = {
'hidden': tf.Variable(tf.random_normal([self.n_input, self.n_hidden])), # Hidden layer weights
'out': tf.Variable(tf.random_normal([self.n_hidden, self.n_classes], mean=1.0))
self.biases = {
'hidden': tf.Variable(tf.random_normal([self.n_hidden])),
'out': tf.Variable(tf.random_normal([self.n_classes]))
self.pred = self.LSTM_RNN(self.x, self.weights, self.biases)
self.sess = tf.InteractiveSession(config=tf.ConfigProto(log_device_placement=True))
self.init = tf.global_variables_initializer()
with tf.Session() as sess:
saver = tf.train.Saver()
saver.restore(sess, tf.train.latest_checkpoint('ActivityTrainedModels/'))
print("Model restored.")
#all_vars = tf.trainable_variables()
#for i in range(len(all_vars)):
#name = all_vars[i].name
#values = sess.run(name)
#print('name', name)
#print('value', values)
def inference(self,test):
result = self.sess.run(selfpred, feed_dict={x: test})
for r in range(len(result)):
return activity
if __name__ == "__main__":
I wrote a CNN network with Tensorflow which works properly and I want to check classified images during the testing stage.
In my dataset, I have 5 different categories, during the test stage I am looking for a way to save classified images in a new folder for each category to check the results from my net make or not sense.
This is the whole of my code in the test stage:
tf.app.flags.DEFINE_float('learning_rate', 0.0001, 'Learning rate for adam optimizer')
tf.app.flags.DEFINE_integer('num_classes', 3, 'Number of classes')
tf.app.flags.DEFINE_integer('batch_size', 128, 'Batch size')
tf.app.flags.DEFINE_float('keep_prob', 0.8, 'Dropout keep probability')
tf.app.flags.DEFINE_integer('num_channel',3 , 'Image channel, RGB=3, Grayscale=1')
tf.app.flags.DEFINE_integer('img_size', 80, 'Size of images')
tf.app.flags.DEFINE_string('test_file', 'data/test.txt', 'Test dataset file')
FLAGS = tf.app.flags.FLAGS
checkpoint_dir = '/home/xyrio/Desktop/classier/training/checkpoints/model_epoch.ckpt89'
def main(_):
x = tf.placeholder(tf.float32, shape=[FLAGS.batch_size, FLAGS.img_size, FLAGS.img_size, FLAGS.num_channel], name='x')
y_true = tf.placeholder(tf.float32, shape=[None, FLAGS.num_classes], name='y_true')
y_true_cls = tf.argmax(y_true, axis=1)
filter_size_conv1 = 3
num_filters_conv1 = 32
filter_size_conv2 = 3
num_filters_conv2 = 32
filter_size_conv3 = 3
num_filters_conv3 = 64
filter_size_conv4 = 3
num_filters_conv4 = 128
filter_size_conv5 = 3
num_filters_conv5 = 256
fc_layer_size = 512
fc_layer_size2 = 128
def create_weights(shape):
return tf.Variable(tf.truncated_normal(shape, mean=0, stddev=0.01))
def create_biases(size):
return tf.Variable(tf.constant(0.01, shape=[size]))
def create_convolutional_layer(input, num_input_channels, conv_filter_size, num_filters, useBatchNorm=False,
weights = create_weights(shape=[conv_filter_size, conv_filter_size, num_input_channels, num_filters])
biases = create_biases(num_filters)
layer = tf.nn.conv2d(input=input, filter=weights, strides=[1, 1, 1, 1], padding='SAME')
layer += biases
layer = tf.nn.relu(layer)
if useBatchNorm == True:
layer = tf.layers.batch_normalization(layer)
if usePooling:
layer = tf.nn.max_pool(value=layer, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
return layer
def create_flatten_layer(layer):
layer_shape = layer.get_shape()
num_features = layer_shape[1:4].num_elements()
layer = tf.reshape(layer, [-1, num_features])
return layer
def create_fc_layer(input, num_inputs, num_outputs, useRelu=True, useDropout=False):
weights = create_weights(shape=[num_inputs, num_outputs])
biases = create_biases(num_outputs)
layer = tf.matmul(input, weights) + biases
if useRelu:
layer = tf.nn.relu(layer)
if useDropout == True:
layer = tf.nn.dropout(layer, keep_prob=FLAGS.keep_prob)
return layer
layer_conv1 = create_convolutional_layer(x, FLAGS.num_channel, filter_size_conv1, num_filters_conv1,
useBatchNorm=True, usePooling=True)
layer_conv2 = create_convolutional_layer(layer_conv1, num_filters_conv1, filter_size_conv2, num_filters_conv2,
useBatchNorm=True, usePooling=True)
layer_conv3 = create_convolutional_layer(layer_conv2, num_filters_conv2, filter_size_conv3, num_filters_conv3,
useBatchNorm=True, usePooling=True)
layer_conv4 = create_convolutional_layer(layer_conv3, num_filters_conv3, filter_size_conv4, num_filters_conv4,
useBatchNorm=True, usePooling=True)
layer_conv5 = create_convolutional_layer(layer_conv4, num_filters_conv4, filter_size_conv5, num_filters_conv5,
useBatchNorm=True, usePooling=True)
layer_flat = create_flatten_layer(layer_conv5)
layer_fc1 = create_fc_layer(layer_flat, layer_flat.get_shape()[1:4].num_elements(), fc_layer_size, useRelu=True,
layer_fc2 = create_fc_layer(layer_fc1, fc_layer_size, fc_layer_size2, useRelu=True, useDropout=True)
layer_fc3 = create_fc_layer(layer_fc2, fc_layer_size2, FLAGS.num_classes, useRelu=False)
y_pred = tf.nn.softmax(layer_fc3, name='y_pred', axis=1)
y_pred_cls = tf.argmax(y_pred, axis=1)
correct_prediction = tf.equal(y_pred_cls, y_true_cls)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
saver = tf.train.Saver()
test_preprocessor = BatchPreprocessor(dataset_file_path=FLAGS.test_file, num_classes=FLAGS.num_classes,
output_size=[FLAGS.img_size, FLAGS.img_size])
test_batches_per_epoch = np.floor(len(test_preprocessor.labels) / FLAGS.batch_size).astype(np.int16)
conf_mat = tf.confusion_matrix(y_true_cls,y_pred_cls,FLAGS.num_classes)
with tf.Session() as sess:
saver.restore(sess, checkpoint_dir)
# Start Testing
test_acc = 0.
test_count = 0
cm_total = None
for _ in range(test_batches_per_epoch):
batch_tx, batch_ty = test_preprocessor.next_batch(FLAGS.batch_size)
acc, conf_m = sess.run([accuracy, conf_mat],
feed_dict={x: batch_tx, y_true: batch_ty})
if cm_total is None:
cm_total = conf_m
cm_total += conf_m
test_acc += acc
test_count += 1
test_acc /= test_count
print("{} Testing Accuracy = {:.2%}".format(datetime.now(), test_acc))
This code is for test data, as you can see I restored checkpoint which I saved during training and validating, and after that, I used the best checkpoint for predict on my test data.
batch_tx is my test data and batch_ty is my test label.
is anyone have an idea how can I do this?
thanks in advance
Ok, after the discussion above, you added the line
sess.run(y_pred_cls, {x:batch_tx})
after your confusion matrix summation and now you have your predicted labels. Print them out in a format that you can turn into a np array for the code below. Provided your test code runs in a single thread, and it does not shuffle the test batches, you now have your predicted labels in the same order as the images appear in the input file. Assuming your input file is a .bin file, you should be able to extract images (using PIL) from it like this:
from PIL import Image
# your image dimensions here
width = 80
height = 80
channels = 3
# most labels are 1 byte
labelSize = 1
pixelSize = width * height * channels
recordSize = labelSize + pixelSize
label_names = ['cat', 'horse', 'dog'....]
predictions = [...] # put your predictions here
with open(inputFilename, "rb") as f:
allTheData = np.fromfile(f, 'u1')
numRecords = allTheData.shape[0] / recordSize
allTheData = allTheData.reshape(numRecords, recordSize)
for idx, d in enumerate(allTheData):
label = label_names[d[0]]
rgbData = d[1:] #records are label first, then all pixel data and rgb
predlabel = label_names[data_labels[idx]]
filename = "{}_pred{}_actual{}.png".format(idx, predlabel, label)
pictureA = rgbData.reshape(3, width, height)
pictureA = np.swapaxes(pictureA,0,1)
pictureA = np.swapaxes(pictureA,1,2)
pictureA = np.ndarray.flatten(pictureA)
imageA = Image.frombytes('RGB', (height, width), pictureA)
imageA.save(filename, "PNG")
Please note that the code above won't run until you add in proper label names and your predictions. Also, if the input file is a .csv, you will have to change the reading of it slightly.
I use tf.dataset to fetch images, labels, edges in training with GPU. But I find the dataset API cannot load all the data.
I use codes:
def get_dataset(filenames, shuffle_buffer, repeat_times, batch_size):
dataset = tf.data.TFRecordDataset([filenames])
dataset = dataset.map(tfrecord_preprocess)
if repeat_times is None:
dataset = dataset.repeat()
dataset = dataset.repeat(repeat_times)
dataset = dataset.shuffle(shuffle_buffer).batch(batch_size)
return dataset
def tfrecord_preprocess(example):
feature = {"image": tf.FixedLenFeature((), tf.string, default_value=""),
"label": tf.FixedLenFeature((), tf.string, default_value=""),
"edge": tf.FixedLenFeature((), tf.string, default_value="")}
parsed_feature = tf.parse_single_example(example, feature)
image = tf.decode_raw(parsed_feature["image"], out_type=tf.uint8)
label = tf.decode_raw(parsed_feature["label"], out_type=tf.uint8)
edge = tf.decode_raw(parsed_feature["edge"], out_type=tf.uint8)
image = tf.cast(tf.reshape(image, shape=[1, 128, 128]), tf.float32)
label = tf.cast(tf.reshape(label, shape=[1, 128, 128]), tf.float32)
edge = tf.cast(tf.reshape(edge, shape=[128, 128]), tf.float32)
return image, label, edge
I write a simple codes to test the API
dataset = get_dataset(filenames, shuffle_buffer, repeat_times, batchsize)
#shuffle=1000, repeat_times=2, batchsize=13
iter = dataset.make_one_shot_iterator
images, labels, edges = iter.get_next()
count = 0
with tf.Session() as sess:
for _ in xrange(40):
edges_value = sess.run(edges)
count = count+len(edges_value)
print count
except tf.errors.OutofRangeError:
the number of data is 260 so after repeat and batch, the epochs should be 40. It works.
However, when I use similar code for training, the total number of data is less than 260, only 140(through the var count). Does anyone know haw to solve this problem? Pls help me.
I use tensorflow-gpu 1.4
my training code is:
shuffle_buffer = params["shuffle_buffer"] #1000
repeat_times = params["repeat_times"] #1
batch_size = params["batch_size"] #26
num_classes = params["num_classes"] #2
dataset = model.get_dataset(filenames, shuffle_buffer, repeat_times, batch_size)
iterator = dataset.make_one_shot_iterator()
with tf.device('/gpu:1'):
global_step = tf.train.get_or_create_global_step()
learning_rate = tf.train.exponential_decay(params["learning_rate"],
global_step, 100, 0.99)
optimizer = tf.train.AdamOptimizer(learning_rate)
images, labels, edges = iterator.get_next()
_, probs = model.interence(features=images, training=True)
loss, reg = model.get_loss(probs, labels, edges, num_classes)
_, acc_mean, _ = model.get_acc(probs, labels)
train_op = optimizer.minimize(loss, global_step=global_step)
variables_average = tf.train.ExponentialMovingAverage(0.99, global_step)
var_list = tf.trainable_variables(scope='.*(kernel|bias)')
variables_average_op = variables_average.apply(var_list)
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
train_all_op = tf.group(train_op, variables_average_op)
tf.summary.scalar("loss", loss)
tf.summary.scalar("reg", reg)
tf.summary.scalar("acc_mean", acc_mean)
merged = tf.summary.merge_all()
saver = tf.train.Saver(max_to_keep=5)
config = tf.ConfigProto(log_device_placement=True,
config.gpu_options.allow_growth = True
count = 0
with tf.Session(config=config) as sess:
writer = tf.summary.FileWriter('./train', sess.graph)
for _ in xrange(10):
edges_value = sess.run(edges)
count = count+len(edges_value)
_, step, summary = sess.run([train_all_op, global_step, merged])
writer.add_summary(summary, step)
if step % 5 == 0:
loss_value = sess.run(loss)
print loss_value
acc_mean_value = sess.run(acc_mean)
print acc_mean_value
saver.save(sess, params["save_dir"], step)
except tf.errors.OutOfRangeError:
print "end of data"
print count
print "the final step is %d" % step
loss_value = sess.run(loss)
print loss_value
acc_mean_value = sess.run(acc_mean)
print acc_mean_value
saver.save(sess, params["save_dir"], step)
finally I got info in the terminal:
end of data
the final step is 5
to test the code I set the repeat times 1
But I use test codes:
def test():
dataset = get_dataset("train_output.tfrecords", 1000, 1, 26)
terator = dataset.make_one_shot_iterator()
images, labels, edges = iterator.get_next()
count = 0
with tf.Session() as sess:
for i in xrange(10):
images_value, labels_value, edges_value = sess.run([images, labels, edges])
count = count+len(edges_value)
except tf.errors.OutOfRangeError:
print "end of data"
print count
print i
The terminal shows:
The problem is that the sess.run(edges) is causing this part of the graph to execute again: images, labels, edges = iterator.get_next(). Therefore, everytime you run it, you are consuming one iteration that is not counted in your counter.
To get the count of edges, keep a counter inside of the with tf.device('/gpu:1') block. You can even graph it on tensorboard using a tf.summary.scalar similar to how you do so with loss.
Declare a edges_count = tf.Variable(1, name='edges_count', trainable=False, dtype=tf.int32)
images, labels, edges = iterator.get_next()
edges_count_update_op = tf.assign_add(edges_count, len(edges))
Then add edges_count_update_op to your train_op group.
I am very new to TensorFlow. I am doing the image classification using my own training database.
However, after I trained my own dataset, I have no idea on how to classify the input image.
Here is my code for preparing my own dataset
filenames = ['01.jpg', '02.jpg', '03.jpg', '04.jpg']
label = [0,1,1,1]
filename_queue = tf.train.string_input_producer(filenames)
reader = tf.WholeFileReader()
filename, content = reader.read(filename_queue)
image = tf.image.decode_jpeg(content, channels=3)
image = tf.cast(image, tf.float32)
resized_image = tf.image.resize_images(image, 224, 224)
image_batch , label_batch= tf.train.batch([resized_image,label], batch_size=8, num_threads = 3, capacity=5000)
Is this a correct code for training the dataset?
Afterwards, I try to use it to classify the input images with the following code.
test = ['test.jpg', 'test2.jpg']
reader = tf.WholeFileReader()
testname, test_content = reader.read(test_queue)
test = tf.image.decode_jpeg(test_content, channels=3)
test = tf.cast(test, tf.float32)
resized_image = tf.image.resize_images(test, 224,224)
with tf.Session() as sess:
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
res = sess.run(resized_image)
However, it does not return the predicted label for the input images.
I am looking for someone to teach me how to classify the images by using my own dataset.
Thank you.
maybe you could try this after you have install PIL python lib:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import time
import math
import numpy
import numpy as np
import random
from PIL import Image
from six.moves import xrange # pylint: disable=redefined-builtin
import tensorflow as tf
# Basic model parameters as external flags.
flags = tf.app.flags
flags.DEFINE_float('learning_rate', 0.01, 'Initial learning rate.')
flags.DEFINE_integer('max_steps', 2000, 'Number of steps to run trainer.')
flags.DEFINE_integer('hidden1', 128, 'Number of units in hidden layer 1.')
flags.DEFINE_integer('hidden2', 32, 'Number of units in hidden layer 2.')
flags.DEFINE_integer('batch_size', 4, 'Batch size. '
'Must divide evenly into the dataset sizes.')
flags.DEFINE_string('train_dir', 'data', 'Directory to put the training data.')
flags.DEFINE_boolean('fake_data', False, 'If true, uses fake data '
'for unit testing.')
def inference(images, hidden1_units, hidden2_units):
# Hidden 1
with tf.name_scope('hidden1'):
weights = tf.Variable(
tf.truncated_normal([IMAGE_PIXELS, hidden1_units],
stddev=1.0 / math.sqrt(float(IMAGE_PIXELS))),
biases = tf.Variable(tf.zeros([hidden1_units]),
hidden1 = tf.nn.relu(tf.matmul(images, weights) + biases)
# Hidden 2
with tf.name_scope('hidden2'):
weights = tf.Variable(
tf.truncated_normal([hidden1_units, hidden2_units],
stddev=1.0 / math.sqrt(float(hidden1_units))),
biases = tf.Variable(tf.zeros([hidden2_units]),
hidden2 = tf.nn.relu(tf.matmul(hidden1, weights) + biases)
# Linear
with tf.name_scope('softmax_linear'):
weights = tf.Variable(
tf.truncated_normal([hidden2_units, NUM_CLASSES],
stddev=1.0 / math.sqrt(float(hidden2_units))),
biases = tf.Variable(tf.zeros([NUM_CLASSES]),
logits = tf.matmul(hidden2, weights) + biases
return logits
def cal_loss(logits, labels):
labels = tf.to_int64(labels)
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
logits, labels, name='xentropy')
loss = tf.reduce_mean(cross_entropy, name='xentropy_mean')
return loss
def training(loss, learning_rate):
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
global_step = tf.Variable(0, name='global_step', trainable=False)
train_op = optimizer.minimize(loss, global_step=global_step)
return train_op
def evaluation(logits, labels):
correct = tf.nn.in_top_k(logits, labels, 1)
return tf.reduce_sum(tf.cast(correct, tf.int32))
def placeholder_inputs(batch_size):
images_placeholder = tf.placeholder(tf.float32, shape=(batch_size,IMAGE_PIXELS))
labels_placeholder = tf.placeholder(tf.int32, shape=(batch_size))
return images_placeholder, labels_placeholder
def fill_feed_dict(images_feed,labels_feed, images_pl, labels_pl):
feed_dict = {
images_pl: images_feed,
labels_pl: labels_feed,
return feed_dict
def do_eval(sess,
# And run one epoch of eval.
true_count = 0 # Counts the number of correct predictions.
steps_per_epoch = 4 // FLAGS.batch_size
num_examples = steps_per_epoch * FLAGS.batch_size
for step in xrange(steps_per_epoch):
feed_dict = fill_feed_dict(train_images,train_labels,
true_count += sess.run(eval_correct, feed_dict=feed_dict)
precision = true_count / num_examples
print(' Num examples: %d Num correct: %d Precision # 1: %0.04f' %
(num_examples, true_count, precision))
# Get the sets of images and labels for training, validation, and
train_images = []
for filename in ['01.jpg', '02.jpg', '03.jpg', '04.jpg']:
image = Image.open(filename)
image = image.resize((IMAGE_SIZE,IMAGE_SIZE))
train_images = np.array(train_images)
train_images = train_images.reshape(4,IMAGE_PIXELS)
label = [0,1,1,1]
train_labels = np.array(label)
def run_training():
# Tell TensorFlow that the model will be built into the default Graph.
with tf.Graph().as_default():
# Generate placeholders for the images and labels.
images_placeholder, labels_placeholder = placeholder_inputs(4)
# Build a Graph that computes predictions from the inference model.
logits = inference(images_placeholder,
# Add to the Graph the Ops for loss calculation.
loss = cal_loss(logits, labels_placeholder)
# Add to the Graph the Ops that calculate and apply gradients.
train_op = training(loss, FLAGS.learning_rate)
# Add the Op to compare the logits to the labels during evaluation.
eval_correct = evaluation(logits, labels_placeholder)
# Create a saver for writing training checkpoints.
saver = tf.train.Saver()
# Create a session for running Ops on the Graph.
sess = tf.Session()
# Run the Op to initialize the variables.
init = tf.initialize_all_variables()
# And then after everything is built, start the training loop.
for step in xrange(FLAGS.max_steps):
start_time = time.time()
feed_dict = fill_feed_dict(train_images,train_labels,
_, loss_value = sess.run([train_op, loss],
duration = time.time() - start_time
if step % 100 == 0:
# Print status to stdout.
print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_value, duration))
if (step + 1) % 1000 == 0 or (step + 1) == FLAGS.max_steps:
saver.save(sess, FLAGS.train_dir, global_step=step)
print('Training Data Eval:')
def main(_):
if __name__ == '__main__':