Tensorflow Slim: 'module' object has no attribute 'sum_of_squares' - python

I am following the tutorial at TF Slim. However at
loss = slim.losses.sum_of_squares(predictions, targets)
I seem to be getting AttributeError: 'module' object has no attribute 'sum_of_squares'. I have installed TF version 0.12head running on Ubuntu 16.04, CPU version. Complete code I am running is follows:
import matplotlib.pyplot as plt
import math
import numpy as np
import tensorflow as tf
import time
from datasets import dataset_utils
# Main slim library
slim = tf.contrib.slim
def regression_model(inputs, is_training=True, scope="deep_regression"):
"""Creates the regression model.
Args:
inputs: A node that yields a `Tensor` of size [batch_size, dimensions].
is_training: Whether or not we're currently training the model.
scope: An optional variable_op scope for the model.
Returns:
predictions: 1-D `Tensor` of shape [batch_size] of responses.
end_points: A dict of end points representing the hidden layers.
"""
with tf.variable_scope(scope, 'deep_regression', [inputs]):
end_points = {}
# Set the default weight _regularizer and acvitation for each fully_connected layer.
with slim.arg_scope([slim.fully_connected],
activation_fn=tf.nn.relu,
weights_regularizer=slim.l2_regularizer(0.01)):
# Creates a fully connected layer from the inputs with 32 hidden units.
net = slim.fully_connected(inputs, 32, scope='fc1')
end_points['fc1'] = net
# Adds a dropout layer to prevent over-fitting.
net = slim.dropout(net, 0.8, is_training=is_training)
# Adds another fully connected layer with 16 hidden units.
net = slim.fully_connected(net, 16, scope='fc2')
end_points['fc2'] = net
# Creates a fully-connected layer with a single hidden unit. Note that the
# layer is made linear by setting activation_fn=None.
predictions = slim.fully_connected(net, 1, activation_fn=None, scope='prediction')
end_points['out'] = predictions
return predictions, end_points
with tf.Graph().as_default():
# Dummy placeholders for arbitrary number of 1d inputs and outputs
inputs = tf.placeholder(tf.float32, shape=(None, 1))
outputs = tf.placeholder(tf.float32, shape=(None, 1))
# Build model
predictions, end_points = regression_model(inputs)
# Print name and shape of each tensor.
print "Layers"
for k, v in end_points.iteritems():
print 'name = {}, shape = {}'.format(v.name, v.get_shape())
# Print name and shape of parameter nodes (values not yet initialized)
print "\n"
print "Parameters"
for v in slim.get_model_variables():
print 'name = {}, shape = {}'.format(v.name, v.get_shape())
def produce_batch(batch_size, noise=0.3):
xs = np.random.random(size=[batch_size, 1]) * 10
ys = np.sin(xs) + 5 + np.random.normal(size=[batch_size, 1], scale=noise)
return [xs.astype(np.float32), ys.astype(np.float32)]
x_train, y_train = produce_batch(200)
x_test, y_test = produce_batch(200)
plt.scatter(x_train, y_train)
def convert_data_to_tensors(x, y):
inputs = tf.constant(x)
inputs.set_shape([None, 1])
outputs = tf.constant(y)
outputs.set_shape([None, 1])
return inputs, outputs
# The following snippet trains the regression model using a sum_of_squares loss.
ckpt_dir = '/tmp/regression_model/'
with tf.Graph().as_default():
tf.logging.set_verbosity(tf.logging.INFO)
inputs, targets = convert_data_to_tensors(x_train, y_train)
# Make the model.
predictions, nodes = regression_model(inputs, is_training=True)
# Add the loss function to the graph.
loss = slim.losses.sum_of_squares(predictions, targets)
# The total loss is the uers's loss plus any regularization losses.
total_loss = slim.losses.get_total_loss()
# Specify the optimizer and create the train op:
optimizer = tf.train.AdamOptimizer(learning_rate=0.005)
train_op = slim.learning.create_train_op(total_loss, optimizer)
# Run the training inside a session.
final_loss = slim.learning.train(
train_op,
logdir=ckpt_dir,
number_of_steps=5000,
save_summaries_secs=5,
log_every_n_steps=500)
print("Finished training. Last batch loss:", final_loss)
print("Checkpoint saved in %s" % ckpt_dir)

Apparently for some reason, it has been removed in the latest build as can be seen at GitHub Repo. I switched to loss = slim.losses.mean_squared_error(predictions, targets) which should serve the purpose I assume.

Related

How does tensorflow handle training data passed to a neural network?

I am having an issue with my code that I modified from https://keras.io/examples/generative/wgan_gp/ . Instead of the data being images, my data is a (1001,2) array of sequential data. The first column being the time and the second the velocity measurements. I'm getting this error:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_14704/3651127346.py in <module>
21 # Training the WGAN-GP model
22 tic = time.perf_counter()
---> 23 WGAN.fit(dataset, batch_size=batch_Size, epochs=n_epochs, callbacks=[cbk])
24 toc = time.perf_counter()
25 time_elapsed(toc-tic)
~\Anaconda3\lib\site-packages\keras\utils\traceback_utils.py in error_handler(*args, **kwargs)
65 except Exception as e: # pylint: disable=broad-except
66 filtered_tb = _process_traceback_frames(e.__traceback__)
---> 67 raise e.with_traceback(filtered_tb) from None
68 finally:
69 del filtered_tb
~\Anaconda3\lib\site-packages\tensorflow\python\framework\func_graph.py in autograph_handler(*args, **kwargs)
1145 except Exception as e: # pylint:disable=broad-except
1146 if hasattr(e, "ag_error_metadata"):
-> 1147 raise e.ag_error_metadata.to_exception(e)
1148 else:
1149 raise
ValueError: in user code:
File "C:\Users\sissonn\Anaconda3\lib\site-packages\keras\engine\training.py", line 1021, in train_function *
return step_function(self, iterator)
File "C:\Users\sissonn\Anaconda3\lib\site-packages\keras\engine\training.py", line 1010, in step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
File "C:\Users\sissonn\Anaconda3\lib\site-packages\keras\engine\training.py", line 1000, in run_step **
outputs = model.train_step(data)
File "C:\Users\sissonn\AppData\Local\Temp/ipykernel_14704/3074469771.py", line 141, in train_step
gp = self.gradient_penalty(batch_size, x_real, x_fake)
File "C:\Users\sissonn\AppData\Local\Temp/ipykernel_14704/3074469771.py", line 106, in gradient_penalty
alpha = tf.random.uniform(batch_size,1,1)
ValueError: Shape must be rank 1 but is rank 0 for '{{node random_uniform/RandomUniform}} = RandomUniform[T=DT_INT32, dtype=DT_FLOAT, seed=0, seed2=0](strided_slice)' with input shapes: [].
And here is my code:
import time
from tqdm.notebook import tqdm
import os
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import Model
from tensorflow.keras.layers import Dense, Input
import numpy as np
import matplotlib.pyplot as plt
def define_generator(latent_dim):
# This function creates the generator model using the functional API.
# Layers...
# Input Layer
inputs = Input(shape=latent_dim, name='INPUT_LAYER')
# 1st hidden layer
x = Dense(50, activation='relu', name='HIDDEN_LAYER_1')(inputs)
# 2nd hidden layer
x = Dense(150, activation='relu', name='HIDDEN_LAYER_2')(x)
# 3rd hidden layer
x = Dense(300, activation='relu', name='HIDDEN_LAYER_3')(x)
# 4th hidden layer
x = Dense(150, activation='relu', name='HIDDEN_LAYER_4')(x)
# 5th hidden layer
x = Dense(50, activation='relu', name='HIDDEN_LAYER_5')(x)
# Output layer
outputs = Dense(2, activation='linear', name='OUPUT_LAYER')(x)
# Instantiating the generator model
model = Model(inputs=inputs, outputs=outputs, name='GENERATOR')
return model
def generator_loss(fake_logits):
# This function calculates and returns the WGAN-GP generator loss.
# Expected value of critic ouput from fake images
expectation_fake = tf.reduce_mean(fake_logits)
# Loss to minimize
loss = -expectation_fake
return loss
def define_critic():
# This function creates the critic model using the functional API.
# Layers...
# Input Layer
inputs = Input(shape=2, name='INPUT_LAYER')
# 1st hidden layer
x = Dense(50, activation='relu', name='HIDDEN_LAYER_1')(inputs)
# 2nd hidden layer
x = Dense(150, activation='relu', name='HIDDEN_LAYER_2')(x)
# 3rd hidden layer
x = Dense(300, activation='relu', name='HIDDEN_LAYER_3')(x)
# 4th hidden layer
x = Dense(150, activation='relu', name='HIDDEN_LAYER_4')(x)
# 5th hidden layer
x = Dense(50, activation='relu', name='HIDDEN_LAYER_5')(x)
# Output layer
outputs = Dense(1, activation='linear', name='OUPUT_LAYER')(x)
# Instantiating the critic model
model = Model(inputs=inputs, outputs=outputs, name='CRITIC')
return model
def critic_loss(real_logits, fake_logits):
# This function calculates and returns the WGAN-GP critic loss.
# Expected value of critic output from real images
expectation_real = tf.reduce_mean(real_logits)
# Expected value of critic output from fake images
expectation_fake = tf.reduce_mean(fake_logits)
# Loss to minimize
loss = expectation_fake - expectation_real
return loss
class define_wgan(keras.Model):
# This class creates the WGAN-GP object.
# Attributes:
# critic = the critic model.
# generator = the generator model.
# latent_dim = defines generator input dimension.
# critic_steps = defines how many times the discriminator gets trained for each training cycle.
# gp_weight = defines and returns the critic gradient for the gradient penalty term.
# Methods:
# compile() = defines the optimizer and loss function of both the critic and generator.
# gradient_penalty() = calcuates and returns the gradient penalty term in the WGAN-GP loss function.
# train_step() = performs the WGAN-GP training by updating the critic and generator weights
# and returns the loss for both. Called by fit().
def __init__(self, gen, critic, latent_dim, n_critic_train, gp_weight):
super().__init__()
self.critic = critic
self.generator = gen
self.latent_dim = latent_dim
self.critic_steps = n_critic_train
self.gp_weight = gp_weight
def compile(self, generator_loss, critic_loss):
super().compile()
self.generator_optimizer = keras.optimizers.Adam(learning_rate=0.0002, beta_1=0.5, beta_2=0.9)
self.critic_optimizer = keras.optimizers.Adam(learning_rate=0.0002, beta_1=0.5, beta_2=0.9)
self.generator_loss_function = generator_loss
self.critic_loss_function = critic_loss
def gradient_penalty(self, batch_size, x_real, x_fake):
# Random uniform samples of points between distribution.
# "alpha" must be a tensor so that "x_interp" will also be a tensor.
alpha = tf.random.uniform(batch_size,1,1)
# Data interpolated between real and fake distributions
x_interp = alpha*x_real + (1-alpha)*x_fake
# Calculating critic output gradient wrt interpolated data
with tf.GradientTape() as gp_tape:
gp_tape.watch(x_interp)
critc_output = self.discriminator(x_interp, training=True)
grad = gp_tape.gradient(critic_output, x_interp)[0]
# Calculating norm of gradient
grad_norm = tf.sqrt(tf.reduce_sum(tf.square(grad)))
# calculating gradient penalty
gp = tf.reduce_mean((norm - 1.0)**2)
return gp
def train_step(self, x_real):
# Critic training
# Getting batch size for creating latent vectors
print(x_real)
batch_size = tf.shape(x_real)[0]
print(batch_size)
# Critic training loop
for i in range(self.critic_steps):
# Generating latent vectors
latent = tf.random.normal(shape=(batch_size, self.latent_dim))
with tf.GradientTape() as tape:
# Obtaining fake data from generator
x_fake = self.generator(latent, training=True)
# Critic output from fake data
fake_logits = self.critic(x_fake, training=True)
# Critic output from real data
real_logits = self.critic(x_real, training=True)
# Calculating critic loss
c_loss = self.critic_loss_function(real_logits, fake_logits)
# Calcuating gradient penalty
gp = self.gradient_penalty(batch_size, x_real, x_fake)
# Adjusting critic loss with gradient penalty
c_loss = c_loss + gp_weight*gp
# Calculating gradient of critic loss wrt critic weights
critic_grad = tape.gradient(c_loss, self.critic.trainable_variables)
# Updating critic weights
self.critic_optimizer.apply_gradients(zip(critic_gradient, self.critic.trainable_variables))
# Generator training
# Generating latent vectors
latent = tf.random.normal(shape=(batch_size, self.latent_dim))
with tf.GradientTape() as tape:
# Obtaining fake data from generator
x_fake = self.generator(latent, training=True)
# Critic output from fake data
fake_logits = self.critic(x_fake, training=True)
# Calculating generator loss
g_loss = self.generator_loss_function(fake_logits)
# Calculating gradient of generator loss wrt generator weights
genertor_grad = tape.gradient(g_loss, self.generator.trainable_variables)
# Updating generator weights
self.generator_optimizer.apply_gradients(zip(generator_gradient, self.generator.trainable_variables))
return g_loss, c_loss
class GAN_monitor(keras.callbacks.Callback):
def __init__(self, n_samples, latent_dim):
self.n_samples = n_samples
self.latent_dim = latent_dim
def on_epoch_end(self, epoch, logs=None):
latent = tf.random.normal(shape=(self.n_samples, self.latent_dim))
generated_data = self.model.generator(latent)
plt.plot(generated_data)
plt.savefig('Epoch _'+str(epoch)+'.png', dpi=300)
data = np.genfromtxt('Flight_1.dat', dtype='float', encoding=None, delimiter=',')[0:1001,0]
time_span = np.linspace(0,20,1001)
dataset = np.concatenate((time_sapn[:,np.newaxis], data[:,np.newaxis]), axis=1)
dataset.shape
# Training Parameters
latent_dim = 100
n_epochs = 10
n_critic_train = 5
gp_weight = 10
batch_Size = 100
# Instantiating the generator and discriminator models
gen = define_generator(latent_dim)
critic = define_critic()
# Instantiating the WGAN-GP object
WGAN = define_wgan(gen, critic, latent_dim, n_critic_train, gp_weight)
# Compling the WGAN-GP model
WGAN.compile(generator_loss, critic_loss)
# Instantiating custom Keras callback
cbk = GAN_monitor(n_samples=1, latent_dim=latent_dim)
# Training the WGAN-GP model
tic = time.perf_counter()
WGAN.fit(dataset, batch_size=batch_Size, epochs=n_epochs, callbacks=[cbk])
toc = time.perf_counter()
time_elapsed(toc-tic)
This issue is the shape I am providing to tf.random.rand() for the assignment of alpha. I don't fully understand why the shape input is (batch_size, 1, 1, 1) in the Keras example. So I don't know how to specify the shape for my example. Furthermore I don't understand this line in the Keras example:
batch_size = tf.shape(real_images)[0]
In this example 'real_images' is a (60000, 28, 28, 1) array and it gets passed to the fit() method which then passes it to the train_step() method. (It gets passed as "train_images", but they are the same variable.) If I add a line that prints out 'real_images' before this tf.shape() this is what it produces:
Tensor("IteratorGetNext:0", shape=(None, 28, 28, 1), dtype=float32)
Why is the 60000 now None? Then, I added a line that printed out "batch_size" after the tf.shape() and this is what it produces:
Tensor("strided_slice:0", shape=(), dtype=int32)
I googled "tf strided_slice", but all I could find is the method tf.strided_slice(). So what exactly is the value of "batch_size" and why are the output of variables so ambiguous when they are tensors? In fact, I type:
tf.shape(train_images)[0]
in another cell of Jupyter notebook. I get a completely different output:
<tf.Tensor: shape=(), dtype=int32, numpy=60000>
I really need to understand this Keras example in order to successfully implement this code for my data. Any help is appreciated.
BTW: I am using only one set of data for now, but once I get the GAN running, I will provide multiple sets of these (1001,2) datasets. Also, if you want to test the code yourself, replacing the "dataset" variable with any (1001,2) numpy array should suffice. Thank You.
'Why is the 60000 now None?': In defining TensorFlow models, the first dimension (batch_size) is None. Getting under the hood of what goes on with TensorFlow and how it uses graphs for computation can be quite complex. But for your understanding right now, all you need to know is that batch_size does not need to be specified when defining the model, hence None. This is essential as it allow a model to be defined once but then trained with and applied to datasets of an arbitrary number of examples. For example, when training you may provide the model with a batch of 256 images at a time, but when using the trained model for inference, it's very likely that you might only want the input to be a single image. Therefore the actual value of the first dimension of the size of the input is only important once the computation is going to begin.
'I don't fully understand why the shape input is (batch_size, 1, 1, 1) in the Keras example': The reason for this size is that you want a different random value, alpha, for each image. You have batch_size number of images, hence batch_size in the first dimension, but it is just a single value in tensor format, so it only need size 1 in all other dimensions. The reason it has 4 dimensions overall is so that it can be used in calculation with your inputs, which are 4-D image tensors which will have a shape of something like (batch_size, img_h, img_w, 3) for color images with 3 RGB channels.
In terms of understanding your error, Shape must be rank 1 but is rank 0, this is saying that the function you are using - tf.random.uniform requires a rank 1 tensor, i.e. something with 1 dimension, but is being passed a rank 0 tensor, i.e. a scalar value. It is possible from your code that you are just passing it the value of batch_size rather than a tensor. This might work instead:
alpha = tf.random.uniform([batch_size, 1, 1, 1])
The first parameter of this function is its shape and so it is important to have the [] there. Check out the documentation on this function in order to make sure you're using it correctly - https://www.tensorflow.org/api_docs/python/tf/random/uniform.

Weird Error with TensorFlow 2.0 Being Incompatible with TensorFlow 1.0

I am testing some TensorFlow code; I'm seeing this error:
AttributeError: module 'tensorflow' has no attribute 'variable_scope'
I am running TensorFlow version 2.1.0.
Here is the code that I am testing.
# imports
import os
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
# Input data:
# For this tutorial we use the MNIST dataset. MNIST is a dataset of handwritten digits. If you are into machine learning, you might have heard of this dataset by now. MNIST is kind of benchmark of datasets for deep learning. One other reason that we use the MNIST is that it is easily accesible through Tensorflow. If you want to know more about the MNIST dataset you can check Yann Lecun's website. We can easily import the dataset and see the size of training, test and validation set:
# Import MNIST data
# from tensorflow.examples.tutorials.mnist import input_data
#import tensorflow_datasets as tfds
# Construct a tf.data.Dataset
#mnist = tfds.load(name="mnist", split=tfds.Split.TRAIN)
mnist = tf.keras.datasets.mnist
#mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
#print("Size of:")
#print("- Training-set:\t\t{}".format(len(mnist.train.labels)))
#print("- Test-set:\t\t{}".format(len(mnist.test.labels)))
#print("- Validation-set:\t{}".format(len(mnist.validation.labels)))
# hyper-parameters
logs_path = "C:/Users/ryans/MNIST_data/logs/embedding/" # path to the folder that we want to save the logs for Tensorboard
learning_rate = 0.001 # The optimization learning rate
epochs = 10 # Total number of training epochs
batch_size = 100 # Training batch size
display_freq = 100 # Frequency of displaying the training results
# Network Parameters
# We know that MNIST images are 28 pixels in each dimension.
img_h = img_w = 28
# Images are stored in one-dimensional arrays of this length.
img_size_flat = img_h * img_w
# Number of classes, one class for each of 10 digits.
n_classes = 10
# number of units in the first hidden layer
h1 = 200
# Graph:
# Like before, we start by constructing the graph. But, we need to define some functions that we need rapidly in our code.
# weight and bais wrappers
def weight_variable(name, shape):
"""
Create a weight variable with appropriate initialization
:param name: weight name
:param shape: weight shape
:return: initialized weight variable
"""
initer = tf.truncated_normal_initializer(stddev=0.01)
return tf.get_variable('W_' + name,
dtype=tf.float32,
shape=shape,
initializer=initer)
def bias_variable(name, shape):
"""
Create a bias variable with appropriate initialization
:param name: bias variable name
:param shape: bias variable shape
:return: initialized bias variable
"""
initial = tf.constant(0., shape=shape, dtype=tf.float32)
return tf.get_variable('b_' + name,
dtype=tf.float32,
initializer=initial)
def fc_layer(x, num_units, name, use_relu=True):
"""
Create a fully-connected layer
:param x: input from previous layer
:param num_units: number of hidden units in the fully-connected layer
:param name: layer name
:param use_relu: boolean to add ReLU non-linearity (or not)
:return: The output array
"""
with tf.variable_scope(name):
in_dim = x.get_shape()[1]
W = weight_variable(name, shape=[in_dim, num_units])
tf.summary.histogram('W', W)
b = bias_variable(name, [num_units])
tf.summary.histogram('b', b)
layer = tf.matmul(x, W)
layer += b
if use_relu:
layer = tf.nn.relu(layer)
return layer
# Now that we have our helper functions we can create our graph.
# Create graph
# Placeholders for inputs (x), outputs(y)
with tf.compat.v1.variable_scope('Input'):
x = tf.compat.v1.placeholder(tf.float32, shape=[None, img_size_flat], name='X')
tf.summary.image('input_image', tf.reshape(x, (-1, img_w, img_h, 1)), max_outputs=5)
y = tf.compat.v1.placeholder(tf.float32, shape=[None, n_classes], name='Y')
fc1 = fc_layer(x, h1, 'Hidden_layer', use_relu=True)
output_logits = fc_layer(fc1, n_classes, 'Output_layer', use_relu=False)
# Define the loss function, optimizer, and accuracy
with tf.compat.v1.variable_scope('Train'):
with tf.compat.v1.variable_scope('Loss'):
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=output_logits), name='loss')
tf.summary.scalar('loss', loss)
with tf.compat.v1.variable_scope('Optimizer'):
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, name='Adam-op').minimize(loss)
with tf.compat.v1.variable_scope('Accuracy'):
correct_prediction = tf.equal(tf.argmax(output_logits, 1), tf.argmax(y, 1), name='correct_pred')
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name='accuracy')
tf.summary.scalar('accuracy', accuracy)
# Network predictions
cls_prediction = tf.argmax(output_logits, axis=1, name='predictions')
# Initializing the variables
init = tf.global_variables_initializer()
merged = tf.summary.merge_all()
# Session:
# Launch the graph (session)
sess = tf.InteractiveSession() # using InteractiveSession instead of Session to test network in separate cell
sess.run(init)
train_writer = tf.summary.FileWriter(logs_path, sess.graph)
num_tr_iter = int(mnist.train.num_examples / batch_size)
global_step = 0
for epoch in range(epochs):
print('Training epoch: {}'.format(epoch + 1))
for iteration in range(num_tr_iter):
batch_x, batch_y = mnist.train.next_batch(batch_size)
global_step += 1
# Run optimization op (backprop)
feed_dict_batch = {x: batch_x, y: batch_y}
_, summary_tr = sess.run([optimizer, merged], feed_dict=feed_dict_batch)
train_writer.add_summary(summary_tr, global_step)
if iteration % display_freq == 0:
# Calculate and display the batch loss and accuracy
loss_batch, acc_batch = sess.run([loss, accuracy],
feed_dict=feed_dict_batch)
print("iter {0:3d}:\t Loss={1:.2f},\tTraining Accuracy={2:.01%}".
format(iteration, loss_batch, acc_batch))
# Run validation after every epoch
feed_dict_valid = {x: mnist.validation.images, y: mnist.validation.labels}
loss_valid, acc_valid = sess.run([loss, accuracy], feed_dict=feed_dict_valid)
print('---------------------------------------------------------')
print("Epoch: {0}, validation loss: {1:.2f}, validation accuracy: {2:.01%}".
format(epoch + 1, loss_valid, acc_valid))
print('---------------------------------------------------------')
I think the code was designed for an earlier version of TensorFlow. I made a few small modifications to get the code to run on my laptop. Here's the part that I am struggling with.
# Placeholders for inputs (x), outputs(y)
with tf.compat.v1.variable_scope('Input'):
x = tf.compat.v1.placeholder(tf.float32, shape=[None, img_size_flat], name='X')
tf.summary.image('input_image', tf.reshape(x, (-1, img_w, img_h, 1)), max_outputs=5)
y = tf.compat.v1.placeholder(tf.float32, shape=[None, n_classes], name='Y')
fc1 = fc_layer(x, h1, 'Hidden_layer', use_relu=True)
output_logits = fc_layer(fc1, n_classes, 'Output_layer', use_relu=False)
The 'with' statement runs, but I am getting an error on this line:
fc1 = fc_layer(x, h1, 'Hidden_layer', use_relu=True)
I thought the change to 'tf.compat.v1' would oversome the issue of different TensorFlow versions, but apparently not.
I found the code sample here.
https://www.easy-tensorflow.com/tf-tutorials/tensorboard/tb-embedding-visualization
As placeholder is removed from tensorflow 2.0, compat.v1 must be used. However, another problem is incompatibility and can be solved by using tf.compat.v1.disable_eager_execution() before with tf.compat.v1.variable_scope(...):
In a way, you can turn on the eager execution by calling tf.compat.v1.enable_eager_execution
You may check https://www.tensorflow.org/guide/migrate

Tensorflow: value error with variable_scope

This is my code below:
'''
Tensorflow LSTM classification of 16x30 images.
'''
from __future__ import print_function
import tensorflow as tf
from tensorflow.python.ops import rnn, rnn_cell
import numpy as np
from numpy import genfromtxt
from sklearn.cross_validation import train_test_split
import pandas as pd
'''
a Tensorflow LSTM that will sequentially input several lines from each single image
i.e. The Tensorflow graph will take a flat (1,480) features image as it was done in Multi-layer
perceptron MNIST Tensorflow tutorial, but then reshape it in a sequential manner with 16 features each and 30 time_steps.
'''
blaine = genfromtxt('./Desktop/Blaine_CSV_lstm.csv',delimiter=',') # CSV transform to array
target = [row[0] for row in blaine] # 1st column in CSV as the targets
data = blaine[:, 1:481] #flat feature vectors
X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.05, random_state=42)
f=open('cs-training.csv','w') #1st split for training
for i,j in enumerate(X_train):
k=np.append(np.array(y_train[i]),j )
f.write(",".join([str(s) for s in k]) + '\n')
f.close()
f=open('cs-testing.csv','w') #2nd split for test
for i,j in enumerate(X_test):
k=np.append(np.array(y_test[i]),j )
f.write(",".join([str(s) for s in k]) + '\n')
f.close()
new_data = genfromtxt('cs-training.csv',delimiter=',') # Training data
new_test_data = genfromtxt('cs-testing.csv',delimiter=',') # Test data
x_train=np.array([ i[1::] for i in new_data])
ss = pd.Series(y_train) #indexing series needed for later Pandas Dummies one-hot vectors
y_train_onehot = pd.get_dummies(ss)
x_test=np.array([ i[1::] for i in new_test_data])
gg = pd.Series(y_test)
y_test_onehot = pd.get_dummies(gg)
# General Parameters
learning_rate = 0.001
training_iters = 100000
batch_size = 33
display_step = 10
# Tensorflow LSTM Network Parameters
n_input = 16 # MNIST data input (img shape: 28*28)
n_steps = 30 # timesteps
n_hidden = 128 # hidden layer num of features
n_classes = 20 # MNIST total classes (0-9 digits)
# tf Graph input
x = tf.placeholder("float", [None, n_steps, n_input])
y = tf.placeholder("float", [None, n_classes])
# Define weights
weights = {
'out': tf.Variable(tf.random_normal([n_hidden, n_classes]))
}
biases = {
'out': tf.Variable(tf.random_normal([n_classes]))
}
def RNN(x, weights, biases):
# Prepare data shape to match `rnn` function requirements
# Current data input shape: (batch_size, n_steps, n_input)
# Required shape: 'n_steps' tensors list of shape (batch_size, n_input)
# Permuting batch_size and n_steps
x = tf.transpose(x, [1, 0, 2])
# Reshaping to (n_steps*batch_size, n_input)
x = tf.reshape(x, [-1, n_input])
# Split to get a list of 'n_steps' tensors of shape (batch_size, n_input)
x = tf.split(0, n_steps, x)
# Define a lstm cell with tensorflow
with tf.variable_scope('cell_def'):
lstm_cell = tf.nn.rnn_cell.LSTMCell(n_hidden, forget_bias=1.0)
# Get lstm cell output
with tf.variable_scope('rnn_def'):
outputs, states = tf.nn.rnn(lstm_cell, x, dtype=tf.float32)
# Linear activation, using rnn inner loop last output
return tf.matmul(outputs[-1], weights['out']) + biases['out']
pred = RNN(x, weights, biases)
# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# Evaluate model
correct_pred = tf.equal(tf.argmax(pred,1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
# Initializing the variables
init = tf.initialize_all_variables()
# Launch the graph
with tf.Session() as sess:
sess.run(init)
step = 1
# Keep training until reach max iterations
while step * batch_size < training_iters:
batch_x = np.split(x_train, 15)
batch_y = np.split(y_train_onehot, 15)
for index in range(len(batch_x)):
ouh1 = batch_x[index]
ouh2 = batch_y[index]
# Reshape data to get 28 seq of 28 elements
ouh1 = np.reshape(ouh1,(batch_size, n_steps, n_input))
sess.run(optimizer, feed_dict={x: ouh1, y: ouh2}) # Run optimization op (backprop)
if step % display_step == 0:
# Calculate batch accuracy
acc = sess.run(accuracy, feed_dict={x: ouh1, y: ouh2})
# Calculate batch loss
loss = sess.run(cost, feed_dict={x: ouh1, y: ouh2})
print("Iter " + str(step*batch_size) + ", Minibatch Loss= " + \
"{:.6f}".format(loss) + ", Training Accuracy= " + \
"{:.5f}".format(acc))
step += 1
print("Optimization Finished!")
and I am getting the below error that it seems i am re-iterating over the same variable on lines 92 and 97, and i am concerned that it might be a case of incompatibility with Tensorflow 0.10.0 on the RNN def side:
ValueError: Variable RNN/BasicLSTMCell/Linear/Matrix already exists, disallowed. Did you mean to set reuse=True in VarScope? Originally defined at:
File "/home/mohsen/lstm_mnist.py", line 92, in RNN
outputs, states = tf.nn.rnn(lstm_cell, x, dtype=tf.float32)
File "/home/mohsen/lstm_mnist.py", line 97, in <module>
pred = RNN(x, weights, biases)
File "/home/mohsen/anaconda2/lib/python2.7/site-packages/spyderlib/widgets/externalshell/sitecustomize.py", line 81, in execfile
builtins.execfile(filename, *where)
What could be the origin of this error and how i can resolve it?
EDIT: from the original repo where i build upon my code the same variable_scope problem persists https://github.com/aymericdamien/TensorFlow-Examples/blob/master/examples/3_NeuralNetworks/recurrent_network.py
You are not iterating over the same variable in line 92 and 97, since those will always be in the same namespace, at least in the current setting, since you are calling one namespace from within another (since one is embedded in the RNN function). So your effective variable scope will be something like 'backward/forward'.
Hence the problem, in my guess, is in lines 89 and 92, since both "live" in the same namespace (see above), and both may introduce a variable called RNN/BasicLSTMCell/Linear/Matrix. So you should change your code to the following:
# Define a lstm cell with tensorflow
with tf.variable_scope('cell_def'):
lstm_cell = tf.nn.rnn_cell.LSTMCell(n_hidden, forget_bias=1.0)
# Get lstm cell output
with tf.variable_scope('rnn_def'):
outputs, states = tf.nn.rnn(lstm_cell, x, dtype=tf.float32)
This makes the LSTMCell initialization live in one namespace - "cell_def/*", and the initialization of the complete RNN in another - "rnn_def/*".

Pickle python lasagne model

I have trained a simple long short-term memory (lstm) model in lasagne following the recipie here:https://github.com/Lasagne/Recipes/blob/master/examples/lstm_text_generation.py
Here is the architecture:
l_in = lasagne.layers.InputLayer(shape=(None, None, vocab_size))
# We now build the LSTM layer which takes l_in as the input layer
# We clip the gradients at GRAD_CLIP to prevent the problem of exploding gradients.
l_forward_1 = lasagne.layers.LSTMLayer(
l_in, N_HIDDEN, grad_clipping=GRAD_CLIP,
nonlinearity=lasagne.nonlinearities.tanh)
l_forward_2 = lasagne.layers.LSTMLayer(
l_forward_1, N_HIDDEN, grad_clipping=GRAD_CLIP,
nonlinearity=lasagne.nonlinearities.tanh)
# The l_forward layer creates an output of dimension (batch_size, SEQ_LENGTH, N_HIDDEN)
# Since we are only interested in the final prediction, we isolate that quantity and feed it to the next layer.
# The output of the sliced layer will then be of size (batch_size, N_HIDDEN)
l_forward_slice = lasagne.layers.SliceLayer(l_forward_2, -1, 1)
# The sliced output is then passed through the softmax nonlinearity to create probability distribution of the prediction
# The output of this stage is (batch_size, vocab_size)
l_out = lasagne.layers.DenseLayer(l_forward_slice, num_units=vocab_size, W = lasagne.init.Normal(), nonlinearity=lasagne.nonlinearities.softmax)
# Theano tensor for the targets
target_values = T.ivector('target_output')
# lasagne.layers.get_output produces a variable for the output of the net
network_output = lasagne.layers.get_output(l_out)
# The loss function is calculated as the mean of the (categorical) cross-entropy between the prediction and target.
cost = T.nnet.categorical_crossentropy(network_output,target_values).mean()
# Retrieve all parameters from the network
all_params = lasagne.layers.get_all_params(l_out)
# Compute AdaGrad updates for training
print("Computing updates ...")
updates = lasagne.updates.adagrad(cost, all_params, LEARNING_RATE)
# Theano functions for training and computing cost
print("Compiling functions ...")
train = theano.function([l_in.input_var, target_values], cost, updates=updates, allow_input_downcast=True)
compute_cost = theano.function([l_in.input_var, target_values], cost, allow_input_downcast=True)
# In order to generate text from the network, we need the probability distribution of the next character given
# the state of the network and the input (a seed).
# In order to produce the probability distribution of the prediction, we compile a function called probs.
probs = theano.function([l_in.input_var],network_output,allow_input_downcast=True)
and the model is trained via:
for it in xrange(data_size * num_epochs / BATCH_SIZE):
try_it_out() # Generate text using the p^th character as the start.
avg_cost = 0;
for _ in range(PRINT_FREQ):
x,y = gen_data(p)
#print(p)
p += SEQ_LENGTH + BATCH_SIZE - 1
if(p+BATCH_SIZE+SEQ_LENGTH >= data_size):
print('Carriage Return')
p = 0;
avg_cost += train(x, y)
print("Epoch {} average loss = {}".format(it*1.0*PRINT_FREQ/data_size*BATCH_SIZE, avg_cost / PRINT_FREQ))
How can I save the model so I do not need to train it again? With scikit I generally just pickle the model object. However I am unclear on the analogous process with Theano / lasagne.
You can save the weights with numpy:
np.savez('model.npz', *lasagne.layers.get_all_param_values(network_output))
And load them again later on like this:
with np.load('model.npz') as f:
param_values = [f['arr_%d' % i] for i in range(len(f.files))]
lasagne.layers.set_all_param_values(network_output, param_values)
Source: https://github.com/Lasagne/Lasagne/blob/master/examples/mnist.py
As for the model definition itself: One option is certainly to keep the code and regenerate the network, before setting the pretrained weights.
You can save the model parameters and the model by Pickle
import cPickle as pickle
import os
#save the network and its parameters as a dictionary
netInfo = {'network': network, 'params': lasagne.layers.get_all_param_values(network)}
Net_FileName = 'LSTM.pkl'
# save the dictionary as a .pkl file
pickle.dump(netInfo, open(os.path.join(/path/to/a/folder/, Net_FileName), 'wb'),protocol=pickle.HIGHEST_PROTOCOL)
After saving your model, it can be retrieved by pickle.load:
net = pickle.load(open(os.path.join(/path/to/a/folder/,Net_FileName),'rb'))
all_params = net['params']
lasagne.layers.set_all_param_values(net['network'], all_params)
I've had success using dill in combination with the numpy.savez function:
import dill as pickle
...
np.savez('model.npz', *lasagne.layers.get_all_param_values(network))
with open('model.dpkl','wb') as p_output:
pickle.dump(network, p_output)
To import the pickled model:
with open('model.dpkl', 'rb') as p_input:
network = pickle.load(p_input)
with np.load('model.npz') as f:
param_values = [f['arr_%d' % i] for i in range(len(f.files))]
lasagne.layers.set_all_param_values(network, param_values)

How could I use batch normalization in TensorFlow?

I would like to use batch normalization in TensorFlow. I found the related C++ source code in core/ops/nn_ops.cc. However, I did not find it documented on tensorflow.org.
BN has different semantics in MLP and CNN, so I am not sure what exactly this BN does.
I did not find a method called MovingMoments either.
Update July 2016 The easiest way to use batch normalization in TensorFlow is through the higher-level interfaces provided in either contrib/layers, tflearn, or slim.
Previous answer if you want to DIY:
The documentation string for this has improved since the release - see the docs comment in the master branch instead of the one you found. It clarifies, in particular, that it's the output from tf.nn.moments.
You can see a very simple example of its use in the batch_norm test code. For a more real-world use example, I've included below the helper class and use notes that I scribbled up for my own use (no warranty provided!):
"""A helper class for managing batch normalization state.
This class is designed to simplify adding batch normalization
(http://arxiv.org/pdf/1502.03167v3.pdf) to your model by
managing the state variables associated with it.
Important use note: The function get_assigner() returns
an op that must be executed to save the updated state.
A suggested way to do this is to make execution of the
model optimizer force it, e.g., by:
update_assignments = tf.group(bn1.get_assigner(),
bn2.get_assigner())
with tf.control_dependencies([optimizer]):
optimizer = tf.group(update_assignments)
"""
import tensorflow as tf
class ConvolutionalBatchNormalizer(object):
"""Helper class that groups the normalization logic and variables.
Use:
ewma = tf.train.ExponentialMovingAverage(decay=0.99)
bn = ConvolutionalBatchNormalizer(depth, 0.001, ewma, True)
update_assignments = bn.get_assigner()
x = bn.normalize(y, train=training?)
(the output x will be batch-normalized).
"""
def __init__(self, depth, epsilon, ewma_trainer, scale_after_norm):
self.mean = tf.Variable(tf.constant(0.0, shape=[depth]),
trainable=False)
self.variance = tf.Variable(tf.constant(1.0, shape=[depth]),
trainable=False)
self.beta = tf.Variable(tf.constant(0.0, shape=[depth]))
self.gamma = tf.Variable(tf.constant(1.0, shape=[depth]))
self.ewma_trainer = ewma_trainer
self.epsilon = epsilon
self.scale_after_norm = scale_after_norm
def get_assigner(self):
"""Returns an EWMA apply op that must be invoked after optimization."""
return self.ewma_trainer.apply([self.mean, self.variance])
def normalize(self, x, train=True):
"""Returns a batch-normalized version of x."""
if train:
mean, variance = tf.nn.moments(x, [0, 1, 2])
assign_mean = self.mean.assign(mean)
assign_variance = self.variance.assign(variance)
with tf.control_dependencies([assign_mean, assign_variance]):
return tf.nn.batch_norm_with_global_normalization(
x, mean, variance, self.beta, self.gamma,
self.epsilon, self.scale_after_norm)
else:
mean = self.ewma_trainer.average(self.mean)
variance = self.ewma_trainer.average(self.variance)
local_beta = tf.identity(self.beta)
local_gamma = tf.identity(self.gamma)
return tf.nn.batch_norm_with_global_normalization(
x, mean, variance, local_beta, local_gamma,
self.epsilon, self.scale_after_norm)
Note that I called it a ConvolutionalBatchNormalizer because it pins the use of tf.nn.moments to sum across axes 0, 1, and 2, whereas for non-convolutional use you might only want axis 0.
Feedback appreciated if you use it.
As of TensorFlow 1.0 (February 2017) there's also the high-level tf.layers.batch_normalization API included in TensorFlow itself.
It's super simple to use:
# Set this to True for training and False for testing
training = tf.placeholder(tf.bool)
x = tf.layers.dense(input_x, units=100)
x = tf.layers.batch_normalization(x, training=training)
x = tf.nn.relu(x)
...except that it adds extra ops to the graph (for updating its mean and variance variables) in such a way that they won't be dependencies of your training op. You can either just run the ops separately:
extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
sess.run([train_op, extra_update_ops], ...)
or add the update ops as dependencies of your training op manually, then just run your training op as normal:
extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(extra_update_ops):
train_op = optimizer.minimize(loss)
...
sess.run([train_op], ...)
The following works fine for me, it does not require invoking EMA-apply outside.
import numpy as np
import tensorflow as tf
from tensorflow.python import control_flow_ops
def batch_norm(x, n_out, phase_train, scope='bn'):
"""
Batch normalization on convolutional maps.
Args:
x: Tensor, 4D BHWD input maps
n_out: integer, depth of input maps
phase_train: boolean tf.Varialbe, true indicates training phase
scope: string, variable scope
Return:
normed: batch-normalized maps
"""
with tf.variable_scope(scope):
beta = tf.Variable(tf.constant(0.0, shape=[n_out]),
name='beta', trainable=True)
gamma = tf.Variable(tf.constant(1.0, shape=[n_out]),
name='gamma', trainable=True)
batch_mean, batch_var = tf.nn.moments(x, [0,1,2], name='moments')
ema = tf.train.ExponentialMovingAverage(decay=0.5)
def mean_var_with_update():
ema_apply_op = ema.apply([batch_mean, batch_var])
with tf.control_dependencies([ema_apply_op]):
return tf.identity(batch_mean), tf.identity(batch_var)
mean, var = tf.cond(phase_train,
mean_var_with_update,
lambda: (ema.average(batch_mean), ema.average(batch_var)))
normed = tf.nn.batch_normalization(x, mean, var, beta, gamma, 1e-3)
return normed
Example:
import math
n_in, n_out = 3, 16
ksize = 3
stride = 1
phase_train = tf.placeholder(tf.bool, name='phase_train')
input_image = tf.placeholder(tf.float32, name='input_image')
kernel = tf.Variable(tf.truncated_normal([ksize, ksize, n_in, n_out],
stddev=math.sqrt(2.0/(ksize*ksize*n_out))),
name='kernel')
conv = tf.nn.conv2d(input_image, kernel, [1,stride,stride,1], padding='SAME')
conv_bn = batch_norm(conv, n_out, phase_train)
relu = tf.nn.relu(conv_bn)
with tf.Session() as session:
session.run(tf.initialize_all_variables())
for i in range(20):
test_image = np.random.rand(4,32,32,3)
sess_outputs = session.run([relu],
{input_image.name: test_image, phase_train.name: True})
There is also an "official" batch normalization layer coded by the developers. They don't have very good docs on how to use it but here is how to use it (according to me):
from tensorflow.contrib.layers.python.layers import batch_norm as batch_norm
def batch_norm_layer(x,train_phase,scope_bn):
bn_train = batch_norm(x, decay=0.999, center=True, scale=True,
updates_collections=None,
is_training=True,
reuse=None, # is this right?
trainable=True,
scope=scope_bn)
bn_inference = batch_norm(x, decay=0.999, center=True, scale=True,
updates_collections=None,
is_training=False,
reuse=True, # is this right?
trainable=True,
scope=scope_bn)
z = tf.cond(train_phase, lambda: bn_train, lambda: bn_inference)
return z
to actually use it you need to create a placeholder for train_phase that indicates if you are in training or inference phase (as in train_phase = tf.placeholder(tf.bool, name='phase_train')). Its value can be filled during inference or training with a tf.session as in:
test_error = sess.run(fetches=cross_entropy, feed_dict={x: batch_xtest, y_:batch_ytest, train_phase: False})
or during training:
sess.run(fetches=train_step, feed_dict={x: batch_xs, y_:batch_ys, train_phase: True})
I'm pretty sure this is correct according to the discussion in github.
Seems there is another useful link:
http://r2rt.com/implementing-batch-normalization-in-tensorflow.html
You can simply use the build-in batch_norm layer:
batch_norm = tf.cond(is_train,
lambda: tf.contrib.layers.batch_norm(prev, activation_fn=tf.nn.relu, is_training=True, reuse=None),
lambda: tf.contrib.layers.batch_norm(prev, activation_fn =tf.nn.relu, is_training=False, reuse=True))
where prev is the output of your previous layer (can be both fully-connected or a convolutional layer) and is_train is a boolean placeholder. Just use batch_norm as the input to the next layer, then.
Since someone recently edited this, I'd like to clarify that this is no longer an issue.
This answer does not seem correct When phase_train is set to false, it still updates the ema mean and variance. This can be verified with the following code snippet.
x = tf.placeholder(tf.float32, [None, 20, 20, 10], name='input')
phase_train = tf.placeholder(tf.bool, name='phase_train')
# generate random noise to pass into batch norm
x_gen = tf.random_normal([50,20,20,10])
pt_false = tf.Variable(tf.constant(True))
#generate a constant variable to pass into batch norm
y = x_gen.eval()
[bn, bn_vars] = batch_norm(x, 10, phase_train)
tf.initialize_all_variables().run()
train_step = lambda: bn.eval({x:x_gen.eval(), phase_train:True})
test_step = lambda: bn.eval({x:y, phase_train:False})
test_step_c = lambda: bn.eval({x:y, phase_train:True})
# Verify that this is different as expected, two different x's have different norms
print(train_step()[0][0][0])
print(train_step()[0][0][0])
# Verify that this is same as expected, same x's (y) have same norm
print(train_step_c()[0][0][0])
print(train_step_c()[0][0][0])
# THIS IS DIFFERENT but should be they same, should only be reading from the ema.
print(test_step()[0][0][0])
print(test_step()[0][0][0])
Using TensorFlow built-in batch_norm layer, below is the code to load data, build a network with one hidden ReLU layer and L2 normalization and introduce batch normalization for both hidden and out layer. This runs fine and trains fine. Just FYI this example is mostly built upon the data and code from Udacity DeepLearning course.
P.S. Yes, parts of it were discussed one way or another in answers earlier but I decided to gather in one code snippet everything so that you have example of whole network training process with Batch Normalization and its evaluation
# These are all the modules we'll be using later. Make sure you can import them
# before proceeding further.
from __future__ import print_function
import numpy as np
import tensorflow as tf
from six.moves import cPickle as pickle
pickle_file = '/home/maxkhk/Documents/Udacity/DeepLearningCourse/SourceCode/tensorflow/examples/udacity/notMNIST.pickle'
with open(pickle_file, 'rb') as f:
save = pickle.load(f)
train_dataset = save['train_dataset']
train_labels = save['train_labels']
valid_dataset = save['valid_dataset']
valid_labels = save['valid_labels']
test_dataset = save['test_dataset']
test_labels = save['test_labels']
del save # hint to help gc free up memory
print('Training set', train_dataset.shape, train_labels.shape)
print('Validation set', valid_dataset.shape, valid_labels.shape)
print('Test set', test_dataset.shape, test_labels.shape)
image_size = 28
num_labels = 10
def reformat(dataset, labels):
dataset = dataset.reshape((-1, image_size * image_size)).astype(np.float32)
# Map 2 to [0.0, 1.0, 0.0 ...], 3 to [0.0, 0.0, 1.0 ...]
labels = (np.arange(num_labels) == labels[:,None]).astype(np.float32)
return dataset, labels
train_dataset, train_labels = reformat(train_dataset, train_labels)
valid_dataset, valid_labels = reformat(valid_dataset, valid_labels)
test_dataset, test_labels = reformat(test_dataset, test_labels)
print('Training set', train_dataset.shape, train_labels.shape)
print('Validation set', valid_dataset.shape, valid_labels.shape)
print('Test set', test_dataset.shape, test_labels.shape)
def accuracy(predictions, labels):
return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
/ predictions.shape[0])
#for NeuralNetwork model code is below
#We will use SGD for training to save our time. Code is from Assignment 2
#beta is the new parameter - controls level of regularization.
#Feel free to play with it - the best one I found is 0.001
#notice, we introduce L2 for both biases and weights of all layers
batch_size = 128
beta = 0.001
#building tensorflow graph
graph = tf.Graph()
with graph.as_default():
# Input data. For the training data, we use a placeholder that will be fed
# at run time with a training minibatch.
tf_train_dataset = tf.placeholder(tf.float32,
shape=(batch_size, image_size * image_size))
tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
tf_valid_dataset = tf.constant(valid_dataset)
tf_test_dataset = tf.constant(test_dataset)
#introduce batchnorm
tf_train_dataset_bn = tf.contrib.layers.batch_norm(tf_train_dataset)
#now let's build our new hidden layer
#that's how many hidden neurons we want
num_hidden_neurons = 1024
#its weights
hidden_weights = tf.Variable(
tf.truncated_normal([image_size * image_size, num_hidden_neurons]))
hidden_biases = tf.Variable(tf.zeros([num_hidden_neurons]))
#now the layer itself. It multiplies data by weights, adds biases
#and takes ReLU over result
hidden_layer = tf.nn.relu(tf.matmul(tf_train_dataset_bn, hidden_weights) + hidden_biases)
#adding the batch normalization layerhi()
hidden_layer_bn = tf.contrib.layers.batch_norm(hidden_layer)
#time to go for output linear layer
#out weights connect hidden neurons to output labels
#biases are added to output labels
out_weights = tf.Variable(
tf.truncated_normal([num_hidden_neurons, num_labels]))
out_biases = tf.Variable(tf.zeros([num_labels]))
#compute output
out_layer = tf.matmul(hidden_layer_bn,out_weights) + out_biases
#our real output is a softmax of prior result
#and we also compute its cross-entropy to get our loss
#Notice - we introduce our L2 here
loss = (tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
out_layer, tf_train_labels) +
beta*tf.nn.l2_loss(hidden_weights) +
beta*tf.nn.l2_loss(hidden_biases) +
beta*tf.nn.l2_loss(out_weights) +
beta*tf.nn.l2_loss(out_biases)))
#now we just minimize this loss to actually train the network
optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(loss)
#nice, now let's calculate the predictions on each dataset for evaluating the
#performance so far
# Predictions for the training, validation, and test data.
train_prediction = tf.nn.softmax(out_layer)
valid_relu = tf.nn.relu( tf.matmul(tf_valid_dataset, hidden_weights) + hidden_biases)
valid_prediction = tf.nn.softmax( tf.matmul(valid_relu, out_weights) + out_biases)
test_relu = tf.nn.relu( tf.matmul( tf_test_dataset, hidden_weights) + hidden_biases)
test_prediction = tf.nn.softmax(tf.matmul(test_relu, out_weights) + out_biases)
#now is the actual training on the ANN we built
#we will run it for some number of steps and evaluate the progress after
#every 500 steps
#number of steps we will train our ANN
num_steps = 3001
#actual training
with tf.Session(graph=graph) as session:
tf.initialize_all_variables().run()
print("Initialized")
for step in range(num_steps):
# Pick an offset within the training data, which has been randomized.
# Note: we could use better randomization across epochs.
offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
# Generate a minibatch.
batch_data = train_dataset[offset:(offset + batch_size), :]
batch_labels = train_labels[offset:(offset + batch_size), :]
# Prepare a dictionary telling the session where to feed the minibatch.
# The key of the dictionary is the placeholder node of the graph to be fed,
# and the value is the numpy array to feed to it.
feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
_, l, predictions = session.run(
[optimizer, loss, train_prediction], feed_dict=feed_dict)
if (step % 500 == 0):
print("Minibatch loss at step %d: %f" % (step, l))
print("Minibatch accuracy: %.1f%%" % accuracy(predictions, batch_labels))
print("Validation accuracy: %.1f%%" % accuracy(
valid_prediction.eval(), valid_labels))
print("Test accuracy: %.1f%%" % accuracy(test_prediction.eval(), test_labels))
So a simple example of the use of this batchnorm class:
from bn_class import *
with tf.name_scope('Batch_norm_conv1') as scope:
ewma = tf.train.ExponentialMovingAverage(decay=0.99)
bn_conv1 = ConvolutionalBatchNormalizer(num_filt_1, 0.001, ewma, True)
update_assignments = bn_conv1.get_assigner()
a_conv1 = bn_conv1.normalize(a_conv1, train=bn_train)
h_conv1 = tf.nn.relu(a_conv1)

Categories