No gradients provided in tensorflow (mean_squared_error) - python

I'm trying to build a simple net of 2 input neurons (+1 bias) going into 1 output neuron to teach it the "and"-function. It's based on the mnist-clissification example, so it might be overly complex for the task, but it's about the general structure of such nets for me, so please don't say "you can just do it in numpy" or something, it's about tensorflow NNs for me. So here is the code:
import tensorflow as tf
import numpy as np
tf.logging.set_verbosity(tf.logging.INFO)
def model_fn(features, labels, mode):
input_layer = tf.reshape(features["x"], [-1, 2])
output_layer = tf.layers.dense(inputs=input_layer, units=1, activation=tf.nn.relu, name="output_layer")
if mode == tf.estimator.ModeKeys.PREDICT:
return tf.estimator.EstimatorSpec(mode=mode, predictions=output_layer)
loss = tf.losses.mean_squared_error(labels=labels, predictions=output_layer)
if mode == tf.estimator.ModeKeys.TRAIN:
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)
train_op = optimizer.minimize(loss=loss, global_step=tf.train.get_global_step())
return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)
eval_metrics_ops = {"accuracy": tf.metrics.accuracy(labels=labels, predictions=output_layer)}
return tf.estimator.EstimatorSpec(mode=mode, predictions=output_layer, loss=loss)
def main(unused_arg):
train_data = np.asarray(np.reshape([[0,0],[0,1],[1,0],[1,1]],[4,2]))
train_labels = np.asarray(np.reshape([0,0,0,1],[4,1]))
eval_data = train_data
eval_labels = train_labels
classifier = tf.estimator.Estimator(model_fn=model_fn, model_dir="/tmp/NN_AND")
tensors_to_log = {"The output:": "output_layer"}
logging_hook = tf.train.LoggingTensorHook(tensors=tensors_to_log,every_n_iter=10)
train_input_fn = tf.estimator.inputs.numpy_input_fn(x={"x":train_data}, y=train_labels, batch_size=10, num_epochs=None, shuffle=True)
classifier.train(input_fn=train_input_fn, steps=2000, hooks=[logging_hook])
eval_input_fn = tf.estimator.inputs.numpy_input_fn(x={"x":eval_data}, y=eval_labels, batch_size=1, shuffle=False)
eval_results = classifier.evaluate(input_fn=eval_input_fn)
print(eval_results)
if __name__ == "__main__":
tf.app.run()

I've made few slight modifications to your code which enable learning the and function:
1) change your train_data to float32 representation.
train_data = np.asarray(np.reshape([[0,0],[0,1],[1,0],[1,1]],[4,2]), dtype=np.float32)`
2) Remove relu activation from the output layer - generally speaking, using relus in the output layer is not recommended. This might lead to dead relus and all gradients will be equal zero, which in turn will not make any learning possible.
output_layer = tf.layers.dense(inputs=input_layer, units=1, activation=None, name="output_layer")
3) In your eval_metrics_ops make sure you round the result so you can actually measure accuracy:
eval_metrics_ops = {"accuracy": tf.metrics.accuracy(labels=labels, predictions=tf.round(output_layer))}
4) Don't fortget to add the eval_metrics_ops parameter you defined to the estimator:
return tf.estimator.EstimatorSpec(mode=mode, predictions=output_layer, loss=loss, eval_metric_ops=eval_metrics_ops)
In addition, to log the last layer output you should use:
tensors_to_log = {"The output:": "output_layer/BiasAdd:0"}

Related

How do I fix this size of tensor error for my NN classifier PyTorch

I'm having trouble understanding why this is throwing an error. This code is pulled directly from the PyTorch documentation for a NN classifier for the fashion MNIST dataset. However when I try to flip this to the MNIST handwritten digits data set it comes up with the following error:
RuntimeError: The size of tensor a (10) must match the size of tensor b (64) at non-singleton dimension 1
This occurs when using the loss function during the training loop function. Can anyone help me understand why this is happening. Thanks!
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor, Lambda
import torchvision.models as models
import matplotlib.pyplot as plt
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#device = "cpu"
print(f"Using {device} device")
training_data = datasets.MNIST(
root="data",
train=True,
download=True,
transform=ToTensor()
)
test_data = datasets.MNIST(
root="data",
train=False,
download=True,
transform=ToTensor()
)
train_dataloader = DataLoader(training_data, batch_size=64)
test_dataloader = DataLoader(test_data, batch_size=64)
class NeuralNetwork(nn.Module):
def __init__(self):
super(NeuralNetwork, self).__init__()
self.flatten = nn.Flatten()
self.linear_relu_stack = nn.Sequential(
nn.Linear(28*28, 512),
nn.ReLU(),
nn.Linear(512, 512),
nn.ReLU(),
nn.Linear(512, 10),
)
def forward(self, x):
x = self.flatten(x)
logits = self.linear_relu_stack(x)
return logits
def train_loop(dataloader, model, loss_fn, optimizer):
size = len(dataloader.dataset)
for batch, (X, y) in enumerate(dataloader):
# Compute prediction and loss
X, y = X.to(device), y.to(device)
pred = model(X)
loss = loss_fn(pred, y)
# Backpropagation
optimizer.zero_grad()
loss.backward()
optimizer.step()
if batch % 100 == 0:
loss, current = loss.item(), batch * len(X)
print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")
def test_loop(dataloader, model, loss_fn):
size = len(dataloader.dataset)
num_batches = len(dataloader)
test_loss, correct = 0, 0
with torch.no_grad():
for X, y in dataloader:
X, y = X.to(device), y.to(device)
pred = model(X)
test_loss += loss_fn(pred, y).item()
correct += (pred.argmax(1) == y).type(torch.float).sum().item()
test_loss /= num_batches
correct /= size
print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
def save_checkpoint(state, filename = "checkpoint.pth.tar"):
print("=> Saving checkpoint")
torch.save(state, filename)
model = NeuralNetwork().to(device)
learning_rate = 1e-3
batch_size = 64
epochs = 10
# Initialize the loss function
loss_fn = nn.MSELoss()
optimiser = torch.optim.Adam(model.parameters(), lr=learning_rate)
for t in range(epochs):
print(f"Epoch {t+1}\n-------------------------------")
train_loop(train_dataloader, model, loss_fn, optimiser)
test_loop(test_dataloader, model, loss_fn)
print("Done!")
torch.nn.MSELoss is an implemention of mean squared error. You can't measure the difference between two tensors if they're different sizes (MSELoss does not allow for broadcasting). So if you're using MSELoss, then the predictions and the targets must be the same shape. In your case, preds is a tensor of shape [64, 10], and y is a tensor of shape [64].
The reason y is of shape [64] rather than [64, 10] is that most classification dataset implementations represent targets as integer labels rather than one-hot encoded vectors. In theory, you could convert these integer label targets to one-hot encoded targets.
But in reality, since this is a classification problem, you should probably be using something like nn.CrossEntropyLoss rather than nn.MSELoss. The former is a conventional classification loss function, and it allows the targets to be integer labels rather than one-hot labels (so just swapping out MSELoss for CrossEntropyLoss should solve your problem). MSELoss is better suited for regression tasks and such.

pytorch training loss invariant with varying forward pass implementations

The following code (MNIST MLP in PyTorch) delivers approximately the same training loss regardless of having the last layer in the forward pass returning:
F.log_softmax(x)
(x)
Option 1 is incorrect because I use criterion = nn.CrossEntropyLoss() and yet the results are almost identical. Am I missing anything?
import torch
import numpy as np
import time
from torchvision import datasets
import torchvision.transforms as transforms
# number of subprocesses to use for data loading
num_workers = 0
# how many samples per batch to load
batch_size = 20
# convert data to torch.FloatTensor
transform = transforms.ToTensor()
# choose the training and test datasets
train_data = datasets.MNIST(root='data', train=True,
download=True, transform=transform)
test_data = datasets.MNIST(root='data', train=False,
download=True, transform=transform)
# prepare data loaders
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size,
num_workers=num_workers)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size,
num_workers=num_workers)
import torch.nn as nn
import torch.nn.functional as F
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
# linear layer (784 -> 1 hidden node)
self.fc1 = nn.Linear(28 * 28, 512)
self.dropout1= nn.Dropout(p=0.2, inplace= False)
self.fc2 = nn.Linear(512, 256)
self.dropout2= nn.Dropout(p=0.2, inplace= False)
self.dropout = nn.Dropout(p=0.2, inplace= False)
self.fc3 = nn.Linear(256, 10)
def forward(self, x):
# flatten image input
x = x.view(-1, 28 * 28)
# add hidden layer, with relu activation function
x = F.relu(self.fc1(x))
x = self.dropout1(x)
x = F.relu(self.fc2(x))
x = self.dropout2(x)
x = self.fc3(x)
# return F.log_softmax(x)
return x
# initialize the NN
model = Net()
print(model)
model.to('cuda')
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
n_epochs = 10
model.train() # prep model for training
for epoch in range(n_epochs):
# monitor training loss
train_loss = 0.0
start = time.time()
for data, target in train_loader:
data, target = data.to('cuda'), target.to('cuda')
optimizer.zero_grad()
output = model(data)
loss = criterion(output, target)
loss.backward()
optimizer.step()
train_loss += loss.item()*data.size(0)
train_loss = train_loss/len(train_loader.dataset)
print('Epoch: {} \tTraining Loss: {:.6f} \ttime: {:.6f}'.format(
epoch+1,
train_loss,
time.time()-start
))
For numerical stability, the nn.CrossEntropyLoss() is implemented with the softmax layer inside it. So you should NOT use the softmax layer in your forward pass.
From the docs (https://pytorch.org/docs/stable/nn.html#crossentropyloss):
This criterion combines nn.LogSoftmax() and nn.NLLLoss() in one single class.
Using the softmax layer in the forward pass will lead to worse metrics because the gradient magnitudes are lowered (thus, the weight updates are also lowered). I learned it the hard way!
I guess your problem is that the loss is similar at the beginning of training, but at the end of the training, they should not. It is usually a good sanity check to overfit your model in one batch of data. The model should reach 100% accuracy if the batch is small enough. If the model is taking too long to train than you probably have a bug somewhere.
Hope that helps =)

Keras .fit giving better performance than manual Tensorflow

I'm new to Tensorflow and Keras. To get started, I followed the https://www.tensorflow.org/tutorials/quickstart/advanced tutorial. I'm now adapting it to train on CIFAR10 instead of MNIST dataset. I recreated this model https://keras.io/examples/cifar10_cnn/ and I'm trying to run it in my own codebase.
Logically, if the model, batch size and optimizer are all the same, then the two should perform identically, but they don't. I thought it might be that I'm making a mistake in preparing the data. So I copied the model.fit function from the keras code into my script, and it still performs better. Using .fit gives me around 75% accuracy in 25 epochs, while with the manual method it takes around 60 epochs. With .fit I also achieve slightly better max accuracy.
What I want to know is: Is .fit doing something behind the scenes that's optimizing training? What do I need to add to my code to get the same performance? Am I doing something obviously wrong?
Thanks for your time.
Main code:
import tensorflow as tf
from tensorflow import keras
import msvcrt
from Plotter import Plotter
#########################Configuration Settings#############################
BatchSize = 32
ModelName = "CifarModel"
############################################################################
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()
print("x_train",x_train.shape)
print("y_train",y_train.shape)
print("x_test",x_test.shape)
print("y_test",y_test.shape)
x_train, x_test = x_train / 255.0, x_test / 255.0
# Convert class vectors to binary class matrices.
y_train = keras.utils.to_categorical(y_train, 10)
y_test = keras.utils.to_categorical(y_test, 10)
train_ds = tf.data.Dataset.from_tensor_slices(
(x_train, y_train)).batch(BatchSize)
test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(BatchSize)
loss_object = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.0001,decay=1e-6)
# Create an instance of the model
model = ModelManager.loadModel(ModelName,10)
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.CategoricalAccuracy(name='train_accuracy')
test_loss = tf.keras.metrics.Mean(name='test_loss')
test_accuracy = tf.keras.metrics.CategoricalAccuracy(name='test_accuracy')
########### Using this function I achieve better results ##################
model.compile(loss='categorical_crossentropy',
optimizer=optimizer,
metrics=['accuracy'])
model.fit(x_train, y_train,
batch_size=BatchSize,
epochs=100,
validation_data=(x_test, y_test),
shuffle=True,
verbose=2)
############################################################################
########### Using the below code I achieve worse results ##################
#tf.function
def train_step(images, labels):
with tf.GradientTape() as tape:
predictions = model(images, training=True)
loss = loss_object(labels, predictions)
gradients = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(gradients, model.trainable_variables))
train_loss(loss)
train_accuracy(labels, predictions)
#tf.function
def test_step(images, labels):
predictions = model(images, training=False)
t_loss = loss_object(labels, predictions)
test_loss(t_loss)
test_accuracy(labels, predictions)
epoch = 0
InterruptLoop = False
while InterruptLoop == False:
#Shuffle training data
train_ds.shuffle(1000)
epoch = epoch + 1
# Reset the metrics at the start of the next epoch
train_loss.reset_states()
train_accuracy.reset_states()
test_loss.reset_states()
test_accuracy.reset_states()
for images, labels in train_ds:
train_step(images, labels)
for test_images, test_labels in test_ds:
test_step(test_images, test_labels)
test_accuracy = test_accuracy.result() * 100
train_accuracy = train_accuracy.result() * 100
#Print update to console
template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
print(template.format(epoch,
train_loss.result(),
train_accuracy ,
test_loss.result(),
test_accuracy))
# Check if keyboard pressed
while msvcrt.kbhit():
char = str(msvcrt.getch())
if char == "b'q'":
InterruptLoop = True
print("Stopping loop")
The model:
from tensorflow.keras.layers import Dense, Flatten, Conv2D, Dropout, MaxPool2D
from tensorflow.keras import Model
class ModelData(Model):
def __init__(self,NumberOfOutputs):
super(ModelData, self).__init__()
self.conv1 = Conv2D(32, 3, activation='relu', padding='same', input_shape=(32,32,3))
self.conv2 = Conv2D(32, 3, activation='relu')
self.maxpooling1 = MaxPool2D(pool_size=(2,2))
self.dropout1 = Dropout(0.25)
############################
self.conv3 = Conv2D(64,3,activation='relu',padding='same')
self.conv4 = Conv2D(64,3,activation='relu')
self.maxpooling2 = MaxPool2D(pool_size=(2,2))
self.dropout2 = Dropout(0.25)
############################
self.flatten = Flatten()
self.d1 = Dense(512, activation='relu')
self.dropout3 = Dropout(0.5)
self.d2 = Dense(NumberOfOutputs,activation='softmax')
def call(self, x):
x = self.conv1(x)
x = self.conv2(x)
x = self.maxpooling1(x)
x = self.dropout1(x)
x = self.conv3(x)
x = self.conv4(x)
x = self.maxpooling2(x)
x = self.dropout2(x)
x = self.flatten(x)
x = self.d1(x)
x = self.dropout3(x)
x = self.d2(x)
return x
I know this question already has an answer, but I faced the same problem and the solution seemed to be something different, that's not specified in the documentation.
I copy & paste here the answer (and the relative link) I found on GitHub, which solved the issue in my case:
The problem is caused by broadcasting in your loss function in the
custom loop. Make sure that the dimensions of predictions and label is
equal. At the moment (for MAE) they are [128,1] and [128]. Just make
use of tf.squeeze or tf.expand_dims.
Link: https://github.com/tensorflow/tensorflow/issues/28394
Basic translation: when computing the loss, always be sure of the tensors' shapes.
Mentioning the solution here (Answer Section) even though it is present in the Comments, for the benefit of the Community.
On the same Dataset, the Accuracy can differ when using Keras Model.fit with that of the Model built using Tensorflow mainly if the Data is shuffled because, when we shuffle the Data, the Split of Data between Training and Testing (or Validation) will be different resulting in different Train and Test Data in both the cases (Keras and Tensorflow).
If we want to observe the similar results on the Same Dataset and with similar Architecture in Keras and in Tensorflow, we can Turn off Shuffling the Data.
Hope this helps. Happy Learning!

How to test a single image on tensorflow MNIST model?

I,ve followed the tutorial in tensorflow tutorial to build a MNIST model for hand written digits recognition. I want to test the model by inputing a single image to the classifier and get the output it predicted.
Here is the complete code of the classifier.
I've tried by reading the image using imread but it didn't worked
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import tensorflow as tf
import pylab
import os
from scipy.misc import imread
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
tf.logging.set_verbosity(tf.logging.INFO)
def cnn_model_fn(features, labels, mode):
input_layer = tf.reshape(features["x"],[-1,28,28,1])
conv1 = tf.layers.conv2d(
inputs=input_layer,
filters=32,
kernel_size=[5,5],
padding = "same",
activation = tf.nn.relu)
pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2,2], strides=2)
conv2 = tf.layers.conv2d(
inputs=pool1,
filters=64,
kernel_size=5,
padding="same",
activation = tf.nn.relu)
pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2,2], strides=2)
pool2_flat = tf.reshape(pool2, [ -1, 7 * 7 * 64 ] )
dense = tf.layers.dense(inputs=pool2_flat, units=1024, activation=tf.nn.relu)
dropout = tf.layers.dropout(inputs=dense, rate = 0.4, training=mode == tf.estimator.ModeKeys.TRAIN)
logits = tf.layers.dense(inputs=dropout, units=10)
predictions = {
"classes":tf.argmax(input=logits, axis=1),
"probabilites": tf.nn.softmax(logits, name="softmax_tensor")
}
if mode == tf.estimator.ModeKeys.PREDICT:
return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
if mode == tf.estimator.ModeKeys.TRAIN:
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)
train_op = optimizer.minimize(
loss=loss,
global_step=tf.train.get_global_step())
return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)
eval_metric_ops={
"accuracy":tf.metrics.accuracy(
labels=labels, predictions=predictions["classes"])}
return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)
def main(unused_argv):
# Load training and eval data
mnist = tf.contrib.learn.datasets.load_dataset("mnist")
train_data = mnist.train.images # Returns np.array
train_labels = np.asarray(mnist.train.labels, dtype=np.int32)
eval_data = mnist.test.images # Returns np.array
eval_labels = np.asarray(mnist.test.labels, dtype=np.int32)
# Create the Estimator
mnist_classifier = tf.estimator.Estimator(
model_fn=cnn_model_fn, model_dir="/tmp/mnist_convnet_model")
Set up logging for predictions
Log the values in the "Softmax" tensor with label "probabilities"
tensors_to_log = {"probabilities": "softmax_tensor"}
logging_hook = tf.train.LoggingTensorHook(
tensors=tensors_to_log, every_n_iter=50)
# Train the model
train_input_fn = tf.estimator.inputs.numpy_input_fn(
x={"x": train_data},
y=train_labels,
batch_size=100,
num_epochs=None,
shuffle=True)
mnist_classifier.train(
input_fn=train_input_fn,
steps=20000,
hooks=[logging_hook])
# Evaluate the model and print results
eval_input_fn = tf.estimator.inputs.numpy_input_fn(
x={"x": eval_data},
y=eval_labels,
num_epochs=1,
shuffle=False)
eval_results = mnist_classifier.evaluate(input_fn=eval_input_fn)
print(eval_results)
A resource I found really helpful was Tensorflow's official tutorial, which includes a demo of their Estimator. In it, you can find how to test numpy arrays with the model. Here's the link: https://github.com/tensorflow/models/tree/master/official/mnist
Hope this helps!

Tensorflow Embeddings best practice with Estimator

I have a CNN for CIFAR-10 dataset with layers:
[IN] -> [CONV] -> [POOL] -> [CONV] -> [POOL] -> [FC] -> [DROPOUT] -> [LOGITS] -> [OUT]
\-> [EMBEDDINGS]
Estimator code:
config = tf.contrib.learn.RunConfig(save_checkpoints_secs=30)
# Create the Estimator
classifier = tf.estimator.Estimator(model_fn=inference, config=config, model_dir=LOG_DIR)
train_images, train_labels, train_labels_onehot = Utils.load_training_data()
hooks = [
# logging hook
tf.train.LoggingTensorHook(tensors=tensors_to_log, every_n_iter=50),
]
train_input_fn = tf.estimator.inputs.numpy_input_fn(x={'x': train_images}, y=train_labels)
classifier.train(input_fn=train_input_fn, steps=FLAGS.steps, hooks=hooks)
Inference function code:
def inference(self, features, labels, mode):
try:
images = tf.cast(features['x'], tf.float32)
# Input Layer
with tf.name_scope('Data'):
input_layer = tf.reshape(images, [-1, img_width, img_height, num_channels])
# Convolutional Layer 1
with tf.variable_scope('ConvLayer1'):
conv1 = tf.layers.conv2d(inputs=input_layer, filters=32, kernel_size=[5, 5],
padding="same", activation=tf.nn.relu)
pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2)
logging.info('Convolutional Layer 1 build successful..')
# Convolutional Layer 1
with tf.variable_scope('ConvLayer2'):
conv2 = tf.layers.conv2d(inputs=pool1, filters=64, kernel_size=[5, 5],
padding="same", activation=tf.nn.relu)
pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)
logging.info('Convolutional Layer 2 build successful..')
# Fully Connected Layer
with tf.variable_scope('FullyConnectedLayer'):
pool2_flat = tf.reshape(pool2, [-1, 8 * 8 * 64])
dense = tf.layers.dense(inputs=pool2_flat, units=1024, activation=tf.nn.relu)
dropout = tf.layers.dropout(inputs=dense, rate=0.4,
training=(mode == tf.estimator.ModeKeys.TRAIN))
logging.info('Fully Connected Layer build successful..')
tf.summary.histogram('dropout', dropout)
# Logits Layer
logits = tf.layers.dense(inputs=dropout, units=10)
tf.summary.histogram('logits', logits)
logging.info('Logits Layer build successful..')
predictions = {
# Generate predictions (for PREDICT and EVAL mode)
"classes": tf.argmax(input=logits, axis=1),
# Add `softmax_tensor` to the graph. It is used for PREDICT and by the
# `logging_hook`.
"probabilities": tf.nn.softmax(logits, name="softmax_tensor")
}
if mode == tf.estimator.ModeKeys.PREDICT:
return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions,
evaluation_hooks=[])
# Calculate Loss (for both TRAIN and EVAL modes)
onehot_labels = tf.one_hot(indices=tf.cast(labels, tf.int32), depth=10)
loss = tf.losses.softmax_cross_entropy(onehot_labels=onehot_labels,
logits=logits)
tf.summary.histogram('loss', loss)
logging.info('Losses build successful..')
# Configure the Training Op (for TRAIN mode)
if mode == tf.estimator.ModeKeys.TRAIN:
learning_rate = tf.train.exponential_decay(start_learning_rate,
tf.train.get_global_step(), 1000, 0.9, staircase=True)
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss=loss, global_step=tf.train.get_global_step())
return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op,
scaffold=tf.train.Scaffold(
summary_op=tf.summary.merge_all(),
))
# Add evaluation metrics (for EVAL mode)
accuracy = tf.metrics.accuracy(labels=labels, predictions=predictions["classes"])
tf.summary.histogram('accuracy', accuracy)
logging.info('Accuracy metric build successful..')
return tf.estimator.EstimatorSpec(mode=mode, loss=loss,
train_op=train_op,
scaffold=tf.train.Scaffold(
summary_op=tf.summary.merge_all()
))
I am trying to work with Embeddings Visualization in tensorflow and here I want to visualize dropout output as embeddings.
Code that I've found to use embeddings:
sess = tf.InteractiveSession()
# Input set for Embedded TensorBoard visualization
# Performed with cpu to conserve memory and processing power
with tf.device("/cpu:0"):
embedding = tf.Variable(self._data, trainable=False, name='embedding')
sess.run(embedding.initializer)
writer = tf.summary.FileWriter(LOG_DIR + '/projector', sess.graph)
config = projector.ProjectorConfig()
embed = config.embeddings.add()
embed.tensor_name = embedding.name
embed.metadata_path = os.path.join(LOG_DIR + '/projector/metadata.tsv')
embed.sprite.image_path = os.path.join(DATA_DIR + '/cifar_10k_sprite.png')
embed.sprite.single_image_dim.extend([img_width, img_height])
projector.visualize_embeddings(writer, config)
saver = tf.train.Saver([embedding])
saver.save(sess, os.path.join(LOG_DIR, 'projector/a_model.ckpt'))
It does not work in my case, because I am using Estimator class and I have not access to the session.
Ways that I've tried:
Pass numpy.array variable to the model_fn of Estimator where I can set the value to that variable and then pass that variable to SessionRunHook where I can access session and save data to the file. Did not work because all arguments passed to the Estimator become tensors. So that way does not work cause I'm already have dropout layer tensors.
Create global variable where I can put all values of dropout layer. Did not work too because for that approach I need to access tensors values.
As far as I understood Estimator architecture the main problem is to get dropout layer output signals to the out of the Estimator and pass it somehow to the SessionRunHook to save them as Embeddings. But I think it's not the best way.
What is the right way of using Embeddings in Estimator?
This is how I did it guys (but it may be not the most efficient way):
SessinRunHook:
import tensorflow as tf
from classes.Utils import Utils
class EmbeddingSaverHook(tf.train.SessionRunHook):
def __init__(self, values, labels, captions):
self._saver = None
self._classes = Utils.get_classnames()
self._dense3 = None
self._labels = None
self._emb_values = values
self._emb_labels = labels
self._emb_captions = captions
def begin(self):
self._dense3 = tf.get_default_graph().get_tensor_by_name("dense3/BiasAdd:0")
self._labels = tf.get_default_graph().get_tensor_by_name("labels:0")
def before_run(self, run_context):
return tf.train.SessionRunArgs([self._dense3, self._labels])
def after_run(self, run_context, run_values):
self._emb_values.extend(run_values[0][0])
self._emb_labels.extend(run_values[0][1])
self._emb_captions.extend([self._classes[x] for x in run_values[0][1]])
def end(self, session):
pass
Full code you can see in my github repo

Categories