principal component analysis in RNN - python

If I want to project sequence(features) A,B,and C to target sequence with tensorflow LSTM, how can I know the importance of each features affected the target? Does the principal component analysis help? If pca can helps, how to do?
The structure(columns) of data set such as below:
A sequence
B sequence
C sequence
Target sequence

What would the principal components of this sequence be? What you could do is take the PCA of A sequence, B sequence and C sequence and visualise that...
Here is a simple tutorial on visualising PCA with Tensorboard: http://www.pinchofintelligence.com/simple-introduction-to-tensorboard-embedding-visualisation/
%matplotlib inline
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
import os
from tensorflow.contrib.tensorboard.plugins import projector
from tensorflow.examples.tutorials.mnist import input_data
LOG_DIR = 'minimalsample'
NAME_TO_VISUALISE_VARIABLE = "mnistembedding"
TO_EMBED_COUNT = 500
path_for_mnist_sprites = os.path.join(LOG_DIR,'mnistdigits.png')
path_for_mnist_metadata = os.path.join(LOG_DIR,'metadata.tsv')
mnist = input_data.read_data_sets("MNIST_data/", one_hot=False)
batch_xs, batch_ys = mnist.train.next_batch(TO_EMBED_COUNT)
embedding_var = tf.Variable(batch_xs, name=NAME_TO_VISUALISE_VARIABLE)
summary_writer = tf.summary.FileWriter(LOG_DIR)
config = projector.ProjectorConfig()
embedding = config.embeddings.add()
embedding.tensor_name = embedding_var.name
# Specify where you find the metadata
embedding.metadata_path = path_for_mnist_metadata #'metadata.tsv'
# Specify where you find the sprite (we will create this later)
embedding.sprite.image_path = path_for_mnist_sprites #'mnistdigits.png'
embedding.sprite.single_image_dim.extend([28,28])
# Say that you want to visualise the embeddings
projector.visualize_embeddings(summary_writer, config)
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
saver = tf.train.Saver()
saver.save(sess, os.path.join(LOG_DIR, "model.ckpt"), 1)
with open(path_for_mnist_metadata,'w') as f:
f.write("Index\tLabel\n")
for index,label in enumerate(batch_ys):
f.write("%d\t%d\n" % (index,label))
Hope this helps you think about PCA!

Related

Problem initializing Tensorflow variables

I am trying to compute the input signal "maximizing" the activation of a given neuron of an encoder NN (the goal is to understand what my latent features are modelling).
I wrote a little python script which loads the .h5 file with the trained encoder model and builds a tensorflow graph to compute iteratively the "best activation signal".
It seems like my tensorflow implementation is not right. Despite the fact that I run tf.initialize_all_variables(), a FailedPreconditionError: Attempting to use uninitialized value X error is raised.
I am a little new in the use of tensorflow without using keras so this may be a trivial mistake but I could really use some help on this. Here is my code. Thanks a lot.
import tensorflow as tf
import tensorflow.keras as keras
import numpy as np
import matplotlib.pyplot as plt
input_sequence_size = 20
input_dim = 4
encoding_dim = 10
model_save = 'siple_autoencoder_encoder.h5'
model = keras.models.load_model(model_save)
lambda_param = 0.1
n_steps = 100
X = tf.Variable(tf.random_uniform([1, input_sequence_size * input_dim], -1.0, 1.0), name = 'X')
prediction = model.predict(X, steps = 1)
y = tf.gather_nd(prediction, [[0]], batch_dims=0, name=None)
gradient = tf.gradients(y, [X])[0]
step = tf.assign(X, X + lambda_param * gradient)
init = tf.initialize_all_variables()
with tf.Session() as sess:
sess.run(init)
# output = y.eval()
for i in range(n_steps):
sess.run(step)
activation_signal_1 = X.eval()

Why does the TensorBoard display the wrong cosine distance?

i want to visualize word embeddings in the Projector from TensorBoard, but the cosine distances doesnt seem right.
If i compute the cosine distances via sklearn i get different results.
Am i using the TensorBoard Projector wrong?
TensorBoard:
https://i.imgur.com/2hRtXym.png
Sklearn:
https://i.imgur.com/49OaiEU.png
import os
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
from tensorflow.contrib.tensorboard.plugins import projector
LOG_DIR = 'logs'
metadata = os.path.join(LOG_DIR, 'metadata.tsv')
emb_arr = []
arr = []
# category -> dictionary
# category["Category 1"] -> array([[...,...,...,...,]]) # 300 dimensions
for category in category_embeddings:
arr.appendcategory_embeddings[category][0])
embds_arr = np.asarray(arr)
with open(metadata, 'w', encoding="utf-8") as metadata_file:
for key in category_embeddings.keys():
metadata_file.write(key + "\n")
embds = tf.Variable(embds_arr, name='embeds')
with tf.Session() as sess:
saver = tf.train.Saver([embds])
sess.run(embds.initializer)
saver.save(sess, os.path.join(LOG_DIR, 'category.ckpt'))
config = projector.ProjectorConfig()
config.model_checkpoint_path = os.path.join(LOG_DIR, 'checkpoint')
config = projector.ProjectorConfig()
embedding = config.embeddings.add()
embedding.tensor_name = embds.name
embedding.metadata_path = metadata
projector.visualize_embeddings(tf.summary.FileWriter(LOG_DIR), config)
Solved,
i tested it with different datasets and training cycles, it seems to be a bug within TensorBoard.
Sklearn returns the correct reuslts for the original vector space and TensorBoard possibly calculates the distance from a reduced dimensionality.
https://github.com/tensorflow/tensorboard/issues/2421

How to use dataset in TensorFlow session for training

I like to perform image classification on our own large image libary (millions of labeled images) with tensorflow. I´m new to stackoverflow, python and tensorflow and worked myself through a few tutorials (mnist etc.) and got to the point, where i was able to prepare a TensorFlow datset from a dictionary including the absolute path to the images and the according labels. However, i´m stuck at the point using the dataset in a TensorFlow session. Here is my (example) code:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
import numpy as np
import time
import mymodule # I build my module to read the images and labels
from tensorflow.python.framework import ops
from tensorflow.python.framework import dtypes
from tensorflow.contrib.data import Iterator
beginTime = time.time()
batch_size = 100
learning_rate = 0.005
max_steps = 2
NUM_CLASSES = 25
def input_parser(img_path, label):
one_hot = tf.one_hot(label, NUM_CLASSES)
img_file = tf.read_file(img_path)
img_decoded = tf.image.decode_jpeg(img_file, channels = 3)
return img_decoded, one_hot
#Import Training data (returns the dicitonary with paths and labels)
train_dict = mymodule.getFileMap(labelList, imageList)
#Import Test data
test_dict = mymodule.getFileMap(labelList, imageList)
#Get train data
train_file_list, train_label_list = get_file_label_list(train_dict)
train_images_tensor = ops.convert_to_tensor(train_file_list, dtype=dtypes.string)
train_labels_tensor = ops.convert_to_tensor(train_label_list, dtype=dtypes.int64)
#Get test data
test_file_list, test_label_list = get_file_label_list(test_dict)
test_images_tensor = ops.convert_to_tensor(test_file_list, dtype=dtypes.string)
test_labels_tensor = ops.convert_to_tensor(test_label_list, dtype=dtypes.int64)
#Create TensorFlow Datset object
train_data = tf.data.Dataset.from_tensor_slices((train_images_tensor, train_labels_tensor))
test_data = tf.data.Dataset.from_tensor_slices((test_images_tensor, test_labels_tensor))
# Transform the datset so that it contains decoded images
# and one-hot vector labels
train_data = train_data.map(input_parser)
test_data = test_data.map(input_parser)
# Batching --> How to do it right?
#train_data = train_data.batch(batch_size = 100)
#test_data = train_data.batch(batch_size = 100)
#Define input placeholders
image_size = 990*990*3
images_placeholder = tf.placeholder(tf.float32, shape=[None, image_size])
labels_placeholder = tf.placeholder(tf.int64, shape=[None])
# Define variables (these afe the values we want to optimize)
weigths = tf.Variable(tf.zeros([image_size, NUM_CLASSES]))
biases = tf.Variable(tf.zeros([NUM_CLASSES]))
# Define the classifier´s result
logits = tf.matmul(images_placeholder, weigths) + biases
# Define the loss function
loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits = logits, labels = labels_placeholder))
# Define the training operation
train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)
# Operation comparing prediciton with true label
correct_prediciton = tf.equal(tf.argmax(logits, 1), labels_placeholder)
# Operation calculating the accuracy of our predicitons
accuracy = tf.reduce_mean(tf.cast(correct_prediciton, tf.float32))
#Create TensorFlow Iterator object
iterator = Iterator.from_structure(train_data.output_types,
train_data.output_shapes)
next_element = iterator.get_next()
#Create two initialization ops to switch between the datasets
train_init_op = iterator.make_initializer(train_data)
test_init_op = iterator.make_initializer(test_data)
with tf.Session() as sess:
#Initialize variables
sess.run(tf.global_variables_initializer())
sess.run(train_init_op)
for _ in range(10):
try:
elem = sess.run(next_element)
print(elem)
except tf.errors.OutOfRangeError:
print("End of training datset.")
break
Following this and this tutorial i could not solve the problem of how to use the (image and label) dataset in a tensorflow session for training. I was able to print out the datset by iterating through it, but wasn´t able to use it for learning.
I don´t understand how to access the images and labels seperately after they have been merged in the train_data = tf.data.Dataset.from_tensor_slices((train_images_tensor, train_labels_tensor)) operation, as requried by the 2nd tutorial. Also i don´t know how to implement batching correctly.
What i want to do in the session is basically this (from the 2nd tutorial):
# Generate input data batch
indices = np.random.choice(data_sets['images_train'].shape[0], batch_size)
images_batch = data_sets['images_train'][indices]
labels_batch = data_sets['labels_train'][indices]
# Periodically print out the model's current accuracy
if i % 100 == 0:
train_accuracy = sess.run(accuracy, feed_dict={
images_placeholder: images_batch, labels_placeholder: labels_batch})
print('Step {:5d}: training accuracy {:g}'.format(i, train_accuracy))
# Perform a single training step
sess.run(train_step, feed_dict={images_placeholder: images_batch,
labels_placeholder: labels_batch})
# After finishing the training, evaluate on the test set
test_accuracy = sess.run(accuracy, feed_dict={
images_placeholder: data_sets['images_test'],
labels_placeholder: data_sets['labels_test']})
print('Test accuracy {:g}'.format(test_accuracy))
endTime = time.time()
print('Total time: {:5.2f}s'.format(endTime - beginTime))
If anyone can tell me, how to access images and labels in the dataset sepearately and use it for training, i would be really thankful. Also a tip where and how to do the batching would be appreciated.
Thank you.
In your code, next_element is a tuple of two tensors, matching the structure of your datasets: i.e. it is a tuple whose first element is an image, and second element is a label. To access the individual tensors, you can do the following:
next_element = iterator.get_next()
next_image = next_element[0]
next_label = next_element[1]
# Or, in a single line:
next_image, next_label = iterator.get_next()
To batch a tf.data.Dataset, you can use the Dataset.batch() transformation. Your commented out code for this should simply work:
train_data = train_data.batch(batch_size = 100)
test_data = train_data.batch(batch_size = 100)

Attempting to use uninitialized value Variable in Tensorflow ( sess.run(tf.global_variables_initializer()) is used!)

I try to divide my neural network model and restore() function with setting random weights to zero.
Here's the model code: http://pastebin.com/TqN6kkeb
(it works properly).
And here's the function:
from __future__ import print_function
import tensorflow as tf
tf.GraphKeys.VARIABLES = tf.GraphKeys.GLOBAL_VARIABLES
import random
from LogReg import accuracy
from LogReg import W
from LogReg import x,y
# Import MNIST data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
def restore(model_file):
with tf.Session() as sess:
new_saver = tf.train.import_meta_graph(model_file + ".meta")
new_saver.restore(sess, model_file)
with tf.variable_scope("foo", reuse=True):
temp_var = tf.get_variable("W")
size_2a = tf.get_variable("b")
size_2 = tf.shape(size_2a).eval()[0]
size_1 = tf.shape(temp_var).eval()[0]
ones_mask = tf.Variable(tf.ones([size_1, size_2]))
arg = random.sample(xrange(size_1), size_1/2)
index_num=tf.convert_to_tensor(arg, dtype=tf.int32)
print("om", ones_mask)
print("index", index_num)
print(W)
zeroes = tf.zeros([size_1/2, size_2])
update = tf.scatter_update(ones_mask, index_num, zeroes)
print(update)
assign_op = W.assign(tf.mul(W, update))
sess.run(update)
sess.run(assign_op)
init_op = tf.global_variables_initializer()
sess.run(init_op)
new_saver.save(sess, model_file)
print("Accuracy_new:", accuracy.eval({x: mnist.test.images, y:mnist.test.labels}))
restore('./MyModel2')
The problems are:
1) is that it keeps writing me
FailedPreconditionError (see above for traceback): Attempting to use uninitialized value Variable in this line:
update = tf.scatter_update(ones_mask, index_num, zeroes)
no matter what. I have read these topics: Prettytensor: Attempting to use uninitialized value and Update a subset of weights in TensorFlow (and many others), but advices from there didn't help to fix my bug.
And I don't understand, what's the problem with the initialization as long as I run tf.global_variables_initializer();
2) all of the weights seem to be setting to zero instead of the half, and I can't understand why.
Please, help, I really stuck.
Just for the record (and others finding this post), the method name has changed, as per the page here: https://www.tensorflow.org/versions/r0.10/how_tos/variables/#initialization
you should run the initialize_all_variables() method like this:
import tensorflow as tf
init = tf.initialize_all_variables()
sess = tf.Session()
sess.run(init)

How to load pre-trained tensorflow model named inception by Google?

I have downloaded a tensorflow checkpoint model named inception_resnet_v2_2016_08_30.ckpt.
Do I need to create a graph (with all the variables) that were used when this checkpoint was created?
How do I make use of this model?
First of you have get the network architecture in memory. You can get the network architecture from here
Once you have this program with you, use the following approach to use the model:
from inception_resnet_v2 import inception_resnet_v2, inception_resnet_v2_arg_scope
height = 299
width = 299
channels = 3
X = tf.placeholder(tf.float32, shape=[None, height, width, channels])
with slim.arg_scope(inception_resnet_v2_arg_scope()):
logits, end_points = inception_resnet_v2(X, num_classes=1001,is_training=False)
With this you have all the network in memory, Now you can initialize the network with checkpoint file(ckpt) by using tf.train.saver:
saver = tf.train.Saver()
sess = tf.Session()
saver.restore(sess, "/home/pramod/Downloads/inception_resnet_v2_2016_08_30.ckpt")
If you want to do bottle feature extraction, its simple like lets say you want to get features from last layer, then simply you have to declare predictions = end_points["Logits"] If you want to get it for other intermediate layer, you can get those names from the above program inception_resnet_v2.py
After that you can call: output = sess.run(predictions, feed_dict={X:batch_images})
Do I need to create a graph (with all the variables) that were used when this checkpoint was created?
No, you don't.
As for how to use checkpoint file (cpkt file)
1.This article (TensorFlow-Slim image classification library) tells you how to train your model from scratch
2.The following is an example code from google blog
import numpy as np
import os
import tensorflow as tf
import urllib2
from datasets import imagenet
from nets import inception
from preprocessing import inception_preprocessing
slim = tf.contrib.slim
batch_size = 3
image_size = inception.inception_v3.default_image_size
checkpoints_dir = '/root/code/model'
checkpoints_filename = 'inception_resnet_v2_2016_08_30.ckpt'
model_name = 'InceptionResnetV2'
sess = tf.InteractiveSession()
graph = tf.Graph()
graph.as_default()
def classify_from_url(url):
image_string = urllib2.urlopen(url).read()
image = tf.image.decode_jpeg(image_string, channels=3)
processed_image = inception_preprocessing.preprocess_image(image, image_size, image_size, is_training=False)
processed_images = tf.expand_dims(processed_image, 0)
# Create the model, use the default arg scope to configure the batch norm parameters.
with slim.arg_scope(inception.inception_resnet_v2_arg_scope()):
logits, _ = inception.inception_resnet_v2(processed_images, num_classes=1001, is_training=False)
probabilities = tf.nn.softmax(logits)
init_fn = slim.assign_from_checkpoint_fn(
os.path.join(checkpoints_dir, checkpoints_filename),
slim.get_model_variables(model_name))
init_fn(sess)
np_image, probabilities = sess.run([image, probabilities])
probabilities = probabilities[0, 0:]
sorted_inds = [i[0] for i in sorted(enumerate(-probabilities), key=lambda x:x[1])]
plt.figure()
plt.imshow(np_image.astype(np.uint8))
plt.axis('off')
plt.show()
names = imagenet.create_readable_names_for_imagenet_labels()
for i in range(5):
index = sorted_inds[i]
print('Probability %0.2f%% => [%s]' % (probabilities[index], names[index]))
Another way of loading a pre-trained Imagenet model is
ResNet50
import tensorflow as tf
from tensorflow.keras.applications.resnet50 import ResNet50
model = ResNet50()
model.summary()
InceptionV3
iport tensorflow as tf
from tensorflow.keras.applications.inception_v3 import InceptionV3
model = InceptionV3()
model.summary()
You can check a detailed explanation related to this here

Categories