Related
I am trying to build the autoencoder structure detailed in this IEEE article. The autoencoder uses a separable loss function where it is required that I create a custom loss function for the "cluster loss" term of the separable loss function as a function of the average output of the encoder. I create my own layer called RffConnected that calculates the cluster loss and utilizes the add_loss method. Otherwise, this RffConnected layer should act as just a normal deep layer.
Here are my relevant code snippets:
import matplotlib.pyplot as plot
from mpl_toolkits.axes_grid1 import ImageGrid
import numpy as np
import math
from matplotlib.figure import Figure
import tensorflow as tf
import keras
from keras import layers
import random
import time
from os import listdir
#loads data from a text file
def loadData(basePath, samplesPerFile, sampleRate):
real = []
imag = []
fileOrder = []
for file in listdir(basePath):
if((file != "READ_ME") and ((file != "READ_ME.txt"))):
fid = open(basePath + "\\" + file, "r")
fileOrder.append(file)
t = 0
sampleEvery = samplesPerFile / sampleRate
temp1 = []
temp2 = []
times = []
for line in fid.readlines():
times.append(t)
samples = line.split("\t")
temp1.append(float(samples[0]))
temp2.append(float(samples[1]))
t = t + sampleEvery
real.append(temp1)
imag.append(temp2)
fid.close()
real = np.array(real)
imag = np.array(imag)
return real, imag, times, fileOrder
#####################################################################################################
#Breaks up and randomizes data
def breakUpData(real, imag, times, numPartitions, basePath):
if(len(real) % numPartitions != 0):
raise ValueError("Error: The length of the dataset must be divisible by the number of partitions.")
newReal = []
newImag = []
newTimes = []
fileOrder = listdir(basePath)
dataFiles = []
interval = int(len(real[0]) / numPartitions)
for i in range(0, interval):
newTimes.append(times[i])
for i in range(0, len(real)):
tempI = []
tempQ = []
for j in range(0, len(real[0])):
tempI.append(real[i, j])
tempQ.append(imag[i, j])
if((j + 1) % interval == 0):
newReal.append(tempI)
newImag.append(tempQ)
#fileName = fileOrder[i][0: fileOrder[i].find("_") + 3]
dataFiles.append(fileOrder[i])
tempI = []
tempQ = []
#randomizes the broken up dataset and the file list
for i in range(0, len(newReal)):
r = random.randint(0, len(newReal) - 1)
tempReal = newReal[i]
tempImag = newImag[i]
newReal[i] = newReal[r]
newImag[i] = newImag[r]
newReal[r] = tempReal
newImag[r] = tempImag
tempFile = dataFiles[i]
dataFiles[i] = dataFiles[r]
dataFiles[r] = tempFile
#return np.array(newReal), np.array(newImag), newTimes, dataFiles
return newReal, newImag, newTimes, dataFiles
#####################################################################################################
#custom loss layer for the RffAe-S that calculates the clustering loss term
class RffConnected(layers.Layer):
def __init__(self, output_dim, batchSize, beta, alpha):
super(RffConnected, self).__init__()
# self.total = tf.Variable(initial_value=tf.zeros((input_dim,)), trainable=False)
#array = np.zeros(output_dim)
self.iters = 0.0
self.beta = beta
self.alpha = alpha
self.batchSize = batchSize
self.output_dim = output_dim
self.sum = tf.zeros(output_dim, tf.float64)
self.moving_average = tf.zeros(output_dim, tf.float64)
self.clusterloss = tf.zeros(output_dim, tf.float64)
self.sum = tf.cast(self.sum, tf.float32)
self.moving_average = tf.cast(self.moving_average, tf.float32)
self.clusterloss = tf.cast(self.clusterloss, tf.float32)
# self.sum = keras.Input(shape=(self.output_dim,))
# self.moving_average = keras.Input(shape=(self.output_dim,))
# self.clusterloss = keras.Input(shape=(self.output_dim,))
def build(self, input_shape):
self.kernel = self.add_weight(name = 'kernel', \
shape = (int(input_shape[-1]), self.output_dim), \
initializer = 'normal', trainable = True)
#self.kernel = tf.cast(self.kernel, tf.float64)
super(RffConnected, self).build(int(input_shape[-1]))
def call(self, inputs):
#keeps track of training epochs
self.iters = self.iters + 1
#inputs = tf.cast(inputs, tf.float64)
#where this custom layer acts as a normal layer- the loss then uses this
#calc = keras.backend.dot(inputs, self.kernel)
calc = tf.matmul(inputs, self.kernel)
#cumulative sum of deep encoded features
#self.sum = state_ops.assign(self.sum, tf.reshape(tf.math.add(self.sum, calc), tf.shape(self.sum)))
#self.sum = tf.ops.state_ops.assign(self.sum, tf.math.add(self.sum, calc))
#self.sum.assign_add(calc)
self.sum = tf.math.add(self.sum, calc)
#calculate the moving average and loss if we have already trained one batch
if(self.iters >= self.batchSize):
self.moving_average = tf.math.divide(self.sum, self.iters)
self.clusterloss = tf.math.exp(\
tf.math.multiply(-1 * self.beta, tf.math.reduce_sum(tf.math.square(tf.math.subtract(inputs, self.moving_average)))))
#self.add_loss(tf.math.multiply(self.clusterloss, self.alpha))
self.add_loss(self.clusterloss.numpy() * self.alpha)
return calc
#####################################################################################################
def customloss(y_true, y_pred):
loss = tf.square(y_true - y_pred)
print(loss)
return loss
#####################################################################################################
realTraining = np.array(real[0:2200])
realTesting = np.array(real[2200:-1])
imagTraining = np.array(imag[0:2200])
imagTesting = np.array(imag[2200:-1])
numInputs = len(realTraining[0])
i_sig = keras.Input(shape=(numInputs,))
q_sig = keras.Input(shape=(numInputs,))
iRff = tf.keras.layers.experimental.RandomFourierFeatures(numInputs, \
kernel_initializer='gaussian', scale=9.0)(i_sig)
rff1 = keras.Model(inputs=i_sig, outputs=iRff)
qRff = tf.keras.layers.experimental.RandomFourierFeatures(numInputs, \
kernel_initializer='gaussian', scale=9.0)(q_sig)
rff2 = keras.Model(inputs=q_sig, outputs=qRff)
combined = layers.Concatenate()([iRff, qRff])
combineRff = tf.keras.layers.experimental.RandomFourierFeatures(4 * numInputs, \
kernel_initializer='gaussian', scale=10.0)(combined)
preprocess = keras.Model(inputs=[iRff, qRff], outputs=combineRff)
#print(realTraining[0:5])
preprocessedTraining = preprocess.predict([realTraining, imagTraining])
preprocessedTesting = preprocess.predict([realTesting, imagTesting])
################## Entering Encoder ######################
encoderIn = keras.Input(shape=(4*numInputs,))
#connected1 = layers.Dense(100, activation="sigmoid")(encoderIn)
clusterLossLayer = RffConnected(100, 30, 1.00, 100.00)(encoderIn)
#clusterLossLayer = myRffConnected(256)(connected1)
encoder = keras.Model(inputs=encoderIn, outputs=clusterLossLayer)
################## Entering Decoder ######################
connected2 = layers.Dense(125, activation="sigmoid")(clusterLossLayer)
relu1 = layers.ReLU()(connected2)
dropout = layers.Dropout(0.2)(relu1)
reshape1 = layers.Reshape((25, 5, 1))(dropout)
bn1 = layers.BatchNormalization()(reshape1)
trans1 = layers.Conv2DTranspose(1, (4, 2))(bn1)
ups1 = layers.UpSampling2D(size=(2, 1))(trans1)
relu2 = layers.ReLU()(ups1)
bn2 = layers.BatchNormalization()(relu2)
trans2 = layers.Conv2DTranspose(1, (4, 2))(bn2)
ups2 = layers.UpSampling2D(size=(2, 1))(trans2)
relu3 = layers.ReLU()(ups2)
bn3 = layers.BatchNormalization()(relu3)
trans3 = layers.Conv2DTranspose(1, (5, 2))(bn3)
ups3 = layers.UpSampling2D(size=(2, 1))(trans3)
relu4 = layers.ReLU()(ups3)
bn4 = layers.BatchNormalization()(relu4)
trans4 = layers.Conv2DTranspose(1, (7, 1))(bn4)
reshape2 = layers.Reshape((4*numInputs, 1, 1))(trans4)
autoencoder = keras.Model(inputs=encoderIn, outputs=reshape2)
encoded_input = keras.Input(shape=(None, 100))
decoder_layer = autoencoder.layers[-1]
#autoencoder.summary()
autoencoder.compile(optimizer='adam', loss=[autoencoder.losses[-1], customloss], metrics=['accuracy', 'accuracy'])
autoencoder.fit(preprocessedTraining, preprocessedTraining, epochs=100, batch_size=20, shuffle=True, validation_data=(preprocessedTesting, preprocessedTesting))
It seems like it runs for two training epochs then it gives me an error. I end up getting this error when I run it:
ValueError: Could not interpret loss function identifier: Tensor("rff_connected_137/Const:0", shape=(100,), dtype=float32)
I've already spent a considerable amount of time debugging this thing, although if you spot any more errors I would appreciate a heads-up. Thank you in advance.
According to the documentation of the keras Keras Model Training-Loss, the 'loss' attribute can take the value of float tensor (except for the sparse loss functions returning integer arrays) with a specific shape.
If it is necessary to combine two loss functions, it would be better to perform mathematical calculations within your custom loss function to return an output of float tensor. This reference might be a help Keras CustomLoss definition.
I am trying an adversarial attack on 10 images and I need to save all the perturbed images in a folder. So, I used torch.utils.save_image in pytorch which works pretty fine. I expect all the images to be saved in the folder but instead, they are being overwritten and the last image seen is the only image saved. I have the following attack() function that takes a single image to perturb
def attack(img, label, net, target=None, pixels=1, maxiter=75, popsize=400, verbose=False):
# img: 1*3*W*H tensor
# label: a number
targeted_attack = target is not None
target_calss = target if targeted_attack else label
bounds = [(0,32), (0,32), (0,255), (0,255), (0,255)] * pixels
popmul = max(1, popsize//len(bounds))
predict_fn = lambda xs: predict_classes(
xs, img, target_calss, net, target is None)
callback_fn = lambda x, convergence: attack_success(
x, img, target_calss, net, targeted_attack, verbose)
inits = np.zeros([popmul*len(bounds), len(bounds)])
count = 1
for init in inits:
for i in range(pixels):
init[i*5+0] = np.random.random()*32
init[i*5+1] = np.random.random()*32
init[i*5+2] = np.random.normal(128,127)
init[i*5+3] = np.random.normal(128,127)
init[i*5+4] = np.random.normal(128,127)
attack_result = differential_evolution(predict_fn, bounds, maxiter=maxiter, popsize=popmul,
recombination=1, atol=-1, callback=callback_fn, polish=False, init=inits)
attack_image = perturb_image(attack_result.x, img)
# attack_var = Variable(attack_image, volatile=True).cuda()
with torch.no_grad():
attack_var = attack_image.to(device)
predicted_probs = F.softmax(net(attack_var), dim=1).data.cpu().numpy()[0]
predicted_class = np.argmax(predicted_probs)
vutils.save_image(vutils.make_grid(attack_image, normalize=True, scale_each=True), 'result_img/adversarial' + str(count) + '.png')
vutils.save_image(vutils.make_grid(img, normalize=True, scale_each=True), 'result_img/original' + str(count) + '.png')
count = count + 1
if (not targeted_attack and predicted_class != label) or (targeted_attack and predicted_class == target_calss):
return 1, attack_result.x.astype(int)
return 0, [None]
Below is the attack_all() function that perturbs batches of images (entire test set) which is 10 images in my case.
def attack_all(net, loader, pixels=1, targeted=False, maxiter=75, popsize=400, verbose=False):
correct = 0
success = 0
for batch_idx, (input, target) in enumerate(loader):
# img_var = Variable(input, volatile=True).cuda()
with torch.no_grad():
img_var = input.to(device)
target = target
prior_probs = F.softmax(net(img_var), dim=1)
_, indices = torch.max(prior_probs, 1)
if target[0] != indices.data.cpu()[0]:
continue
correct += 1
target = target.numpy()
targets = [None] if not targeted else range(10)
for target_calss in targets:
if (targeted):
if (target_calss == target[0]):
continue
flag, x = attack(input, target[0], net, target_calss, pixels=pixels, maxiter=maxiter, popsize=popsize, verbose=verbose)
success += flag
if (targeted):
success_rate = float(success)/(9*correct)
else:
success_rate = float(success)/correct
if flag == 1:
print("success rate: %.4f (%d/%d) [(x,y) = (%d,%d) and (R,G,B)=(%d,%d,%d)]"%(
success_rate, success, correct, x[0],x[1],x[2],x[3],x[4]))
if correct == args.samples:
break
return success_rate
Below is the main() class where I am attacking the 10 images with attack_all(). I expect all the 10 images (both original and perturbed) to be saved but only the last seen image is saved.
def main():
print ("==> Loading data and model...")
transform_test = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])
# test_set = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=tranfrom_test)
test_set = Cifar10Dataset(csv_file='mydata/cifar10.csv', root_dir = 'mydata/cifar_selected_10', transform = transform_test)
testloader = torch.utils.data.DataLoader(test_set, batch_size=1, shuffle=True, num_workers=2)
class_names = ['plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
assert os.path.isdir('checkpoint'), 'Error: no checkpoint directory found!'
checkpoint = torch.load('./checkpoint/%s.t7'%args.model)
net = checkpoint['net']
net.cuda()
cudnn.benchmark = True
print ("==> Starting attack...")
results = attack_all(net, testloader, pixels=args.pixels, targeted=args.targeted, maxiter=args.maxiter, popsize=args.popsize, verbose=args.verbose)
print ("Final success rate: %.4f"%results)
So I figured out how to solve it myself.
I noticed that variable count in attack() will not increase no matter how. Instead, I set count = 1 outside attack() and did global count inside same attack(). This way, value of count can change and will not remain thesame everytime attack_all() calls the function attack().
I have a problem with TF dataset generator. I do not why, but when I get picture from dataset by running it through session, it returns Tensors where colors are inverted. I tried to changed BGR to RGB, but this is not the problem.
It is partially solved by inverting the image array (img = 1 - img ), but I would like not this problem to occur in first place. Does somebody know what could be the cause?
import os
import glob
import random
import tensorflow as tf
from tensorflow import Tensor
class PairGenerator(object):
person1 = 'img'
person2 = 'person2'
label = 'same_person'
#def __init__(self, lfw_path='./tf_dataset/resources' + os.path.sep + 'lfw'):
def __init__(self, lfw_path='/home/tom/Devel/ai-dev/tensorflow-triplet-loss/data/augmentor'):
self.all_people = self.generate_all_people_dict(lfw_path)
print(self.all_people.keys())
def generate_all_people_dict(self, lfw_path):
# generates a dictionary between a person and all the photos of that person
all_people = {}
for person_folder in os.listdir(lfw_path):
person_photos = glob.glob(lfw_path + os.path.sep + person_folder + os.path.sep + '*.jpg')
all_people[person_folder] = person_photos
return all_people
def get_next_pair(self):
all_people_names = list(self.all_people.keys())
while True:
# draw a person at random
person1 = random.choice(all_people_names)
# flip a coin to decide whether we fetch a photo of the same person vs different person
same_person = random.random() > 0.5
if same_person:
person2 = person1
else:
# repeatedly pick random names until we find a different name
person2 = person1
while person2 == person1:
person2 = random.choice(all_people_names)
person1_photo = random.choice(self.all_people[person1])
yield ({self.person1: person1_photo,
self.label: same_person})
class Inputs(object):
def __init__(self, img: Tensor, label: Tensor):
self.img = img
self.label = label
def feed_input(self, input_img, input_label=None):
# feed the input images that are necessary to make a prediction
feed_dict = {self.img: input_img}
# optionally also include the label:
# if we're just making a prediction without calculating loss, that won't be necessary
if input_label is not None:
feed_dict[self.label] = input_label
return feed_dict
class Dataset(object):
img_resized = 'img_resized'
label = 'same_person'
def __init__(self, generator=PairGenerator()):
self.next_element = self.build_iterator(generator)
def build_iterator(self, pair_gen: PairGenerator):
batch_size = 10
prefetch_batch_buffer = 5
dataset = tf.data.Dataset.from_generator(pair_gen.get_next_pair,
output_types={PairGenerator.person1: tf.string,
PairGenerator.label: tf.bool})
dataset = dataset.map(self._read_image_and_resize)
dataset = dataset.batch(batch_size)
dataset = dataset.prefetch(prefetch_batch_buffer)
iter = dataset.make_one_shot_iterator()
element = iter.get_next()
return Inputs(element[self.img_resized],
element[PairGenerator.label])
def _read_image_and_resize(self, pair_element):
target_size = [224, 224]
# read images from disk
img_file = tf.read_file(pair_element[PairGenerator.person1])
print("////")
print(PairGenerator.person1)
img = tf.image.decode_image(img_file, channels=3)
# let tensorflow know that the loaded images have unknown dimensions, and 3 color channels (rgb)
img.set_shape([None, None, 3])
# resize to model input size
img_resized = tf.image.resize_images(img, target_size)
#img_resized = tf.image.flip_up_down(img_resized)
#img_resized = tf.image.rot90(img_resized)
pair_element[self.img_resized] = img_resized
pair_element[self.label] = tf.cast(pair_element[PairGenerator.label], tf.float32)
return pair_element
generator = PairGenerator()
iter = generator.get_next_pair()
for i in range(10):
print(next(iter))
ds = Dataset(generator)
import matplotlib.pyplot as plt
imgplot = plt.imshow(out)
imgplot = plt.imshow(1 - out)
Ok so the solution was
imgplot = plt.imshow(out/255)
Here is my code for reading and passing a tf record batch through a network.
import os, sys
import tensorflow as tf
def read_and_decode_single_example(filename_queue):
# Unlike the TFRecordWriter, the TFRecordReader is symbolic
reader = tf.TFRecordReader()
# One can read a single serialized example from a filename
# serialized_example is a Tensor of type string.
_, serialized_example = reader.read(filename_queue)
# The serialized example is converted back to actual values.
# One needs to describe the format of the objects to be returned
features = tf.parse_single_example(
serialized_example,
features={
# We know the length of both fields. If not the
# tf.VarLenFeature could be used
'click': tf.FixedLenFeature([], tf.int64),
'title': tf.FixedLenFeature([25], tf.int64)
# maybe others eg data1:tf.FixLenFeature([],tf.float64)
})
# now return the converted data
lbl = features['click']
ttl = features['title']
return lbl, ttl
def read_batch_data(files, b_s):
min_after_dequeue = 8
num_threads = 2
batch_size = b_s
capacity = min_after_dequeue + (num_threads + 2) * batch_size
filename_queue = tf.train.string_input_producer(files, num_epochs=1)
c_n_c, tit = read_and_decode_single_example(filename_queue)
label_batch, title_batch = tf.train.shuffle_batch([c_n_c, tit], batch_size=batch_size, capacity=capacity,num_threads=num_threads, min_after_dequeue=min_after_dequeue)
return label_batch, title_batch
And the network code:
import math
import os,sys
import subprocess
import pickle
import load_data_labels
import numpy as np
import tensorflow as tf
import shutil
LOG_DIR = './log_dir'
def init_weights(shape, name):
return tf.Variable(tf.random_normal(shape,stddev=0.01,dtype=tf.float64), name=name)
def init_biases(shape, name):
return tf.Variable(tf.random_normal(shape,dtype=tf.float64),name=name)
def model(titles, w_h, w_h2, w_o, vocab_size,embd_layer):
# Add layer name scopes for better graph visualization
# Embedding layer
with tf.device('/cpu:0'), tf.name_scope("embedding"):
W_em = tf.Variable(embd_layer,name="word_embeddings")
embed_l = tf.nn.embedding_lookup(W_em, titles)
# can be reduce sum
embedding = tf.reduce_mean(embed_l, [1])
with tf.name_scope("layer1"):
h = tf.nn.relu(tf.add(tf.matmul(embedding, w_h), b_h))
with tf.name_scope("layer2"):
h2 = tf.nn.relu(tf.add(tf.matmul(h, w_h2), b_h2))
with tf.name_scope("layer3"):
return tf.add(tf.matmul(h2, w_o), b_o)
def init_word_embedding_with_w2v(w2v_dict, word_map, emb_dim, voc_len):
initW = np.random.uniform(-1.0,1.0,(voc_len+1, emb_dim))
for word in word_map:
vec = w2v_dict.get(word)
idx = word_map[word]
if vec is not None:
initW[idx,:] = vec
return initW
with open('./data/word_map.pickle', 'rb') as word_map_file:
word_map = pickle.load(word_map_file)
with open('./data/word_2_vec_dict.pickle', 'rb') as w2vec_file:
w2vec = pickle.load(w2vec_file)
dataset_file= "./data/file000000000000_1000lines.tfrecords"
batch_size=4
trY,trX = load_data_labels.read_batch_data([dataset_file],batch_size)
trY=tf.one_hot(trY,depth=2,axis = -1)
trY=tf.reshape(trY,[4,2])
print trY.get_shape()
print trX.get_shape()
w_h = init_weights([300, 625], "w_h")
w_h2 = init_weights([625, 625], "w_h2")
w_o = init_weights([625, 2], "w_o")
vocabulary_length=len(w2vec)
any_vector_in_dict = w2vec.itervalues().next()
emb_dim = len(any_vector_in_dict)
embd_layer=init_word_embedding_with_w2v(w2vec,word_map,emb_dim,vocabulary_length)
b_h = init_biases([625], "b_h")
b_h2 = init_biases([625], "b_h2")
b_o = init_biases([2],"b_o")
tf.summary.histogram("w_h_summar", w_h)
tf.summary.histogram("w_h2_summar", w_h2)
tf.summary.histogram("w_o_summar", w_o)
tf.summary.histogram("embedding_layer", embd_layer)
py_x = model(trX, w_h, w_h2, w_o, vocabulary_length,embd_layer)
with tf.name_scope("cost"):
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=trY, logits=py_x))
train_op = tf.train.RMSPropOptimizer(0.001, 0.9).minimize(cost)
tf.summary.scalar("cost", cost)
with tf.name_scope("accuracy"):
correct_pred = tf.equal(tf.argmax(trY, 1), tf.argmax(py_x, 1))
acc_op = tf.reduce_mean(tf.cast(correct_pred, "float"))
tf.summary.scalar("accuracy", acc_op)
with tf.Session() as sess:
writer = tf.summary.FileWriter(LOG_DIR, sess.graph)
merged = tf.summary.merge_all()
tf.global_variables_initializer().run()
for i in range(10):
sess.run(train_op)
summary, acc = sess.run([merged, acc_op])
writer.add_summary(summary, i) # Write summary
The problem is that the program crushes because it fills all of the RAM memory.The point that the crush starts is in merge_all statement of the network and it hangs in global_variable_initializer from it never returns but the memory is filled gradually.Maybe a queue left open?I can't find anything relevant and specific and tensorflow's docs are at least bad.I'm searching it around more than a week and I m starting to get extremely tired.Could anyone help?
tl;dr: I input a word to my model, and am supposed to get a list of similar words and their associated measures of similarity back. I get an error: Aborted (core dumped).
My goal is to determine which words are similar to an input word, based on their feature vectors. I have model already trained. I load it and call two functions:
def main(argv=None):
model = NVDM(args)
sess_saver = tf.train.Saver()
sess = tf.Session()
init = tf.initialize_all_variables()
sess.run(init)
loaded = load_for_similar(sess, sess_saver) #my function
wm = word_match(sess, loaded[0], loaded[1], "bottle", loaded[2], loaded[3], topN=5)
My problem is that I can't print out the words which are similar and the associated similarity measure. I tried (in main):
sess.run(wm)
wm[0].eval(session=sess)
print(wm)
All of which gave me the error:
F tensorflow/core/kernels/strided_slice_op.cc:316] Check failed: tmp.CopyFrom(input.Slice(begin[0], end[0]), final_shape)
Aborted (core dumped)
This tells me I'm not running the session properly. What am I doing wrong?
Details on the functions, just in case:
The function 'load_for_similar' restores the weights and bias of the decoder in my model (a variational autoencoder), and normalizes them. It also reverses the order of the keys and values in my vocabulary dictionary for later use:
def load_for_similar(sess, saver_obj):
saver_obj.restore(sess, "./CA_checkpoints/saved_model.ckpt")
vocab_file = '/path/to/vocab.pkl'
t1 = loader_object(vocab_file)
v1 = t1.get_vocab()
v1_rev = {k:v for v, k in v1.iteritems()}
decoder_mat = tf.get_collection(tf.GraphKeys.VARIABLES, scope='decoder')[0]
decoder_bias = tf.get_collection(tf.GraphKeys.VARIABLES, scope='decoder')[1]
return (find_norm(decoder_mat), find_norm(decoder_bias), v1, v1_rev)
To find similar words, I pass the normalized weight matrix and bias in to an new function, along with the feature vector of my word (vec):
def find_similar(sess, Weights, vec, bias):
dists = tf.add(tf.reduce_sum(tf.mul(Weights, vec)), bias)
best = argsort(sess, dists, reverse=True)
dist_sort = tf.nn.top_k(dists, k=dists.get_shape().as_list()[0], sorted=True).values
return dist_sort, best
Finally, I want to match the words that are closest to my supplied word, "bottle":
def word_match(sess, norm_mat , norm_bias, word_ , vocab, vocab_inverse , topN = 10):
idx = vocab[word_]
similarity_meas , indexes = find_similar(sess, norm_mat , norm_mat[idx], norm_bias)
words = tf.gather(vocab_inverse.keys(), indexes[:topN])
return (words, similarity_meas[:topN])
EDIT: in response to mrry's comment, here is the model (I hope this is what you wanted?). This code depends on utils.py, a separate utilities file. I will include that as well. Please note that this code is heavily based on Yishu Miao's and Sarath Nair's.
class NVDM(object):
""" Neural Variational Document Model -- BOW VAE.
"""
def __init__(self,
vocab_size=15000, #was 2000
n_hidden=500,
n_topic=50,
n_sample=1,
learning_rate=1e-5,
batch_size=100, #was 64
non_linearity=tf.nn.tanh):
self.vocab_size = vocab_size
self.n_hidden = n_hidden
self.n_topic = n_topic
self.n_sample = n_sample
self.non_linearity = non_linearity
self.learning_rate = learning_rate/batch_size #CA
self.batch_size = batch_size
self.x = tf.placeholder(tf.float32, [None, vocab_size], name='input')
self.mask = tf.placeholder(tf.float32, [None], name='mask') # mask paddings
# encoder
with tf.variable_scope('encoder'):
self.enc_vec = utils.mlp(self.x, [self.n_hidden, self.n_hidden])
self.mean = utils.linear(self.enc_vec, self.n_topic, scope='mean')
self.logsigm = utils.linear(self.enc_vec,
self.n_topic,
bias_start_zero=True,
matrix_start_zero=False,
scope='logsigm')
self.kld = -0.5 * tf.reduce_sum(1 - tf.square(self.mean) + 2 * self.logsigm - tf.exp(2 * self.logsigm), 1)
self.kld = self.mask*self.kld # mask paddings
with tf.variable_scope('decoder'):
if self.n_sample ==1: # single sample
p1 = tf.cast(tf.reduce_sum(self.mask), tf.int32) #needed for random normal generation
eps = tf.random_normal((p1, self.n_topic), 0, 1)
doc_vec = tf.mul(tf.exp(self.logsigm), eps) + self.mean
logits = tf.nn.log_softmax(utils.linear(doc_vec, self.vocab_size, scope='projection'))
self.recons_loss = -tf.reduce_sum(tf.mul(logits, self.x), 1)
# multiple samples
else:
eps = tf.random_normal((self.n_sample*batch_size, self.n_topic), 0, 1)
eps_list = tf.split(0, self.n_sample, eps)
recons_loss_list = []
for i in xrange(self.n_sample):
if i > 0: tf.get_variable_scope().reuse_variables()
curr_eps = eps_list[i]
doc_vec = tf.mul(tf.exp(self.logsigm), curr_eps) + self.mean
logits = tf.nn.log_softmax(utils.linear(doc_vec, self.vocab_size, scope='projection'))
recons_loss_list.append(-tf.reduce_sum(tf.mul(logits, self.x), 1))
self.recons_loss = tf.add_n(recons_loss_list) / self.n_sample
self.objective = self.recons_loss + self.kld
optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate)
fullvars = tf.trainable_variables()
enc_vars = utils.variable_parser(fullvars, 'encoder')
dec_vars = utils.variable_parser(fullvars, 'decoder')
enc_grads = tf.gradients(self.objective, enc_vars)
dec_grads = tf.gradients(self.objective, dec_vars)
self.optim_enc = optimizer.apply_gradients(zip(enc_grads, enc_vars))
self.optim_dec = optimizer.apply_gradients(zip(dec_grads, dec_vars))
def minibatch_bow(it1, Instance1, n_samples, batch_size, used_ints = set()):
available = set(np.arange(n_samples)) - used_ints #
if len(available) < batch_size:
indices = np.array(list(available))
else:
indices = np.random.choice(tuple(available), batch_size, replace=False)
used = used_ints
mb = itemgetter(*indices)(it1)
batch_xs = Instance1._bag_of_words(mb, vocab_size=15000)
batch_flattened = np.ravel(batch_xs)
index_positions = np.where(batch_flattened > 0)[0]
return (batch_xs, index_positions, set(indices)) #batch_xs[0] is the bag of words; batch_xs[1] is the 0/1 word used/not;
def train(sess, model, train_file, vocab_file, saver_obj, training_epochs, alternate_epochs, batch_size):
Instance1 = testchunk_Nov23.testLoader(train_file, vocab_file)
data_set = Instance1.get_batch(batch_size) #get all minibatches of size 100
n_samples = Instance1.num_reviews()
train_batches = list(data_set) #this is an itertools.chain object
it1_train = list(itertools.chain(*train_batches)) #length is 732,356. This is all the reviews.atch_size
if len(it1_train) % batch_size != 0:
total_batch = int(len(it1_train)/batch_size) + 1
else:
total_batch = int(len(it1_train)/batch_size)
trainfilesave = "train_ELBO_and_perplexity_Dec1.txt"
#Training
train_time = time.time()
for epoch in range(training_epochs):
for switch in xrange(0, 2):
if switch == 0:
optim = model.optim_dec
print_mode = 'updating decoder'
else:
optim = model.optim_enc
print_mode = 'updating encoder'
with open(trainfilesave, 'w') as f:
for i in xrange(alternate_epochs):
loss_sum = 0.0
kld_sum = 0.0
word_count = 0
used_indices = set()
for idx_batch in range(total_batch): #train_batches:
mb = minibatch_bow(it1_train, Instance1, n_samples, batch_size, used_ints=used_indices)
print('minibatch', idx_batch)
used_indices.update(mb[2])
num_mb = np.ones(mb[0][0].shape[0])
input_feed = {model.x.name: mb[0][0], model.mask: num_mb}
_, (loss, kld) = sess.run((optim,[model.objective, model.kld]) , input_feed)
loss_sum += np.sum(loss)
And the utils.py file:
def linear(inputs,
output_size,
no_bias=False,
bias_start_zero=False,
matrix_start_zero=False,
scope=None):
"""Define a linear connection."""
with tf.variable_scope(scope or 'Linear'):
if matrix_start_zero:
matrix_initializer = tf.constant_initializer(0)
else:
matrix_initializer = None
if bias_start_zero:
bias_initializer = tf.constant_initializer(0)
else:
bias_initializer = None
input_size = inputs.get_shape()[1].value
matrix = tf.get_variable('Matrix', [input_size, output_size],
initializer=matrix_initializer)
bias_term = tf.get_variable('Bias', [output_size],
initializer=bias_initializer)
output = tf.matmul(inputs, matrix)
if not no_bias:
output = output + bias_term
return output
def mlp(inputs,
mlp_hidden=[],
mlp_nonlinearity=tf.nn.tanh,
scope=None):
"""Define an MLP."""
with tf.variable_scope(scope or 'Linear'):
mlp_layer = len(mlp_hidden)
res = inputs
for l in xrange(mlp_layer):
res = mlp_nonlinearity(linear(res, mlp_hidden[l], scope='l'+str(l)))
return res