I am trying to implement Keras triplet learning.
Here are two versions of get_triplet() I used:
"Trivial":
def get_triplet(nSplitIdx, bIsTrain):
positiveClass = np.random.choice(arrLabels)
# Depending train or validate, select range.
# say we have 10 images per class, and 70% does to train. Then 0-6 (train); 7-9 (valid, at least 3)
if(bIsTrain):
nMinIdx = 0
nMaxIdx = nSplitIdx - 1
else:
nMinIdx = nSplitIdx
nMaxIdx = NUM_OF_IMAGES_PER_CLASS - 1 - TESTING_IMAGES_PER_CLASS
# Get 3 indices: for base image and for positive example, from same class. And one more for negative example.
# TBD: figure (test) if SAME image should be used in a positive pair, like [img[1], img[1]]?
nImageIdx = np.random.choice(range(nMinIdx, nMaxIdx), 3)
while nImageIdx[0] == nImageIdx[1]:
nImageIdx[1] = np.random.choice(range(nMinIdx, nMaxIdx))
negativeClass = np.random.choice(arrLabels)
while negativeClass['Id'] == positiveClass['Id']:
negativeClass = np.random.choice(arrLabels)
negativeFileName = negativeClass['ImageNames'][nImageIdx[2]]
# nImageIdx is an array of 3 indexes: base and positive in positiveClass and negative in negativeClass.
# Ex: positiveClass[nImageIdx[0], positiveClass[nImageIdx[1]], negativeClass[nImageIdx[2]]
return nImageIdx, positiveClass, negativeClass
and "semi-hard" (though not so elegant):
# Used to mine ONLY
def getDistance(a, b):
distance = (a - b)**2
distance = sum(distance)
return distance
graph = tf.get_default_graph()
def getExampleIdx(positiveClass, negativeClass, bIsPositive, nAncorIdx):
global embedding_model
# tensorflow model is not loaded and used in the same thread. One workaround is to force tensorflow to use the gloabl default graph.
global graph
with graph.as_default():
imgAncor = positiveClass['Images'][nAncorIdx]
if(bIsPositive):
cClass = positiveClass
else:
cClass = negativeClass
arrExamples = []
arrExamples.append(np.array(imgAncor, dtype="float32") / 255.) # Add ancor image as 0th element
if(bIsPositive):
for i in range(TRAINING_IMAGES_PER_CLASS):
# img = loadImage(cClass, i, datagen)
# arrExamples.append(np.array(img, dtype="float32") / 255.)
# Note: no augmentation here
img = cClass['Images'][i]
arrExamples.append(np.array(img, dtype="float32") / 255.)
else:
# For a batch
if(BATCH_SIZE > len(arrLabels)):
nMinRange = len(arrLabels)
nStartClassIdx = 0
else:
nMinRange = BATCH_SIZE + 1
nStartClassIdx = np.random.choice(range(len(arrLabels) - nMinRange))
for nClassId in range(nStartClassIdx, nStartClassIdx + nMinRange):
cClass = arrLabels[nClassId]
if cClass['Id'] == positiveClass['Id']:
continue
# We do it to speed it up, but generally, it should be uncommented: for instead of rand
i = np.random.choice(range(TRAINING_IMAGES_PER_CLASS))
#for i in range(TRAINING_IMAGES_PER_CLASS):
# Note: no augmentation here
img = cClass['Images'][i]
arrExamples.append(np.array(img, dtype="float32") / 255.)
arrPredictionsPos = embedding_model.predict([arrExamples])
# Get distances between 0th predictions and other predictions
arrDistancesPos = []
for i in range(1, len(arrPredictionsPos)):
arrDistancesPos.append(getDistance(arrPredictionsPos[0], arrPredictionsPos[i]))
#print("{}, ".format(arrDistancesPos[i - 1]))
if(bIsPositive):
#print("Positive: {}, ".format(arrDistancesPos[np.array(arrDistancesPos).argmax()]))
return np.array(arrDistancesPos).argmax(), positiveClass
else:
#print("Negative: {}, ".format(arrDistancesPos[np.array(arrDistancesPos).argmin()]))
nNegativeIdx = np.array(arrDistancesPos).argmin() % TRAINING_IMAGES_PER_CLASS
return nNegativeIdx, arrLabels[nStartClassIdx + nNegativeIdx // TRAINING_IMAGES_PER_CLASS]
# ---
def get_triplet(nSplitIdx, bIsTrain):
# Select random class
positiveClass = np.random.choice(arrLabels)
negativeClass = np.random.choice(arrLabels)
while negativeClass['Id'] == positiveClass['Id']:
negativeClass = np.random.choice(arrLabels)
# Depending train or validate, select range.
# say we have 10 images per class, and 70% does to train. Then 0-6 (train); 7-9 (valid, at least 3)
if(bIsTrain):
nMinIdx = 0
nMaxIdx = nSplitIdx
else:
nMinIdx = nSplitIdx
nMaxIdx = NUM_OF_IMAGES_PER_CLASS - TESTING_IMAGES_PER_CLASS
arrImageIdx = np.random.choice(range(nMinIdx, nMaxIdx), 3)
if(bIsTrain):
arrImageIdx[1], positiveClass = getExampleIdx(positiveClass, positiveClass, True, arrImageIdx[0])
arrImageIdx[2], negativeClass = getExampleIdx(positiveClass, negativeClass, False, arrImageIdx[0])
else:
while arrImageIdx[0] == arrImageIdx[1]:
arrImageIdx[1] = np.random.choice(range(nMinIdx, nMaxIdx))
#negativeFileName = negativeClass['ImageNames'][arrImageIdx[2]]
# nImageIdx is an array of 3 indexes: base and positive in positiveClass and negative in negativeClass.
# Ex: positiveClass[nImageIdx[0], positiveClass[nImageIdx[1]], negativeClass[nImageIdx[2]]
return arrImageIdx, positiveClass, negativeClass
Now, the generator:
from time import time
#t0 = time()
#t1 = time()
#print('get_triplet takes %f' %(t1-t0))
def gen(bIsTrain):
#nSplitIdx = int(NUM_OF_IMAGES_PER_CLASS * TESTING_SPLIT)
while True:
arrBaseExamples = []
arrPositiveExamples = []
arrNegativeExamples = []
for i in range(BATCH_SIZE):
nImageIdx, positiveClass, negativeClass = get_triplet(TRAINING_IMAGES_PER_CLASS, bIsTrain)
#t0 = time()
baseExampleImg = loadImage(positiveClass, nImageIdx[0], datagen)
positiveExampleImg = loadImage(positiveClass, nImageIdx[1], datagen)
negativeExampleImg = loadImage(negativeClass, nImageIdx[2], datagen)
#t1 = time()
#print('loaded in %f' %(t1-t0))
arrBaseExamples.append(baseExampleImg)
arrPositiveExamples.append(positiveExampleImg)
arrNegativeExamples.append(negativeExampleImg)
#base = preprocess_input(np.array(arrBaseExamples)) / 255. #'a' #preprocess_input(np.array(arrBaseExamples))
base = np.array(arrBaseExamples) / 255.
#train_datagen.fit(base)
#positive = preprocess_input(np.array(arrPositiveExamples)) / 255.
positive = np.array(arrPositiveExamples) / 255.
#train_datagen.fit(positive)
#negative = preprocess_input(np.array(arrNegativeExamples)) / 255.
negative = np.array(arrNegativeExamples) / 255.
#train_datagen.fit(negative)
label = None
yield ({'anchor_input': base, 'positive_input': positive, 'negative_input': negative}, label)
and finally, triplet_loss function (stolen from Kaggle):
ALPHA = 0.2 # Triplet Loss Parameter
def triplet_loss(inputs, dist='sqeuclidean', margin='maxplus'):
anchor, positive, negative = inputs
positive_distance = K.square(anchor - positive)
negative_distance = K.square(anchor - negative)
if dist == 'euclidean':
positive_distance = K.sqrt(K.sum(positive_distance, axis=-1, keepdims=True))
negative_distance = K.sqrt(K.sum(negative_distance, axis=-1, keepdims=True))
elif dist == 'sqeuclidean':
positive_distance = K.sum(positive_distance, axis=-1, keepdims=True)
negative_distance = K.sum(negative_distance, axis=-1, keepdims=True)
loss = positive_distance - negative_distance + ALPHA
if margin == 'maxplus':
loss = K.maximum(0.0, 1 + loss)
elif margin == 'softplus':
loss = K.log(1 + K.exp(loss))
return K.mean(loss)
The model is "transfer learning":
def createModel(nL2):
base_model = ResNet50(weights='imagenet', include_top=False, pooling='max')
for layer in base_model.layers:
layer.trainable = False
x = base_model.output
x = Dropout(0.6, name="classifier_dropout")(x)
x = Dense(EMBEDDING_DIM, activation='relu', name="classifier_dense_0", kernel_regularizer=regularizers.l2(nL2))(x)
x = Dense(EMBEDDING_DIM, activation='relu', name="classifier_dense_1", kernel_regularizer=regularizers.l2(nL2))(x)
x = Dense(EMBEDDING_DIM, activation='softmax', name="classifier_dense", kernel_regularizer=regularizers.l2(nL2))(x)
x = Lambda(lambda x: K.l2_normalize(x,axis=1), name="lambda")(x)
embedding_model = Model(base_model.input, x, name="embedding")
input_shape = (IMAGE_SIZE, IMAGE_SIZE, 3)
anchor_input = Input(input_shape, name='anchor_input')
positive_input = Input(input_shape, name='positive_input')
negative_input = Input(input_shape, name='negative_input')
anchor_embedding = embedding_model(anchor_input)
positive_embedding = embedding_model(positive_input)
negative_embedding = embedding_model(negative_input)
inputs = [anchor_input, positive_input, negative_input]
outputs = [anchor_embedding, positive_embedding, negative_embedding]
triplet_model = Model(inputs, outputs)
triplet_model.add_loss(K.mean(triplet_loss(outputs)))
return embedding_model, triplet_model
I create and teach it on four Kaggle's dog species (the original dataset has 120, I only use four):
BATCH_SIZE = 16
EPOCHS = 200
arrParams = [[0.8, 3]]
checkpoint = ModelCheckpoint(best_weights_filepath, monitor="val_loss", save_best_only=True, save_weights_only=True, mode='auto')
callbacks_list = [checkpoint] # , early]
gen_train = gen(True)
gen_valid = gen(False)
for i in range(0, len(arrParams)):
nL2 = arrParams[i][0]
EMBEDDING_DIM = arrParams[i][1]
deleteSavedNet(best_weights_filepath)
embedding_model, triplet_model = createModel(nL2)
nNumOfClasses = len(arrLabels)
nNumOfTrainSamples = TRAINING_IMAGES_PER_CLASS * nNumOfClasses
nNumOfValidSamples = VALIDATION_IMAGES_PER_CLASS * nNumOfClasses
STEP_SIZE_TRAIN = nNumOfTrainSamples // BATCH_SIZE
if(STEP_SIZE_TRAIN == 0):
STEP_SIZE_TRAIN = 1
STEP_SIZE_VALID = nNumOfValidSamples // BATCH_SIZE
if(STEP_SIZE_VALID == 0):
STEP_SIZE_VALID = 1
triplet_model.compile(loss=None, optimizer="adam", metrics=['binary_accuracy']) #metrics=['accuracy'])
history = triplet_model.fit_generator(gen_train, validation_data=gen_valid,
epochs=EPOCHS, verbose=1, steps_per_epoch=STEP_SIZE_TRAIN, validation_steps=STEP_SIZE_VALID, callbacks=callbacks_list)
print(nL2, EMBEDDING_DIM)
plotHistoryLoss()
It learns:
deleteSavedNet():File removed
Initializing model
Finished initializing model
Epoch 1/200
6/6 [==============================] - 44s 7s/step - loss: 8.0306 - val_loss: 7.6388
Epoch 2/200
6/6 [==============================] - 15s 2s/step - loss: 7.0082 - val_loss: 6.7307
...
Epoch 200/200
6/6 [==============================] - 15s 3s/step - loss: 0.7046 - val_loss: 0.7043
But (notice I used 3 dim. embeding) when I plot results, I see line, not clasters :
nL2 = arrParams[0][0]
EMBEDDING_DIM = arrParams[0][1]
embedding_model, triplet_model = createModel(nL2)
loadBestModel()
def data_generator_simple(arrAllImages, arrAllImageLabels, arrAllImageClasses):
i = 0
arrImages = []
arrImageLabels = []
arrImageClasses = []
for nImageIdx in range(len(arrAllImages)):
if(i == 0):
arrImages = []
arrImageLabels = []
arrImageClasses = []
i += 1
arrImg = img_to_array(arrAllImages[nImageIdx])
arrImg = datagen.random_transform(arrImg) / 255.
arrImg = np.array(arrImg, dtype="float32")
arrImages.append(arrImg)
arrImageLabels.append(arrAllImageLabels[nImageIdx])
arrImageClasses.append(arrAllImageClasses[nImageIdx])
if i == BATCH_SIZE:
i = 0
arrImages = np.array(arrImages)
yield arrImages, arrImageLabels, arrImageClasses
if i != 0:
arrImages = np.array(arrImages)
yield arrImages, arrImageLabels, arrImageClasses
raise StopIteration()
arrAllImages = []
arrAllImageLabels = []
arrAllImageClasses = []
for cClass in arrLabels:
for nIdx in range(TRAINING_IMAGES_PER_CLASS):
arrAllImages.append(cClass['Images'][nIdx])
arrAllImageLabels.append(cClass['ImageNames'][nIdx])
arrAllImageClasses.append(cClass['Id'])
train_preds = []
train_file_names = []
train_class_names = []
np.random.seed(7)
for imgs, fnames, classes in data_generator_simple(arrAllImages, arrAllImageLabels, arrAllImageClasses):
predicts = embedding_model.predict(imgs)
predicts = predicts.tolist()
train_preds += predicts
train_file_names += fnames
train_class_names += classes
train_preds = np.array(train_preds)
arrAllImages = []
arrAllImageLabels = []
arrAllImageClasses = []
for cClass in arrLabels:
#for nIdx in range(TRAINING_IMAGES_PER_CLASS + VALIDATION_IMAGES_PER_CLASS, TRAINING_IMAGES_PER_CLASS + VALIDATION_IMAGES_PER_CLASS + TESTING_IMAGES_PER_CLASS):
#for nIdx in range(TRAINING_IMAGES_PER_CLASS):
for nIdx in range(TRAINING_IMAGES_PER_CLASS + VALIDATION_IMAGES_PER_CLASS, TRAINING_IMAGES_PER_CLASS + VALIDATION_IMAGES_PER_CLASS + TESTING_IMAGES_PER_CLASS):
arrAllImages.append(cClass['Images'][nIdx])
arrAllImageLabels.append(cClass['ImageNames'][nIdx])
arrAllImageClasses.append(cClass['Id'])
test_preds = []
test_file_names = []
test_class_names = []
np.random.seed(7)
for imgs, fnames, classes in data_generator_simple(arrAllImages, arrAllImageLabels, arrAllImageClasses):
predicts = embedding_model.predict(imgs)
predicts = predicts.tolist()
test_preds += predicts
test_file_names += fnames
test_class_names += classes
test_preds = np.array(test_preds)
neigh = NearestNeighbors(n_neighbors=6)
neigh.fit(train_preds)
#neigh.fit(arrTrainingClasterCenters)
distances_test, neighbors_test = neigh.kneighbors(test_preds)
distances_test, neighbors_test = distances_test.tolist(), neighbors_test.tolist()
data = pd.read_csv(working_path + "DogRecognizer/dogs/train_dogs.csv")
file_id_mapping = {k: v for k, v in zip(data.Image.values, data.Id.values)}
preds_str = []
arrSearchPositions = []
for filepath, distance, neighbour_ in zip(test_file_names, distances_test, neighbors_test):
sample_result = []
sample_classes = []
for d, n in zip(distance, neighbour_):
#class_train = arrLabels[n]['Id']
train_file = train_file_names[n].split(os.sep)[-1]
class_train = file_id_mapping[train_file]
sample_classes.append(class_train)
sample_result.append((class_train, d))
#if "new_whale" not in sample_classes:
# sample_result.append(("new_whale", 0.1))
sample_result.sort(key=lambda x: x[1], reverse=True)
sample_result = sample_result[:5]
preds_str.append(" ".join([x[0] for x in sample_result]))
nTotalSuccess = 0
for i, strClassNames in enumerate(preds_str):
if(test_class_names[i] in strClassNames):
strContains = ": Yes"
nTotalSuccess += 1
else:
strContains = ": No"
#print(test_class_names[i], ": ", strClassNames, " (", strContains, ")")
print ("Success rate: ", nTotalSuccess / (i+1) )
import pylab as pl
from sklearn import neighbors, datasets
import matplotlib.cm as cm
h = 0.02
knn=neighbors.KNeighborsClassifier(n_neighbors=6)
knn.fit(test_preds, test_class_names)
x_min, x_max = test_preds[:,0].min() - .01, test_preds[:,0].max() + .02
y_min, y_max = test_preds[:,1].min() - .01, test_preds[:,1].max() + .02
z_min, z_max = test_preds[:,2].min() - .01, test_preds[:,2].max() + .01
xx, yy, zz = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h), np.arange(z_min, z_max, h))
#Z = knn.predict(np.c_[xx.ravel(), yy.ravel(), zz.ravel()])
color_space = []
for i in range(len(test_class_names)):
if(test_class_names[i] == 'pembroke'):
color_space.append('red')
#test_preds[:,1][i] += 0.01
elif(test_class_names[i] == 'maltese_dog'):
color_space.append('green')
#test_preds[:,1][i] += 0.02
elif(test_class_names[i] == 'gordon_setter'):
color_space.append('orange')
#test_preds[:,1][i] += 0.03
else:
color_space.append('blue')
from matplotlib import pyplot
from mpl_toolkits.mplot3d import Axes3D
import random
fig = pyplot.figure(figsize=(16, 8))
ax = Axes3D(fig)
pl.set_cmap(pl.cm.Paired)
ax.scatter(test_preds[:,0], test_preds[:,1], test_preds[:,2], c= color_space)
pyplot.show()
Here is the plot:
Why is it line, not clasters?
Related
I'm new to pytorch, and I try to train a simple classifier with mnist data. However, my classifier's accuracy is about 10%, I tried several method to adjust the network, but failed, the classifier's output label is always the same, all 0, or all 7, or all 6. Please tell me what is wrong with the code.(I know I should use DataLoader, I will take a look at it later, now I just want to make the classifier's accuracy looks gook)
# coding=utf-8
# 数据为data中的handwritten_digit
import struct
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import time
data_folder = '../data/handwritten_digit/'
dt = torch.get_default_dtype()
train_label_file = 'train-labels-idx1-ubyte'
train_img_file = 'train-images-idx3-ubyte'
test_img_file = 't10k-images-idx3-ubyte'
test_label_file = 't10k-labels-idx1-ubyte'
model_path = './handwritten_digit_recognition_net3.pth'
def timer(func):
def cal_time(*args, **kw):
start_time = time.time()
out = func(*args, **kw)
end_time = time.time()
print('函数 ', func.__name__, ' 运行耗时', end_time-start_time, '秒', sep = '')
return out
return cal_time
def read_imgs(file):
with open(data_folder+file, 'rb') as frb:
# 先读取meta
magic_num, img_num, row_num, col_num = struct.unpack('>IIII', frb.read(16))
# print(magic_num, img_num, row_num, col_num)
# img = np.fromfile(frb, dtype = np.uint8, count = row_num*col_num).reshape(row_num, col_num)
# print(img, img.shape, 'img')
imgs = np.fromfile(frb, dtype = np.uint8).reshape(img_num, row_num, col_num)
# imgs = np.fromfile(frb, dtype = np.uint8, count = row_num*col_num*img_num).reshape(img_num, row_num, col_num)
return torch.from_numpy(imgs).type(dt).unsqueeze(1).unsqueeze(1)
def read_labels(file):
with open(data_folder+file, 'rb') as frb:
# 先读取meta
magic_num, label_num = struct.unpack('>II', frb.read(8))
# print(magic_num, label_num)
labels = np.fromfile(frb, dtype = np.uint8)
return torch.from_numpy(labels).type(dt)
class Net(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(1, 12, 5)
self.conv2 = nn.Conv2d(12, 12, 5)
self.pool = nn.MaxPool2d(2, 2)
self.linear1 = nn.Linear(12*16, 30)
self.linear2 = nn.Linear(30, 10)
def forward(self, x):
x = F.relu(self.conv1(x))
x = self.pool(x)
x = F.relu(self.conv2(x))
x = self.pool(x)
x = x.view(-1, 12*16)
# print(x.size(), 'x.size()')
x = F.relu(self.linear1(x))
x = self.linear2(x)
return x
#timer
def train_and_save_net():
train_imgs = read_imgs(train_img_file)
train_labels = read_labels(train_label_file)
test_imgs = read_imgs(test_img_file)
test_labels = read_labels(test_label_file)
# label = torch.zeros(1, 10)
# label[0][int(train_labels[0])] = 1
# print(label)
# print(train_labels[0])
# return
net = Net()
# criterion = nn.MSELoss()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr = 0.001, momentum = 0.9)
print('Start Training')
sum_loss = 0
for i, img in enumerate(train_imgs):
optimizer.zero_grad()
predicted = net(img)
# label = torch.zeros(1, 10)
# label[0][int(train_labels[i])] = 1
label = torch.tensor([train_labels[i]], dtype = torch.long)
# print(predicted, predicted.size(), 'predicted')
# print(label, label.size(), 'label')
loss = criterion(predicted, label)
loss.backward()
optimizer.step()
sum_loss += loss.item()
if i % 2000 == 1999:
print('已经训练了', i+1, '张图片,', '完成进度:', '%.2f'%((i+1)/len(train_labels)*100), '%', sep = '')
print('loss为:', sum_loss/2000)
sum_loss = 0
print('End Training')
torch.save(net.state_dict(), model_path)
print('End Saving Net Parameters')
def load_net():
net = Net()
net.load_state_dict(torch.load(model_path))
return net
#timer
def evaluate():
train_imgs = read_imgs(train_img_file)
train_labels = read_labels(train_label_file)
test_imgs = read_imgs(test_img_file)
test_labels = read_labels(test_label_file)
net = load_net()
# 直观感受
for i in range(5):
img = train_imgs[i]
# plt.imshow(img.squeeze(), cmap = 'gray')
# plt.show()
predicted_vector = net(img)
_, predicted = torch.max(predicted_vector, 1)
predicted = predicted.item()
print('预测的分类是:', predicted, ',实际的分类是:', int(train_labels[i].item()), sep = '')
# 训练集精度
total = len(train_labels)
correct = 0
for i in range(len(train_labels)):
img = train_imgs[i]
predicted_vector = net(img)
_, predicted = torch.max(predicted_vector, 1)
label = int(train_labels[i].item())
if predicted == label:
correct += 1
print('训练集上的准确率为:', '%.2f'%(correct/total*100), '%', sep = '')
total = len(test_labels)
correct = 0
pre_arr = []
for i in range(len(test_labels)):
img = test_imgs[i]
predicted_vector = net(img)
_, predicted = torch.max(predicted_vector, 1)
label = int(test_labels[i].item())
pre_arr.append(predicted)
if predicted == label:
correct += 1
print('测试集上的准确率为:', '%.2f'%(correct/total*100), '%', sep = '')
print('模型判断为0的个数/总判断数 为:', pre_arr.count(0), '/', len(pre_arr), sep = '')
#timer
def test():
predicted_vector = torch.randn(1,10)
_, predicted = torch.max(predicted_vector, 1)
print(predicted.item())
if __name__ == '__main__':
train_and_save_net()
# test()
evaluate()
Well, I seem to figure out where the problem is, I changed the learning rate from 1e-3 to 1e-4, then the accuracy reached about 97%...
I save the trained model after a certain number of episodes with the special save() function of the DDPG class (the network is saved when the reward reaches zero), but when I restore the model again using saver.restore(), the network gives out a reward equal to approximately -1800. Why is this happening, maybe I'm doing something wrong? My network:
import tensorflow as tf
import numpy as np
import gym
epsiode_steps = 500
# learning rate for actor
lr_a = 0.001
# learning rate for critic
lr_c = 0.002
gamma = 0.9
alpha = 0.01
memory = 10000
batch_size = 32
render = True
class DDPG(object):
def __init__(self, no_of_actions, no_of_states, a_bound, ):
self.memory = np.zeros((memory, no_of_states * 2 + no_of_actions + 1), dtype=np.float32)
# initialize pointer to point to our experience buffer
self.pointer = 0
self.sess = tf.Session()
self.noise_variance = 3.0
self.no_of_actions, self.no_of_states, self.a_bound = no_of_actions, no_of_states, a_bound,
self.state = tf.placeholder(tf.float32, [None, no_of_states], 's')
self.next_state = tf.placeholder(tf.float32, [None, no_of_states], 's_')
self.reward = tf.placeholder(tf.float32, [None, 1], 'r')
with tf.variable_scope('Actor'):
self.a = self.build_actor_network(self.state, scope='eval', trainable=True)
a_ = self.build_actor_network(self.next_state, scope='target', trainable=False)
with tf.variable_scope('Critic'):
q = self.build_crtic_network(self.state, self.a, scope='eval', trainable=True)
q_ = self.build_crtic_network(self.next_state, a_, scope='target', trainable=False)
self.ae_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='Actor/eval')
self.at_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='Actor/target')
self.ce_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='Critic/eval')
self.ct_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='Critic/target')
# update target value
self.soft_replace = [
[tf.assign(at, (1 - alpha) * at + alpha * ae), tf.assign(ct, (1 - alpha) * ct + alpha * ce)]
for at, ae, ct, ce in zip(self.at_params, self.ae_params, self.ct_params, self.ce_params)]
q_target = self.reward + gamma * q_
td_error = tf.losses.mean_squared_error(labels=(self.reward + gamma * q_), predictions=q)
self.ctrain = tf.train.AdamOptimizer(lr_c).minimize(td_error, name="adam-ink", var_list=self.ce_params)
a_loss = - tf.reduce_mean(q)
# train the actor network with adam optimizer for minimizing the loss
self.atrain = tf.train.AdamOptimizer(lr_a).minimize(a_loss, var_list=self.ae_params)
tf.summary.FileWriter("logs2", self.sess.graph)
# initialize all variables
self.sess.run(tf.global_variables_initializer())
self.saver = tf.train.Saver()
self.saver.restore(self.sess, "Pendulum/nn.ckpt")
def choose_action(self, s):
a = self.sess.run(self.a, {self.state: s[np.newaxis, :]})[0]
a = np.clip(np.random.normal(a, self.noise_variance), -2, 2)
return a
def learn(self):
# soft target replacement
self.sess.run(self.soft_replace)
indices = np.random.choice(memory, size=batch_size)
batch_transition = self.memory[indices, :]
batch_states = batch_transition[:, :self.no_of_states]
batch_actions = batch_transition[:, self.no_of_states: self.no_of_states + self.no_of_actions]
batch_rewards = batch_transition[:, -self.no_of_states - 1: -self.no_of_states]
batch_next_state = batch_transition[:, -self.no_of_states:]
self.sess.run(self.atrain, {self.state: batch_states})
self.sess.run(self.ctrain, {self.state: batch_states, self.a: batch_actions, self.reward: batch_rewards,
self.next_state: batch_next_state})
# we define a function store_transition which stores all the transition information in the buffer
def store_transition(self, s, a, r, s_):
trans = np.hstack((s, a, [r], s_))
index = self.pointer % memory
self.memory[index, :] = trans
self.pointer += 1
if self.pointer > memory:
self.noise_variance *= 0.99995
self.learn()
# we define the function build_actor_network for builing our actor network and after crtic network
def build_actor_network(self, s, scope, trainable)
with tf.variable_scope(scope):
l1 = tf.layers.dense(s, 30, activation=tf.nn.tanh, name='l1', trainable=trainable)
a = tf.layers.dense(l1, self.no_of_actions, activation=tf.nn.tanh, name='a', trainable=trainable)
return tf.multiply(a, self.a_bound, name="scaled_a")
def build_crtic_network(self, s, a, scope, trainable):
with tf.variable_scope(scope):
n_l1 = 30
w1_s = tf.get_variable('w1_s', [self.no_of_states, n_l1], trainable=trainable)
w1_a = tf.get_variable('w1_a', [self.no_of_actions, n_l1], trainable=trainable)
b1 = tf.get_variable('b1', [1, n_l1], trainable=trainable)
net = tf.nn.tanh(tf.matmul(s, w1_s) + tf.matmul(a, w1_a) + b1)
q = tf.layers.dense(net, 1, trainable=trainable)
return q
def save(self):
self.saver.save(self.sess, "Pendulum/nn.ckpt")
env = gym.make("Pendulum-v0")
env = env.unwrapped
env.seed(1)
no_of_states = env.observation_space.shape[0]
no_of_actions = env.action_space.shape[0]
a_bound = env.action_space.high
ddpg = DDPG(no_of_actions, no_of_states, a_bound)
total_reward = []
no_of_episodes = 300
# for each episodes
for i in range(no_of_episodes):
# initialize the environment
s = env.reset()
# episodic reward
ep_reward = 0
for j in range(epsiode_steps):
env.render()
# select action by adding noise through OU process
a = ddpg.choose_action(s)
# peform the action and move to the next state s
s_, r, done, info = env.step(a)
# store the the transition to our experience buffer
# sample some minibatch of experience and train the network
ddpg.store_transition(s, a, r, s_)
# update current state as next state
s = s_
# add episodic rewards
ep_reward += r
if int(ep_reward) == 0 and i > 200:
ddpg.save()
print("save")
quit()
if j == epsiode_steps - 1:
total_reward.append(ep_reward)
print('Episode:', i, ' Reward: %i' % int(ep_reward))
break
After finishing Coursera's Practical RL course on A3C, I'm trying to implement my own A3C agent using tensorflow 2. To start, I'm training it on the Cartpole environment but I can't get good results. For now, I've already launched several training with the following code, changing the entropy coefficient to see its impact (the results are shown below). Does it come from my implementation, or is it more a fine-tuning issue ?
class A3C:
def __init__(self, state_dim, n_actions, optimizer=tf.keras.optimizers.Adam(1e-3)):
self.state_input = Input(shape=state_dim)
self.x = Dense(256, activation='relu')(self.state_input)
self.head_v = Dense(1, activation='linear')(self.x)
self.head_p = Dense(n_actions, activation='linear')(self.x)
self.network = tf.keras.Model(inputs=[self.state_input], outputs=[self.head_v, self.head_p])
self.optimizer = optimizer
def forward(self, state):
return self.network(state)
def sample(self, logits):
policy = np.exp(logits.numpy()) / np.sum(np.exp(logits.numpy()), axis=-1, keepdims=True)
return np.array([np.random.choice(len(p), p=p) for p in policy])
def evaluate(agent, env, n_games=1): """Plays an a game from start till done, returns per-game rewards """
game_rewards = []
for _ in range(n_games):
state = env.reset()
total_reward = 0
while True:
action = agent.sample(agent.forward(np.array([state]))[1])[0]
state, reward, done, info = env.step(action)
total_reward += reward
if done: break
game_rewards.append(total_reward)
return game_rewards
class EnvBatch:
def __init__(self, n_envs = 10):
self.envs = [gym.make(env_id) for _ in range(n_envs)]
def reset(self):
return np.array([env.reset() for env in self.envs])
def step(self, actions):
results = [env.step(a) for env, a in zip(self.envs, actions)]
new_obs, rewards, done, infos = map(np.array, zip(*results))
for i in range(len(self.envs)):
if done[i]:
new_obs[i] = self.envs[i].reset()
return new_obs, rewards, done, infos
env_id = "CartPole-v0"
env = gym.make(env_id)
state_dim = env.observation_space.shape
n_actions = env.action_space.n
agent = A3C(state_dim, n_actions)
env_batch = EnvBatch(10)
batch_states = env_batch.reset()
gamma=0.99
rewards_history = []
entropy_history = []
for i in trange(200000):
with tf.GradientTape() as t:
batch_values, batch_logits = agent.forward(batch_states)
batch_actions = agent.sample(batch_logits)
batch_next_states, batch_rewards, batch_dones, _ = env_batch.step(batch_actions)
batch_next_values, btach_next_logits = agent.forward(batch_next_states)
batch_next_values *= (1 - batch_dones)
probs = tf.nn.softmax(batch_logits)
logprobs = tf.nn.log_softmax(batch_logits)
logp_actions = tf.reduce_sum(logprobs * tf.one_hot(batch_actions, n_actions), axis=-1)
advantage = batch_rewards + gamma*batch_next_values - batch_values
entropy = -tf.reduce_sum(probs * logprobs, 1, name="entropy")
actor_loss = - tf.reduce_mean(logp_actions * tf.stop_gradient(advantage)) - 0.005 * tf.reduce_mean(entropy)
target_state_values = batch_rewards + gamma*batch_next_values
critic_loss = tf.reduce_mean((batch_values - tf.stop_gradient(target_state_values))**2 )
loss = actor_loss + critic_loss
var_list = agent.network.trainable_variables
grads = t.gradient(loss,var_list)
agent.optimizer.apply_gradients(zip(grads, var_list))
batch_states = batch_next_states
entropy_history.append(np.mean(entropy))
if i % 500 == 0:
if i % 2500 == 0:
rewards_history.append(np.mean(evaluate(agent, env, n_games=3)))
clear_output(True)
plt.figure(figsize=[8, 4])
plt.subplot(1, 2, 1)
plt.plot(rewards_history, label='rewards')
plt.title("Session rewards")
plt.grid()
plt.legend()
plt.subplot(1, 2, 2)
plt.plot(entropy_history, label='entropy')
plt.title("Policy entropy")
plt.grid()
plt.legend()
plt.show()
Beta = 0.005 - Training 1
Beta = 0.005 - Training 2
Beta = 0.005 - Training 3
Beta = 0.05 - Training 1
Beta = 0.05 - Training 2
Beta = 0.05 - Training 3
I've looked through your code, and it doesn't look like there's any problem with the algorithm. That is, it seems to me that the Hyper Parameter was chosen incorrectly. Try different Hyper Parameter Sets. If it doesn't work properly, refer to repository
The critic loss is wrong. You should get first expect returns, predicting the next state and iterate over it with bellman equation.
Here is an example:
def getExpectedReturns(self, states, next_states, done, rewards, standarize=True):
# Get next value
if done[-1] == 1.0:
arr_idx = np.zeros((rewards.shape[0], 1))
arr_idx[-1] = 1.0
values_rewards_sum_one_hot = tf.convert_to_tensor(arr_idx, dtype=tf.float32)
next_value = tf.reduce_sum(rewards * values_rewards_sum_one_hot, axis=0)
else:
values_rewards_sum = self.model_a2c(next_states)[-1]
arr_idx = np.zeros((rewards.shape[0], 1))
arr_idx[0] = 1.0
values_rewards_sum_one_hot = tf.convert_to_tensor(arr_idx, dtype=tf.float32)
next_value = tf.reduce_sum(values_rewards_sum * values_rewards_sum_one_hot, axis=0)
# Iterate over rewards
list_true_values = []
for i in reversed(range(0, len(rewards))):
if done[i]==0.0:
next_value = rewards[i] + next_value * self.gamma
else:
next_value = rewards[i]
list_true_values.append(next_value)
list_true_values.reverse()
list_true_values = tf.convert_to_tensor(list_true_values, dtype=tf.float32)
if standarize:
list_true_values = ((list_true_values - tf.math.reduce_mean(list_true_values)) /
(tf.math.reduce_std(list_true_values) + tf.constant(1e-12)))
return list_true_values
with tf.GradientTape() as tape:
# Advantage
returns = self.getExpectedReturns(states, next_states, done, rewards, standarize=False)
actions_probs_logits, values = self.model_a2c(states)
advantage = returns - values
advantage = tf.squeeze(advantage)
# Actions probs
actions_probs_softmax = tf.nn.softmax(actions_probs_logits)
actions_log_probs_softmax = tf.nn.log_softmax(actions_probs_logits)
actions_one_hot = tf.one_hot(actions, self.num_actions, 1.0, 0.0)
actions_log_probs = tf.reduce_sum(actions_log_probs_softmax * actions_one_hot, axis=-1)
# Entropy
entropy = self.entropy_coef * tf.reduce_mean(actions_probs_softmax * actions_log_probs_softmax, axis=1)
# Losses
actor_loss = -tf.reduce_mean(actions_log_probs * tf.stop_gradient(advantage), axis=0)
critic_loss = self.critic_coef * tf.reduce_mean(tf.math.pow(advantage, 2), axis=0)
total_loss = actor_loss + critic_loss - entropy
I have two python files namely lstm.py and test.py in same folder. im executing the test.py file in python interactive shell. It shows the
AttributeError: Lstm Param instance has no attribute "x_list_add".
lstm.py
import random
import numpy as np
import math
def sigmoid(x):
return 1. / (1 + np.exp(-x))
def sigmoid_derivative(values):
return values*(1-values)
def tanh_derivative(values):
return 1. - values ** 2
# createst uniform random array w/ values in [a,b) and shape args
def rand_arr(a, b, *args):
np.random.seed(0)
return np.random.rand(*args) * (b - a) + a
class LstmParam:
def __init__(self, mem_cell_ct, x_dim):
self.mem_cell_ct = mem_cell_ct
self.x_dim = x_dim
concat_len = x_dim + mem_cell_ct
# weight matrices
self.wg = rand_arr(-0.1, 0.1, mem_cell_ct, concat_len)
self.wi = rand_arr(-0.1, 0.1, mem_cell_ct, concat_len)
self.wf = rand_arr(-0.1, 0.1, mem_cell_ct, concat_len)
self.wo = rand_arr(-0.1, 0.1, mem_cell_ct, concat_len)
# bias terms
self.bg = rand_arr(-0.1, 0.1, mem_cell_ct)
self.bi = rand_arr(-0.1, 0.1, mem_cell_ct)
self.bf = rand_arr(-0.1, 0.1, mem_cell_ct)
self.bo = rand_arr(-0.1, 0.1, mem_cell_ct)
# diffs (derivative of loss function w.r.t. all parameters)
self.wg_diff = np.zeros((mem_cell_ct, concat_len))
self.wi_diff = np.zeros((mem_cell_ct, concat_len))
self.wf_diff = np.zeros((mem_cell_ct, concat_len))
self.wo_diff = np.zeros((mem_cell_ct, concat_len))
self.bg_diff = np.zeros(mem_cell_ct)
self.bi_diff = np.zeros(mem_cell_ct)
self.bf_diff = np.zeros(mem_cell_ct)
self.bo_diff = np.zeros(mem_cell_ct)
def apply_diff(self, lr = 1):
self.wg -= lr * self.wg_diff
self.wi -= lr * self.wi_diff
self.wf -= lr * self.wf_diff
self.wo -= lr * self.wo_diff
self.bg -= lr * self.bg_diff
self.bi -= lr * self.bi_diff
self.bf -= lr * self.bf_diff
self.bo -= lr * self.bo_diff
# reset diffs to zero
self.wg_diff = np.zeros_like(self.wg)
self.wi_diff = np.zeros_like(self.wi)
self.wf_diff = np.zeros_like(self.wf)
self.wo_diff = np.zeros_like(self.wo)
self.bg_diff = np.zeros_like(self.bg)
self.bi_diff = np.zeros_like(self.bi)
self.bf_diff = np.zeros_like(self.bf)
self.bo_diff = np.zeros_like(self.bo)
class LstmState:
def __init__(self, mem_cell_ct, x_dim):
self.g = np.zeros(mem_cell_ct)
self.i = np.zeros(mem_cell_ct)
self.f = np.zeros(mem_cell_ct)
self.o = np.zeros(mem_cell_ct)
self.s = np.zeros(mem_cell_ct)
self.h = np.zeros(mem_cell_ct)
self.bottom_diff_h = np.zeros_like(self.h)
self.bottom_diff_s = np.zeros_like(self.s)
class LstmNode:
def __init__(self, lstm_param, lstm_state):
# store reference to parameters and to activations
self.state = lstm_state
self.param = lstm_param
# non-recurrent input concatenated with recurrent input
self.xc = None
def bottom_data_is(self, x, s_prev = None, h_prev = None):
# if this is the first lstm node in the network
if s_prev is None: s_prev = np.zeros_like(self.state.s)
if h_prev is None: h_prev = np.zeros_like(self.state.h)
# save data for use in backprop
self.s_prev = s_prev
self.h_prev = h_prev
# concatenate x(t) and h(t-1)
xc = np.hstack((x, h_prev))
self.state.g = np.tanh(np.dot(self.param.wg, xc) + self.param.bg)
self.state.i = sigmoid(np.dot(self.param.wi, xc) + self.param.bi)
self.state.f = sigmoid(np.dot(self.param.wf, xc) + self.param.bf)
self.state.o = sigmoid(np.dot(self.param.wo, xc) + self.param.bo)
self.state.s = self.state.g * self.state.i + s_prev * self.state.f
self.state.h = self.state.s * self.state.o
self.xc = xc
def top_diff_is(self, top_diff_h, top_diff_s):
# notice that top_diff_s is carried along the constant error carousel
ds = self.state.o * top_diff_h + top_diff_s
do = self.state.s * top_diff_h
di = self.state.g * ds
dg = self.state.i * ds
df = self.s_prev * ds
# diffs w.r.t. vector inside sigma / tanh function
di_input = sigmoid_derivative(self.state.i) * di
df_input = sigmoid_derivative(self.state.f) * df
do_input = sigmoid_derivative(self.state.o) * do
dg_input = tanh_derivative(self.state.g) * dg
# diffs w.r.t. inputs
self.param.wi_diff += np.outer(di_input, self.xc)
self.param.wf_diff += np.outer(df_input, self.xc)
self.param.wo_diff += np.outer(do_input, self.xc)
self.param.wg_diff += np.outer(dg_input, self.xc)
self.param.bi_diff += di_input
self.param.bf_diff += df_input
self.param.bo_diff += do_input
self.param.bg_diff += dg_input
# compute bottom diff
dxc = np.zeros_like(self.xc)
dxc += np.dot(self.param.wi.T, di_input)
dxc += np.dot(self.param.wf.T, df_input)
dxc += np.dot(self.param.wo.T, do_input)
dxc += np.dot(self.param.wg.T, dg_input)
# save bottom diffs
self.state.bottom_diff_s = ds * self.state.f
self.state.bottom_diff_h = dxc[self.param.x_dim:]
class LstmNetwork():
def __init__(self, lstm_param):
self.lstm_param = lstm_param
self.lstm_node_list = []
# input sequence
self.x_list = []
def y_list_is(self, y_list, loss_layer):
"""
Updates diffs by setting target sequence
with corresponding loss layer.
Will *NOT* update parameters. To update parameters,
call self.lstm_param.apply_diff()
"""
assert len(y_list) == len(self.x_list)
idx = len(self.x_list) - 1
# first node only gets diffs from label ...
loss = loss_layer.loss(self.lstm_node_list[idx].state.h, y_list[idx])
diff_h = loss_layer.bottom_diff(self.lstm_node_list[idx].state.h, y_list[idx])
# here s is not affecting loss due to h(t+1), hence we set equal to zero
diff_s = np.zeros(self.lstm_param.mem_cell_ct)
self.lstm_node_list[idx].top_diff_is(diff_h, diff_s)
idx -= 1
### ... following nodes also get diffs from next nodes, hence we add diffs to diff_h
### we also propagate error along constant error carousel using diff_s
while idx >= 0:
loss += loss_layer.loss(self.lstm_node_list[idx].state.h, y_list[idx])
diff_h = loss_layer.bottom_diff(self.lstm_node_list[idx].state.h, y_list[idx])
diff_h += self.lstm_node_list[idx + 1].state.bottom_diff_h
diff_s = self.lstm_node_list[idx + 1].state.bottom_diff_s
self.lstm_node_list[idx].top_diff_is(diff_h, diff_s)
idx -= 1
return loss
def x_list_clear(self):
self.x_list = []
def x_list_add(self, x):
self.x_list.append(x)
if len(self.x_list) > len(self.lstm_node_list):
# need to add new lstm node, create new state mem
lstm_state = LstmState(self.lstm_param.mem_cell_ct, self.lstm_param.x_dim)
self.lstm_node_list.append(LstmNode(self.lstm_param, lstm_state))
# get index of most recent x input
idx = len(self.x_list) - 1
if idx == 0:
# no recurrent inputs yet
self.lstm_node_list[idx].bottom_data_is(x)
else:
s_prev = self.lstm_node_list[idx - 1].state.s
h_prev = self.lstm_node_list[idx - 1].state.h
self.lstm_node_list[idx].bottom_data_is(x, s_prev, h_prev)
test.py
import numpy as np
import sys
sys.path.append("/home/vj/Downloads/tfdeep/core_lstm/lstm/")
from lstm import LstmParam, LstmNetwork
class ToyLossLayer:
"""
Computes square loss with first element of hidden layer array."""
#classmethod
def loss(self, pred, label):
return (pred[0] - label) ** 2
#classmethod
def bottom_diff(self, pred, label):
diff = np.zeros_like(pred)
diff[0] = 2 * (pred[0] - label)
return diff
def example_0():
# learns to repeat simple sequence from random inputs
np.random.seed(0)
# parameters for input data dimension and lstm cell count
mem_cell_ct = 100
x_dim = 50
lstm_param = LstmParam(mem_cell_ct, x_dim)
lstm_net = LstmNetwork(lstm_param)
y_list = [-0.5, 0.2, 0.1, -0.5]
input_val_arr = [np.random.random(x_dim) for _ in y_list]
for cur_iter in range(100):
print("iter", "%2s" % str(cur_iter),": ")
for ind in range(len(y_list)):
lstm_net.x_list_add(input_val_arr[ind])
#print("y_pred = [" + ", ".join(["% 2.5f" % lstm_net.lstm_node_list[ind].state.h[0] for ind in range(len(y_list))]) +"]," end=":")
loss = lstm_net.y_list_is(y_list, ToyLossLayer)
print("loss:", "%.3e" % loss)
lstm_param.apply_diff(lr=0.1)
lstm_net.x_list_clear()
if __name__ == "__main__":
example_0()
while executing this test.py file I am getting I getting the follwing error
Traceback (most recent call last):
File "", line 2, in
File "", line 14, in example_0
AttributeError: LstmNetwork instance has no attribute 'x_list_add'
I am struggling to get the output. I didnot get any post relate to my problem.
tl;dr: I input a word to my model, and am supposed to get a list of similar words and their associated measures of similarity back. I get an error: Aborted (core dumped).
My goal is to determine which words are similar to an input word, based on their feature vectors. I have model already trained. I load it and call two functions:
def main(argv=None):
model = NVDM(args)
sess_saver = tf.train.Saver()
sess = tf.Session()
init = tf.initialize_all_variables()
sess.run(init)
loaded = load_for_similar(sess, sess_saver) #my function
wm = word_match(sess, loaded[0], loaded[1], "bottle", loaded[2], loaded[3], topN=5)
My problem is that I can't print out the words which are similar and the associated similarity measure. I tried (in main):
sess.run(wm)
wm[0].eval(session=sess)
print(wm)
All of which gave me the error:
F tensorflow/core/kernels/strided_slice_op.cc:316] Check failed: tmp.CopyFrom(input.Slice(begin[0], end[0]), final_shape)
Aborted (core dumped)
This tells me I'm not running the session properly. What am I doing wrong?
Details on the functions, just in case:
The function 'load_for_similar' restores the weights and bias of the decoder in my model (a variational autoencoder), and normalizes them. It also reverses the order of the keys and values in my vocabulary dictionary for later use:
def load_for_similar(sess, saver_obj):
saver_obj.restore(sess, "./CA_checkpoints/saved_model.ckpt")
vocab_file = '/path/to/vocab.pkl'
t1 = loader_object(vocab_file)
v1 = t1.get_vocab()
v1_rev = {k:v for v, k in v1.iteritems()}
decoder_mat = tf.get_collection(tf.GraphKeys.VARIABLES, scope='decoder')[0]
decoder_bias = tf.get_collection(tf.GraphKeys.VARIABLES, scope='decoder')[1]
return (find_norm(decoder_mat), find_norm(decoder_bias), v1, v1_rev)
To find similar words, I pass the normalized weight matrix and bias in to an new function, along with the feature vector of my word (vec):
def find_similar(sess, Weights, vec, bias):
dists = tf.add(tf.reduce_sum(tf.mul(Weights, vec)), bias)
best = argsort(sess, dists, reverse=True)
dist_sort = tf.nn.top_k(dists, k=dists.get_shape().as_list()[0], sorted=True).values
return dist_sort, best
Finally, I want to match the words that are closest to my supplied word, "bottle":
def word_match(sess, norm_mat , norm_bias, word_ , vocab, vocab_inverse , topN = 10):
idx = vocab[word_]
similarity_meas , indexes = find_similar(sess, norm_mat , norm_mat[idx], norm_bias)
words = tf.gather(vocab_inverse.keys(), indexes[:topN])
return (words, similarity_meas[:topN])
EDIT: in response to mrry's comment, here is the model (I hope this is what you wanted?). This code depends on utils.py, a separate utilities file. I will include that as well. Please note that this code is heavily based on Yishu Miao's and Sarath Nair's.
class NVDM(object):
""" Neural Variational Document Model -- BOW VAE.
"""
def __init__(self,
vocab_size=15000, #was 2000
n_hidden=500,
n_topic=50,
n_sample=1,
learning_rate=1e-5,
batch_size=100, #was 64
non_linearity=tf.nn.tanh):
self.vocab_size = vocab_size
self.n_hidden = n_hidden
self.n_topic = n_topic
self.n_sample = n_sample
self.non_linearity = non_linearity
self.learning_rate = learning_rate/batch_size #CA
self.batch_size = batch_size
self.x = tf.placeholder(tf.float32, [None, vocab_size], name='input')
self.mask = tf.placeholder(tf.float32, [None], name='mask') # mask paddings
# encoder
with tf.variable_scope('encoder'):
self.enc_vec = utils.mlp(self.x, [self.n_hidden, self.n_hidden])
self.mean = utils.linear(self.enc_vec, self.n_topic, scope='mean')
self.logsigm = utils.linear(self.enc_vec,
self.n_topic,
bias_start_zero=True,
matrix_start_zero=False,
scope='logsigm')
self.kld = -0.5 * tf.reduce_sum(1 - tf.square(self.mean) + 2 * self.logsigm - tf.exp(2 * self.logsigm), 1)
self.kld = self.mask*self.kld # mask paddings
with tf.variable_scope('decoder'):
if self.n_sample ==1: # single sample
p1 = tf.cast(tf.reduce_sum(self.mask), tf.int32) #needed for random normal generation
eps = tf.random_normal((p1, self.n_topic), 0, 1)
doc_vec = tf.mul(tf.exp(self.logsigm), eps) + self.mean
logits = tf.nn.log_softmax(utils.linear(doc_vec, self.vocab_size, scope='projection'))
self.recons_loss = -tf.reduce_sum(tf.mul(logits, self.x), 1)
# multiple samples
else:
eps = tf.random_normal((self.n_sample*batch_size, self.n_topic), 0, 1)
eps_list = tf.split(0, self.n_sample, eps)
recons_loss_list = []
for i in xrange(self.n_sample):
if i > 0: tf.get_variable_scope().reuse_variables()
curr_eps = eps_list[i]
doc_vec = tf.mul(tf.exp(self.logsigm), curr_eps) + self.mean
logits = tf.nn.log_softmax(utils.linear(doc_vec, self.vocab_size, scope='projection'))
recons_loss_list.append(-tf.reduce_sum(tf.mul(logits, self.x), 1))
self.recons_loss = tf.add_n(recons_loss_list) / self.n_sample
self.objective = self.recons_loss + self.kld
optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate)
fullvars = tf.trainable_variables()
enc_vars = utils.variable_parser(fullvars, 'encoder')
dec_vars = utils.variable_parser(fullvars, 'decoder')
enc_grads = tf.gradients(self.objective, enc_vars)
dec_grads = tf.gradients(self.objective, dec_vars)
self.optim_enc = optimizer.apply_gradients(zip(enc_grads, enc_vars))
self.optim_dec = optimizer.apply_gradients(zip(dec_grads, dec_vars))
def minibatch_bow(it1, Instance1, n_samples, batch_size, used_ints = set()):
available = set(np.arange(n_samples)) - used_ints #
if len(available) < batch_size:
indices = np.array(list(available))
else:
indices = np.random.choice(tuple(available), batch_size, replace=False)
used = used_ints
mb = itemgetter(*indices)(it1)
batch_xs = Instance1._bag_of_words(mb, vocab_size=15000)
batch_flattened = np.ravel(batch_xs)
index_positions = np.where(batch_flattened > 0)[0]
return (batch_xs, index_positions, set(indices)) #batch_xs[0] is the bag of words; batch_xs[1] is the 0/1 word used/not;
def train(sess, model, train_file, vocab_file, saver_obj, training_epochs, alternate_epochs, batch_size):
Instance1 = testchunk_Nov23.testLoader(train_file, vocab_file)
data_set = Instance1.get_batch(batch_size) #get all minibatches of size 100
n_samples = Instance1.num_reviews()
train_batches = list(data_set) #this is an itertools.chain object
it1_train = list(itertools.chain(*train_batches)) #length is 732,356. This is all the reviews.atch_size
if len(it1_train) % batch_size != 0:
total_batch = int(len(it1_train)/batch_size) + 1
else:
total_batch = int(len(it1_train)/batch_size)
trainfilesave = "train_ELBO_and_perplexity_Dec1.txt"
#Training
train_time = time.time()
for epoch in range(training_epochs):
for switch in xrange(0, 2):
if switch == 0:
optim = model.optim_dec
print_mode = 'updating decoder'
else:
optim = model.optim_enc
print_mode = 'updating encoder'
with open(trainfilesave, 'w') as f:
for i in xrange(alternate_epochs):
loss_sum = 0.0
kld_sum = 0.0
word_count = 0
used_indices = set()
for idx_batch in range(total_batch): #train_batches:
mb = minibatch_bow(it1_train, Instance1, n_samples, batch_size, used_ints=used_indices)
print('minibatch', idx_batch)
used_indices.update(mb[2])
num_mb = np.ones(mb[0][0].shape[0])
input_feed = {model.x.name: mb[0][0], model.mask: num_mb}
_, (loss, kld) = sess.run((optim,[model.objective, model.kld]) , input_feed)
loss_sum += np.sum(loss)
And the utils.py file:
def linear(inputs,
output_size,
no_bias=False,
bias_start_zero=False,
matrix_start_zero=False,
scope=None):
"""Define a linear connection."""
with tf.variable_scope(scope or 'Linear'):
if matrix_start_zero:
matrix_initializer = tf.constant_initializer(0)
else:
matrix_initializer = None
if bias_start_zero:
bias_initializer = tf.constant_initializer(0)
else:
bias_initializer = None
input_size = inputs.get_shape()[1].value
matrix = tf.get_variable('Matrix', [input_size, output_size],
initializer=matrix_initializer)
bias_term = tf.get_variable('Bias', [output_size],
initializer=bias_initializer)
output = tf.matmul(inputs, matrix)
if not no_bias:
output = output + bias_term
return output
def mlp(inputs,
mlp_hidden=[],
mlp_nonlinearity=tf.nn.tanh,
scope=None):
"""Define an MLP."""
with tf.variable_scope(scope or 'Linear'):
mlp_layer = len(mlp_hidden)
res = inputs
for l in xrange(mlp_layer):
res = mlp_nonlinearity(linear(res, mlp_hidden[l], scope='l'+str(l)))
return res