Related
I have implemented PPO for Cartpole-VO environment. However, it does not converge in certain iterations of the game. Sometimes it gets stuck in local optima. I have implemented the algorithm using the TD-0 advantage i.e.
A(s_t) = R(t+1) + \gamma V(S_{t+1}) - V(S_t)
Here is my code:
def running_average(x, n):
N = n
kernel = np.ones(N)
conv_len = x.shape[0]-N
y = np.zeros(conv_len)
for i in range(conv_len):
y[i] = kernel # x[i:i+N] # matrix multiplication operator: np.mul
y[i] /= N
return y
class ActorNetwork(nn.Module):
def __init__(self, state_dim, n_actions, learning_rate=0.0003, epsilon_clipping=0.3, update_epochs=10):
super().__init__()
self.n_actions = n_actions
self.model = nn.Sequential(
nn.Linear(state_dim, 64),
nn.ReLU(),
nn.Linear(64, 32),
nn.ReLU(),
nn.Linear(32, n_actions),
nn.Softmax(dim=-1)
).float()
self.optimizer = optim.Adam(self.model.parameters(), lr=learning_rate)
self.epsilon_clipping = epsilon_clipping
self.update_epochs = update_epochs
def forward(self, X):
return self.model(X)
def predict(self, state):
if state.ndim < 2:
action_probs = self.model(torch.FloatTensor(state).unsqueeze(0).float())
else:
action_probs = self.model(torch.FloatTensor(state))
return action_probs.squeeze(0).data.numpy()
def update(self, states, actions, deltas, old_prob):
batch_size = len(states)
state_batch = torch.Tensor(states)
action_batch = torch.Tensor(actions)
delta_batch = torch.Tensor(deltas)
old_prob_batch = torch.Tensor(old_prob)
for k in range(self.update_epochs):
pred_batch = self.model(state_batch)
prob_batch = pred_batch.gather(dim=1, index=action_batch.long().view(-1, 1)).squeeze()
ratio = torch.exp(torch.log(prob_batch) - torch.log(old_prob_batch))
clipped = torch.clamp(ratio, 1 - self.epsilon_clipping, 1 + self.epsilon_clipping) * delta_batch
loss_r = -torch.min(ratio*delta_batch, clipped)
loss = torch.mean(loss_r)
self.optimizer.zero_grad()
loss.backward()
self.optimizer.step()
class CriticNetwork(nn.Module):
def __init__(self, state_dim, learning_rate=0.001):
super().__init__()
self.model = nn.Sequential(
nn.Linear(state_dim, 64),
nn.ReLU(),
nn.Linear(64, 32),
nn.ReLU(),
nn.Linear(32, 1),
).float()
self.optimizer = optim.Adam(self.model.parameters(), lr=learning_rate)
def forward(self, X):
return self.model(X)
def predict(self, state):
if state.ndim < 2:
values = self.model(torch.FloatTensor(state).unsqueeze(0).float())
else:
values = self.model(torch.FloatTensor(state))
return values.data.numpy()
def update(self, states, targets):
state_batch = torch.Tensor(states)
target_batch = torch.Tensor(targets)
pred_batch = self.model(state_batch)
loss = torch.nn.functional.mse_loss(pred_batch, target_batch.unsqueeze(1))
self.optimizer.zero_grad()
loss.backward()
self.optimizer.step()
def train_ppo_agent(env, episode_length, max_episodes, gamma, visualize_step, learning_rate_actor=0.0003, learning_rate_critic=0.001, epsilon_clipping=0.2, actor_update_epochs=10):
model_actor = ActorNetwork(env.observation_space.shape[0], env.action_space.n, learning_rate=learning_rate_actor,
epsilon_clipping=epsilon_clipping, update_epochs=actor_update_epochs)
model_critic = CriticNetwork(env.observation_space.shape[0], learning_rate=learning_rate_critic)
EPISODE_LENGTH = episode_length
MAX_EPISODES = max_episodes
GAMMA = gamma
VISUALIZE_STEP = max(1, visualize_step)
score = []
for episode in range(MAX_EPISODES):
curr_state = env.reset()
done = False
all_episode_t = []
score_episode = 0
for t in range(EPISODE_LENGTH):
act_prob = model_actor.predict(curr_state)
action = np.random.choice(np.array(list(range(env.action_space.n))), p=act_prob)
value = model_critic.predict(curr_state)
prev_state = curr_state
curr_state, reward, done, info = env.step(action)
score_episode += reward
e_t = {'state': prev_state, 'action':action, 'action_prob':act_prob[action],'reward': reward, 'value': value}
all_episode_t.append(e_t)
if done:
break
score.append(score_episode)
episode_values = [all_episode_t[t]['value'] for t in range(len(all_episode_t))]
next_state_estimates = [episode_values[i].item() for i in range(1, len(episode_values))]
next_state_estimates.append(0)
boostrap_estimate = []
for t in range(len(all_episode_t)):
G = all_episode_t[t]['reward'] + GAMMA * next_state_estimates[t]
boostrap_estimate.append(G)
episode_target = np.array(boostrap_estimate)
episode_values = np.array(episode_values)
# compute the advantage for each state in the episode: R_{t+1} + \gamma * V(S_{t+1}) - V_{t}
adv_batch = episode_target-episode_values
state_batch = np.array([all_episode_t[t]['state'] for t in range(len(all_episode_t))])
action_batch = np.array([all_episode_t[t]['action'] for t in range(len(all_episode_t))])
old_actor_prob = np.array([all_episode_t[t]['action_prob'] for t in range(len(all_episode_t))])
model_actor.update(state_batch, action_batch, adv_batch, old_actor_prob)
model_critic.update(state_batch, episode_target)
# print the status after every VISUALIZE_STEP episodes
if episode % VISUALIZE_STEP == 0 and episode > 0:
print('Episode {}\tAverage Score: {:.2f}'.format(episode, np.mean(score[-VISUALIZE_STEP:-1])))
# domain knowledge applied to stop training: if the average score across last 100 episodes is greater than 195, game is solved
if np.mean(score[-100:-1]) > 195:
break
# Training plot: Episodic reward over Training Episodes
score = np.array(score)
avg_score = running_average(score, visualize_step)
plt.figure(figsize=(15, 7))
plt.ylabel("Episodic Reward", fontsize=12)
plt.xlabel("Training Episodes", fontsize=12)
plt.plot(score, color='gray', linewidth=1)
plt.plot(avg_score, color='blue', linewidth=3)
plt.scatter(np.arange(score.shape[0]), score, color='green', linewidth=0.3)
plt.savefig("temp/cartpole_ppo_training_plot.pdf")
# return the trained models
return model_actor, model_critic
def main():
env = gym.make('CartPole-v0')
episode_length = 300
n_episodes = 5000
gamma = 0.99
vis_steps = 100
learning_rate_actor = 0.0003
actor_update_epochs = 10
epsilon_clipping = 0.2
learning_rate_critic = 0.001
# train the PPO agent
model_actor, model_critic = train_ppo_agent(env, episode_length, n_episodes, gamma, vis_steps,
learning_rate_actor=learning_rate_actor,
learning_rate_critic=learning_rate_critic,
epsilon_clipping=epsilon_clipping,
actor_update_epochs=actor_update_epochs)
Am I missing something, or is this kind of behaviour expected if one uses simple TD-0 advantages for PPO, given the nature of the Cartpole environment?
If you remove the "-" (the negative marker) in line:
loss_r = -torch.min(ratio*delta_batch, clipped)
The score will then start to steadily increase over time. Before this fix you had negative loss which would increase over time. This is not how loss should work for neural networks. As gradient descent works to minimize the loss. So you want a positive loss which can be minimized by optimizer.
Hope my answer is somewhat clear, and sorry I cannot go into deeper detail.
My run can be seen in the attached image:
I am trying to use tensorflow addon's multioptimizer for discriminative layer training, different learning rates for different layers, but it does not work with the callback ReduceLROnPlateau.
from tensorflow.keras.callbacks import ReduceLROnPlateau
reduce_lr = ReduceLROnPlateau(patience=5, min_delta=1e-4, min_lr=1e-7, verbose=0)
with tpu_strategy.scope():
roberta_model = create_model(512)
optimizers = [
AdamWeightDecay(learning_rate=0.00001, weight_decay_rate=0.00001),
AdamWeightDecay(learning_rate=0.0001, weight_decay_rate=0.0001)
]
# specifying the optimizers and layers in which it will operate
optimizers_and_layers = [
(optimizers[0], roberta_model.layers[:3]),
(optimizers[1], roberta_model.layers[3:])
]
# Using Multi Optimizer from Tensorflow Addons
opt = tfa.optimizers.MultiOptimizer(optimizers_and_layers)
roberta_model.compile(optimizer=opt,
loss=tf.keras.losses.CategoricalCrossentropy(label_smoothing=0.1), metrics=["accuracy"])
history=roberta_model.fit(train, epochs=50, validation_data=val, callbacks=[reduce_lr])
At the end of the first epoch it produces this error:
AttributeError: 'MultiOptimizer' object has no attribute 'lr'
It works fine without the ReduceLROnPlateau callback.
I tried several things to solve this where the last attempt was to modify the callback - writing my own reduce learning rate on plateau callback. But this is far beyond my coding skills. I have commented where i made a couple of changes to the orginal callback.
I tried like this:
class My_ReduceLROnPlateau(tf.keras.callbacks.Callback):
def __init__(self,
monitor='val_loss',
factor=0.1,
patience=10,
verbose=0,
mode='auto',
min_delta=1e-4,
cooldown=0,
min_lr=0,
**kwargs):
super(My_ReduceLROnPlateau, self).__init__()
self.monitor = monitor
if factor >= 1.0:
raise ValueError(
f'ReduceLROnPlateau does not support a factor >= 1.0. Got {factor}')
if 'epsilon' in kwargs:
min_delta = kwargs.pop('epsilon')
logging.warning('`epsilon` argument is deprecated and '
'will be removed, use `min_delta` instead.')
self.factor = factor
self.min_lr = min_lr
self.min_delta = min_delta
self.patience = patience
self.verbose = verbose
self.cooldown = cooldown
self.cooldown_counter = 0 # Cooldown counter.
self.wait = 0
self.best = 0
self.mode = mode
self.monitor_op = None
self._reset()
def _reset(self):
"""Resets wait counter and cooldown counter.
"""
if self.mode not in ['auto', 'min', 'max']:
logging.warning('Learning rate reduction mode %s is unknown, '
'fallback to auto mode.', self.mode)
self.mode = 'auto'
if (self.mode == 'min' or
(self.mode == 'auto' and 'acc' not in self.monitor)):
self.monitor_op = lambda a, b: np.less(a, b - self.min_delta)
self.best = np.Inf
else:
self.monitor_op = lambda a, b: np.greater(a, b + self.min_delta)
self.best = -np.Inf
self.cooldown_counter = 0
self.wait = 0
def on_train_begin(self, logs=None):
self._reset()
def on_epoch_end(self, epoch, logs=None):
logs = logs or {}
logs['lr'] = backend.get_value(self.model.optimizer[1].lr)
current = logs.get(self.monitor)
if current is None:
logging.warning('Learning rate reduction is conditioned on metric `%s` '
'which is not available. Available metrics are: %s',
self.monitor, ','.join(list(logs.keys())))
else:
if self.in_cooldown():
self.cooldown_counter -= 1
self.wait = 0
if self.monitor_op(current, self.best):
self.best = current
self.wait = 0
elif not self.in_cooldown():
self.wait += 1
if self.wait >= self.patience:
# Here below i tried to subscript the self.model.optimizer
#, guessing that each pointed to one of the optimzers.
# And using the same code as in the original ReduceLROnPlateau to
# update the optimizers.
old_lr1 = backend.get_value(self.model.optimizer[1].lr)
old_lr0 = backend.get_value(self.model.optimizer[0].lr)
if old_lr1 > np.float32(self.min_lr):
new_lr1 = old_lr1 * self.factor
new_lr1 = max(new_lr1, self.min_lr)
backend.set_value(self.model.optimizer[1].lr, new_lr1)
new_lr0 = old_lr0 * self.factor
new_lr0 = max(new_lr0, self.min_lr)
backend.set_value(self.model.optimizer[0].lr, new_lr0)
if self.verbose > 0:
io_utils.print_msg(
f'\nEpoch {epoch +1}: '
f'ReduceLROnPlateau reducing learning rate to {new_lr0} and {new_lr1}.')
self.cooldown_counter = self.cooldown
self.wait = 0
def in_cooldown(self):
return self.cooldown_counter > 0
Then i created the callback
reduce_lr = My_ReduceLROnPlateau(patience=5, min_delta=1e-4, min_lr=1e-7, verbose=0)
and started to train again. At the end of the first epoch i got the following error.
TypeError: 'MultiOptimizer' object is not subscriptable
i.e. you cant do this self.model.optimizer[1], self.model.optimizer[0].
So my question is how to solve this? I.e using discriminative layer training with ReduceLROnPlateau.
Either via some other method or modify my attempt of creating a new callback class.
Here is a link to the orginal ReduceLROnPlateau callback, i.e. without the few changes i did above in my custom callback.
A solution would maybe be possible using this:
Note: Currently, tfa.optimizers.MultiOptimizer does not support callbacks that modify optimizers. However, you can instantiate optimizer layer pairs with tf.keras.optimizers.schedules.LearningRateSchedule instead of a static learning rate
Looking in the code of tfa.optimizers.MultiOptimizer (in the method create_optimizer_spec, it seems that optimizers can be accessed via
self.model.optimizer.optimizer_specs[0]["optimizer"] and self.model.optimizer.optimizer_specs[1]["optimizer"] to change the learning rate (which is why self.model.optimizer[1] raises an error).
Then your custom callback seems to work.
I have a big issue with memory. I am developing a big application with GUI for testing and optimizing neural networks. The main program is showing the GUI, but training is done in thread. In my app I need to train many models with different parameters one after one. To do this I need to create a model for each attempt. When I train one I want to delete it and train new one, but I cannot delete old model. I am trying to do something like this:
del model
torch.cuda.empty_cache()
but GPU memory doesn't change,
then i tried to do this:
model.cpu()
del model
When I move model to CPU, GPU memory is freed but CPU memory increase.
In each attempt of training, memory is increasing all the time. Only when I close my app and run it again the all memory is freed.
Is there a way to delete model permanently from GPU or CPU?
Edit:
Code:
Thread, where the procces of training take pleace:
class uczeniegridsearcch(QObject):
endofoneloop = pyqtSignal()
endofonesample = pyqtSignal()
finished = pyqtSignal()
def __init__(self, train_loader, test_loader, epoch, optimizer, lenoftd, lossfun, numberofsamples, optimparams, listoflabels, model_name, num_of_class, pret):
super(uczeniegridsearcch, self).__init__()
self.train_loaderup = train_loader
self.test_loaderup = test_loader
self.epochup = epoch
self.optimizername = optimizer
self.lenofdt = lenoftd
self.lossfun = lossfun
self.numberofsamples = numberofsamples
self.acc = 0
self.train_loss = 0
self.sendloss = 0
self.optimparams = optimparams
self.listoflabels = listoflabels
self.sel_Net = model_name
self.num_of_class = num_of_class
self.sel_Pret = pret
self.modelforsend = []
def setuptrainmodel(self):
if self.sel_Net == "AlexNet":
model = models.alexnet(pretrained=self.sel_Pret)
model.classifier[6] = torch.nn.Linear(4096, self.num_of_class)
elif self.sel_Net == "ResNet50":
model = models.resnet50(pretrained=self.sel_Pret)
model.fc = torch.nn.Linear(model.fc.in_features, self.num_of_class)
elif self.sel_Net == "VGG13":
model = models.vgg13(pretrained=self.sel_Pret)
model.classifier[6] = torch.nn.Linear(model.classifier[6].in_features, self.num_of_class)
elif self.sel_Net == "DenseNet201":
model = models.densenet201(pretrained=self.sel_Pret)
model.classifier = torch.nn.Linear(model.classifier.in_features, self.num_of_class)
elif self.sel_Net == "MNASnet":
model = models.mnasnet1_0(pretrained=self.sel_Pret)
model.classifier[1] = torch.nn.Linear(model.classifier[1].in_features, self.num_of_class)
elif self.sel_Net == "ShuffleNet v2":
model = models.shufflenet_v2_x1_0(pretrained=self.sel_Pret)
model.fc = torch.nn.Linear(model.fc.in_features, self.num_of_class)
elif self.sel_Net == "SqueezeNet":
model = models.squeezenet1_0(pretrained=self.sel_Pret)
model.classifier[1] = torch.nn.Conv2d(512, self.num_of_class, kernel_size=(1, 1), stride=(1, 1))
model.num_classes = self.num_of_class
elif self.sel_Net == "GoogleNet":
model = models.googlenet(pretrained=self.sel_Pret)
model.fc = torch.nn.Linear(model.fc.in_features, self.num_of_class)
return model
def train(self):
for x in range(self.numberofsamples):
torch.cuda.empty_cache()
modelup = self.setuptrainmodel()
device = torch.device('cuda')
optimizerup = TableWidget.setupotimfun(self, modelup, self.optimizername, self.optimparams[(x, 0)],
self.optimparams[(x, 1)], self.optimparams[(x, 2)],
self.optimparams[(x, 3)],
self.optimparams[(x, 4)], self.optimparams[(x, 5)])
modelup = modelup.to(device)
best_accuracy = 0.0
train_error_count = 0
for epoch in range(self.epochup):
for images, labels in iter(self.train_loaderup):
images = images.to(device)
labels = labels.to(device)
optimizerup.zero_grad()
outputs = modelup(images)
loss = TableWidget.setuplossfun(self, lossfun=self.lossfun, outputs=outputs, labels=labels)
self.train_loss += loss
loss.backward()
optimizerup.step()
train_error_count += float(torch.sum(torch.abs(labels - outputs.argmax(1))))
self.train_loss /= len(self.train_loaderup)
test_error_count = 0.0
for images, labels in iter(self.test_loaderup):
images = images.to(device)
labels = labels.to(device)
outputs = modelup(images)
test_error_count += float(torch.sum(torch.abs(labels - outputs.argmax(1))))
test_accuracy = 1.0 - float(test_error_count) / float(self.lenofdt)
print('%s, %d,%d: %f %f' % ("PrĂ³ba nr:", x+1, epoch, test_accuracy, self.train_loss), "Parametry: ", self.optimparams[x,:])
self.acc = test_accuracy
self.sendloss = self.train_loss.item()
self.endofoneloop.emit()
self.endofonesample.emit()
modelup.cpu()
del modelup,optimizerup,device,test_accuracy,test_error_count,train_error_count,loss,labels,images,outputs
torch.cuda.empty_cache()
self.finished.emit()
How I call thread in main block:
self.qtest = uczeniegridsearcch(self.train_loader,self.test_loader, int(self.InputEpoch.text()),
self.sel_Optim,len(self.test_dataset), self.sel_Loss,
int(self.numberofsamples.text()), self.params, self.listoflabels,
self.sel_Net,len(self.sel_ImgClasses),self.sel_Pret)
self.qtest.endofoneloop.connect(self.inkofprogress)
self.qtest.endofonesample.connect(self.inksamples)
self.qtest.finished.connect(self.prints)
testtret = threading.Thread(target=self.qtest.train)
testtret.start()
Assuming that the model creation code is run iteratively inside a loop,I suggest the following
Put code for model creation, training,evaluation and model deletion code inside a separate function and call that function from the loop body.
Call gc.collect() after the function call
The rational for first point is that the model creation, deletion and cache clearing would happen in a separate stack and it would force the GPU memory clearance when the method returns.
I use a very custom LSTM-cell inspired by http://mlexplained.com/2019/02/15/building-an-lstm-from-scratch-in-pytorch-lstms-in-depth-part-1/.
I use it to look at intermediate gating values. My question is, how would I expand this class to have an option for adding more layers and for adding bidirectionality? Should it be wrapped in a new class or added in the present one?
class Dim(IntEnum):
batch = 0
seq = 1
class simpleLSTM(nn.Module):
def __init__(self, input_sz: int, hidden_sz: int):
super().__init__()
self.input_size = input_sz
self.hidden_size = hidden_sz
# input gate
self.W_ii = Parameter(torch.Tensor(input_sz, hidden_sz))
self.W_hi = Parameter(torch.Tensor(hidden_sz, hidden_sz))
self.b_i = Parameter(torch.Tensor(hidden_sz))
# forget gate
self.W_if = Parameter(torch.Tensor(input_sz, hidden_sz))
self.W_hf = Parameter(torch.Tensor(hidden_sz, hidden_sz))
self.b_f = Parameter(torch.Tensor(hidden_sz))
# ???
self.W_ig = Parameter(torch.Tensor(input_sz, hidden_sz))
self.W_hg = Parameter(torch.Tensor(hidden_sz, hidden_sz))
self.b_g = Parameter(torch.Tensor(hidden_sz))
# output gate
self.W_io = Parameter(torch.Tensor(input_sz, hidden_sz))
self.W_ho = Parameter(torch.Tensor(hidden_sz, hidden_sz))
self.b_o = Parameter(torch.Tensor(hidden_sz))
self.init_weights()
self.out = nn.Linear(hidden_sz, len(TRG.vocab))
def init_weights(self):
for p in self.parameters():
if p.data.ndimension() >= 2:
nn.init.xavier_uniform_(p.data)
else:
nn.init.zeros_(p.data)
def forward(self, x, init_states=None ):
"""Assumes x is of shape (batch, sequence, feature)"""
seq_sz, bs, = x.size()
hidden_seq = []
prediction = []
if init_states is None:
h_t, c_t = torch.zeros(self.hidden_size).to(x.device), torch.zeros(self.hidden_size).to(x.device)
else:
h_t, c_t = init_states
for t in range(seq_sz): # iterate over the time steps
x_t = x[t, :].float()
#LOOK HERE!!!
i_t = torch.sigmoid(x_t # self.W_ii + h_t # self.W_hi + self.b_i)
f_t = torch.sigmoid(x_t # self.W_if + h_t # self.W_hf + self.b_f)
g_t = torch.tanh(x_t # self.W_ig + h_t # self.W_hg + self.b_g)
o_t = torch.sigmoid(x_t # self.W_io + h_t # self.W_ho + self.b_o)
c_t = f_t * c_t + i_t * g_t
h_t = o_t * torch.tanh(c_t)
hidden_seq.append(h_t.unsqueeze(Dim.batch))
pred_t = self.out(h_t.unsqueeze(Dim.batch))
#pred_t = F.softmax(pred_t)
prediction.append(pred_t)
hidden_seq = torch.cat(hidden_seq, dim=Dim.batch)
prediction = torch.cat(prediction, dim=Dim.batch)
# reshape from shape (sequence, batch, feature) to (batch, sequence, feature)
hidden_seq = hidden_seq.transpose(Dim.batch, Dim.seq).contiguous()
prediction = prediction.transpose(Dim.batch, Dim.seq).contiguous()
return prediction, hidden_seq, (h_t, c_t)
I call it and train using the following as an example.
lstm = simpleLSTM(1, 100)
hidden_size = lstm.hidden_size
optimizer = optim.Adam(lstm.parameters())
h_0, c_0 = (torch.zeros(hidden_size, requires_grad=True),
torch.zeros(hidden_size, requires_grad=True))
grads = []
h_t, c_t = h_0, c_0
N_EPOCHS = 10
for epoch in range(N_EPOCHS):
epoch_loss = 0
for i, batch in enumerate(train):
optimizer.zero_grad()
src, src_len = batch.src
trg = batch.trg
trg = trg.view(-1)
predict, output, hidden_states = lstm(src)
predict = predict.t().unsqueeze(1)
predict= predict.view(-1, predict.shape[-1])
loss = criterion(predict,trg)
loss.backward()
optimizer.step()
epoch_loss += loss.item()
print(epoch_loss)
The easiest would be to create another module (say Bidirectional) and pass any cell you want to it.
Implementation itself is quite easy to do. Notice that I'm using concat operation for joining bi-directional output, you may want to specify other modes like summation etc.
Please read the comments in the code below, you may have to change it appropriately.
import torch
class Bidirectional(torch.nn.Module):
def __init__(self, cell):
super().__init__()
self.cell = cell
def __call__(self, x, init_states=None):
prediction, hidden_seq, (h_t, c_t) = self.cell(x, init_states)
backward_prediction, backward_hidden_seq, (
backward_h_t,
backward_c_t,
# Assuming sequence is first dimension, otherwise change 0 appropriately
# Reverses sequences so the LSTM cell acts on the reversed sequence
) = self.cell(torch.flip(x, (0,)), init_states)
return (
# Assuming you transpose so it has (batch, seq, features) dimensionality
torch.cat((prediction, backward_prediction), 2),
torch.cat((hidden_seq, backward_hidden_seq), 2),
# Assuming it has (batch, features) dimensionality
torch.cat((h_t, backward_ht), 1),
torch.cat((c_t, backward_ct), 1),
)
When it comes to multiple layers you could do something similiar in principle:
import torch
class Multilayer(torch.nn.Module):
def __init__(self, *cells):
super().__init__()
self.cells = torch.nn.ModuleList(cells)
def __call__(self, x, init_states=None):
inputs = x
for cell in self.cells:
prediction, hidden_seq, (h_t, c_t) = cell(inputs, init_states)
inputs = hidden_seq
return prediction, hidden_seq, (h_t, c_t)
Please note you have to pass created cell objects into Multilayer e.g.:
# For three layers of LSTM, each needs features to be set up correctly
multilayer_LSTM = Multilayer(LSTM(), LSTM(), LSTM())
You may also pass classes instead of instances into constructor and create those inside Multilayer (so hidden_size matches automatically), but those ideas should get you started.
tl;dr: I input a word to my model, and am supposed to get a list of similar words and their associated measures of similarity back. I get an error: Aborted (core dumped).
My goal is to determine which words are similar to an input word, based on their feature vectors. I have model already trained. I load it and call two functions:
def main(argv=None):
model = NVDM(args)
sess_saver = tf.train.Saver()
sess = tf.Session()
init = tf.initialize_all_variables()
sess.run(init)
loaded = load_for_similar(sess, sess_saver) #my function
wm = word_match(sess, loaded[0], loaded[1], "bottle", loaded[2], loaded[3], topN=5)
My problem is that I can't print out the words which are similar and the associated similarity measure. I tried (in main):
sess.run(wm)
wm[0].eval(session=sess)
print(wm)
All of which gave me the error:
F tensorflow/core/kernels/strided_slice_op.cc:316] Check failed: tmp.CopyFrom(input.Slice(begin[0], end[0]), final_shape)
Aborted (core dumped)
This tells me I'm not running the session properly. What am I doing wrong?
Details on the functions, just in case:
The function 'load_for_similar' restores the weights and bias of the decoder in my model (a variational autoencoder), and normalizes them. It also reverses the order of the keys and values in my vocabulary dictionary for later use:
def load_for_similar(sess, saver_obj):
saver_obj.restore(sess, "./CA_checkpoints/saved_model.ckpt")
vocab_file = '/path/to/vocab.pkl'
t1 = loader_object(vocab_file)
v1 = t1.get_vocab()
v1_rev = {k:v for v, k in v1.iteritems()}
decoder_mat = tf.get_collection(tf.GraphKeys.VARIABLES, scope='decoder')[0]
decoder_bias = tf.get_collection(tf.GraphKeys.VARIABLES, scope='decoder')[1]
return (find_norm(decoder_mat), find_norm(decoder_bias), v1, v1_rev)
To find similar words, I pass the normalized weight matrix and bias in to an new function, along with the feature vector of my word (vec):
def find_similar(sess, Weights, vec, bias):
dists = tf.add(tf.reduce_sum(tf.mul(Weights, vec)), bias)
best = argsort(sess, dists, reverse=True)
dist_sort = tf.nn.top_k(dists, k=dists.get_shape().as_list()[0], sorted=True).values
return dist_sort, best
Finally, I want to match the words that are closest to my supplied word, "bottle":
def word_match(sess, norm_mat , norm_bias, word_ , vocab, vocab_inverse , topN = 10):
idx = vocab[word_]
similarity_meas , indexes = find_similar(sess, norm_mat , norm_mat[idx], norm_bias)
words = tf.gather(vocab_inverse.keys(), indexes[:topN])
return (words, similarity_meas[:topN])
EDIT: in response to mrry's comment, here is the model (I hope this is what you wanted?). This code depends on utils.py, a separate utilities file. I will include that as well. Please note that this code is heavily based on Yishu Miao's and Sarath Nair's.
class NVDM(object):
""" Neural Variational Document Model -- BOW VAE.
"""
def __init__(self,
vocab_size=15000, #was 2000
n_hidden=500,
n_topic=50,
n_sample=1,
learning_rate=1e-5,
batch_size=100, #was 64
non_linearity=tf.nn.tanh):
self.vocab_size = vocab_size
self.n_hidden = n_hidden
self.n_topic = n_topic
self.n_sample = n_sample
self.non_linearity = non_linearity
self.learning_rate = learning_rate/batch_size #CA
self.batch_size = batch_size
self.x = tf.placeholder(tf.float32, [None, vocab_size], name='input')
self.mask = tf.placeholder(tf.float32, [None], name='mask') # mask paddings
# encoder
with tf.variable_scope('encoder'):
self.enc_vec = utils.mlp(self.x, [self.n_hidden, self.n_hidden])
self.mean = utils.linear(self.enc_vec, self.n_topic, scope='mean')
self.logsigm = utils.linear(self.enc_vec,
self.n_topic,
bias_start_zero=True,
matrix_start_zero=False,
scope='logsigm')
self.kld = -0.5 * tf.reduce_sum(1 - tf.square(self.mean) + 2 * self.logsigm - tf.exp(2 * self.logsigm), 1)
self.kld = self.mask*self.kld # mask paddings
with tf.variable_scope('decoder'):
if self.n_sample ==1: # single sample
p1 = tf.cast(tf.reduce_sum(self.mask), tf.int32) #needed for random normal generation
eps = tf.random_normal((p1, self.n_topic), 0, 1)
doc_vec = tf.mul(tf.exp(self.logsigm), eps) + self.mean
logits = tf.nn.log_softmax(utils.linear(doc_vec, self.vocab_size, scope='projection'))
self.recons_loss = -tf.reduce_sum(tf.mul(logits, self.x), 1)
# multiple samples
else:
eps = tf.random_normal((self.n_sample*batch_size, self.n_topic), 0, 1)
eps_list = tf.split(0, self.n_sample, eps)
recons_loss_list = []
for i in xrange(self.n_sample):
if i > 0: tf.get_variable_scope().reuse_variables()
curr_eps = eps_list[i]
doc_vec = tf.mul(tf.exp(self.logsigm), curr_eps) + self.mean
logits = tf.nn.log_softmax(utils.linear(doc_vec, self.vocab_size, scope='projection'))
recons_loss_list.append(-tf.reduce_sum(tf.mul(logits, self.x), 1))
self.recons_loss = tf.add_n(recons_loss_list) / self.n_sample
self.objective = self.recons_loss + self.kld
optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate)
fullvars = tf.trainable_variables()
enc_vars = utils.variable_parser(fullvars, 'encoder')
dec_vars = utils.variable_parser(fullvars, 'decoder')
enc_grads = tf.gradients(self.objective, enc_vars)
dec_grads = tf.gradients(self.objective, dec_vars)
self.optim_enc = optimizer.apply_gradients(zip(enc_grads, enc_vars))
self.optim_dec = optimizer.apply_gradients(zip(dec_grads, dec_vars))
def minibatch_bow(it1, Instance1, n_samples, batch_size, used_ints = set()):
available = set(np.arange(n_samples)) - used_ints #
if len(available) < batch_size:
indices = np.array(list(available))
else:
indices = np.random.choice(tuple(available), batch_size, replace=False)
used = used_ints
mb = itemgetter(*indices)(it1)
batch_xs = Instance1._bag_of_words(mb, vocab_size=15000)
batch_flattened = np.ravel(batch_xs)
index_positions = np.where(batch_flattened > 0)[0]
return (batch_xs, index_positions, set(indices)) #batch_xs[0] is the bag of words; batch_xs[1] is the 0/1 word used/not;
def train(sess, model, train_file, vocab_file, saver_obj, training_epochs, alternate_epochs, batch_size):
Instance1 = testchunk_Nov23.testLoader(train_file, vocab_file)
data_set = Instance1.get_batch(batch_size) #get all minibatches of size 100
n_samples = Instance1.num_reviews()
train_batches = list(data_set) #this is an itertools.chain object
it1_train = list(itertools.chain(*train_batches)) #length is 732,356. This is all the reviews.atch_size
if len(it1_train) % batch_size != 0:
total_batch = int(len(it1_train)/batch_size) + 1
else:
total_batch = int(len(it1_train)/batch_size)
trainfilesave = "train_ELBO_and_perplexity_Dec1.txt"
#Training
train_time = time.time()
for epoch in range(training_epochs):
for switch in xrange(0, 2):
if switch == 0:
optim = model.optim_dec
print_mode = 'updating decoder'
else:
optim = model.optim_enc
print_mode = 'updating encoder'
with open(trainfilesave, 'w') as f:
for i in xrange(alternate_epochs):
loss_sum = 0.0
kld_sum = 0.0
word_count = 0
used_indices = set()
for idx_batch in range(total_batch): #train_batches:
mb = minibatch_bow(it1_train, Instance1, n_samples, batch_size, used_ints=used_indices)
print('minibatch', idx_batch)
used_indices.update(mb[2])
num_mb = np.ones(mb[0][0].shape[0])
input_feed = {model.x.name: mb[0][0], model.mask: num_mb}
_, (loss, kld) = sess.run((optim,[model.objective, model.kld]) , input_feed)
loss_sum += np.sum(loss)
And the utils.py file:
def linear(inputs,
output_size,
no_bias=False,
bias_start_zero=False,
matrix_start_zero=False,
scope=None):
"""Define a linear connection."""
with tf.variable_scope(scope or 'Linear'):
if matrix_start_zero:
matrix_initializer = tf.constant_initializer(0)
else:
matrix_initializer = None
if bias_start_zero:
bias_initializer = tf.constant_initializer(0)
else:
bias_initializer = None
input_size = inputs.get_shape()[1].value
matrix = tf.get_variable('Matrix', [input_size, output_size],
initializer=matrix_initializer)
bias_term = tf.get_variable('Bias', [output_size],
initializer=bias_initializer)
output = tf.matmul(inputs, matrix)
if not no_bias:
output = output + bias_term
return output
def mlp(inputs,
mlp_hidden=[],
mlp_nonlinearity=tf.nn.tanh,
scope=None):
"""Define an MLP."""
with tf.variable_scope(scope or 'Linear'):
mlp_layer = len(mlp_hidden)
res = inputs
for l in xrange(mlp_layer):
res = mlp_nonlinearity(linear(res, mlp_hidden[l], scope='l'+str(l)))
return res