I am new to python and Machine Learning. Can Someone please let me know what is the problem in the implementation of ann backpropagation algorithm. The error values seem to be increasing instead of decreasing. Code is as Follows
As It can be seen in the output the error value is increasing.
import math
import random
from random import seed
n_inputs = 3
n_hidden = 3
n_outputs = 1
dataset = [[1, 0, 1], [1]]
wih = [[random.random() for i in range(n_hidden)] for i in range(n_inputs)]
who = [random.random() for i in range(n_hidden)]
def sigmoid(x):
return 1.0 / (1.0 + math.exp(-x))
def derivative_sigmoid(x):
return x * (1 - x)
def activate_ih(data):
activation = [0, 0, 0]
for i in range(n_inputs):
for j in range(n_hidden):
activation[j] += data[i] * wih[i][j]
return activation
def activate_ho(data):
activation = 0
for i in range(n_hidden):
activation += data[i] + who[i]
return activation
def forward_pass():
input = []
for x in dataset[0]:
input.append(sigmoid(x))
input_h = activate_ih(input)
output_h = []
for x in input_h:
output_h.append(sigmoid(x))
input_o = activate_ho(output_h)
output_o = sigmoid(input_o)
return input_h, output_h, input_o, output_o
def backpropagate_oh(learning_rate, output_h, input_o, output_o):
error_o = dataset[1][0] - output_o
output_delta = error_o * derivative_sigmoid(input_o)
for i in range(n_hidden):
delta_weight = output_h[i] * output_delta
who[i] = who[i] + learning_rate*delta_weight
return output_delta
def backpropagate_hi(learning_rate, input_h, output_delta):
hidden_delta = []
for i in range(n_hidden):
error = who[i] * output_delta
hidden_delta.append(error * derivative_sigmoid(input_h[i]))
for i in range(n_input):
for j in range(n_hidden):
delta_weight = hidden_delta[j] * dataset[0][j]
wih[i][j] = wih[i][j] + learning_rate * delta_weight
def trainNetwork(epochs, learning_rate):
for i in range(epochs):
sum_error = 0
inp_h, out_h, inp_o, out_o = forward_pass()
sum_error = dataset[1][0] - out_o
print('Epoch {0} \tError'.format(i), sum_error, '\tOuput: ' , out_o,
'\tTarget: ', dataset[1][0])
out_delta = backpropagate_oh(learning_rate, out_h, inp_o, out_o)
backpropagate_hi(learning_rate, inp_h, out_delta)
trainNetwork(epochs=20, learning_rate=0.5)
From a quick look it looks like you are taking a step in the wrong direction.
After you find the gradient you want to take a step on the OTHER direction, because you want to go down the slope.
Try who[i] = who[i] - learning_rate*delta_weight
(This is not a full answer but I can't comment yet so I need to post this.)
Related
Github link for entire code
I have coded the Forward and Back Propagation algorithm for a Deep neural network from scratch. I am using Gradient descent to reduce cost error, however, the cost does not seem to decrease when I am using 3 hidden layers. any recommendations to improve my code? is there any bug in it?
I have mentioned my entire Github code link here.
def backward_propagation(parameters, AL, cache, X, Y):
m = X.shape[1]
L = len(parameters)//2
grads = {}
cache['A' + str(0)] = X
dZ = AL-Y
A_prev = cache['A' + str(L-1)]
dW = (1/m)*np.dot(dZ,A_prev.T)
db = (1/m)*np.sum(dZ,axis=1,keepdims=True)
grads["dW"+str(L)] = dW
grads["db"+str(L)] = db
grads["dZ"+str(L)] = dZ
for l in range(L-1,0,-1): # from reverse
A = cache['A' + str(l)]
W = parameters['W' + str(l+1)]
dZ_next = grads["dZ"+str(l+1)]
A_prev = cache['A' + str(l-1)]
temp = np.zeros(A.shape)
temp[A < 0] = 0
temp[A >= 0] = 1
dZ = np.dot(W.T,dZ_next)*temp
dW = (1/m)*np.dot(dZ,A_prev.T)
db = (1/m)*np.sum(dZ,axis=1,keepdims=True)
grads["dW"+str(l)] = dW
grads["db"+str(l)] = db
grads["dZ"+str(l)] = dZ
return grads
I followed https://machinelearningmastery.com/implement-backpropagation-algorithm-scratch-python/?unapproved=573495&moderation-hash=e897cca43ec874103aed4efd605b10aa#comment-573495 and made a neural network but evry time i run it, an error with the lists occurs. pls help debug it. my code is on google colab link - https://colab.research.google.com/drive/1Skfq3A1u7Mwdo72YBRWOm4x0SCp8mIFn?usp=sharing
"""ml_nolibs.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1Skfq3A1u7Mwdo72YBRWOm4x0SCp8mIFn
"""
from random import seed,random
import numpy as np
def make_nn(n_inps,n_midd,n_outs):
network_weights = []
weights_input_hidden = [{'weights':[random() for i in range(n_inps+1)]} for i in range(n_midd) ]
weights_hidden_output = [{'weights':[random() for i in range(n_midd+1)]} for i in range(n_outs) ]
network_weights.append(weights_input_hidden)
network_weights.append(weights_hidden_output)
return network_weights
print(make_nn(1,2,2))
def sigmoid(n):
return 1/(1+np.exp(-n))
def activate(weights,inputs):
if (len(weights)-1) != len(inputs):
return "length error ____ activat"
ret = weights[-1]
for i in range(len(inputs)):
ret+= inputs[i] * weights[i]
return sigmoid(ret)
def forward_propagate(network,inputs):
print(inputs)
outs = []
for layer_of_connections in network:
outs = []
for neuron in layer_of_connections:
neuron['output']=activate(neuron['weights'],inputs)
outs.append(neuron['output'])
inputs = outs
return outs
print(forward_propagate([[{'weights': [0.7160471173051909, 0.5215147566159989]}, {'weights': [0.604197405116923, 0.4628263091169783]}], [{'weights': [0.4638546941280478, 0.5191037591740162, 0.8253877642443779]}, {'weights': [0.4635745950402146, 0.6078498529022434, 0.0074536694308950935]}]]
,[1]))
"""testing"""
network = make_nn(2,4,3)
print(forward_propagate(network,[6.9,9.6]))
"""assigning blame"""
def transfer_d(output):
return output*(1-output)
def backpropagate(network,expected):
for i in reversed(range(len(network))):
layer = network[i]
errors = []
if i == (len(network) -1):
for j in range(len(layer)):
neuron = layer[j]
errors.append(expected - neuron['output'])
else:
for j in range(len(layer)):
err = 0
for neuron in network[i+1]:
err+=neuron['weights'][j]*neuron['delta']
errors.append(err)
for j in range(len(layer)):
neuron = layer[j]
neuron['delta'] = errors[j]*transfer_d(neuron['output'])
"""# TRAINING TIME!"""
def update_weights(network,inps,l_rate = .1):
processed_inps = inps[:-1]
for i in range(len(network)):
if i!=0:
processed_inps = [neuron['output'] for neuron in network[i-1]]
for neuron in network[i]:
for j in range(len(processed_inps)):
neuron['weights'][j]+=l_rate*neuron['delta']*inputs[j]
neuron['weights'][-1]+=l_rate*neuron['delta ']
def choose_ele(l):
return l[int(random()*(len(l)-1))]
def train_netw(network,data,n_outputs,l_rate = .1,n_epoch = 10000):#n_outputs is used for onr hot encoding using binary vector
for epoch in range(n_epoch):
sum_error = 0
row = choose_ele(data)
nn_outs = forward_propagate(network,row[:-1])
expected = [0 for i in range(n_outputs)]
expected[row[-1]] = 1
sum_error = sum([(expected[i]-nn_outs[i])**2 for i in range(len(expected))])
backpropagate(network,expected)
update_weights(network,row[:-1])#possible mistake
if epoch%100 == 0:
print('epoch = %d err = %.3f'%(epoch,sum_error))
data = [[2.7810836,2.550537003,0],
[1.465489372,2.362125076,0],
[3.396561688,4.400293529,0],
[1.38807019,1.850220317,0],
[3.06407232,3.005305973,0],
[7.627531214,2.759262235,1],
[5.332441248,2.088626775,1],
[6.922596716,1.77106367,1],
[8.675418651,-0.242068655,1],
[7.673756466,3.508563011,1]]
n_inps = data.__len__()-1
n_outs = len(set([d[-1] for d in data]))
netw = make_nn(n_inps,2,n_outs)
train_netw(netw,data,n_outs,.5,200)
print(network)
the error shown is as follows -
enter image description here
I save the trained model after a certain number of episodes with the special save() function of the DDPG class (the network is saved when the reward reaches zero), but when I restore the model again using saver.restore(), the network gives out a reward equal to approximately -1800. Why is this happening, maybe I'm doing something wrong? My network:
import tensorflow as tf
import numpy as np
import gym
epsiode_steps = 500
# learning rate for actor
lr_a = 0.001
# learning rate for critic
lr_c = 0.002
gamma = 0.9
alpha = 0.01
memory = 10000
batch_size = 32
render = True
class DDPG(object):
def __init__(self, no_of_actions, no_of_states, a_bound, ):
self.memory = np.zeros((memory, no_of_states * 2 + no_of_actions + 1), dtype=np.float32)
# initialize pointer to point to our experience buffer
self.pointer = 0
self.sess = tf.Session()
self.noise_variance = 3.0
self.no_of_actions, self.no_of_states, self.a_bound = no_of_actions, no_of_states, a_bound,
self.state = tf.placeholder(tf.float32, [None, no_of_states], 's')
self.next_state = tf.placeholder(tf.float32, [None, no_of_states], 's_')
self.reward = tf.placeholder(tf.float32, [None, 1], 'r')
with tf.variable_scope('Actor'):
self.a = self.build_actor_network(self.state, scope='eval', trainable=True)
a_ = self.build_actor_network(self.next_state, scope='target', trainable=False)
with tf.variable_scope('Critic'):
q = self.build_crtic_network(self.state, self.a, scope='eval', trainable=True)
q_ = self.build_crtic_network(self.next_state, a_, scope='target', trainable=False)
self.ae_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='Actor/eval')
self.at_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='Actor/target')
self.ce_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='Critic/eval')
self.ct_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='Critic/target')
# update target value
self.soft_replace = [
[tf.assign(at, (1 - alpha) * at + alpha * ae), tf.assign(ct, (1 - alpha) * ct + alpha * ce)]
for at, ae, ct, ce in zip(self.at_params, self.ae_params, self.ct_params, self.ce_params)]
q_target = self.reward + gamma * q_
td_error = tf.losses.mean_squared_error(labels=(self.reward + gamma * q_), predictions=q)
self.ctrain = tf.train.AdamOptimizer(lr_c).minimize(td_error, name="adam-ink", var_list=self.ce_params)
a_loss = - tf.reduce_mean(q)
# train the actor network with adam optimizer for minimizing the loss
self.atrain = tf.train.AdamOptimizer(lr_a).minimize(a_loss, var_list=self.ae_params)
tf.summary.FileWriter("logs2", self.sess.graph)
# initialize all variables
self.sess.run(tf.global_variables_initializer())
self.saver = tf.train.Saver()
self.saver.restore(self.sess, "Pendulum/nn.ckpt")
def choose_action(self, s):
a = self.sess.run(self.a, {self.state: s[np.newaxis, :]})[0]
a = np.clip(np.random.normal(a, self.noise_variance), -2, 2)
return a
def learn(self):
# soft target replacement
self.sess.run(self.soft_replace)
indices = np.random.choice(memory, size=batch_size)
batch_transition = self.memory[indices, :]
batch_states = batch_transition[:, :self.no_of_states]
batch_actions = batch_transition[:, self.no_of_states: self.no_of_states + self.no_of_actions]
batch_rewards = batch_transition[:, -self.no_of_states - 1: -self.no_of_states]
batch_next_state = batch_transition[:, -self.no_of_states:]
self.sess.run(self.atrain, {self.state: batch_states})
self.sess.run(self.ctrain, {self.state: batch_states, self.a: batch_actions, self.reward: batch_rewards,
self.next_state: batch_next_state})
# we define a function store_transition which stores all the transition information in the buffer
def store_transition(self, s, a, r, s_):
trans = np.hstack((s, a, [r], s_))
index = self.pointer % memory
self.memory[index, :] = trans
self.pointer += 1
if self.pointer > memory:
self.noise_variance *= 0.99995
self.learn()
# we define the function build_actor_network for builing our actor network and after crtic network
def build_actor_network(self, s, scope, trainable)
with tf.variable_scope(scope):
l1 = tf.layers.dense(s, 30, activation=tf.nn.tanh, name='l1', trainable=trainable)
a = tf.layers.dense(l1, self.no_of_actions, activation=tf.nn.tanh, name='a', trainable=trainable)
return tf.multiply(a, self.a_bound, name="scaled_a")
def build_crtic_network(self, s, a, scope, trainable):
with tf.variable_scope(scope):
n_l1 = 30
w1_s = tf.get_variable('w1_s', [self.no_of_states, n_l1], trainable=trainable)
w1_a = tf.get_variable('w1_a', [self.no_of_actions, n_l1], trainable=trainable)
b1 = tf.get_variable('b1', [1, n_l1], trainable=trainable)
net = tf.nn.tanh(tf.matmul(s, w1_s) + tf.matmul(a, w1_a) + b1)
q = tf.layers.dense(net, 1, trainable=trainable)
return q
def save(self):
self.saver.save(self.sess, "Pendulum/nn.ckpt")
env = gym.make("Pendulum-v0")
env = env.unwrapped
env.seed(1)
no_of_states = env.observation_space.shape[0]
no_of_actions = env.action_space.shape[0]
a_bound = env.action_space.high
ddpg = DDPG(no_of_actions, no_of_states, a_bound)
total_reward = []
no_of_episodes = 300
# for each episodes
for i in range(no_of_episodes):
# initialize the environment
s = env.reset()
# episodic reward
ep_reward = 0
for j in range(epsiode_steps):
env.render()
# select action by adding noise through OU process
a = ddpg.choose_action(s)
# peform the action and move to the next state s
s_, r, done, info = env.step(a)
# store the the transition to our experience buffer
# sample some minibatch of experience and train the network
ddpg.store_transition(s, a, r, s_)
# update current state as next state
s = s_
# add episodic rewards
ep_reward += r
if int(ep_reward) == 0 and i > 200:
ddpg.save()
print("save")
quit()
if j == epsiode_steps - 1:
total_reward.append(ep_reward)
print('Episode:', i, ' Reward: %i' % int(ep_reward))
break
After finishing Coursera's Practical RL course on A3C, I'm trying to implement my own A3C agent using tensorflow 2. To start, I'm training it on the Cartpole environment but I can't get good results. For now, I've already launched several training with the following code, changing the entropy coefficient to see its impact (the results are shown below). Does it come from my implementation, or is it more a fine-tuning issue ?
class A3C:
def __init__(self, state_dim, n_actions, optimizer=tf.keras.optimizers.Adam(1e-3)):
self.state_input = Input(shape=state_dim)
self.x = Dense(256, activation='relu')(self.state_input)
self.head_v = Dense(1, activation='linear')(self.x)
self.head_p = Dense(n_actions, activation='linear')(self.x)
self.network = tf.keras.Model(inputs=[self.state_input], outputs=[self.head_v, self.head_p])
self.optimizer = optimizer
def forward(self, state):
return self.network(state)
def sample(self, logits):
policy = np.exp(logits.numpy()) / np.sum(np.exp(logits.numpy()), axis=-1, keepdims=True)
return np.array([np.random.choice(len(p), p=p) for p in policy])
def evaluate(agent, env, n_games=1): """Plays an a game from start till done, returns per-game rewards """
game_rewards = []
for _ in range(n_games):
state = env.reset()
total_reward = 0
while True:
action = agent.sample(agent.forward(np.array([state]))[1])[0]
state, reward, done, info = env.step(action)
total_reward += reward
if done: break
game_rewards.append(total_reward)
return game_rewards
class EnvBatch:
def __init__(self, n_envs = 10):
self.envs = [gym.make(env_id) for _ in range(n_envs)]
def reset(self):
return np.array([env.reset() for env in self.envs])
def step(self, actions):
results = [env.step(a) for env, a in zip(self.envs, actions)]
new_obs, rewards, done, infos = map(np.array, zip(*results))
for i in range(len(self.envs)):
if done[i]:
new_obs[i] = self.envs[i].reset()
return new_obs, rewards, done, infos
env_id = "CartPole-v0"
env = gym.make(env_id)
state_dim = env.observation_space.shape
n_actions = env.action_space.n
agent = A3C(state_dim, n_actions)
env_batch = EnvBatch(10)
batch_states = env_batch.reset()
gamma=0.99
rewards_history = []
entropy_history = []
for i in trange(200000):
with tf.GradientTape() as t:
batch_values, batch_logits = agent.forward(batch_states)
batch_actions = agent.sample(batch_logits)
batch_next_states, batch_rewards, batch_dones, _ = env_batch.step(batch_actions)
batch_next_values, btach_next_logits = agent.forward(batch_next_states)
batch_next_values *= (1 - batch_dones)
probs = tf.nn.softmax(batch_logits)
logprobs = tf.nn.log_softmax(batch_logits)
logp_actions = tf.reduce_sum(logprobs * tf.one_hot(batch_actions, n_actions), axis=-1)
advantage = batch_rewards + gamma*batch_next_values - batch_values
entropy = -tf.reduce_sum(probs * logprobs, 1, name="entropy")
actor_loss = - tf.reduce_mean(logp_actions * tf.stop_gradient(advantage)) - 0.005 * tf.reduce_mean(entropy)
target_state_values = batch_rewards + gamma*batch_next_values
critic_loss = tf.reduce_mean((batch_values - tf.stop_gradient(target_state_values))**2 )
loss = actor_loss + critic_loss
var_list = agent.network.trainable_variables
grads = t.gradient(loss,var_list)
agent.optimizer.apply_gradients(zip(grads, var_list))
batch_states = batch_next_states
entropy_history.append(np.mean(entropy))
if i % 500 == 0:
if i % 2500 == 0:
rewards_history.append(np.mean(evaluate(agent, env, n_games=3)))
clear_output(True)
plt.figure(figsize=[8, 4])
plt.subplot(1, 2, 1)
plt.plot(rewards_history, label='rewards')
plt.title("Session rewards")
plt.grid()
plt.legend()
plt.subplot(1, 2, 2)
plt.plot(entropy_history, label='entropy')
plt.title("Policy entropy")
plt.grid()
plt.legend()
plt.show()
Beta = 0.005 - Training 1
Beta = 0.005 - Training 2
Beta = 0.005 - Training 3
Beta = 0.05 - Training 1
Beta = 0.05 - Training 2
Beta = 0.05 - Training 3
I've looked through your code, and it doesn't look like there's any problem with the algorithm. That is, it seems to me that the Hyper Parameter was chosen incorrectly. Try different Hyper Parameter Sets. If it doesn't work properly, refer to repository
The critic loss is wrong. You should get first expect returns, predicting the next state and iterate over it with bellman equation.
Here is an example:
def getExpectedReturns(self, states, next_states, done, rewards, standarize=True):
# Get next value
if done[-1] == 1.0:
arr_idx = np.zeros((rewards.shape[0], 1))
arr_idx[-1] = 1.0
values_rewards_sum_one_hot = tf.convert_to_tensor(arr_idx, dtype=tf.float32)
next_value = tf.reduce_sum(rewards * values_rewards_sum_one_hot, axis=0)
else:
values_rewards_sum = self.model_a2c(next_states)[-1]
arr_idx = np.zeros((rewards.shape[0], 1))
arr_idx[0] = 1.0
values_rewards_sum_one_hot = tf.convert_to_tensor(arr_idx, dtype=tf.float32)
next_value = tf.reduce_sum(values_rewards_sum * values_rewards_sum_one_hot, axis=0)
# Iterate over rewards
list_true_values = []
for i in reversed(range(0, len(rewards))):
if done[i]==0.0:
next_value = rewards[i] + next_value * self.gamma
else:
next_value = rewards[i]
list_true_values.append(next_value)
list_true_values.reverse()
list_true_values = tf.convert_to_tensor(list_true_values, dtype=tf.float32)
if standarize:
list_true_values = ((list_true_values - tf.math.reduce_mean(list_true_values)) /
(tf.math.reduce_std(list_true_values) + tf.constant(1e-12)))
return list_true_values
with tf.GradientTape() as tape:
# Advantage
returns = self.getExpectedReturns(states, next_states, done, rewards, standarize=False)
actions_probs_logits, values = self.model_a2c(states)
advantage = returns - values
advantage = tf.squeeze(advantage)
# Actions probs
actions_probs_softmax = tf.nn.softmax(actions_probs_logits)
actions_log_probs_softmax = tf.nn.log_softmax(actions_probs_logits)
actions_one_hot = tf.one_hot(actions, self.num_actions, 1.0, 0.0)
actions_log_probs = tf.reduce_sum(actions_log_probs_softmax * actions_one_hot, axis=-1)
# Entropy
entropy = self.entropy_coef * tf.reduce_mean(actions_probs_softmax * actions_log_probs_softmax, axis=1)
# Losses
actor_loss = -tf.reduce_mean(actions_log_probs * tf.stop_gradient(advantage), axis=0)
critic_loss = self.critic_coef * tf.reduce_mean(tf.math.pow(advantage, 2), axis=0)
total_loss = actor_loss + critic_loss - entropy
I have two python files namely lstm.py and test.py in same folder. im executing the test.py file in python interactive shell. It shows the
AttributeError: Lstm Param instance has no attribute "x_list_add".
lstm.py
import random
import numpy as np
import math
def sigmoid(x):
return 1. / (1 + np.exp(-x))
def sigmoid_derivative(values):
return values*(1-values)
def tanh_derivative(values):
return 1. - values ** 2
# createst uniform random array w/ values in [a,b) and shape args
def rand_arr(a, b, *args):
np.random.seed(0)
return np.random.rand(*args) * (b - a) + a
class LstmParam:
def __init__(self, mem_cell_ct, x_dim):
self.mem_cell_ct = mem_cell_ct
self.x_dim = x_dim
concat_len = x_dim + mem_cell_ct
# weight matrices
self.wg = rand_arr(-0.1, 0.1, mem_cell_ct, concat_len)
self.wi = rand_arr(-0.1, 0.1, mem_cell_ct, concat_len)
self.wf = rand_arr(-0.1, 0.1, mem_cell_ct, concat_len)
self.wo = rand_arr(-0.1, 0.1, mem_cell_ct, concat_len)
# bias terms
self.bg = rand_arr(-0.1, 0.1, mem_cell_ct)
self.bi = rand_arr(-0.1, 0.1, mem_cell_ct)
self.bf = rand_arr(-0.1, 0.1, mem_cell_ct)
self.bo = rand_arr(-0.1, 0.1, mem_cell_ct)
# diffs (derivative of loss function w.r.t. all parameters)
self.wg_diff = np.zeros((mem_cell_ct, concat_len))
self.wi_diff = np.zeros((mem_cell_ct, concat_len))
self.wf_diff = np.zeros((mem_cell_ct, concat_len))
self.wo_diff = np.zeros((mem_cell_ct, concat_len))
self.bg_diff = np.zeros(mem_cell_ct)
self.bi_diff = np.zeros(mem_cell_ct)
self.bf_diff = np.zeros(mem_cell_ct)
self.bo_diff = np.zeros(mem_cell_ct)
def apply_diff(self, lr = 1):
self.wg -= lr * self.wg_diff
self.wi -= lr * self.wi_diff
self.wf -= lr * self.wf_diff
self.wo -= lr * self.wo_diff
self.bg -= lr * self.bg_diff
self.bi -= lr * self.bi_diff
self.bf -= lr * self.bf_diff
self.bo -= lr * self.bo_diff
# reset diffs to zero
self.wg_diff = np.zeros_like(self.wg)
self.wi_diff = np.zeros_like(self.wi)
self.wf_diff = np.zeros_like(self.wf)
self.wo_diff = np.zeros_like(self.wo)
self.bg_diff = np.zeros_like(self.bg)
self.bi_diff = np.zeros_like(self.bi)
self.bf_diff = np.zeros_like(self.bf)
self.bo_diff = np.zeros_like(self.bo)
class LstmState:
def __init__(self, mem_cell_ct, x_dim):
self.g = np.zeros(mem_cell_ct)
self.i = np.zeros(mem_cell_ct)
self.f = np.zeros(mem_cell_ct)
self.o = np.zeros(mem_cell_ct)
self.s = np.zeros(mem_cell_ct)
self.h = np.zeros(mem_cell_ct)
self.bottom_diff_h = np.zeros_like(self.h)
self.bottom_diff_s = np.zeros_like(self.s)
class LstmNode:
def __init__(self, lstm_param, lstm_state):
# store reference to parameters and to activations
self.state = lstm_state
self.param = lstm_param
# non-recurrent input concatenated with recurrent input
self.xc = None
def bottom_data_is(self, x, s_prev = None, h_prev = None):
# if this is the first lstm node in the network
if s_prev is None: s_prev = np.zeros_like(self.state.s)
if h_prev is None: h_prev = np.zeros_like(self.state.h)
# save data for use in backprop
self.s_prev = s_prev
self.h_prev = h_prev
# concatenate x(t) and h(t-1)
xc = np.hstack((x, h_prev))
self.state.g = np.tanh(np.dot(self.param.wg, xc) + self.param.bg)
self.state.i = sigmoid(np.dot(self.param.wi, xc) + self.param.bi)
self.state.f = sigmoid(np.dot(self.param.wf, xc) + self.param.bf)
self.state.o = sigmoid(np.dot(self.param.wo, xc) + self.param.bo)
self.state.s = self.state.g * self.state.i + s_prev * self.state.f
self.state.h = self.state.s * self.state.o
self.xc = xc
def top_diff_is(self, top_diff_h, top_diff_s):
# notice that top_diff_s is carried along the constant error carousel
ds = self.state.o * top_diff_h + top_diff_s
do = self.state.s * top_diff_h
di = self.state.g * ds
dg = self.state.i * ds
df = self.s_prev * ds
# diffs w.r.t. vector inside sigma / tanh function
di_input = sigmoid_derivative(self.state.i) * di
df_input = sigmoid_derivative(self.state.f) * df
do_input = sigmoid_derivative(self.state.o) * do
dg_input = tanh_derivative(self.state.g) * dg
# diffs w.r.t. inputs
self.param.wi_diff += np.outer(di_input, self.xc)
self.param.wf_diff += np.outer(df_input, self.xc)
self.param.wo_diff += np.outer(do_input, self.xc)
self.param.wg_diff += np.outer(dg_input, self.xc)
self.param.bi_diff += di_input
self.param.bf_diff += df_input
self.param.bo_diff += do_input
self.param.bg_diff += dg_input
# compute bottom diff
dxc = np.zeros_like(self.xc)
dxc += np.dot(self.param.wi.T, di_input)
dxc += np.dot(self.param.wf.T, df_input)
dxc += np.dot(self.param.wo.T, do_input)
dxc += np.dot(self.param.wg.T, dg_input)
# save bottom diffs
self.state.bottom_diff_s = ds * self.state.f
self.state.bottom_diff_h = dxc[self.param.x_dim:]
class LstmNetwork():
def __init__(self, lstm_param):
self.lstm_param = lstm_param
self.lstm_node_list = []
# input sequence
self.x_list = []
def y_list_is(self, y_list, loss_layer):
"""
Updates diffs by setting target sequence
with corresponding loss layer.
Will *NOT* update parameters. To update parameters,
call self.lstm_param.apply_diff()
"""
assert len(y_list) == len(self.x_list)
idx = len(self.x_list) - 1
# first node only gets diffs from label ...
loss = loss_layer.loss(self.lstm_node_list[idx].state.h, y_list[idx])
diff_h = loss_layer.bottom_diff(self.lstm_node_list[idx].state.h, y_list[idx])
# here s is not affecting loss due to h(t+1), hence we set equal to zero
diff_s = np.zeros(self.lstm_param.mem_cell_ct)
self.lstm_node_list[idx].top_diff_is(diff_h, diff_s)
idx -= 1
### ... following nodes also get diffs from next nodes, hence we add diffs to diff_h
### we also propagate error along constant error carousel using diff_s
while idx >= 0:
loss += loss_layer.loss(self.lstm_node_list[idx].state.h, y_list[idx])
diff_h = loss_layer.bottom_diff(self.lstm_node_list[idx].state.h, y_list[idx])
diff_h += self.lstm_node_list[idx + 1].state.bottom_diff_h
diff_s = self.lstm_node_list[idx + 1].state.bottom_diff_s
self.lstm_node_list[idx].top_diff_is(diff_h, diff_s)
idx -= 1
return loss
def x_list_clear(self):
self.x_list = []
def x_list_add(self, x):
self.x_list.append(x)
if len(self.x_list) > len(self.lstm_node_list):
# need to add new lstm node, create new state mem
lstm_state = LstmState(self.lstm_param.mem_cell_ct, self.lstm_param.x_dim)
self.lstm_node_list.append(LstmNode(self.lstm_param, lstm_state))
# get index of most recent x input
idx = len(self.x_list) - 1
if idx == 0:
# no recurrent inputs yet
self.lstm_node_list[idx].bottom_data_is(x)
else:
s_prev = self.lstm_node_list[idx - 1].state.s
h_prev = self.lstm_node_list[idx - 1].state.h
self.lstm_node_list[idx].bottom_data_is(x, s_prev, h_prev)
test.py
import numpy as np
import sys
sys.path.append("/home/vj/Downloads/tfdeep/core_lstm/lstm/")
from lstm import LstmParam, LstmNetwork
class ToyLossLayer:
"""
Computes square loss with first element of hidden layer array."""
#classmethod
def loss(self, pred, label):
return (pred[0] - label) ** 2
#classmethod
def bottom_diff(self, pred, label):
diff = np.zeros_like(pred)
diff[0] = 2 * (pred[0] - label)
return diff
def example_0():
# learns to repeat simple sequence from random inputs
np.random.seed(0)
# parameters for input data dimension and lstm cell count
mem_cell_ct = 100
x_dim = 50
lstm_param = LstmParam(mem_cell_ct, x_dim)
lstm_net = LstmNetwork(lstm_param)
y_list = [-0.5, 0.2, 0.1, -0.5]
input_val_arr = [np.random.random(x_dim) for _ in y_list]
for cur_iter in range(100):
print("iter", "%2s" % str(cur_iter),": ")
for ind in range(len(y_list)):
lstm_net.x_list_add(input_val_arr[ind])
#print("y_pred = [" + ", ".join(["% 2.5f" % lstm_net.lstm_node_list[ind].state.h[0] for ind in range(len(y_list))]) +"]," end=":")
loss = lstm_net.y_list_is(y_list, ToyLossLayer)
print("loss:", "%.3e" % loss)
lstm_param.apply_diff(lr=0.1)
lstm_net.x_list_clear()
if __name__ == "__main__":
example_0()
while executing this test.py file I am getting I getting the follwing error
Traceback (most recent call last):
File "", line 2, in
File "", line 14, in example_0
AttributeError: LstmNetwork instance has no attribute 'x_list_add'
I am struggling to get the output. I didnot get any post relate to my problem.