I am trying to use Deep Galerkin Method (DGM) to solve high dimensional PDEs and I face a problem. For illustrative purposes, I am posting a simple optimization problem below. The feed-forward network successfully recovers the optimal funciton, but DGM network fails to do so. Any help is highly appreciated.
import logging, os
os.system('clear')
logging.disable(logging.WARNING)
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
import tensorflow as tf
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
import numpy as np
import pandas as pd
import time
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")
# CLASS DEFINITIONS FOR NEURAL NETWORKS USED IN DEEP GALERKIN METHOD
#%% import needed packages
import tensorflow as tf
#%% LSTM-like layer used in DGM (see Figure 5.3 and set of equations on p. 45) - modification of Keras layer class
class LSTMLayer(tf.keras.layers.Layer):
# constructor/initializer function (automatically called when new instance of class is created)
def __init__(self, output_dim, input_dim, trans1 = "tanh", trans2 = "tanh"):
'''
Args:
input_dim (int): dimensionality of input data
output_dim (int): number of outputs for LSTM layers
trans1, trans2 (str): activation functions used inside the layer;
one of: "tanh" (default), "relu" or "sigmoid"
Returns: customized Keras layer object used as intermediate layers in DGM
'''
# create an instance of a Layer object (call initialize function of superclass of LSTMLayer)
super(LSTMLayer, self).__init__()
# add properties for layer including activation functions used inside the layer
self.output_dim = output_dim
self.input_dim = input_dim
if trans1 == "tanh":
self.trans1 = tf.nn.tanh
elif trans1 == "relu":
self.trans1 = tf.nn.relu
elif trans1 == "sigmoid":
self.trans1 = tf.nn.sigmoid
if trans2 == "tanh":
self.trans2 = tf.nn.tanh
elif trans2 == "relu":
self.trans2 = tf.nn.relu
elif trans2 == "sigmoid":
self.trans2 = tf.nn.relu
### define LSTM layer parameters (use Xavier initialization)
# u vectors (weighting vectors for inputs original inputs x)
self.Uz = self.add_variable("Uz", shape=[self.input_dim, self.output_dim],
initializer = tf.contrib.layers.xavier_initializer())
self.Ug = self.add_variable("Ug", shape=[self.input_dim ,self.output_dim],
initializer = tf.contrib.layers.xavier_initializer())
self.Ur = self.add_variable("Ur", shape=[self.input_dim, self.output_dim],
initializer = tf.contrib.layers.xavier_initializer())
self.Uh = self.add_variable("Uh", shape=[self.input_dim, self.output_dim],
initializer = tf.contrib.layers.xavier_initializer())
# w vectors (weighting vectors for output of previous layer)
self.Wz = self.add_variable("Wz", shape=[self.output_dim, self.output_dim],
initializer = tf.contrib.layers.xavier_initializer())
self.Wg = self.add_variable("Wg", shape=[self.output_dim, self.output_dim],
initializer = tf.contrib.layers.xavier_initializer())
self.Wr = self.add_variable("Wr", shape=[self.output_dim, self.output_dim],
initializer = tf.contrib.layers.xavier_initializer())
self.Wh = self.add_variable("Wh", shape=[self.output_dim, self.output_dim],
initializer = tf.contrib.layers.xavier_initializer())
# bias vectors
self.bz = self.add_variable("bz", shape=[1, self.output_dim])
self.bg = self.add_variable("bg", shape=[1, self.output_dim])
self.br = self.add_variable("br", shape=[1, self.output_dim])
self.bh = self.add_variable("bh", shape=[1, self.output_dim])
# main function to be called
def call(self, S, X):
'''Compute output of a LSTMLayer for a given inputs S,X .
Args:
S: output of previous layer
X: data input
Returns: customized Keras layer object used as intermediate layers in DGM
'''
# compute components of LSTM layer output (note H uses a separate activation function)
Z = self.trans1(tf.add(tf.add(tf.matmul(X,self.Uz), tf.matmul(S,self.Wz)), self.bz))
G = self.trans1(tf.add(tf.add(tf.matmul(X,self.Ug), tf.matmul(S, self.Wg)), self.bg))
R = self.trans1(tf.add(tf.add(tf.matmul(X,self.Ur), tf.matmul(S, self.Wr)), self.br))
H = self.trans2(tf.add(tf.add(tf.matmul(X,self.Uh), tf.matmul(tf.multiply(S, R), self.Wh)), self.bh))
# compute LSTM layer output
S_new = tf.add(tf.multiply(tf.subtract(tf.ones_like(G), G), H), tf.multiply(Z,S))
return S_new
#%% Fully connected (dense) layer - modification of Keras layer class
class DenseLayer(tf.keras.layers.Layer):
# constructor/initializer function (automatically called when new instance of class is created)
def __init__(self, output_dim, input_dim, transformation=None):
'''
Args:
input_dim: dimensionality of input data
output_dim: number of outputs for dense layer
transformation: activation function used inside the layer; using
None is equivalent to the identity map
Returns: customized Keras (fully connected) layer object
'''
# create an instance of a Layer object (call initialize function of superclass of DenseLayer)
super(DenseLayer,self).__init__()
self.output_dim = output_dim
self.input_dim = input_dim
### define dense layer parameters (use Xavier initialization)
# w vectors (weighting vectors for output of previous layer)
self.W = self.add_variable("W", shape=[self.input_dim, self.output_dim],
initializer = tf.contrib.layers.xavier_initializer())
# bias vectors
self.b = self.add_variable("b", shape=[1, self.output_dim])
if transformation:
if transformation == "tanh":
self.transformation = tf.tanh
elif transformation == "relu":
self.transformation = tf.nn.relu
else:
self.transformation = transformation
# main function to be called
def call(self,X):
'''Compute output of a dense layer for a given input X
Args:
X: input to layer
'''
# compute dense layer output
S = tf.add(tf.matmul(X, self.W), self.b)
if self.transformation:
S = self.transformation(S)
return S
#%% Neural network architecture used in DGM - modification of Keras Model class
class DGMNet(tf.keras.Model):
# constructor/initializer function (automatically called when new instance of class is created)
def __init__(self, layer_width, n_layers, input_dim, final_trans=None):
'''
Args:
layer_width:
n_layers: number of intermediate LSTM layers
input_dim: spaital dimension of input data (EXCLUDES time dimension)
final_trans: transformation used in final layer
Returns: customized Keras model object representing DGM neural network
'''
# create an instance of a Model object (call initialize function of superclass of DGMNet)
super(DGMNet,self).__init__()
# define initial layer as fully connected
# NOTE: to account for time inputs we use input_dim+1 as the input dimensionality
self.initial_layer = DenseLayer(layer_width, input_dim, transformation = "tanh")
# define intermediate LSTM layers
self.n_layers = n_layers
self.LSTMLayerList = []
for _ in range(self.n_layers):
self.LSTMLayerList.append(LSTMLayer(layer_width, input_dim))
# define final layer as fully connected with a single output (function value)
self.final_layer = DenseLayer(1, layer_width, transformation = final_trans)
# main function to be called
def call(self,x):
'''
Args:
t: sampled time inputs
x: sampled space inputs
Run the DGM model and obtain fitted function value at the inputs (t,x)
'''
# define input vector as time-space pairs
X = tf.concat([x],1)
# call initial layer
S = self.initial_layer.call(X)
# call intermediate LSTM layers
for i in range(self.n_layers):
S = self.LSTMLayerList[i].call(S,X)
# call final LSTM layers
result = self.final_layer.call(S)
return result
#%% main class
class check():
def __init__(self,v,x,layers,learning_rate,adam_iter,params):
self.params=params
self.v=v
self.x=x
self.learning_rate = learning_rate
self.adam_iter = adam_iter
self.lb = np.array([self.x[0][0]])
self.ub = np.array([self.x[-1][0]])
self.sess = tf.Session(config = tf.ConfigProto(allow_soft_placement = True, log_device_placement = True))
self.x_tf = tf.placeholder(tf.float32, shape=[None,self.x.shape[1]])
self.v_tf = tf.placeholder(tf.float32, shape=[None,self.v.shape[1]])
self.x_u_tf = tf.placeholder(tf.float32, shape=[None,self.x.shape[1]])
self.v_u_tf = tf.placeholder(tf.float32, shape=[None,self.v.shape[1]])
self.weights_v,self.biases_v = self.initialize_nn(layers)
self.weights_i,self.biases_i = self.initialize_nn(layers)
with tf.variable_scope("control",reuse=True):
self.i_pred = self.net_i(self.x_tf)
with tf.variable_scope("value",reuse=True):
self.v_pred = self.net_v(self.x_tf)
self.error_i = self.policy_error(self.x_tf)
self.loss_v = tf.math.reduce_max(tf.abs(self.v_pred-self.v_tf))
self.loss = tf.math.reduce_max(tf.abs(self.v_pred-self.v_tf)) + tf.reduce_mean(tf.square(self.error_i))
self.optimizer_Adam = tf.train.AdamOptimizer(learning_rate=self.learning_rate)
self.train_op_Adam = self.optimizer_Adam.minimize(self.loss)
self.optimizer_Adam_v = tf.train.AdamOptimizer(learning_rate=self.learning_rate)
self.train_op_Adam_v = self.optimizer_Adam.minimize(self.loss_v)
init = tf.global_variables_initializer()
self.sess.run(init)
def policy_error(self,x):
i=self.net_i(x)
v_ = self.net_v(x+i)
l = v_ - i*x**2
error_i = tf.gradients(l,i)[0]
return error_i
def initialize_nn(self,layers):
weights = []
biases = []
num_layers = len(layers)
for l in range(num_layers-1):
W = self.xavier_init(size = [layers[l],layers[l+1]])
b = tf.Variable(tf.zeros([1,layers[l+1]], dtype=tf.float32), dtype = tf.float32)
weights.append(W)
biases.append(b)
return weights,biases
def xavier_init(self,size):
in_dim = size[0]
out_dim = size[1]
xavier_stddev = np.sqrt(2/(in_dim + out_dim))
try:
val = tf.Variable(tf.random.truncated_normal([in_dim,out_dim], stddev = xavier_stddev), dtype = tf.float32)
except:
val = tf.Variable(tf.truncated_normal([in_dim,out_dim], stddev = xavier_stddev), dtype = tf.float32)
return val
def neural_net(self,X,weights,biases):
num_layers = len(weights) +1
H = 2.0*(X - self.lb)/(self.ub - self.lb) -1
#H=X
for l in range(num_layers-2):
W = weights[l]
b = biases[l]
H = tf.tanh(tf.add(tf.matmul(H,W),b))
W = weights[-1]
b = biases[-1]
Y = tf.add(tf.matmul(H,W),b)
return Y
def net_v(self,eta):
if self.params['DGM']==True:
model_v = DGMNet(self.params['neurons_per_layer'],self.params['num_layers'],1)
v_u = model_v(eta)
else:
X = tf.concat([eta],1)
v_u = self.neural_net(X,self.weights_v,self.biases_v)
return v_u
def net_i(self,eta):
if self.params['DGM']==True:
model_i = DGMNet(self.params['neurons_per_layer'],self.params['num_layers'],1)
i_u = model_i(eta)
else:
X = tf.concat([eta],1)
i_u = self.neural_net(X,self.weights_i,self.biases_i)
return i_u
def callback(self,loss):
print('Loss: ',loss)
def train(self):
#K.clear_session()
start_time = time.time()
if True: #set this to true if you want adam to run
tf_dict = {self.v_tf:self.v, self.x_tf:self.x}
for it in range(self.adam_iter):
self.sess.run(self.train_op_Adam_v, tf_dict)
# Print
if it % 1000 == 0:
elapsed = time.time() - start_time
loss_value = self.sess.run(self.loss_v, tf_dict)
print('It: %d, Loss: %.3e, Time: %.2f' %
(it, loss_value, elapsed))
start_time = time.time()
start_time = time.time()
if True: #set this to true if you want adam to run
tf_dict = {self.v_tf:self.v, self.x_tf:self.x}
for it in range(self.adam_iter):
self.sess.run(self.train_op_Adam, tf_dict)
# Print
if it % 1000 == 0:
elapsed = time.time() - start_time
loss_value = self.sess.run(self.loss, tf_dict)
print('It: %d, Loss: %.3e, Time: %.2f' %
(it, loss_value, elapsed))
start_time = time.time()
start_time = time.time()
def predict(self,X_star):
i_star = self.sess.run(self.i_pred,{self.x_tf: X_star[:,0:1]})
v_star = self.sess.run(self.v_pred,{self.x_tf: X_star[:,0:1]})
error = self.sess.run(self.error_i,{self.x_tf: X_star[:,0:1]})
tf.reset_default_graph()
return i_star,v_star,error
#%%
if __name__=="__main__":
params={'DGM':True,'neurons_per_layer':50,'num_layers':4}
x=np.linspace(-1,1,100).reshape(-1,1).astype(np.float32)
v=(10 - x**2).reshape(-1,1).astype(np.float32)
#architecture for feed-forward network
layers = [1, 10,1]
learning_rate = 0.001
adam_iter = 5000
run = check(v,x,layers,learning_rate,adam_iter,params)
run.train()
i_star,v_star,error=run.predict(x)
The problem is to find the optimal function i that maximizes the function v=10-(x+i^2)-ix^2, where x is the state variable. That is, the optimal function i will depend on x. If I set 'DGM' as False in the parameter dictionary and run the code, I get the right solution (in this case the functions are coded as feed-forward neural network), where the correct analytical solution is i_star = 0.5*(-2x-x^2). If I set 'DGM' as False, the solution is incorrect. I tried with different number of layers and number of neurons per each layer, but DGM always gives incorrect solution.
Am I doing something wrong? Many thanks.
Related
I have the following code:
import torch
from torch.nn.utils.stateless import functional_call
import torch.autograd as autograd
import torch.nn as nn
# This is the model
class Encoder(nn.Module):
def __init__(self, action_dim, z_dim, skill_length):
super().__init__()
print(action_dim)
self.lin1 = nn.Linear(action_dim, action_dim)
self.lstm = nn.LSTM(input_size=action_dim, hidden_size=z_dim, batch_first=True)
self.lin2 = nn.Linear(z_dim, z_dim)
def forward(self, skill):
a, b, c = skill.shape
skill = skill.reshape(-1, skill.shape[-1])
embed = self.lin1(skill)
embed = embed.reshape(a, b, c)
mean, _ = self.lstm(embed)
mean = mean[:, -1, :]
mean = self.lin2(mean)
return mean
# This is the initialization function
def pars(model):
params = {}
for name, param in model.named_parameters():
if len(param.shape) == 1:
init = torch.nn.init.constant_(param, 0)
else:
init = torch.nn.init.orthogonal_(param)
params[name] = nn.Parameter(init)
return params
# Initializating the model
model = Encoder(4, 2, 5)
x = torch.rand(3, 5, 4)
params = pars(model)
# Running the model with functional_call and calculating gradient.
samp = functional_call(model, params, x)
grad_f = autograd.grad(torch.mean(samp), params.values(),
retain_graph=True, allow_unused=True)
print(grad_f)
# grad_f has gradient for the linear layer, but None for the LSTM layer.
# Running the model without functional_call and calculating gradient.
samp = model(x)
grad = autograd.grad(torch.mean(samp), model.parameters(), retain_graph=True)
print(grad)
# grad has gradient for all layers, e.g., linears and lstm.
I know the problem is with the LSTM layer because when I use a linear layer with nn.Linear, then the gradient depends on std as well as the linear layer. Unfortunately, I do not know to resolve this problem. I'd appreciate any help.
*Edit: I heavily edited the code provided just to further simplify the example. This code can be copied and run.
Update Dec 11, 2022
class Encoder(nn.Module):
def __init__(self, action_dim, z_dim, skill_length):
super().__init__()
print(action_dim)
self.lin1 = nn.Linear(action_dim, action_dim)
self.lstm = nn.LSTM(input_size=action_dim, hidden_size=z_dim, batch_first=True)
self.lin2 = nn.Linear(z_dim, z_dim)
def forward(self, skill):
a, b, c = skill.shape
skill = skill.reshape(-1, skill.shape[-1])
embed = self.lin1(skill)
embed = embed.reshape(a, b, c)
mean, _ = self.lstm(embed)
pdb.set_trace()
grad1 = autograd.grad(mean.mean(), params.values(),
retain_graph=True, allow_unused=True)
# This gives gradient for the self.lin1 layer, and None for the LSTM
grad2 = autograd.grad(mean.mean(), self.parameters(),
retain_graph=True, allow_unused=True)
# This gives gradient the LSTM, but None for the self.lin1 layer
mean = mean[:, -1, :]
mean = self.lin2(mean)
return mean
When I run it the regular without functional_call and calling directly the model, then autograd.grad(mean.mean(), self.parameters(), allow_unused=True, retain_graph=True) has gradient for the self.lin1 and LSTM layer.
I don't know if this information is useful, but putting out there just in case.
I am performing reinforcement learning and need to train an actor and a critic neural network over a custom environment. I have the following code for my networks and RL agent:
import tensorflow as tf
import tensorflow_probability as tfp
import tensorflow.keras.losses as kls
class critic(tf.keras.Model):
## Critic NN
def __init__(self):
super().__init__()
self.d1 = tf.keras.layers.Dense(64,activation='relu')
self.v = tf.keras.layers.Dense(1, activation = None)
def call(self, input_data):
x = self.d1(input_data)
v = self.v(x)
return v
class actor(tf.keras.Model):
## Actor NN
def __init__(self):
super().__init__()
self.d1 = tf.keras.layers.Dense(64,activation='relu')
self.a = tf.keras.layers.Dense(4,activation='softmax')
def call(self, input_data):
x = self.d1(input_data)
a = self.a(x)
return a
class rlalgorithm:
## RL Agent that trains the above NNs based on data from environment
def __init__(self, actions, learning_rate=0.1):
## Display name for graphing performance
self.display_name="A2C"
## Root Mean Square Optimizer for minimizing A2C losses
self.a_opt = tf.keras.optimizers.RMSprop(learning_rate=learning_rate)
self.c_opt = tf.keras.optimizers.RMSprop(learning_rate=learning_rate)
## Initialize models
self.actor = actor()
self.critic = critic()
## Define training constants and variables
learn_rate = tf.constant(learning_rate, dtype=tf.float32)
self.reward = tf.Variable(initial_value=0, dtype=tf.float32)
self.state = tf.Variable(initial_value=tf.zeros(shape=(1, 4)), dtype=tf.float32, shape=(1, 4))
self.next_state = tf.Variable(initial_value=tf.zeros(shape=(1, 4)), dtype=tf.float32, shape=(1, 4))
self.action = tf.Variable(initial_value=0, dtype=tf.float32)
## The graph that produces the advantage
advantage = self.reward + learn_rate*self.critic(self.next_state) - self.critic(self.state)
## Graph that produces losses
dist = tfp.distributions.Categorical(probs=self.actor(self.state), dtype=tf.float32)
self.actor_loss = dist.log_prob(self.action)*advantage
self.critic_loss = advantage**2
def choose_action(self, state):
## Agent chooses action to proceed to next state
prob = self.actor(tf.convert_to_tensor([state]))
dist = tfp.distributions.Categorical(probs=prob, dtype=tf.float32)
action = dist.sample()
return int(action.numpy()[0])
def learn(self, s, a, r, s_):
## Based on chosen action, learn from result
## Assign training variables for this state-action outcome
self.reward = self.reward.assign(r)
self.state = self.state.assign(tf.convert_to_tensor([s]))
self.next_state = self.next_state.assign(tf.convert_to_tensor([s_]))
self.action = self.action.assign(a)
## Generate the loss gradient for actor
with tf.GradientTape() as tape:
actor_grad = tape.gradient(self.actor_loss, self.actor.trainable_variables)
self.a_opt.apply_gradients(zip(actor_grad, self.actor.trainable_variables))
## Generate the loss gradient for critic
with tf.GradientTape() as tape:
critic_grad = tape.gradient(self.critic_loss, self.critic.trainable_variables)
self.c_opt.apply_gradients(zip(critic_grad, self.critic.trainable_variables))
## Environment uses this, not relevant to learning
return s_, self.choose_action(s_)
I am getting the following error:
ValueError: No gradients provided for any variable: ['actor/dense/kernel:0', 'actor/dense/bias:0', 'actor/dense_1/kernel:0', 'actor/dense_1/bias:0'].
I have seen this question asked multiple times, but none of the previous solutions seem to work for my case. Unfortunately, I cannot provide the environment that this agent runs on but the error is only contained within the above file.
I have read through the docs and tried a similar implementation using the optimizer minimize function, with the same results.
I suspect the issue is related to how the tensorflow graphs are defined, but am unsure of what exactly the problem is.
Any and all help is appreciated.
I solved this on my own.
What I didn't understand was the proper usage of tf.GradientTape. Within the with block, I need to perform the operations that compute loss, so that the gradients can be found.
Here is the updated learn function, for anybody else' reference:
def learn(self, s, a, r, s_):
## Based on chosen action, learn from result
## Assign training variables for this state-action outcome
self.reward = self.reward.assign(r)
self.state = self.state.assign(tf.convert_to_tensor([s]))
self.next_state = self.next_state.assign(tf.convert_to_tensor([s_]))
self.action = self.action.assign(a)
## Generate the loss gradient for critic
with tf.GradientTape() as tape:
advantage = self.reward + self.learn_rate*self.critic(self.next_state) - self.critic(self.state)
critic_loss = advantage**2
critic_grad = tape.gradient(critic_loss, self.critic.trainable_variables)
self.c_opt.apply_gradients(zip(critic_grad, self.critic.trainable_variables))
## Generate the loss gradient for actor
with tf.GradientTape() as tape:
dist = tfp.distributions.Categorical(probs=self.actor(self.state), dtype=tf.float32)
actor_loss = dist.log_prob(self.action)*self.advantage
actor_grad = tape.gradient(actor_loss, self.actor.trainable_variables)
self.a_opt.apply_gradients(zip(actor_grad, self.actor.trainable_variables))
## Environment uses this, not relevant to learning
return s_, self.choose_action(s_)
I am trying to run the following code (as given in Tensorflow documentation) to create windows of my data and then flatten the dataset of datasets.
window_size = 5
windows = range_ds.window(window_size, shift=1)
for sub_ds in windows.take(5):
print(sub_ds)
flat_windows = windows.flat_map(lambda x: x)
The problem is that flat_windows.cardinality().numpy() returns cardinality to be -2 which is creating problem for me during training. I tried looking for ways to set_cardinality of a dataset but couldn't find anything. I also tried other ways of flattening a dataset of datasets, but again no success.
Edit-1: The problem with the training is that the shape is unknown (at Linear and Dense layers) when I am training a subclass model (given below). The model trains well when I train the model eagerly (through tf.config.run_functions_eagerly(True)) but that is slow. Therefore I want the input data to be known for the model training.
Neural Network
class NeuralNetworkModel(tf.keras.Model):
def __init__(self):
super(NeuralNetworkModel, self).__init__()
self.encoder = Encoder()
def train_step(self, inputs):
X = inputs[0]
Y = inputs[1]
with tf.GradientTape() as tape:
enc_X = self.encoder(X)
enc_Y = self.encoder(Y)
# loss:
loss = tf.norm(enc_Y - enc_X, axis = [0, 1], ord = 'fro')
# Compute gradients
trainable_vars = self.encoder.trainable_variables
gradients = tape.gradient(loss, trainable_vars)
# Update weights
self.optimizer.apply_gradients(zip(gradients, trainable_vars))
# Compute our own metrics
loss_tracker.update_state(loss)
# Return a dict mapping metric names to current value.
# Note that it will include the loss (tracked in self.metrics).
return {"loss": loss_tracker.result()}
#property
def metrics(self):
# We list our `Metric` objects here so that `reset_states()` can be
# called automatically at the start of each epoch
# or at the start of `evaluate()`.
# If you don't implement this property, you have to call
# `reset_states()` yourself at the time of your choosing.
return [loss_tracker]
def test_step(self, inputs):
X = inputs[0]
Y = inputs[1]
Psi_X = self.encoder(X)
Psi_Y = self.encoder(Y)
# loss:
loss = tf.norm(Psi_Y - Psi_X, axis = [0, 1], ord = 'fro')
# Compute our own metrics
loss_tracker.update_state(loss)
# Return a dict mapping metric names to current value.
# Note that it will include the loss (tracked in self.metrics).
return {"loss": loss_tracker.result()}
class Encoder(tf.keras.Model):
def __init__(self):
super(Encoder, self).__init__(dtype = 'float64', name = 'Encoder')
self.input_layer = DenseLayer(128)
self.hidden_layer1 = DenseLayer(128)
self.hidden_layer2 = DenseLayer(64)
self.hidden_layer3 = DenseLayer(64)
self.output_layer = LinearLayer(64)
def call(self, input_data, training):
fx = self.input_layer(input_data)
fx = self.hidden_layer1(fx)
fx = self.hidden_layer2(fx)
fx = self.hidden_layer3(fx)
return self.output_layer(fx)
class LinearLayer(tf.keras.layers.Layer):
def __init__(self, units):
super(LinearLayer, self).__init__(dtype = 'float64')
self.units = units
def build(self, input_shape):
input_dim = input_shape[-1]
self.w = self.add_weight(shape = (input_dim, self.units),
initializer = "random_normal",
trainable = True)
self.b = self.add_weight(shape = (self.units,),
initializer = tf.zeros_initializer(),
trainable = True)
def call(self, inputs):
return tf.matmul(inputs, self.w) + self.b
class DenseLayer(tf.keras.layers.Layer):
def __init__(self, units):
super(DenseLayer, self).__init__(dtype = 'float64')
self.units = units
def build(self, input_shape):
input_dim = input_shape[-1]
self.w = self.add_weight(shape = (input_dim, self.units),
initializer = "random_normal",
trainable = True)
self.b = self.add_weight(shape = (self.units,),
initializer = tf.zeros_initializer(),
trainable = True)
def call(self, inputs):
x = tf.matmul(inputs, self.w) + self.b
return tf.nn.elu(x)
I was wondering about this as well. Turns out that -2 is tf.data.UNKNOWN_CARDINALITY (https://www.tensorflow.org/api_docs/python/tf/data#UNKNOWN_CARDINALITY), which represents that TF doesn't know how many elements the flat_map returns per item.
I just asked Windowing a TensorFlow dataset without losing cardinality information? to see if anyone knows a way to window datasets without losing cardinality.
I'm currently setting up a Deep Deterministic Policy Gradient agent to interact with a crypto trading environment. The code works when I'm using Dense layers for the function approximator, but when I switch to LSTM or GRUs this error pops up when I call the learn method of Agent:
No gradients provided for any variable: ['actor_network_4/lstm/kernel:0', 'actor_network_4/lstm/recurrent_kernel:0', 'actor_network_4/lstm/bias:0', 'actor_network_4/lstm_1/kernel:0', 'actor_network_4/lstm_1/recurrent_kernel:0', 'actor_network_4/lstm_1/bias:0', 'actor_network_4/dense_8/kernel:0', 'actor_network_4/dense_8/bias:0'].
I'm using GradientTape to record the gradients and optimizer.apply_gradients to update the actor and critic networks. Please find the code snippets below:
#RNN version
class CriticNetwork(keras.Model):
def __init__(self, n_actions,name='critic', chkpt_dir='ddpg'):
super(CriticNetwork, self).__init__()
self.n_actions = n_actions
self.model_name = name
self.checkpoint_dir = chkpt_dir
self.checkpoint_file = os.path.join(self.checkpoint_dir, self.model_name+'_ddpg.h5')
self.lstm1 = LSTM(128,return_sequences=True,unroll=False)
self.lstm2 = LSTM(128)
self.q = Dense(1, activation=None)
def call(self, state, action):
action_value = tf.concat([state, action], axis=1)
action_value = np.reshape(action_value,(32,1,44))
action_value = self.lstm1(action_value)
action_value = self.lstm2(action_value)
q = self.q(action_value)
return q
class ActorNetwork(keras.Model):
def __init__(self,n_actions=1, name='actor',chkpt_dir='ddpg'):
super(ActorNetwork, self).__init__()
self.n_actions = n_actions
self.model_name = name
self.checkpoint_dir = chkpt_dir
self.checkpoint_file = os.path.join(self.checkpoint_dir,
self.model_name+'_ddpg.h5')
self.lstm1 = LSTM(128,return_sequences=True,unroll=False)
self.lstm2 = LSTM(128)
self.mu = Dense(self.n_actions, activation='tanh')
def call(self, state):
state = np.reshape(state,(32,1,43))
prob = self.lstm1(state)
prob = self.lstm2(prob)
mu = self.mu(prob)
return mu
class Agent:
def __init__(self, alpha=0.001, beta=0.002, input_dims=[33], env=None,
gamma=0.99, n_actions=1, max_size=3000000, tau=0.005,
fc1=128, fc2=128, fc3=64, batch_size=32):
self.gamma = gamma
self.tau = tau
self.memory = ReplayBuffer(max_size, input_dims, n_actions)
self.batch_size = batch_size
self.n_actions = n_actions
self.max_action = 1
self.min_action = -1
self.actor = ActorNetwork(n_actions=n_actions, name='actor')
self.critic = CriticNetwork(n_actions=n_actions, name='critic')
self.target_actor = ActorNetwork(n_actions=n_actions, name='target_actor')
self.target_critic = CriticNetwork(n_actions=n_actions, name='target_critic')
self.actor.compile(optimizer=Adam(learning_rate=alpha))
self.critic.compile(optimizer=Adam(learning_rate=beta))
self.target_actor.compile(optimizer=Adam(learning_rate=alpha))
self.target_critic.compile(optimizer=Adam(learning_rate=beta))
self.update_network_parameters(tau=1)
def update_network_parameters(self, tau=None):
if tau is None:
tau = self.tau
weights = []
targets = self.target_actor.weights
for i, weight in enumerate(self.actor.weights):
weights.append(weight * tau + targets[i]*(1-tau))
self.target_actor.set_weights(weights)
weights = []
targets = self.target_critic.weights
for i, weight in enumerate(self.critic.weights):
weights.append(weight * tau + targets[i]*(1-tau))
self.target_critic.set_weights(weights)
def remember(self, state, action, reward, new_state, done):
self.memory.store_transition(state, action, reward, new_state, done)
def save_models(self):
print('... saving models ...')
self.actor.save_weights(self.actor.checkpoint_file)
self.target_actor.save_weights(self.target_actor.checkpoint_file)
self.critic.save_weights(self.critic.checkpoint_file)
self.target_critic.save_weights(self.target_critic.checkpoint_file)
def load_models(self):
print('... loading models ...')
self.actor.load_weights(self.actor.checkpoint_file)
self.target_actor.load_weights(self.target_actor.checkpoint_file)
self.critic.load_weights(self.critic.checkpoint_file)
self.target_critic.load_weights(self.target_critic.checkpoint_file)
def choose_action(self, observation, evaluate=False):
state = tf.convert_to_tensor([observation], dtype=tf.float32)
actions = self.actor(state)
if not evaluate:
actions += tf.random.normal(shape=[self.n_actions],
mean=0.0, stddev=0.05)
actions = tf.clip_by_value(actions, self.min_action, self.max_action)
return actions
def learn(self):
if self.memory.mem_cntr < self.batch_size:
return
state, action, reward, new_state, done = \
self.memory.sample_buffer(self.batch_size)
states = tf.convert_to_tensor(state, dtype=tf.float32)
states_ = tf.convert_to_tensor(new_state, dtype=tf.float32)
rewards = tf.convert_to_tensor(reward, dtype=tf.float32)
actions = tf.convert_to_tensor(action, dtype=tf.float32)
with tf.GradientTape() as tape:
target_actions = self.target_actor(states_)
critic_value_ = tf.squeeze(self.target_critic(
states_, target_actions), 1)
critic_value = tf.squeeze(self.critic(states, actions), 1)
target = reward + self.gamma*critic_value_*(1-done)
critic_loss = keras.losses.MSE(target, critic_value)
critic_network_gradient = tape.gradient(critic_loss, self.critic.trainable_variables)
self.critic.optimizer.apply_gradients(zip(critic_network_gradient, self.critic.trainable_variables))
with tf.GradientTape() as tape:
new_policy_actions = self.actor(states)
actor_loss = -self.critic(states, new_policy_actions)
actor_loss = tf.math.reduce_mean(actor_loss)
actor_network_gradient = tape.gradient(actor_loss, self.actor.trainable_variables)
self.actor.optimizer.apply_gradients(zip(actor_network_gradient, self.actor.trainable_variables))
self.update_network_parameters()
Any advice or help in terms of how to record and apply the gradient for LSTMs would help.
Actually, the problem is that you are using Numpy operations to define the computation logic (hence, the gradients cannot flow from that point onward); however, this should be done entirely using TF Ops or Keras layers. Specifically, in call method of CriticNetwork as well as ActorNetwork, instead of using np.reshape you should either use tf.expand_dims (if you only want to add a new axis of size one to the tensor), or tf.reshape, or tf.keras.layers.Reshape layer (for more involved reshaping). For example, using tf.expand_dims:
class CriticNetwork(keras.Model):
#...
def call(self, state, action):
action_value = tf.concat([state, action], axis=1)
action_value = tf.expand_dims(action_value, axis=1)
or using tf.reshape:
class CriticNetwork(keras.Model):
#...
def call(self, state, action):
action_value = tf.concat([state, action], axis=1)
action_value = tf.reshape(action_value, (-1, 1, 44)) # Use `-1` for the first axis so that any batch size would be supported
or using Reshape layer:
class CriticNetwork(keras.Model):
def __init__(self, n_actions,name='critic', chkpt_dir='ddpg'):
# ...
self.reshape = tf.keras.layers.Reshape((1, 44))
def call(self, state, action):
action_value = tf.concat([state, action], axis=1)
action_value = self.reshape(action_value)
And you need to do the same thing for the ActorNetwork.
Side note: I am not sure if this is just a demo code or not, but note that using RNN layers on a sequence of length one (i.e. having only one timestep) may not prove to be very beneficial.
I'm working on a NeuralNetwork class using the back-propogation algorithm from Artificial Intelligence: A Modern Approach. I stepped through a run of the train function with a debugger and everything seems to be working properly, but the error isn't going down when I run it. Can anybody spot what I'm doing wrong?
import math, random
import numpy as np
CLOSE = 0.2
class Perceptron:
'''A single perceptron using sigmoid activation'''
def __init__(self, inputs):
'''Set up the perceptron with the given number of inputs'''
self.weights = np.empty(inputs)
for i in range(inputs):
self.weights[i] = random.random()
self.bias = random.random()
def getOutput(self, inputs):
'''Calculates, stores, and returns the output'''
assert len(inputs) == len(self.weights)
inj = np.sum(inputs * self.weights) + self.bias # Sum inputs
g = 1.0 / (1.0 + math.exp(-inj)) # Sigmoid activation
self.aj = g
return g
def adjust(self, delta):
'''Adjusts the weights and bias'''
self.bias += self.aj * delta
for i in range(len(self.weights)):
self.weights[i] += self.aj * delta
class Layer:
'''Creates a single layer in a single feed-forward neural network'''
def __init__(self, width, inputSize, prevLayer=False):
'''Create a new layer'''
self.prevLayer = prevLayer
self.nextLayer = False
self.nodes = []
for _ in range(width):
self.nodes.append(Perceptron(inputSize))
def setNext(self, nextLayer):
'''Set the next layer in the network'''
self.nextLayer = nextLayer
def getOutput(self, inputs):
'''Get an array of the output of the network'''
output = np.empty(len(self.nodes))
for i in range(len(self.nodes)):
output[i] = self.nodes[i].getOutput(inputs)
if isinstance(self.nextLayer, Layer):
# If this isn't the output layer, recurse to the next layer down
return self.nextLayer.getOutput(output)
return output
def backProp(self, deltas):
'''Back-propogate error through all the layers'''
if isinstance(self.prevLayer, Layer):
# If this isn't the input layer, calculate deltas for the next layer up
crossprod = np.empty((len(deltas), len(self.nodes[0].weights)))
for j in range(len(deltas)):
crossprod[j][:] = self.nodes[j].weights * deltas[j]
nextDeltas = crossprod.sum(axis=0)
for i in range(len(nextDeltas)):
# multiply by g'
nextDeltas[i] *= self.prevLayer.nodes[i].aj * (1.0 - self.prevLayer.nodes[i].aj)
# Recurse upwards
self.prevLayer.backProp(nextDeltas)
# Adjust the weights of neurons in this layer
for i in range(len(self.nodes)):
self.nodes[i].adjust(deltas[i])
class NeuralNetwork:
def __init__(self, layerSizes=np.array(0), filename=""):
'''Creates a neural network with the given layer sizes.'''
prev = False
inputLayer = False
for i in range(len(layerSizes)-1):
inputSize = layerSizes[i]
outputSize = layerSizes[i+1]
layer = Layer(outputSize, inputSize, prev)
if isinstance(prev, Layer):
prev.setNext(layer)
if not isinstance(inputLayer, Layer):
inputLayer = layer
prev = layer
self.inputLayer = inputLayer
self.outputLayer = prev
def train(self, inputs, outputs):
'''Train the network on the given sample'''
pred = self.inputLayer.getOutput(inputs)
# calculate error of output layer
error = outputs - pred
deltas = error * pred * (1.0 - pred)
# back-propogate the error
self.outputLayer.backProp(deltas)
# return error
return np.max(abs(error))
def test(self, inputs, outputs):
'''Test the network on the given sample'''
pred = self.inputLayer.getOutput(inputs)
correct = True
for i in range(len(pred)):
if abs(pred[i] - outputs[i]) > CLOSE:
correct = False
return correct
You can try any of these measures :
Shuffle your data well.
Use a smaller learning rate like 0.001
Use ReLU instead of sigmoid.
Initialize your biases as 1 and not random.
Use softmax at output layer if you are using ReLU.