I am trying to implement early stopping in TF OD API. I used this code.
Here is my EarlyStoppingHook (is it essentially just a copy from the above code):
class EarlyStoppingHook(session_run_hook.SessionRunHook):
"""Hook that requests stop at a specified step."""
def __init__(self, monitor='val_loss', min_delta=0, patience=0,
mode='auto'):
"""
"""
self.monitor = monitor
self.patience = patience
self.min_delta = min_delta
self.wait = 0
self.max_wait = 0
self.ind = 0
if mode not in ['auto', 'min', 'max']:
logging.warning('EarlyStopping mode %s is unknown, '
'fallback to auto mode.', mode, RuntimeWarning)
mode = 'auto'
if mode == 'min':
self.monitor_op = np.less
elif mode == 'max':
self.monitor_op = np.greater
else:
if 'acc' in self.monitor:
self.monitor_op = np.greater
else:
self.monitor_op = np.less
if self.monitor_op == np.greater:
self.min_delta *= 1
else:
self.min_delta *= -1
self.best = np.Inf if self.monitor_op == np.less else -np.Inf
def begin(self):
# Convert names to tensors if given
graph = tf.get_default_graph()
self.monitor = graph.as_graph_element(self.monitor)
if isinstance(self.monitor, tf.Operation):
self.monitor = self.monitor.outputs[0]
def before_run(self, run_context): # pylint: disable=unused-argument
return session_run_hook.SessionRunArgs(self.monitor)
def after_run(self, run_context, run_values):
self.ind += 1
current = run_values.results
if self.ind % 200 == 0:
print(f"loss value (inside hook!!! ): {current}, best: {self.best}, wait: {self.wait}, max_wait: {self.max_wait}")
if self.monitor_op(current - self.min_delta, self.best):
self.best = current
if self.max_wait < self.wait:
self.max_wait = self.wait
self.wait = 0
else:
self.wait += 1
if self.wait >= self.patience:
run_context.request_stop()
And I use the class like this:
early_stopping_hook = EarlyStoppingHook(
monitor='total_loss',
patience=2000)
train_spec = tf.estimator.TrainSpec(
input_fn=train_input_fn, max_steps=train_steps, hooks=[early_stopping_hook])
What I don't understand is what is total_loss? Is this val loss or train loss? Also I don't understand where these losses ('total_loss', 'loss_1', 'loss_2') are defined.
So, here is what worked for me
from matplotlib import pyplot as plt
import numpy as np
import collections
import os
_EVENT_FILE_GLOB_PATTERN = 'events.out.tfevents.*'
def _summaries(eval_dir):
"""Yields `tensorflow.Event` protos from event files in the eval dir.
Args:
eval_dir: Directory containing summary files with eval metrics.
Yields:
`tensorflow.Event` object read from the event files.
"""
if tf.compat.v1.gfile.Exists(eval_dir):
for event_file in tf.compat.v1.gfile.Glob(
os.path.join(eval_dir, _EVENT_FILE_GLOB_PATTERN)):
for event in tf.compat.v1.train.summary_iterator(event_file):
yield event
def read_eval_metrics(eval_dir):
"""Helper to read eval metrics from eval summary files.
Args:
eval_dir: Directory containing summary files with eval metrics.
Returns:
A `dict` with global steps mapping to `dict` of metric names and values.
"""
eval_metrics_dict = collections.defaultdict(dict)
for event in _summaries(eval_dir):
if not event.HasField('summary'):
continue
metrics = {}
for value in event.summary.value:
if value.HasField('simple_value'):
metrics[value.tag] = value.simple_value
if metrics:
eval_metrics_dict[event.step].update(metrics)
return collections.OrderedDict(
sorted(eval_metrics_dict.items(), key=lambda t: t[0]))
met_dict_2 = read_eval_metrics('/content/gdrive2/My Drive/models/retinanet/eval_0')
x = []
y = []
for k, v in met_dict_2.items():
x.append(k)
y.append(v['Loss/total_loss'])
read_eval_metrics function returns dictionary which keys are iteration number and values are different metrics and losses computer at that evaluation step. But you can also use this function for train event files. You just need to change the path.
Example of one key value pair from returned dictionary.
(4988, {'DetectionBoxes_Precision/Precision#.50IOU': 0.12053315341472626,
'DetectionBoxes_Precision/mAP': 0.060865387320518494,
'DetectionBoxes_Precision/mAP (large)': 0.07213596999645233,
'DetectionBoxes_Precision/mAP (medium)': 0.062120337039232254,
'DetectionBoxes_Precision/mAP (small)': 0.02642354555428028,
'DetectionBoxes_Precision/mAP#.50IOU': 0.11469704657793045,
'DetectionBoxes_Precision/mAP#.75IOU': 0.06001879647374153,
'DetectionBoxes_Recall/AR#1': 0.13470394909381866,
'DetectionBoxes_Recall/AR#10': 0.20102562010288239,
'DetectionBoxes_Recall/AR#100': 0.2040158212184906,
'DetectionBoxes_Recall/AR#100 (large)': 0.2639017701148987,
'DetectionBoxes_Recall/AR#100 (medium)': 0.20173722505569458,
'DetectionBoxes_Recall/AR#100 (small)': 0.10018187761306763,
'Loss/classification_loss': 1.0127471685409546,
'Loss/localization_loss': 0.3542810380458832,
'Loss/regularization_loss': 0.708609938621521,
'Loss/total_loss': 2.0756208896636963,
'learning_rate': 0.0006235376931726933,
'loss': 2.0756208896636963})
So I ended up setting monitor argument to 'Loss/total_loss' instead of 'total_loss' in EarlyStoppingHook.
Related
I have a simple code below for testing a RNN cell by feeding previous output as current input.
I was to do this after training.
When I call
tf.compat.v1.nn.raw_rnn(cell, rnn_loop)
after training I want it to use the weights that were achieved in training using another
tf.compat.v1.nn.raw_rnn(cell, rnn_loop)
Will the weights be the same or will the weights for raw_rnn during testing be initialized from zero? I will not run sess.run(tf.initialize_all_variables). I want know if I can safely call
tf.compat.v1.nn.raw_rnn(cell, rnn_loop) twice and still be using the same weights.
I also want to know how to inspect the trained weight values? so that I can confirm this.
The shape of rnn_outputs_tensor is (None,64,128) but I am expecting (10,64,128) because there are 10 steps (HORIZON) right?
print(rnn_outputs_tensor.shape)
import tensorflow as tf
tf.compat.v1.disable_eager_execution()
state_size = 128
BATCH_SIZE = 64
HORIZON = 10
cell = tf.compat.v1.nn.rnn_cell.BasicRNNCell(state_size)
class RnnLoop:
def __init__(self, initial_state, cell):
self.initial_state = initial_state
self.cell = cell
def __call__(self, time, cell_output, cell_state, loop_state):
emit_output = cell_output # == None for time == 0
if cell_output is None: # time == 0
initial_input = tf.fill([BATCH_SIZE, state_size], 0.0)
next_input = initial_input
next_cell_state = self.initial_state
else:
next_input = cell_output
next_cell_state = cell_state
elements_finished = (time >= HORIZON)
next_loop_state = None
return elements_finished, next_input, next_cell_state, emit_output, next_loop_state
initial_state_tensor = tf.zeros((BATCH_SIZE,state_size),dtype=tf.float32)
rnn_loop = RnnLoop(initial_state=initial_state_tensor, cell=cell)
rnn_outputs_tensor_array, _, _ = tf.compat.v1.nn.raw_rnn(cell, rnn_loop)
rnn_outputs_tensor = rnn_outputs_tensor_array.stack()
print(rnn_outputs_tensor.shape)
var = [v for v in tf.compat.v1.trainable_variables()]
print(var)
I wanted to create an implementation of the Swarm Optimization Algorithm for deep neural networks that is the Fireworks Algorithm.
I was finally able to create a TensorFlow optimizer class that implements the same using THIS repository.
But even after implementation, my accuracy seems to be around ~11% during training. (please refer to THIS collab notebook for code)
To test out the code SEE MY IMPLEMENTATION ON A COLLAB NOTEBOOK
How can I resolve this issue.
also my main optimizer code is,
# https://github.com/cilatpku/firework-algorithm/blob/master/fwa/BBFWA.py
class Firework(optimizer.Optimizer):
def __init__(self,
# params for prob
evaluator = None,
dim = 2,
upper_bound = 100,
lower_bound = -100,
max_iter = 10000,
max_eval = 20000,
# params for method
sp_size = 200,
init_amp = 200,
name="Firework", use_locking=False, **kwargs):
super(Firework, self).__init__(use_locking, name)
## Parameters
# params of method
self.sp_size = sp_size # total spark size
self.init_amp = init_amp # initial dynamic amplitude
# load params
self.evaluator = evaluator
self.dim = dim
self.upper_bound = upper_bound
self.lower_bound = lower_bound
self.max_iter = max_iter
self.max_eval = max_eval
## States
# private init states
self._num_iter = 0
self._num_eval = 0
self._dyn_amp = init_amp
# public states
self.best_idv = None # best individual found
self.best_fit = None # best fitness found
self.trace = [] # trace of best individual in each generation
## Fireworks
self.fireworks = np.random.uniform(self.lower_bound, self.upper_bound, [1, self.dim])
self.fireworks = self.fireworks.tolist()
self.fits = self.evaluator(self.fireworks)
## Tensor versions of the constructor arguments, created in _prepare().
self.dim_t = None
self.upper_bound_t = None
self.lower_bound_t = None
self.max_iter_t = None
self.max_eval_t = None
self.sp_size_t = None
self.init_amp_t = None
self.fireworks_t = None
self.fits_t = None
def _create_slots(self, var_list):
"""For each model variable, create the optimizer variable associated with it.
TensorFlow calls these optimizer variables "slots"."""
# Create slots for the first and second moments.
for v in var_list:
self._zeros_slot(v, "fireworks", self._name)
for v in var_list:
self._zeros_slot(v, "fits", self._name)
def _prepare(self):
# self.evaluator_t = ops.convert_to_tensor(self.evaluator, name="evaloator")
self.dim_t = ops.convert_to_tensor(self.dim, name="dimention")
self.upper_bound_t = ops.convert_to_tensor(self.upper_bound, name="upper_bound")
self.lower_bound_t = ops.convert_to_tensor(self.lower_bound, name="lower_bound")
self.max_iter_t = ops.convert_to_tensor(self.max_iter, name="max_iterations")
self.max_eval_t = ops.convert_to_tensor(self.max_eval, name="max_eval")
self.sp_size_t = ops.convert_to_tensor(self.sp_size, name="sp_size")
self.init_amp_t = ops.convert_to_tensor(self.init_amp, name="init_amp")
self.fireworks_t = ops.convert_to_tensor(self.fireworks, name="fireworks")
self.fits_t = ops.convert_to_tensor(self.fits, name="fits")
print(self.fireworks_t)
def _resource_apply_dense(self, grad, var):
evaluator = self.evaluator
dim_t = math_ops.cast(self.dim_t, var.dtype.base_dtype)
upper_bound_t = math_ops.cast(self.upper_bound_t, var.dtype.base_dtype)
lower_bound_t = math_ops.cast(self.lower_bound_t, var.dtype.base_dtype)
max_iter_t = math_ops.cast(self.max_iter_t, var.dtype.base_dtype)
max_eval_t = math_ops.cast(self.max_eval_t, var.dtype.base_dtype)
sp_size_t = math_ops.cast(self.sp_size_t, var.dtype.base_dtype)
init_amp_t = math_ops.cast(self.init_amp_t, var.dtype.base_dtype)
fits = self.get_slot(grad, "fits")
fireworks = self.get_slot(var, "fireworks")
fireworks_update, fits_update = self.iter(self.fireworks, self.fits)
self.fireworks = fireworks_update
self.fits = fits_update
fireworks_update_t = math_ops.cast(fireworks_update, var.dtype.base_dtype)
fits_update_t = math_ops.cast(fits_update, var.dtype.base_dtype)
self.fireworks_t = fireworks_update_t
self.fits_t = fits_update_t
print("fireworks_update : ", fireworks_update)
print("fits_update : ", fits_update)
#Create an op that groups multiple operations
#When this op finishes, all ops in input have finished
return control_flow_ops.group(*[fireworks_update_t, fits_update_t])
## Helper functions
def iter(self, fireworks, fits):
print("...\n")
e_sparks, e_fits = self._explode(fireworks, fits)
n_fireworks, n_fits = self._select(fireworks, fits, e_sparks, e_fits)
# update states
if n_fits[0] < fits[0]:
self._dyn_amp *= 1.2
else:
self._dyn_amp *= 0.9
self._num_iter += 1
self._num_eval += len(e_sparks)
self.best_idv = n_fireworks[0]
self.best_fit = n_fits[0]
self.trace.append([n_fireworks[0], n_fits[0], self._dyn_amp])
fireworks = n_fireworks
fits = n_fits
return fireworks, fits
def _explode(self, fireworks, fits):
bias = np.random.uniform(-self._dyn_amp, self._dyn_amp, [self.sp_size, self.dim])
rand_samples = np.random.uniform(self.lower_bound, self.upper_bound, [self.sp_size, self.dim])
e_sparks = fireworks + bias
in_bound = (e_sparks > self.lower_bound) * (e_sparks < self.upper_bound)
e_sparks = in_bound * e_sparks + (1 - in_bound) * rand_samples
e_sparks = e_sparks.tolist()
e_fits = self.evaluator(e_sparks)
return e_sparks, e_fits
def _select(self, fireworks, fits, e_sparks, e_fits):
idvs = fireworks + e_sparks
fits = fits + e_fits
idx = np.argmin(fits)
return [idvs[idx]], [fits[idx]]
##################################################
##################################################
def get_config(self):
base_config = super().get_config()
return {
**base_config,
"learning_rate": self._serialize_hyperparameter("learning_rate"),
"decay": self._serialize_hyperparameter("decay"),
"momentum": self._serialize_hyperparameter("momentum"),
}
def _apply_dense(self, grad, var):
raise NotImplementedError("Dense gradient updates are not supported.")
def _apply_sparse(self, grad, var):
raise NotImplementedError("Sparse gradient updates are not supported.")
def _resource_apply_sparse(self, grad, var):
raise NotImplementedError("Sparse Resource gradient updates are not supported.")
I am trying to use tensorflow addon's multioptimizer for discriminative layer training, different learning rates for different layers, but it does not work with the callback ReduceLROnPlateau.
from tensorflow.keras.callbacks import ReduceLROnPlateau
reduce_lr = ReduceLROnPlateau(patience=5, min_delta=1e-4, min_lr=1e-7, verbose=0)
with tpu_strategy.scope():
roberta_model = create_model(512)
optimizers = [
AdamWeightDecay(learning_rate=0.00001, weight_decay_rate=0.00001),
AdamWeightDecay(learning_rate=0.0001, weight_decay_rate=0.0001)
]
# specifying the optimizers and layers in which it will operate
optimizers_and_layers = [
(optimizers[0], roberta_model.layers[:3]),
(optimizers[1], roberta_model.layers[3:])
]
# Using Multi Optimizer from Tensorflow Addons
opt = tfa.optimizers.MultiOptimizer(optimizers_and_layers)
roberta_model.compile(optimizer=opt,
loss=tf.keras.losses.CategoricalCrossentropy(label_smoothing=0.1), metrics=["accuracy"])
history=roberta_model.fit(train, epochs=50, validation_data=val, callbacks=[reduce_lr])
At the end of the first epoch it produces this error:
AttributeError: 'MultiOptimizer' object has no attribute 'lr'
It works fine without the ReduceLROnPlateau callback.
I tried several things to solve this where the last attempt was to modify the callback - writing my own reduce learning rate on plateau callback. But this is far beyond my coding skills. I have commented where i made a couple of changes to the orginal callback.
I tried like this:
class My_ReduceLROnPlateau(tf.keras.callbacks.Callback):
def __init__(self,
monitor='val_loss',
factor=0.1,
patience=10,
verbose=0,
mode='auto',
min_delta=1e-4,
cooldown=0,
min_lr=0,
**kwargs):
super(My_ReduceLROnPlateau, self).__init__()
self.monitor = monitor
if factor >= 1.0:
raise ValueError(
f'ReduceLROnPlateau does not support a factor >= 1.0. Got {factor}')
if 'epsilon' in kwargs:
min_delta = kwargs.pop('epsilon')
logging.warning('`epsilon` argument is deprecated and '
'will be removed, use `min_delta` instead.')
self.factor = factor
self.min_lr = min_lr
self.min_delta = min_delta
self.patience = patience
self.verbose = verbose
self.cooldown = cooldown
self.cooldown_counter = 0 # Cooldown counter.
self.wait = 0
self.best = 0
self.mode = mode
self.monitor_op = None
self._reset()
def _reset(self):
"""Resets wait counter and cooldown counter.
"""
if self.mode not in ['auto', 'min', 'max']:
logging.warning('Learning rate reduction mode %s is unknown, '
'fallback to auto mode.', self.mode)
self.mode = 'auto'
if (self.mode == 'min' or
(self.mode == 'auto' and 'acc' not in self.monitor)):
self.monitor_op = lambda a, b: np.less(a, b - self.min_delta)
self.best = np.Inf
else:
self.monitor_op = lambda a, b: np.greater(a, b + self.min_delta)
self.best = -np.Inf
self.cooldown_counter = 0
self.wait = 0
def on_train_begin(self, logs=None):
self._reset()
def on_epoch_end(self, epoch, logs=None):
logs = logs or {}
logs['lr'] = backend.get_value(self.model.optimizer[1].lr)
current = logs.get(self.monitor)
if current is None:
logging.warning('Learning rate reduction is conditioned on metric `%s` '
'which is not available. Available metrics are: %s',
self.monitor, ','.join(list(logs.keys())))
else:
if self.in_cooldown():
self.cooldown_counter -= 1
self.wait = 0
if self.monitor_op(current, self.best):
self.best = current
self.wait = 0
elif not self.in_cooldown():
self.wait += 1
if self.wait >= self.patience:
# Here below i tried to subscript the self.model.optimizer
#, guessing that each pointed to one of the optimzers.
# And using the same code as in the original ReduceLROnPlateau to
# update the optimizers.
old_lr1 = backend.get_value(self.model.optimizer[1].lr)
old_lr0 = backend.get_value(self.model.optimizer[0].lr)
if old_lr1 > np.float32(self.min_lr):
new_lr1 = old_lr1 * self.factor
new_lr1 = max(new_lr1, self.min_lr)
backend.set_value(self.model.optimizer[1].lr, new_lr1)
new_lr0 = old_lr0 * self.factor
new_lr0 = max(new_lr0, self.min_lr)
backend.set_value(self.model.optimizer[0].lr, new_lr0)
if self.verbose > 0:
io_utils.print_msg(
f'\nEpoch {epoch +1}: '
f'ReduceLROnPlateau reducing learning rate to {new_lr0} and {new_lr1}.')
self.cooldown_counter = self.cooldown
self.wait = 0
def in_cooldown(self):
return self.cooldown_counter > 0
Then i created the callback
reduce_lr = My_ReduceLROnPlateau(patience=5, min_delta=1e-4, min_lr=1e-7, verbose=0)
and started to train again. At the end of the first epoch i got the following error.
TypeError: 'MultiOptimizer' object is not subscriptable
i.e. you cant do this self.model.optimizer[1], self.model.optimizer[0].
So my question is how to solve this? I.e using discriminative layer training with ReduceLROnPlateau.
Either via some other method or modify my attempt of creating a new callback class.
Here is a link to the orginal ReduceLROnPlateau callback, i.e. without the few changes i did above in my custom callback.
A solution would maybe be possible using this:
Note: Currently, tfa.optimizers.MultiOptimizer does not support callbacks that modify optimizers. However, you can instantiate optimizer layer pairs with tf.keras.optimizers.schedules.LearningRateSchedule instead of a static learning rate
Looking in the code of tfa.optimizers.MultiOptimizer (in the method create_optimizer_spec, it seems that optimizers can be accessed via
self.model.optimizer.optimizer_specs[0]["optimizer"] and self.model.optimizer.optimizer_specs[1]["optimizer"] to change the learning rate (which is why self.model.optimizer[1] raises an error).
Then your custom callback seems to work.
Hello my DDPG model that I have implemented in TF 2 get's horrible results at every env on openai-gym that has continuous actions I need help to find what's the problem. I run this on my GPU. On env Pendulum I get -1200/-1000 rewards on every episode. This code is from a course I took on udemy but it was written in TF1.x and I rewrote it in TF2 but his TF1.x implementation had better results. Here is the code:
import tensorflow as tf
import numpy as np
import os
import gym
import random
import matplotlib.pyplot as plt
from tensorflow.keras.layers import Input, Dense, concatenate
from tensorflow.keras.models import Model
class ReplayBuffer():
def __init__(self, obs_dim, act_dim, size):
self.obs1_buf = np.zeros([size, obs_dim, ], dtype=np.float32)
self.obs2_buf = np.zeros([size, obs_dim, ], dtype=np.float32)
self.act_buf = np.zeros([size, act_dim], dtype=np.float32)
self.reward_buf = np.zeros(size, dtype=np.float32)
self.done_buf = np.zeros(size, dtype=np.float32)
self.current = 0
self.count = 0
self.size = size
def add_experience(self, state, action, reward, next_state, done):
self.obs1_buf[self.current] = state
self.act_buf[self.current] = action
self.reward_buf[self.current] = reward
self.obs2_buf[self.current] = next_state
self.done_buf[self.current] = done
self.current = (self.current + 1) % self.size
self.count = min(self.count+1, self.size)
def sample_batch(self, batch_size=32):
idx = np.random.randint(0, self.count, size=batch_size)
return dict(s=self.obs1_buf[idx],
s2=self.obs2_buf[idx],
a=self.act_buf[idx],
r=self.reward_buf[idx],
d=self.done_buf[idx])
class DDPG():
def __init__(self, env, num_states, num_actions, action_max):
self.env = env
self.num_states = num_states
self.num_actions = num_actions
self.action_max = action_max
self.gamma = 0.99
self.decay = 0.995
self.mu_optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)
self.q_optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)
def mu_model(hidden_layers):
inp = Input(shape=(self.num_states, ))
x = inp
for layers in hidden_layers[:-1]:
x = Dense(layers, activation='relu')(x)
x = Dense(hidden_layers[-1], activation='tanh')(x)
mu_model = Model(inp, x)
return mu_model
self.mu_model = mu_model([300, self.num_actions])
def q_model(inp_state, inp_act, hidden_layers):
inp_state = Input(shape=(inp_state, ))
inp_mu = Input(shape=(inp_act, ))
inp = concatenate([inp_state, inp_mu])
x = inp
for layers in hidden_layers[:-1]:
x = Dense(layers, activation='relu')(x)
x = Dense(hidden_layers[-1], activation='linear')(x)
q_model = Model([inp_state, inp_mu], x)
return q_model
self.q_model = q_model(self.num_states, self.num_actions, hidden_layers=[300, 1])
self.q_target_model = q_model(self.num_states, self.num_actions, hidden_layers=[300, 1])
#Eself.mu_do_minimize = tf.function(self.mu_minimize, input_signature=[
#tf.TensorSpec(shape=(None, self.num_states), dtype=tf.float32, name='state')])
self.q_do_minimize = tf.function(self.q_minimize, input_signature=[
tf.TensorSpec(shape=(None, self.num_states), dtype=tf.float32, name='state'),
tf.TensorSpec(shape=(None, self.num_actions), dtype=tf.float32, name='action'),
tf.TensorSpec(shape=(None, self.num_states), dtype=tf.float32, name='next_state'),
tf.TensorSpec(shape=(None, ), dtype=tf.float32, name='reward'),
tf.TensorSpec(shape=(None, ), dtype=tf.float32, name='done_flags')])
#tf.function
def train_mu(self, state):
with tf.GradientTape() as tape:
actions = self.mu_model(state, training=True)
critic_value = self.q_model([state, actions], training=True)
# Used `-value` as we want to maximize the value given
# by the critic for our actions
actor_loss = -tf.math.reduce_mean(critic_value)
actor_grad = tape.gradient(actor_loss, self.mu_model.trainable_variables)
self.mu_optimizer.apply_gradients(
zip(actor_grad, self.mu_model.trainable_variables)
)
def q_minimize(self, state, action, next_state, reward, done):
def calc_loss():
q_targ = reward + self.gamma * (1 - done) * self.q_target_model([next_state, action])
q = self.q_model([state, action])
cost = tf.reduce_mean((q - q_targ)**2)
return cost
self.q_optimizer.minimize(calc_loss, self.q_model.trainable_variables)
def train(self, state, action, reward, done, next_state):
state = np.atleast_2d(state)
next_state = np.atleast_2d(next_state)
action = np.atleast_2d(action)
reward = np.atleast_1d(reward)
done = np.atleast_1d(done)
self.update_target_net()
self.train_mu(state)
self.q_do_minimize(state, action, next_state, reward, done)
def update_target_net(self):
mu_weights = np.array(self.mu_model.get_weights())
q_weights = np.array(self.q_model.get_weights())
#print(mu_weights.shape)
#print(q_weights.shape)
mu_target_weights = np.array(self.mu_target_model.get_weights())
q_target_weights = np.array(self.q_target_model.get_weights())
self.q_target_model.set_weights(self.decay * q_weights + (1 - self.decay) * q_target_weights)
def get_action(self, states, noise=None):
if noise is None: noise = self.ACT_NOISE_SCALE
if len(states.shape) == 1: states = states.reshape(1,-1)
action = self.mu_model.predict_on_batch(states)[0]
if noise != 0:
action += noise * np.random.randn(self.num_actions)
action = np.clip(action, -self.action_max, self.action_max)
return action
def play_one(env, agent, replay_buffer, gamma=0.99, noise=0.1, max_episode_len=1000, start_steps=10000, num_train_ep=100, batch_size=100, test_ep_agent=25):
returns = []
num_steps = 0
for ep in range(num_train_ep):
s, ep_return, ep_len, d = env.reset(), 0, 0, False
while not (d or ep_len == max_episode_len):
env.render()
if num_steps > start_steps:
a = agent.get_action(s, noise)
else:
a = env.action_space.sample()
num_steps+=1
if num_steps == start_steps:
print("USING AGENT ACTIONS NOW")
s2, r, d, _ = env.step(a)
ep_return+=r
ep_len+=1
#print(s.shape)
d = False if ep_len == max_episode_len else d
replay_buffer.add_experience(s, a, r, s2, d)
s = s2
for _ in range(ep_len):
batch = replay_buffer.sample_batch()
state, next_state, action, reward, done = batch['s'], batch['s2'], batch['a'], batch['r'], batch['d']
loss = agent.train(state, action, reward, done, next_state)
returns.append(ep_return)
print('Iter:', ep, 'Rewards:', ep_return)
return returns
if __name__ == '__main__':
env = gym.make('Pendulum-v0')
obs_dim1 = env.observation_space.shape[0]
act_dim1 = env.action_space.shape[0]
action_max1 = env.action_space.high[0]
actor = DDPG(env, obs_dim1, act_dim1, action_max1)
replay_buffer = ReplayBuffer(obs_dim1, act_dim1, size=100000)
returns = play_one(env, actor, replay_buffer)
Thanks you in advance!
First things that comes to mind is the learning rate: 0.01 is too high, even for pendulum. Try a lower learning rate (eg 1e-3 for the actor and 5e-3 for the critic).
Also a couple of things look off in your code:
There is no target network for the actor. Why is that? IIRC ddpg has target network for both actor and critic.
Usually it is better to initialize main and target network with the same parameters. You can do that with target_model.set_weights(model.get_weights())
In the function play_one the training steps are done after playing a whole episode. This is probably ok, but there is no need to: because pendulum is not real time you don't need your code to be fast, so you can train while playing.
If you want to take a look I implemented ddpg in tensorflow 2 a while back. It solves pendulum in 80ish episodes.
GitHub
From the below code the accuracy values of the ROC can be predicted and GNUplot was used to display the plot output. But the output is set to 'onscreen'. Thus the plot just appears for few seconds and closes. I want to direct the plot output to a folder as a. How to modify the code.
Thanks for the assistance.
The program was taken from plotroc.py https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/#roc_curve_for_binary_svm
#!/usr/bin/env python
#This tool allow users to plot SVM-prob ROC curve from data
from svmutil import *
from sys import argv, platform
from os import path, popen
from random import randrange , seed
from operator import itemgetter
from time import sleep
#search path for gnuplot executable
#be careful on using windows LONG filename, surround it with double quotes.
#and leading 'r' to make it raw string, otherwise, repeat \\.
gnuplot_exe_list = [r'"C:\Users\Lakshmi\Documents\gnuplot\bin\pgnuplot.exe"', "/usr/bin/gnuplot","/usr/local/bin/gnuplot"]
def get_pos_deci(train_y, train_x, test_y, test_x, param):
model = svm_train(train_y, train_x, param)
#predict and grab decision value, assure deci>0 for label+,
#the positive descision value = val[0]*labels[0]
labels = model.get_labels()
py, evals, deci = svm_predict(test_y, test_x, model)
deci = [labels[0]*val[0] for val in deci]
return deci,model
#get_cv_deci(prob_y[], prob_x[], svm_parameter param, nr_fold)
#input raw attributes, labels, param, cv_fold in decision value building
#output list of decision value, remember to seed(0)
def get_cv_deci(prob_y, prob_x, param, nr_fold):
if nr_fold == 1 or nr_fold==0:
deci,model = get_pos_deci(prob_y, prob_x, prob_y, prob_x, param)
return deci
deci, model = [], []
prob_l = len(prob_y)
#random permutation by swapping i and j instance
for i in range(prob_l):
j = randrange(i,prob_l)
prob_x[i], prob_x[j] = prob_x[j], prob_x[i]
prob_y[i], prob_y[j] = prob_y[j], prob_y[i]
#cross training : folding
for i in range(nr_fold):
begin = i * prob_l // nr_fold
end = (i + 1) * prob_l // nr_fold
train_x = prob_x[:begin] + prob_x[end:]
train_y = prob_y[:begin] + prob_y[end:]
test_x = prob_x[begin:end]
test_y = prob_y[begin:end]
subdeci, submdel = get_pos_deci(train_y, train_x, test_y, test_x, param)
deci += subdeci
return deci
#a simple gnuplot object
class gnuplot:
def __init__(self, term='onscreen'):
# -persists leave plot window on screen after gnuplot terminates
if platform == 'win32':
cmdline = gnuplot_exe
self.__dict__['screen_term'] = 'windows'
else:
cmdline = gnuplot_exe + ' -persist'
self.__dict__['screen_term'] = 'x11'
self.__dict__['iface'] = popen(cmdline,'w')
self.set_term(term)
def set_term(self, term):
if term=='onscreen':
self.writeln("set term %s" % self.screen_term)
else:
#term must be either x.ps or x.png
if term.find('.ps')>0:
self.writeln("set term postscript eps color 22")
elif term.find('.png')>0:
self.writeln("set term png")
else:
print("You must set term to either *.ps or *.png")
raise SystemExit
self.output = term
def writeln(self,cmdline):
self.iface.write(cmdline + '\n')
def __setattr__(self, attr, val):
if type(val) == str:
self.writeln('set %s \"%s\"' % (attr, val))
else:
print("Unsupport format:", attr, val)
raise SystemExit
#terminate gnuplot
def __del__(self):
self.writeln("quit")
self.iface.flush()
self.iface.close()
def __repr__(self):
return "<gnuplot instance: output=%s>" % term
#data is a list of [x,y]
def plotline(self, data):
self.writeln("plot \"-\" notitle with lines linewidth 1")
for i in range(len(data)):
self.writeln("%f %f" % (data[i][0], data[i][1]))
sleep(0) #delay
self.writeln("e")
if platform=='win32':
sleep(3)
#processing argv and set some global variables
def proc_argv(argv = argv):
#print("Usage: %s " % argv[0])
#The command line : ./plotroc.py [-v cv_fold | -T testing_file] [libsvm-options] training_file
train_file = argv[-1]
test_file = None
fold = 5
options = []
i = 1
while i < len(argv)-1:
if argv[i] == '-T':
test_file = argv[i+1]
i += 1
elif argv[i] == '-v':
fold = int(argv[i+1])
i += 1
else :
options += [argv[i]]
i += 1
return ' '.join(options), fold, train_file, test_file
def plot_roc(deci, label, output, title):
#count of postive and negative labels
db = []
pos, neg = 0, 0
for i in range(len(label)):
if label[i]>0:
pos+=1
else:
neg+=1
db.append([deci[i], label[i]])
#sorting by decision value
db = sorted(db, key=itemgetter(0), reverse=True)
#calculate ROC
xy_arr = []
tp, fp = 0., 0. #assure float division
for i in range(len(db)):
if db[i][1]>0: #positive
tp+=1
else:
fp+=1
xy_arr.append([fp/neg,tp/pos])
#area under curve
aoc = 0.
prev_x = 0
for x,y in xy_arr:
if x != prev_x:
aoc += (x - prev_x) * y
prev_x = x
#begin gnuplot
if title == None:
title = output
#also write to file
g = gnuplot(output)
g.xlabel = "False Positive Rate"
g.ylabel = "True Positive Rate"
g.title = "ROC curve of %s (AUC = %.4f)" % (title,aoc)
g.plotline(xy_arr)
#display on screen
s = gnuplot('onscreen')
s.xlabel = "False Positive Rate"
s.ylabel = "True Positive Rate"
s.title = "ROC curve of %s (AUC = %.4f)" % (title,aoc)
s.plotline(xy_arr)
def check_gnuplot_exe():
global gnuplot_exe
gnuplot_exe = None
for g in gnuplot_exe_list:
if path.exists(g.replace('"','')):
gnuplot_exe=g
break
if gnuplot_exe == None:
print("You must add correct path of 'gnuplot' into gnuplot_exe_list")
raise SystemExit
def main():
check_gnuplot_exe()
if len(argv) <= 1:
print("Usage: %s [-v cv_fold | -T testing_file] [libsvm-options] training_file" % argv[0])
raise SystemExit
param,fold,train_file,test_file = proc_argv()
output_file = path.split(train_file)[1] + '-roc.png'
#read data
train_y, train_x = svm_read_problem(train_file)
if set(train_y) != set([1,-1]):
print("ROC is only applicable to binary classes with labels 1, -1")
raise SystemExit
#get decision value, with positive = label+
seed(0) #reset random seed
if test_file: #go with test_file
output_title = "%s on %s" % (path.split(test_file)[1], path.split(train_file)[1])
test_y, test_x = svm_read_problem(test_file)
if set(test_y) != set([1,-1]):
print("ROC is only applicable to binary classes with labels 1, -1")
raise SystemExit
deci,model = get_pos_deci(train_y, train_x, test_y, test_x, param)
plot_roc(deci, test_y, output_file, output_title)
else: #single file -> CV
output_title = path.split(train_file)[1]
deci = get_cv_deci(train_y, train_x, param, fold)
plot_roc(deci, train_y, output_file, output_title)
if __name__ == '__main__':
main()`
I downloaded code and data file heart_scale and after running
python script.py -v 5 -c 10 heart_scale
I found file heart_scale-roc.png in folder in which I executed script.