I want to save a model having two models inside.
class DualEncoder(keras.Model):
def __init__(self, text_encoder, image_encoder, temperature=1.0, **kwargs):
super(DualEncoder, self).__init__(**kwargs)
self.text_encoder = text_encoder
self.image_encoder = image_encoder
self.temperature = temperature
self.loss_tracker = keras.metrics.Mean(name="loss")
Full Code.
class DualEncoder(keras.Model):
def __init__(self, text_encoder, image_encoder, temperature=1.0, **kwargs):
super(DualEncoder, self).__init__(**kwargs)
self.text_encoder = text_encoder
self.image_encoder = image_encoder
self.temperature = temperature
self.loss_tracker = keras.metrics.Mean(name="loss")
#property
def metrics(self):
return [self.loss_tracker]
def call(self, features, training=False):
# Place each encoder on a separate GPU (if available).
# TF will fallback on available devices if there are fewer than 2 GPUs.
with tf.device("/gpu:0"):
# Get the embeddings for the captions.
caption_embeddings = text_encoder(features["caption"], training=training)
with tf.device("/gpu:1"):
# Get the embeddings for the images.
image_embeddings = vision_encoder(features["image"], training=training)
return caption_embeddings, image_embeddings
def compute_loss(self, caption_embeddings, image_embeddings):
logg = tf.constant([[0, 0], [62, 61], [0, 0]])
image_em = tf.pad(image_embeddings, logg, mode ='CONSTANT', constant_values=0)
print("image em is", image_em.shape)
logits = (
tf.matmul(caption_embeddings, image_em, transpose_b=True)
/ self.temperature
)
print("logits shape is", logits.shape)
images_similarity = tf.matmul(
image_em, image_em, transpose_b=True
)
print("images similarity shape is ", images_similarity.shape)
captions_similarity = tf.matmul(
caption_embeddings, caption_embeddings, transpose_b=True
)
print("captions similarity shape is", captions_similarity.shape)
targets = keras.activations.softmax(
(captions_similarity + images_similarity) / (2 * self.temperature)
)
print("targets shape is ", targets.shape)
# Compute the loss for the captions using crossentropy
captions_loss = keras.losses.kl_divergence(
y_true=targets, y_pred=logits, #from_logits=True
)
print("caption loss is", captions_loss.shape)
# Compute the loss for the images using crossentropy
images_loss = keras.losses.kl_divergence(
y_true=tf.transpose(targets), y_pred=tf.transpose(logits),
)
# Return the mean of the loss over the batch.
print("image loss is", images_loss.shape)
#return (captions_loss + images_loss) / 2
return tf.matmul( captions_loss, images_loss) / 2
#(tf.reduce_sum(captions_loss, axis=0) + images_loss) / 2
#return (captions_loss[0, :] + images_loss + captions_loss[1, :]) / 2
def train_step(self, features):
with tf.GradientTape() as tape:
# Forward pass
caption_embeddings, image_embeddings = self(features, training=True)
loss = self.compute_loss(caption_embeddings, image_embeddings)
# Backward pass
gradients = tape.gradient(loss, self.trainable_variables)
self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))
# Monitor loss
self.loss_tracker.update_state(loss)
return {"loss": self.loss_tracker.result()}
def test_step(self, features):
caption_embeddings, image_embeddings = self(features, training=False)
loss = self.compute_loss(caption_embeddings, image_embeddings)
self.loss_tracker.update_state(loss)
return {"loss": self.loss_tracker.result()}
When I save the dual encoder then I get this error. NotImplementedError: Saving the model to HDF5 format requires the model to be a Functional model or a Sequential model. It does not work for subclassed models, because such models are defined via the body of a Python method, which isn't safely serializable. Consider saving to the Tensorflow SavedModel format (by setting save_format="tf") or using save_weights`.
However I can save text_encoder and image_encoder.
Can I load these models like below will it work?
class DualEncoder(keras.Model):
def __init__(self, text_encoder, image_encoder, temperature=1.0, **kwargs):
super(DualEncoder, self).__init__(**kwargs)
self.text_encoder = new_model = tf.keras.models.load_model('text_encoder')
self.image_encoder = new_model = tf.keras.models.load_model('image_encoder')
self.temperature = temperature
self.loss_tracker = keras.metrics.Mean(name="loss")
Related
I was trying to run a model on multiple gpu with mirror strategy of tensorflow.
I used a custom loss function like this:
def mae(y_true, y_pred):
# y_true, y_pred shape = (B, L)
loss = tf.keras.metrics.mean_absolute_error(y_true, y_pred)
# loss shape = (B,)
return loss
class custom_loss(tf.keras.losses.Loss):
def __init__(self, BATCH_SIZE = 1, **kwargs):
super(custom_loss, self).__init__(**kwargs)
self.BATCH_SIZE = BATCH_SIZE
def call(self, y_true, y_pred):
# y_true, y_pred shape = (B, L, 1)
loss = mae(tf.squeeze(y_true, [-1]), tf.squeeze(y_pred, [-1]))
loss = tf.reduce_sum(loss) * (1. / self.BATCH_SIZE)
return loss
def get_config(self):
config = super().get_config().copy()
config.update({'BATCH_SIZE': self.BATCH_SIZE})
return config
with mirror strategy I train the model like this:
def get_compiled_model(args, BATCH_SIZE):
# Make a simple 2-layer densely-connected neural network.
model = MyCustomModel(input_shape=(args.L, 1))
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=args.learning_rate, beta_1=args.beta_1, beta_2=args.beta_2, epsilon=args.epsilon), loss = custom_loss(BATCH_SIZE))
return model
def run_training(args, steps, model = None):
# Create a MirroredStrategy.
strategy = tf.distribute.MirroredStrategy()
BATCH_SIZE_PER_REPLICA = args.batch_size
BATCH_SIZE = BATCH_SIZE_PER_REPLICA * strategy.num_replicas_in_sync
# Open a strategy scope and create/restore the model
with strategy.scope():
if isinstance(model, type(None)):
model = get_compiled_model(args, BATCH_SIZE)
train_dataset, test_dataset, valid_dataset = get_dataset(args, BATCH_SIZE)
callbacks = [
tf.keras.callbacks.ModelCheckpoint(
filepath=os.path.join(args.checkpoints_dir , steps + "_epoch-{epoch:03d}_loss-{loss:.4f}"), save_best_only = True
)
]
model.fit(train_dataset, epochs=args.epochs, callbacks=callbacks, validation_data = valid_dataset, steps_per_epoch = (None if args.steps_per_epoch == -1 else args.steps_per_epoch), validation_steps = (None if args.steps_per_epoch == -1 else args.steps_per_epoch), verbose = 1)
But if I run this on 4 GPU, my loss value becomes 1/4 times than the loss I get when run on single GPU. Does it fail to sum up the different losses from the different GPUs?
I'm following the tutorial of TensorFlow_Federated: custom_federated_algorithms_2. Everything works when I just copy and run the tutorial's code. So I wanna change the code by myself for being more familar with tff. Then bug appeared.
My runtime environment:
python: 3.8.12
tensorflow: 2.5.0
tensorflow_federated: 0.19.0
Code below is the orginal code of testing model in tutorial:
MODEL_SPEC = collections.OrderedDict(
weights=tf.TensorSpec(shape=[784, 10], dtype=tf.float32),
bias=tf.TensorSpec(shape=[10], dtype=tf.float32))
MODEL_TYPE = tff.to_type(MODEL_SPEC)
print(MODEL_TYPE) # <weights=float32[784,10],bias=float32[10]>
BATCH_SPEC = collections.OrderedDict(
x=tf.TensorSpec(shape=[None, 784], dtype=tf.float32),
y=tf.TensorSpec(shape=[None], dtype=tf.int32)
)
BATCH_TYPE = tff.to_type(BATCH_SPEC)
print(BATCH_TYPE) # <x=float32[?,784],y=int32[?]>
And I changed the MODEL_TYPE into:
MODEL_SPEC = collections.OrderedDict(
fc1=tf.TensorSpec(shape=[784, 256], dtype=tf.float32),
b1=tf.TensorSpec(shape=[256], dtype=tf.float32),
fc2=tf.TensorSpec(shape=[256, 128], dtype=tf.float32),
b2=tf.TensorSpec(shape=[128], dtype=tf.float32),
fc3=tf.TensorSpec(shape=[128, 10], dtype=tf.float32),
b3=tf.TensorSpec(shape=[10], dtype=tf.float32)
)
MODEL_TYPE = tff.to_type(MODEL_SPEC)
Thanks to the structure of model changed, the process of forward pass needs to be changed too:
# original
#tf.function
def forward_pass(model, batch):
predicted_y = tf.nn.softmax(
tf.matmul(batch['x'], model['weights']) + model['bias'])
return -tf.reduce_mean(
tf.reduce_sum(
tf.one_hot(batch['y'], 10) * tf.math.log(predicted_y), axis=[1]))
#tff.tf_computation(MODEL_TYPE, BATCH_TYPE)
def batch_loss(model, batch):
return forward_pass(model, batch)
# new
#tf.function
def forward(model, batch):
logits = batch["x"] # model["fc1"] + model["b1"]
logits = logits # model["fc2"] + model["b2"]
logits = logits # model["fc3"] + model["b3"]
logits = tf.nn.softmax(logits, axis=-1,)
one_hot_y = tf.one_hot(batch["y"], depth=10)
return -tf.reduce_mean(tf.reduce_sum(tf.math.log(logits) * one_hot_y, axis=[1]))
#tff.tf_computation(MODEL_TYPE, BATCH_TYPE)
def batch_loss(model, batch):
return forward(model, batch)
I didn't change the batch_train() code.
#tff.tf_computation(MODEL_TYPE, BATCH_TYPE, tf.float32)
def batch_train(initial_model, batch, learning_rate):
# Define a group of model variables and set them to `initial_model`. Must
# be defined outside the #tf.function.
model_vars = collections.OrderedDict([
(name, tf.Variable(name=name, initial_value=value))
for name, value in initial_model.items()
])
optimizer = tf.keras.optimizers.SGD(learning_rate)
#tf.function
def _train_on_batch(model_vars, batch):
# Perform one step of gradient descent using loss from `batch_loss`.
with tf.GradientTape() as tape:
loss = forward_pass(model_vars, batch)
grads = tape.gradient(loss, model_vars)
optimizer.apply_gradients(
zip(tf.nest.flatten(grads), tf.nest.flatten(model_vars)))
return model_vars
return _train_on_batch(model_vars, batch)
And it works fine so far. But when implementing the local_train() section, errors appeared even I just using the original code.
initial_model = collections.OrderedDict(
fc1=tf.zeros([784, 256]),
b1=tf.zeros([256]),
fc2=tf.zeros([256,128]),
b2=tf.zeros([128]),
fc3=tf.zeros([128, 10]),
b3=tf.zeros([10])
)
LOCAL_DATA_TYPE = tff.SequenceType(BATCH_TYPE)
#tff.federated_computation(MODEL_TYPE, tf.float32, LOCAL_DATA_TYPE)
def local_train(initial_model, learning_rate, all_batches):
#tff.tf_computation(LOCAL_DATA_TYPE, tf.float32)
def _insert_learning_rate_to_sequence(dataset, learning_rate):
return dataset.map(lambda x: (x, learning_rate))
batches_with_learning_rate = _insert_learning_rate_to_sequence(all_batches, learning_rate)
# Mapping function to apply to each batch.
#tff.federated_computation(MODEL_TYPE, batches_with_learning_rate.type_signature.element)
def batch_fn(model, batch_with_lr):
batch, lr = batch_with_lr
return batch_train(model, batch, lr)
return tff.sequence_reduce(batches_with_learning_rate, initial_model, batch_fn)
locally_trained_model = local_train(initial_model, 1e-1, mnist_train_dataset[5])
# ValueError: Unable to unpack value [] as a tf.compat.v1.GraphDef
One issue I noticed on a quick skim (did not sift through all of the pasted code) is this line:
return batch_train(model, batch, lr)
To invoke a tff.tf_computation from within the context of a tff.federated_computation, you need to use the tff.federated_map operator. So it could look like
return tff.federated_map(batch_train, (model, batch, lr))
Finally, I found that I had made a low-level mistake.🤦‍♂️
Which is I coded on my custom jupyter notebook, but forgot to add the following key code in the tutorial at the begining:
executor_factory = tff.framework.local_executor_factory(
support_sequence_ops=True
)
execution_context = tff.framework.ExecutionContext(
executor_fn=executor_factory
)
tff.framework.set_default_context(execution_context)
My dataset( Network traffic dataset where we do binary classification)-
Number of features is 25 and I have normalized the dataset.
My ELM model-
class ELM:
def __init__(self, num_input_nodes, num_hidden_units, num_out_units, activation='sigmoid',
loss='bce', beta_init=None, w_init=None, bias_init=None):
self._num_input_nodes = num_input_nodes
self._num_hidden_units = num_hidden_units
self._num_out_units = num_out_units
self._activation = getActivation(activation)
self._loss = getLoss(loss)
if isinstance(beta_init, np.ndarray):
self._beta = beta_init
else:
self._beta = np.random.uniform(-1., 1., size=(self._num_hidden_units, self._num_out_units))
if isinstance(w_init, np.ndarray):
self._w = w_init
else:
self._w = np.random.uniform(-1, 1, size=(self._num_input_nodes, self._num_hidden_units))
if isinstance(bias_init, np.ndarray):
self._bias = bias_init
else:
self._bias = np.zeros(shape=(self._num_hidden_units,))
print('Bias shape:', self._bias.shape)
print('W shape:', self._w.shape)
print('Beta shape:', self._beta.shape)
def fit(self, X, Y, display_time=False):
H = self._activation(X.dot(self._w) + self._bias)
# Moore–Penrose pseudo inverse
if display_time:
start = time.time()
H_pinv = np.linalg.pinv(H)
if display_time:
stop = time.time()
print(f'Train time: {stop-start}')
self._beta = H_pinv.dot(Y)
# print('Fit Beta shape:', self._beta.shape)
def __call__(self, X):
H = self._activation(X.dot(self._w) + self._bias)
return H.dot(self._beta)
def evaluate(self, X, Y):
pred = self(X)
# Loss (base on model setting)
loss = self._loss(Y, pred)
# Accuracy
acc = np.sum(np.argmax(pred, axis=-1) == np.argmax(Y, axis=-1)) / len(Y)
# Unweighted Average Recall
# TODO
return loss, acc
# Network Settings
num_classes = 1
num_hidden_layers = 512
input_length = 25
When I am trying to run this for my dataset, the accuracy is coming to zero. I have taken sigmoid as the activation function and binary cross entropy as loss for my binary classification task.
am using an Seq2Seq project from Google that use Encoder/Decoder, there is the 2 encoder and decoder class :
#ENCODER
class EncoderNetwork(tf.keras.Model):
def __getstate__(self):
d = self.__dict__.copy()
d.pop('_parents', None)
return d
def __init__(self,input_vocab_size,embedding_dims, rnn_units ):
super().__init__()
self.encoder_embedding = tf.keras.layers.Embedding(input_dim=input_vocab_size,
output_dim=embedding_dims)
self.encoder_rnnlayer = tf.keras.layers.LSTM(rnn_units,return_sequences=True,
return_state=True )
encoder_embedding = self.encoder_embedding
encoder_rnnlayer = self.encoder_rnnlayer
#DECODER
class DecoderNetwork(tf.keras.Model):
def __getstate__(self):
d = self.__dict__.copy()
d.pop('_parents', None)
return d
def __init__(self,output_vocab_size, embedding_dims, rnn_units):
super().__init__()
self.decoder_embedding = tf.keras.layers.Embedding(input_dim=output_vocab_size,
output_dim=embedding_dims)
self.dense_layer = tf.keras.layers.Dense(output_vocab_size)
self.decoder_rnncell = tf.keras.layers.LSTMCell(rnn_units)
# Sampler
self.sampler = tfa.seq2seq.sampler.TrainingSampler()
# Create attention mechanism with memory = None
self.attention_mechanism = self.build_attention_mechanism(dense_units,None,BATCH_SIZE*[Tx])
self.rnn_cell = self.build_rnn_cell(BATCH_SIZE)
self.decoder = tfa.seq2seq.BasicDecoder(self.rnn_cell, sampler= self.sampler,
output_layer=self.dense_layer)
def build_attention_mechanism(self, units,memory, memory_sequence_length):
return tfa.seq2seq.LuongAttention(units, memory = memory,
memory_sequence_length=memory_sequence_length)
#return tfa.seq2seq.BahdanauAttention(units, memory = memory, memory_sequence_length=memory_sequence_length)
# wrap decodernn cell
def build_rnn_cell(self, batch_size ):
rnn_cell = tfa.seq2seq.AttentionWrapper(self.decoder_rnncell, self.attention_mechanism,
attention_layer_size=dense_units)
return rnn_cell
def build_decoder_initial_state(self, batch_size, encoder_state,Dtype):
decoder_initial_state = self.rnn_cell.get_initial_state(batch_size = batch_size,
dtype = Dtype)
decoder_initial_state = decoder_initial_state.clone(cell_state=encoder_state)
return decoder_initial_state
i create an instance of EncoderNetwork and DecoderNetwork with my argument and use the loss_function and train_step already defined to train my model
def loss_function(y_pred, y):
#shape of y [batch_size, ty]
#shape of y_pred [batch_size, Ty, output_vocab_size]
sparsecategoricalcrossentropy = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True,
reduction='none')
loss = sparsecategoricalcrossentropy(y_true=y, y_pred=y_pred)
mask = tf.logical_not(tf.math.equal(y,0)) #output 0 for y=0 else output 1
mask = tf.cast(mask, dtype=loss.dtype)
loss = mask* loss
loss = tf.reduce_mean(loss)
return loss
def train_step(input_batch, output_batch,encoder_initial_cell_state):
#initialize loss = 0
loss = 0
with tf.GradientTape() as tape:
encoder_emb_inp = encoderNetwork.encoder_embedding(input_batch)
a, a_tx, c_tx = encoderNetwork.encoder_rnnlayer(encoder_emb_inp,
initial_state =encoder_initial_cell_state)
#[last step activations,last memory_state] of encoder passed as input to decoder Network
# Prepare correct Decoder input & output sequence data
decoder_input = output_batch[:,:-1] # ignore <end>
#compare logits with timestepped +1 version of decoder_input
decoder_output = output_batch[:,1:] #ignore <start>
# Decoder Embeddings
decoder_emb_inp = decoderNetwork.decoder_embedding(decoder_input)
#Setting up decoder memory from encoder output and Zero State for AttentionWrapperState
decoderNetwork.attention_mechanism.setup_memory(a)
decoder_initial_state = decoderNetwork.build_decoder_initial_state(BATCH_SIZE,
encoder_state=[a_tx, c_tx],
Dtype=tf.float32)
#BasicDecoderOutput
outputs, _, _ = decoderNetwork.decoder(decoder_emb_inp,initial_state=decoder_initial_state,
sequence_length=BATCH_SIZE*[Ty-1])
logits = outputs.rnn_output
#Calculate loss
loss = loss_function(logits, decoder_output)
#Returns the list of all layer variables / weights.
variables = encoderNetwork.trainable_variables + decoderNetwork.trainable_variables
# differentiate loss wrt variables
gradients = tape.gradient(loss, variables)
#grads_and_vars – List of(gradient, variable) pairs.
grads_and_vars = zip(gradients,variables)
optimizer.apply_gradients(grads_and_vars)
return loss
the training does not use fit() methode but like this :
epochs = 20
for i in range(1, epochs+1):
encoder_initial_cell_state = initialize_initial_state()
total_loss = 0.0
for ( batch , (input_batch, output_batch)) in enumerate(dataset.take(steps_per_epoch)):
batch_loss = train_step(input_batch, output_batch, encoder_initial_cell_state)
total_loss += batch_loss
if (batch+1)%5 == 0:
print("total loss: {} epoch {} batch {} ".format(batch_loss.numpy(), i, batch+1))
the result are fine and the predict fonction work perfectly (custom predict function), but how can i save the model ? i tried pickel and keras.save() but it doesn't work any idea ?
I use tensorflow Keras API and try to add custom scalar to the tensorboard but nothing except the loss is displayed.
Here is the code for the model:
embedding_in = Embedding(
input_dim=vocab_size + 1 + 1,
output_dim=dim,
mask_zero=True,
)
embedding_out = Embedding(
input_dim=vocab_size + 1 + 1,
output_dim=dim,
mask_zero=True,
)
input_a = Input((None,))
input_b = Input((None,))
input_c = Input((None, None))
emb_target = embedding_in(input_a)
emb_context = embedding_out(input_b)
emb_negatives = embedding_out(input_c)
emb_gru = GRU(dim, return_sequences=True)(emb_target)
num_negatives = tf.shape(input_c)[-1]
def make_logits(tensors):
emb_gru, emb_context, emb_negatives = tensors
true_logits = tf.reduce_sum(tf.multiply(emb_gru, emb_context), axis=2)
true_logits = tf.expand_dims(true_logits, -1)
sampled_logits = tf.squeeze(
tf.matmul(emb_negatives, tf.expand_dims(emb_gru, axis=2),
transpose_b=True), axis=3)
true_logits = true_logits*0
sampled_logits = sampled_logits*0
logits = K.concatenate([true_logits, sampled_logits], axis=-1)
return logits
logits = Lambda(make_logits)([emb_gru, emb_context, emb_negatives])
mean = tf.reduce_mean(logits)
tf.summary.scalar('mean_logits', mean)
model = keras.models.Model(inputs=[input_a, input_b, input_c], outputs=[logits])
In particular, I want to see the evolution of mean_logits scalar after each batch.
I create and compile the model like this:
model = build_model(dim, vocab_size)
model.compile(loss='binary_crossentropy', optimizer='sgd')
callbacks = [
keras.callbacks.TensorBoard(logdir, histogram_freq=1)
]
I use tf Dataset API to the model:
iterator = dataset.make_initializable_iterator()
with tf.Session() as sess:
sess.run(iterator.initializer)
sess.run(tf.tables_initializer())
model.fit(iterator, steps_per_epoch=100,
callbacks=callbacks,
validation_data=iterator,
validation_steps=1
)
However, I don't get any mean_logits graph in the tensorboard and it's not in the graphs.
How can I track mean_logits scalar in tensorboard after each batch?
I use tf 1.12 and keras 2.1.
I have also faced the same issue. It seems that Keras TensorBoard callback not gonna write all existing summaries automatically, but only those registered as metrics (and appear in logs dict). Updating the logs object is a nice trick as it allows to use the values in other callbacks, see Early stopping and learning rate schedule based on custom metric in Keras. I can see several possibilities:
1. Using Lambda callback
Something like this:
eval_callback = LambdaCallback(
on_epoch_end=lambda epoch, logs: logs.update(
{'mean_logits': K.eval(mean)}
))
2. Custom TensorBoard callback
You can also subclass the callback and define your own logic. For instance, my workaround for learning rate monitoring:
class Tensorboard(Callback):
def __init__(self,
log_dir='./log',
write_graph=True):
self.write_graph = write_graph
self.log_dir = log_dir
def set_model(self, model):
self.model = model
self.sess = K.get_session()
if self.write_graph:
self.writer = tf.summary.FileWriter(self.log_dir, self.sess.graph)
else:
self.writer = tf.summary.FileWriter(self.log_dir)
def on_epoch_end(self, epoch, logs={}):
logs.update({'learning_rate': float(K.get_value(self.model.optimizer.lr))})
self._write_logs(logs, epoch)
def _write_logs(self, logs, index):
for name, value in logs.items():
if name in ['batch', 'size']:
continue
summary = tf.Summary()
summary_value = summary.value.add()
if isinstance(value, np.ndarray):
summary_value.simple_value = value.item()
else:
summary_value.simple_value = value
summary_value.tag = name
self.writer.add_summary(summary, index)
self.writer.flush()
def on_train_end(self, _):
self.writer.close()
Here, I just add 'learning_rate' to logs explicitly. But this way can be much more flexible and powerful.
3. Metrics trick
Here is another interesting workaround. What you need to do is to pass a custom metric function to model's compile() call which returns aggregated summary tensor. The idea is to make Keras pass your aggregated summary operation to every session.run call and return it's result as metric:
x_entropy_t = K.sum(p_t * K.log(K.epsilon() + p_t), axis=-1, keepdims=True)
full_policy_loss_t = -res_t + X_ENTROPY_BETA * x_entropy_t
tf.summary.scalar("loss_entropy", K.sum(x_entropy_t))
tf.summary.scalar("loss_policy", K.sum(-res_t))
tf.summary.scalar("loss_full", K.sum(full_policy_loss_t))
summary_writer = tf.summary.FileWriter("logs/" + args.name)
def summary(y_true, y_pred):
return tf.summary.merge_all()
value_policy_model.compile(optimizer=Adagrad(), loss=loss_dict, metrics=[summary])
l = value_policy_model.train_on_batch(x_batch, y_batch)
l_dict = dict(zip(value_policy_model.metrics_names, l))
summary_writer.add_summary(l_dict['value_summary'], global_step=iter_idx)
summary_writer.flush()