Short story: I am building an Autoencoder and would like to store reconstructed images along the way of training. I made a custom callback that writes images to the summary. The only thing that remains is to call my reconstruction layer inside of callback.on_epoch_end(...). How can I get access to a named layer inside of the callback and run a calculation?
Layer definition:
decode = layers.Conv2D(1, (5, 5), name='wwae_decode', activation='sigmoid', padding='same')(conv3)
Callback definition:
class TensorBoardImage(tf.keras.callbacks.Callback):
def __init__(self, tag, logdir):
super().__init__()
self.tag = tag
self.logdir = logdir
def on_epoch_end(self, epoch, logs={}):
img_stack = self.validation_data[0][:3]
# TODO: run img_stack through 'wwae_decode' layer first
# img_stack = self?model?get_layer('wwae_decode').evaluate(img_stack) # ????
single_image = merge_axis(img_stack, target_axis=2)
summary_str = []
single_image = (255 * single_image).astype('uint8')
summary_str.append(tf.Summary.Value(tag=self.tag, image=make_image(single_image)))
# multiple summaries can be appended
writer = tf.summary.FileWriter(self.logdir)
writer.add_summary(tf.Summary(value=summary_str), epoch)
return
If this is the last layer in your model (i.e. output layer), then you can simply call predict method of the model instance inside the callback:
# ...
img_stack = self.validation_data[0][:3]
preds_img_stack = self.model.predict(img_stack)
# ...
Alternatively, you can directly compute a layer's output by defining a backend function:
from keras import backend as K
func = K.function(model.inputs + [K.learning_phase()], [model.get_layer('wwae_decode').output])
# ...
img_stack = self.validation_data[0][:3]
preds_img_stack = func([img_stack, 0])[0]
# ...
For more information, I suggest you to read the relevant section in Keras FAQ: How can I obtain the output of an intermediate layer?
Related
I use the following model:
class DeepGraphInfomax(torch.nn.Module):
def __init__(self, hidden_channels, pos_summary, summary):#, encoder):#, , corruption):
super().__init__()
self.hidden_channels = hidden_channels
self.pos_summary = pos_summary
self.summary = summary
self.weight = Parameter(torch.Tensor(hidden_channels, hidden_channels))
def loss(self, pos_z, neg_z, summary):
r"""Computes the mutual information maximization objective."""
pos_loss = -torch.log(
self.discriminate(pos_z, summary, sigmoid=True) + EPS).mean()
neg_loss = -torch.log(1 -
self.discriminate(neg_z, summary, sigmoid=True) +
EPS).mean()
return pos_loss + neg_loss
and run my loss function with:
def train():
model.train()
optimizer.zero_grad()
#pos_z, neg_z, summary = model(data.x, data.edge_index)
loss = model.loss(embedding_pos, embedding_neg, result)
loss.backward(retain_graph = True)
optimizer.step()
return loss.item()
which result in the following error even though I used retain_graph = True
RuntimeError: Trying to backward through the graph a second time (or directly access saved tensors after they have already been freed). Saved intermediate values of the graph are freed when you call .backward() or autograd.grad(). Specify retain_graph=True if you need to backward through the graph a second time or if you need to access saved tensors after calling backward.
Why is it recommended to save the state dicts and load them instead of saving stuff with dill for example and then just getting the usable objects immediately?
I think I've done that without may issues and it saves users code.
But instead we are recommended to do something like:
def _load_model_and_optimizer_from_checkpoint(args: Namespace, training: bool = True) -> Namespace:
"""
based from: https://pytorch.org/tutorials/recipes/recipes/saving_and_loading_a_general_checkpoint.html
"""
import torch
from torch import optim
import torch.nn as nn
# model = Net()
args.model = nn.Linear()
# optimizer = optim.SGD(args.model.parameters(), lr=0.001, momentum=0.9)
optimizer = optim.Adam(args.model.parameters(), lr=0.001)
# scheduler...
checkpoint = torch.load(args.PATH)
args.model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
args.epoch_num = checkpoint['epoch_num']
args.loss = checkpoint['loss']
args.model.train() if training else args.model.eval()
For example I've saved:
def save_for_meta_learning(args: Namespace, ckpt_filename: str = 'ckpt.pt'):
if is_lead_worker(args.rank):
import dill
args.logger.save_current_plots_and_stats()
# - ckpt
assert uutils.xor(args.training_mode == 'epochs', args.training_mode == 'iterations')
f: nn.Module = get_model_from_ddp(args.base_model)
# pickle vs torch.save https://discuss.pytorch.org/t/advantages-disadvantages-of-using-pickle-module-to-save-models-vs-torch-save/79016
args_pickable: Namespace = uutils.make_args_pickable(args)
torch.save({'training_mode': args.training_mode, # assert uutils.xor(args.training_mode == 'epochs', args.training_mode == 'iterations')
'it': args.it,
'epoch_num': args.epoch_num,
'args': args_pickable, # some versions of this might not have args!
'meta_learner': args.meta_learner,
'meta_learner_str': str(args.meta_learner), # added later, to make it easier to check what optimizer was used
'f': f,
'f_state_dict': f.state_dict(), # added later, to make it easier to check what optimizer was used
'f_str': str(f), # added later, to make it easier to check what optimizer was used
# 'f_modules': f._modules,
# 'f_modules_str': str(f._modules),
'outer_opt': args.outer_opt, # added later, to make it easier to check what optimizer was used
'outer_opt_state_dict': args.outer_opt.state_dict(), # added later, to make it easier to check what optimizer was used
'outer_opt_str': str(args.outer_opt) # added later, to make it easier to check what optimizer was used
},
pickle_module=dill,
f=args.log_root / ckpt_filename)
then loaded:
def get_model_opt_meta_learner_to_resume_checkpoint_resnets_rfs(args: Namespace,
path2ckpt: str,
filename: str,
device: Optional[torch.device] = None
) -> tuple[nn.Module, optim.Optimizer, MetaLearner]:
"""
Get the model, optimizer, meta_learner to resume training from checkpoint.
Examples:
- see: _resume_from_checkpoint_meta_learning_for_resnets_rfs_test
"""
import uutils
path2ckpt: Path = Path(path2ckpt).expanduser() if isinstance(path2ckpt, str) else path2ckpt.expanduser()
ckpt: dict = torch.load(path2ckpt / filename, map_location=torch.device('cpu'))
# args_ckpt: Namespace = ckpt['args']
training_mode = ckpt.get('training_mode')
if training_mode is not None:
assert uutils.xor(training_mode == 'epochs', training_mode == 'iterations')
if training_mode == 'epochs':
args.epoch_num = ckpt['epoch_num']
else:
args.it = ckpt['it']
# - get meta-learner
meta_learner: MetaLearner = ckpt['meta_learner']
# - get model
model: nn.Module = meta_learner.base_model
# - get outer-opt
outer_opt_str = ckpt.get('outer_opt_str')
if outer_opt_str is not None:
# use the string to create optimizer, load the state dict, etc.
outer_opt: optim.Optimizer = get_optimizer(outer_opt_str)
outer_opt_state_dict: dict = ckpt['outer_opt_state_dict']
outer_opt.load_state_dict(outer_opt_state_dict)
else:
# this is not ideal, but since Adam has a exponentially moving average for it's adaptive learning rate,
# hopefully this doesn't screw my checkpoint to much
outer_opt: optim.Optimizer = optim.Adam(model.parameters(), lr=args.outer_lr)
# - device setup
if device is not None:
# if torch.cuda.is_available():
# meta_learner.base_model = meta_learner.base_model.cuda()
meta_learner.base_model = meta_learner.base_model.to(device)
return model, outer_opt, meta_learner
without issues.
Related:
Save and load model optimizer state
pytorch save and load model
Save and load a Pytorch model
save and load unserialized pytorch pretrained model
https://pytorch.org/tutorials/recipes/recipes/saving_and_loading_a_general_checkpoint.html
Why is it not recommended to save the optimizer, model etc as pickable/dillable objs in PyTorch but instead get the state dicts and load them?
https://discuss.pytorch.org/t/why-is-it-not-recommended-to-save-the-optimizer-model-etc-as-pickable-dillable-objs-in-pytorch-but-instead-get-the-state-dicts-and-load-them/137933
I use Tensorflow for regression using the following function
import tensorflow as tf
def ff(*args, **kwargs):
model = tf.keras.models.Sequential()
model.add(tf.keras.Input(shape=[inp_train.shape[-1],]))
for i in range(n_layer):
model.add(tf.keras.layers.Dense(n_unit, activation=act))
model.add(tf.keras.layers.Dense(out_train.shape[1]))
model.compile(optimizer=opt, loss='mae')
early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=100)
check_point = tf.keras.callbacks.ModelCheckpoint('best_model.h5', monitor='val_loss', save_best_only=True)
model.fit(inp_train, out_train, epochs=n_epoch, batch_size=s_batch, validation_data=(inp_val, out_val), callbacks=[early_stop, check_point], verbose=0)
best_model = tf.keras.models.load_model('best_model.h5')
return model, best_mode
As you see, I save the best model by check_point callback and use it later for prediction. The problem is that in this way I have to save the best model on the disk first, and then load it from the disk. If I want to do a couple of runs in parallel, since each run create a file with the same name it does not work.
So, how can I assign the best model in a variable without having to save it on the disk?
NOTE: I fixed a bug and is untested
I had to do this for myself and thought I would share:
Callback:
class SaveBestModel(tf.keras.callbacks.Callback):
def __init__(self, save_best_metric='val_loss', this_max=False):
self.save_best_metric = save_best_metric
self.max = this_max
if this_max:
self.best = float('-inf')
else:
self.best = float('inf')
def on_epoch_end(self, epoch, logs=None):
metric_value = logs[self.save_best_metric]
if self.max:
if metric_value > self.best:
self.best = metric_value
self.best_weights = self.model.get_weights()
else:
if metric_value < self.best:
self.best = metric_value
self.best_weights= self.model.get_weights()
usage:
save_best_model = SaveBestModel()
model.fit(data, callbacks=[save_best_model]
#set best weigts
model.set_weights(save_best_model.best_weights)
Here is a basic example of creating a callback and saving the model at the time of callback to an external list. It has to be a list (or a type that allows modification using a method). The base tf.keras.callbacks.Callback class is extended with an additional argument, the list, in the callback class __init___ method. This example shows that it works. When the callback is called on training_end it appends the current model to the list.
import tensorflow as tf
from tensorflow.python.keras.models import Model
# define a custom callback
class MyCustomCallback(tf.keras.callbacks.Callback):
def __init__(self, external_list):
self.list_obj = external_list
def on_train_end(self, logs=None):
self.list_obj.append(self.model)
# test the idea works
model_save_list = []
my_callback = MyCustomCallback(model_save_list)
model1 = Model()
my_callback.set_model(model1)
my_callback.on_train_end()
print(model_save_list)
Run this and you will see the internal model gets added to your list object:
[<tensorflow.python.keras.engine.training.Model object at 0x10d230b50>]
Modify your training by adding your new callback to the callbacks like so:
model.fit(inp_train, out_train, epochs=n_epoch, batch_size=s_batch, validation_data=(inp_val, out_val), callbacks=[early_stop, my_callback], verbose=0)
I have a model trained on a single machine without using Estimator and I'm looking to serve the final trained model on Google cloud AI platform (ML engine). I exported the frozen graph as a SavedModel using SavedModelBuilder and deployed it on the AI platform. It works fine for small input images but for it to be able to accept large input images for online prediction, I need to change it to accept b64 encoded strings ({'image_bytes': {'b64': base64.b64encode(jpeg_data).decode()}}) which are converted to the required tensor by a serving_input_fn if using Estimators.
What options do I have if I am not using an Estimator? If I have a frozen graph or SavedModel being created from SavedModelBuilder, is there a way to have something similar to an estimator's serving_input_fn when exporting/ saving?
Here's the code I'm using for exporting:
from tensorflow.python.saved_model import signature_constants
from tensorflow.python.saved_model import tag_constants
export_dir = 'serving_model/'
graph_pb = 'model.pb'
builder = tf.saved_model.builder.SavedModelBuilder(export_dir)
with tf.gfile.GFile(graph_pb, "rb") as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
sigs = {}
with tf.Session(graph=tf.Graph()) as sess:
# name="" is important to ensure we don't get spurious prefixing
tf.import_graph_def(graph_def, name="")
g = tf.get_default_graph()
inp = g.get_tensor_by_name("image_bytes:0")
out_f1 = g.get_tensor_by_name("feature_1:0")
out_f2 = g.get_tensor_by_name("feature_2:0")
sigs[signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY] = \
tf.saved_model.signature_def_utils.predict_signature_def(
{"image_bytes": inp}, {"f1": out_f1, "f2": out_f2})
builder.add_meta_graph_and_variables(sess,
[tag_constants.SERVING],
strip_default_attrs=True,
signature_def_map=sigs)
builder.save()
Use a #tf.function to specify a serving signature. Here's an example that calls Keras:
class ExportModel(tf.keras.Model):
def __init__(self, model):
super().__init__(self)
self.model = model
#tf.function(input_signature=[
tf.TensorSpec([None,], dtype='int32', name='a'),
tf.TensorSpec([None,], dtype='int32', name='b')
])
def serving_fn(self, a, b):
return {
'pred' : self.model({'a': a, 'b': b}) #, steps=1)
}
def save(self, export_path):
sigs = {
'serving_default' : self.serving_fn
}
tf.keras.backend.set_learning_phase(0) # inference only
tf.saved_model.save(self, export_path, signatures=sigs)
sm = ExportModel(model)
sm.save(EXPORT_PATH)
First, load your already exported SavedModel with
import tensorflow as tf
loaded_model = tf.saved_model.load(MODEL_DIR)
Then, wrap it with a new Keras model that takes base64 input
class Base64WrapperModel(tf.keras.Model):
def __init__(self, model):
super(Base64WrapperModel, self).__init__()
self.inner_model = model
#tf.function
def call(self, base64_input):
str_input = tf.io.decode_base64(base64_input)
return self.inner_model(str_input)
wrapper_model = Base64WrapperModel(loaded_model)
Finally, save your wrapped model with Keras API
wrapper_model.save(EXPORT_DIR)
I am experimenting Watson Neural Network Modeler.
I've create a model from the built in demo "Single Convolution layer on MNIST". The only customization I did was to specify the training data files.
I then exported the Pytorch code and I am trying to run in on my local computer.
The generated code is pretty readable. The relevant code excerpt is:
# Define network architecture
class Net(nn.Module):
def __init__(self, inp_c):
super(Net, self).__init__()
def forward(self, ImageData_4, target):
Convolution2D_9 = self.Convolution2D_9(ImageData_4)
ReLU_1 = self.ReLU_1(Convolution2D_9)
Pooling2D_8 = self.Pooling2D_8(ReLU_1)
Flatten_2 = Pooling2D_8.view(-1, 10816)
Dense_3 = self.Dense_3(Flatten_2)
Softmax_5 = self.Softmax_5(Dense_3)
Accuracy_6 = torch.topk(Softmax_5, 1)[0]
CrossEntropyLoss_7 = self.CrossEntropyLoss_7(Softmax_5, target)
return Softmax_5, Accuracy_6
# Model Initialization
inp_c = 1
model = Net(inp_c)
model.cuda()
# Define optimizer
learning_rate = 0.001000
decay = 0.000000
beta_1 = 0.900000
beta_2 = 0.999000
optim = optim.Adam(
model.parameters(),
lr=learning_rate,
betas=(beta_1, beta_2),
weight_decay=decay)
I am getting the error:
"ValueError: optimizer got an empty parameter list"
on the optim = optim.Adam() statement.
Is there any Watson user/expert over there to bring some light on this issue? I am basically running the demo. It was not supposed to fail.
Thanks!