Currently trying to make this repo works.
I'm trying to save the trained model in the local machine so can be applied later. I read in tensorflow's doc, seems pretty intuitive to save the model, by calling tf.save_model.save(object). But I'm not sure how to apply.
Original code is here: model.py
Following is my changes:
import tensorflow as tf
class ICON(tf.Module): # make it a tensorflow modul
def __init__(self, config, embeddingMatrix, session=None):
def _build_inputs(self):
def _build_vars(self):
def _convolution(self, input_to_conv):
def _inference(self):
def batch_fit(self, queries, ownHistory, otherHistory, labels):
feed_dict = {self._input_queries: queries, self._own_histories: ownHistory, self._other_histories: otherHistory,
self._labels: labels}
loss, _ = self._sess.run([self.loss_op, self.train_op], feed_dict=feed_dict)
return loss
def predict(self, queries, ownHistory, otherHistory, ):
feed_dict = {self._input_queries: queries, self._own_histories: ownHistory, self._other_histories: otherHistory}
return self._sess.run(self.predict_op, feed_dict=feed_dict)
def save(self): # attempt to save the model
tf.saved_model.save(
self, './output/model')
The code above produces ValueError as following:
ValueError: Tensor("ICON/CNN/embedding_matrix:0", shape=(16832, 300), dtype=float32_ref) must be from the same graph as Tensor("saver_filename:0", shape=(), dtype=string).
I believe you can use the tf.train.Saver class for this
def save(self): # attempt to save the model
saver = tf.train.Saver()
saver.save(self._sess, './output/model')
You can then restore the model this way
saver = tf.train.import_meta_graph('./output/model.meta')
with tf.Session() as sess:
saver.restore(sess, tf.train.latest_checkpoint('./output'))
You might also find this tutorial helpful in understanding this more.
Edit: if you want to use SavedModel
def save(self):
inputs = {'input_queries': self._input_queries, 'own_histories': self._own_histories, 'other_histories': self._other_histories}
outputs = {'output': self.predict_op}
tf.saved_model.simple_save(self._sess, './output/model', inputs, outputs)
You can then use tf.contrib.predictor.from_saved_model to load and serve using the SavedModel
from tensorflow.contrib.predictor import from_saved_model
predictor = from_saved_model('./output/model')
predictions = predictor({'input_queries': input_queries, 'own_histories': own_histories, 'other_histories': other_histories})
Related
I fine tuned the pretrained model here by freezing all layers except the classifier layers. And I saved weight file with using pytorch as .bin format.
Now instead of loading the 400mb pre-trained model, is there a way to load the parameters of the just Classifier layer I retrained it? By the way, I know that I have to load the original pretrained model, I just don't want to load the entire fine tuned model. due to memory concerns.
I can access the last layer's parameters from state_dict as below, but how can I save them in a separate file to use them later for less memory usage?
model = PosTaggingModel(num_pos_tag=num_pos_tag)
state_dict = torch.load("model.bin")
print("state dictionary:",state_dict)
with torch.no_grad():
model.out_pos_tag.weight.copy_(state_dict['out_pos_tag.weight'])
model.out_pos_tag.bias.copy_(state_dict['out_pos_tag.bias'])
Here is the model class:
class PosTaggingModel(nn.Module):
def __init__(self, num_pos_tag):
super(PosTaggingModel, self).__init__()
self.num_pos_tag = num_pos_tag
self.model = AutoModel.from_pretrained("dbmdz/bert-base-turkish-cased")
for name, param in self.model.named_parameters():
if 'classifier' not in name: # classifier layer
param.requires_grad = False
self.bert_drop = nn.Dropout(0.3)
self.out_pos_tag = nn.Linear(768, self.num_pos_tag)
def forward(self, ids, mask, token_type_ids, target_pos_tag):
o1, _ = self.model(ids, attention_mask = mask, token_type_ids = token_type_ids)
bo_pos_tag = self.bert_drop(o1)
pos_tag = self.out_pos_tag(bo_pos_tag)
loss = loss_fn(pos_tag, target_pos_tag, mask, self.num_pos_tag)
return pos_tag, loss
I don't know if this is possible but I'm just looking for a way to save and reuse the last layer's parameters, without the need for parameters of frozen layers. I couldn't find it in the documentation.
Thanks in advance to those who will help.
You can do it like this
import torch
# creating a dummy model
class Classifier(torch.nn.Module):
def __init__(self):
super(Classifier, self).__init__()
self.first = torch.nn.Linear(10, 10)
self.second = torch.nn.Linear(10, 20)
self.last = torch.nn.Linear(20, 1)
def forward(self, x):
pass
# Creating its object
model = Classifier()
# Extracting the layer to save
to_save = model.last
# Saving the state dict of that layer
torch.save(to_save.state_dict(), './classifier.bin')
# Recreating the object of that model
model = Classifier()
# Updating the saved layer of model
model.last.load_state_dict(torch.load('./classifier.bin'))
I am working on a Tensorflow Model. Here I have created a custom class where the model is being built. Below is the code for training.
sess = tf.Session(config=session_conf)
with sess.as_default():
model = EntityAttentionLSTM(
sequence_length=train_x.shape[1],
num_classes=train_y.shape[1],
vocab_size=len(vocab_processor.vocabulary_),
embedding_size=FLAGS.embedding_size,
pos_vocab_size=len(pos_vocab_processor.vocabulary_),
pos_embedding_size=FLAGS.pos_embedding_size,
hidden_size=FLAGS.hidden_size,
num_heads=FLAGS.num_heads,
attention_size=FLAGS.attention_size,
use_elmo=(FLAGS.embeddings == 'elmo'),
l2_reg_lambda=FLAGS.l2_reg_lambda)
for train_batch in train_batches:
train_bx, train_by, train_btxt, train_be1, train_be2,
train_bp1, train_bp2 = zip(*train_batch)
feed_dict = {
model.input_x: train_bx,
model.input_y: train_by,
model.input_text: train_btxt,
model.input_e1: train_be1,
model.input_e2: train_be2,
model.input_p1: train_bp1,
model.input_p2: train_bp2,
model.emb_dropout_keep_prob:
FLAGS.emb_dropout_keep_prob,
model.rnn_dropout_keep_prob:
FLAGS.rnn_dropout_keep_prob,
model.dropout_keep_prob: FLAGS.dropout_keep_prob
}
_, step, summaries, loss, accuracy = sess.run(
[train_op, global_step, train_summary_op, model.loss, model.accuracy], feed_dict)
train_summary_writer.add_summary(summaries, step)
While this runs okay, if I have to load the checkpoint and restore the model, how would the prediction logic work? How to restore object of a class? Please help
Let's assume that I have a large amount of trained keras models and I have to sometimes load them and make predictions. I need to load every model and predict data in the fastest possible way. I think that the fastest solution is to store them in the memory however I guess that it is not good way because soon ot later the RAM will overflow? So at this point I achieve the highest performance with something like this.
K.clear_session()
random_model = load_model('path/to/' + str(random_model))
results = random_model(final_input_row)
In addition I have five models which I use almost all the time and in this case the performance is even more important. I load them during starting the server and I have a constant access to them.
graph = tf.get_default_graph()
with graph.as_default():
constant_model = load_model(
'path/to/constant_model')
prediction:
with graph.as_default():
results = constant_model(final_input_row)
The problem is that during K.clear_session() which I execute during loading random_models I lost them from the memory. Without K.clear_session() loading of random_models last too long. Do you have any ideas how can I solve this? I can even use completely different methods.
UPDATE
I try to do something like this:
class LoadModel:
def __init__(self, path):
self.path = path
self.sess = tf.Session(config=config)
self.graph = tf.get_default_graph()
K.set_session(self.sess)
with self.graph.as_default():
self.model = load_model(self.path)
def do_predictions(self, x):
with self.graph.as_default():
return self.model.predict(x)
And then when I execute:
random_model = LoadModel('./path/to/random_model.h5')
results = random_model.do_predictions(final_input_row)
It takes something about 3 seconds to predict data. In the case of random_models when I have a lot of models it is acceptable. However in the case of constant_models when I have five of them and I need to have constant access to them it lasts too long. Until now, I did it in such a way that I was loading the model during starting the Django server and I stored this in the memory and then I could just run results = constant_model.do_predictions(final_input_row) and it was very fast. It works okay until I run random_models, after that in every request I get
tensorflow.python.framework.errors_impl.InvalidArgumentError: Tensor lstm_14_input:0, specified in either feed_devices or fetch_devices was not found in the Graph
[24/Jun/2019 10:11:00] "POST /request/ HTTP/1.1" 500 17326
[24/Jun/2019 10:11:02] "GET /model/ HTTP/1.1" 201 471
Exception ignored in: <bound method BaseSession._Callable.__del__ of <tensorflow.python.client.session.BaseSession._Callable object at 0x130069908>>
Traceback (most recent call last):
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1455, in __del__
self._session._session, self._handle, status)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/tensorflow/python/framework/errors_impl.py", line 528, in __exit__
c_api.TF_GetCode(self.status.status))
tensorflow.python.framework.errors_impl.InvalidArgumentError: No such callable handle: 140576717540032
[24/Jun/2019 10:11:02] "GET /model/ HTTP/1.1" 201 471
[24/Jun/2019 10:11:07] "GET /model/ HTTP/1.1" 201 471
Obviously it works properly if I run constant_model = LoadModel('./path/to/constant_model.h5') before every results = constant_model.do_predictions(final_input_row) however as I have mentioned it is too slow. Any ideas how to solve this?
UPDATE2
I try in this way
session = tf.Session(config=config)
K.set_session(session)
class LoadModel:
def __init__(self, path):
self.path = path
self.graph = tf.get_default_graph()
with self.graph.as_default():
self.model = load_model(self.path)
def do_predictions(self, x):
with self.graph.as_default():
return self.model.predict(x)
but still I get TypeError: Cannot interpret feed_dict key as Tensor: Tensor Tensor("Placeholder:0", shape=(1, 128), dtype=float32) is not an element of this graph.
SOLUTION
Below is my working solution. If anyone knows an even more efficient way of loading the model that meets the requirements described above, I will be grateful for the message.
class LoadModel:
def __init__(self, model_path):
with K.get_session().graph.as_default():
self.model = load_model(model_path)
self.model._make_predict_function()
self.graph = tf.get_default_graph()
def predict_data(self, data):
with self.graph.as_default():
output = self.model.predict(data)
return output
As you said that you are using all the models all the time, it will be good to keep them in memory so that you are not loading and predicting every time you make a request. For example, you should define a class that loads a model and keep different instances of this class for different models. This is not a tested code, so you may have to make some changes.
# if you are using GPU
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
class LoadModel:
def __init__(self, path_to_model):
self.path = path
self.sess = tf.Session(config=config)
self.graph = tf.get_default_graph()
K.set_session(self.sess)
self.model = self.load()
def load(self):
with graph.as_default():
model = load_model(self.path)
return model
def do_predictions(self, x):
return self.model.predict(x)
I use tensorflow Keras API and try to add custom scalar to the tensorboard but nothing except the loss is displayed.
Here is the code for the model:
embedding_in = Embedding(
input_dim=vocab_size + 1 + 1,
output_dim=dim,
mask_zero=True,
)
embedding_out = Embedding(
input_dim=vocab_size + 1 + 1,
output_dim=dim,
mask_zero=True,
)
input_a = Input((None,))
input_b = Input((None,))
input_c = Input((None, None))
emb_target = embedding_in(input_a)
emb_context = embedding_out(input_b)
emb_negatives = embedding_out(input_c)
emb_gru = GRU(dim, return_sequences=True)(emb_target)
num_negatives = tf.shape(input_c)[-1]
def make_logits(tensors):
emb_gru, emb_context, emb_negatives = tensors
true_logits = tf.reduce_sum(tf.multiply(emb_gru, emb_context), axis=2)
true_logits = tf.expand_dims(true_logits, -1)
sampled_logits = tf.squeeze(
tf.matmul(emb_negatives, tf.expand_dims(emb_gru, axis=2),
transpose_b=True), axis=3)
true_logits = true_logits*0
sampled_logits = sampled_logits*0
logits = K.concatenate([true_logits, sampled_logits], axis=-1)
return logits
logits = Lambda(make_logits)([emb_gru, emb_context, emb_negatives])
mean = tf.reduce_mean(logits)
tf.summary.scalar('mean_logits', mean)
model = keras.models.Model(inputs=[input_a, input_b, input_c], outputs=[logits])
In particular, I want to see the evolution of mean_logits scalar after each batch.
I create and compile the model like this:
model = build_model(dim, vocab_size)
model.compile(loss='binary_crossentropy', optimizer='sgd')
callbacks = [
keras.callbacks.TensorBoard(logdir, histogram_freq=1)
]
I use tf Dataset API to the model:
iterator = dataset.make_initializable_iterator()
with tf.Session() as sess:
sess.run(iterator.initializer)
sess.run(tf.tables_initializer())
model.fit(iterator, steps_per_epoch=100,
callbacks=callbacks,
validation_data=iterator,
validation_steps=1
)
However, I don't get any mean_logits graph in the tensorboard and it's not in the graphs.
How can I track mean_logits scalar in tensorboard after each batch?
I use tf 1.12 and keras 2.1.
I have also faced the same issue. It seems that Keras TensorBoard callback not gonna write all existing summaries automatically, but only those registered as metrics (and appear in logs dict). Updating the logs object is a nice trick as it allows to use the values in other callbacks, see Early stopping and learning rate schedule based on custom metric in Keras. I can see several possibilities:
1. Using Lambda callback
Something like this:
eval_callback = LambdaCallback(
on_epoch_end=lambda epoch, logs: logs.update(
{'mean_logits': K.eval(mean)}
))
2. Custom TensorBoard callback
You can also subclass the callback and define your own logic. For instance, my workaround for learning rate monitoring:
class Tensorboard(Callback):
def __init__(self,
log_dir='./log',
write_graph=True):
self.write_graph = write_graph
self.log_dir = log_dir
def set_model(self, model):
self.model = model
self.sess = K.get_session()
if self.write_graph:
self.writer = tf.summary.FileWriter(self.log_dir, self.sess.graph)
else:
self.writer = tf.summary.FileWriter(self.log_dir)
def on_epoch_end(self, epoch, logs={}):
logs.update({'learning_rate': float(K.get_value(self.model.optimizer.lr))})
self._write_logs(logs, epoch)
def _write_logs(self, logs, index):
for name, value in logs.items():
if name in ['batch', 'size']:
continue
summary = tf.Summary()
summary_value = summary.value.add()
if isinstance(value, np.ndarray):
summary_value.simple_value = value.item()
else:
summary_value.simple_value = value
summary_value.tag = name
self.writer.add_summary(summary, index)
self.writer.flush()
def on_train_end(self, _):
self.writer.close()
Here, I just add 'learning_rate' to logs explicitly. But this way can be much more flexible and powerful.
3. Metrics trick
Here is another interesting workaround. What you need to do is to pass a custom metric function to model's compile() call which returns aggregated summary tensor. The idea is to make Keras pass your aggregated summary operation to every session.run call and return it's result as metric:
x_entropy_t = K.sum(p_t * K.log(K.epsilon() + p_t), axis=-1, keepdims=True)
full_policy_loss_t = -res_t + X_ENTROPY_BETA * x_entropy_t
tf.summary.scalar("loss_entropy", K.sum(x_entropy_t))
tf.summary.scalar("loss_policy", K.sum(-res_t))
tf.summary.scalar("loss_full", K.sum(full_policy_loss_t))
summary_writer = tf.summary.FileWriter("logs/" + args.name)
def summary(y_true, y_pred):
return tf.summary.merge_all()
value_policy_model.compile(optimizer=Adagrad(), loss=loss_dict, metrics=[summary])
l = value_policy_model.train_on_batch(x_batch, y_batch)
l_dict = dict(zip(value_policy_model.metrics_names, l))
summary_writer.add_summary(l_dict['value_summary'], global_step=iter_idx)
summary_writer.flush()
I saved my model using tf.train.Saver('./model.ckpt'), when I went to the local directory, I have found files named model.ckpt.index, model.ckpt.meta and model.ckpt.data-00000-of-00001, but not model.ckpt. As a consequence, I wasn't able to restore the model. Anyone know if i did anything wrong? Here's my code
class autoencoder(object):
def __init__(self, network_architecture, learning_rate=0.001, regularization_constant=1):
self.network_arch = network_architecture
self.X = tf.placeholder(tf.float32, [None, network_architecture['n_input']])
self.c = tf.Variable(regularization_constant, dtype=tf.float32)
self._initialize_weights()
self._build_graph()
self.cost, self.optimizer = self._cost_optimizer(learning_rate)
init = tf.global_variables_initializer()
self.saver = tf.train.Saver()
self.sess = tf.Session()
self.sess.run(init)
...
def save(self, path):
self.saver.save(self.sess, path)
def load(self, path):
self.saver.restore(self.sess, path)
The *.meta file contains your MetaGraph, and you can import it with:
saver = tf.train.import_meta_graph("model.ckpt.meta")
Then you can restore the graph's variables.
saver.restore(sess, "model.ckpt")
You can save additional model data to the metagraph by calling tf.train.export_meta_graph.
Alternatively, you can store your application's model using a SavedModel, which can contain multiple MetaGraphs. The documentation is here.