How to load pre-trained tensorflow model named inception by Google? - python

I have downloaded a tensorflow checkpoint model named inception_resnet_v2_2016_08_30.ckpt.
Do I need to create a graph (with all the variables) that were used when this checkpoint was created?
How do I make use of this model?

First of you have get the network architecture in memory. You can get the network architecture from here
Once you have this program with you, use the following approach to use the model:
from inception_resnet_v2 import inception_resnet_v2, inception_resnet_v2_arg_scope
height = 299
width = 299
channels = 3
X = tf.placeholder(tf.float32, shape=[None, height, width, channels])
with slim.arg_scope(inception_resnet_v2_arg_scope()):
logits, end_points = inception_resnet_v2(X, num_classes=1001,is_training=False)
With this you have all the network in memory, Now you can initialize the network with checkpoint file(ckpt) by using tf.train.saver:
saver = tf.train.Saver()
sess = tf.Session()
saver.restore(sess, "/home/pramod/Downloads/inception_resnet_v2_2016_08_30.ckpt")
If you want to do bottle feature extraction, its simple like lets say you want to get features from last layer, then simply you have to declare predictions = end_points["Logits"] If you want to get it for other intermediate layer, you can get those names from the above program inception_resnet_v2.py
After that you can call: output = sess.run(predictions, feed_dict={X:batch_images})

Do I need to create a graph (with all the variables) that were used when this checkpoint was created?
No, you don't.
As for how to use checkpoint file (cpkt file)
1.This article (TensorFlow-Slim image classification library) tells you how to train your model from scratch
2.The following is an example code from google blog
import numpy as np
import os
import tensorflow as tf
import urllib2
from datasets import imagenet
from nets import inception
from preprocessing import inception_preprocessing
slim = tf.contrib.slim
batch_size = 3
image_size = inception.inception_v3.default_image_size
checkpoints_dir = '/root/code/model'
checkpoints_filename = 'inception_resnet_v2_2016_08_30.ckpt'
model_name = 'InceptionResnetV2'
sess = tf.InteractiveSession()
graph = tf.Graph()
graph.as_default()
def classify_from_url(url):
image_string = urllib2.urlopen(url).read()
image = tf.image.decode_jpeg(image_string, channels=3)
processed_image = inception_preprocessing.preprocess_image(image, image_size, image_size, is_training=False)
processed_images = tf.expand_dims(processed_image, 0)
# Create the model, use the default arg scope to configure the batch norm parameters.
with slim.arg_scope(inception.inception_resnet_v2_arg_scope()):
logits, _ = inception.inception_resnet_v2(processed_images, num_classes=1001, is_training=False)
probabilities = tf.nn.softmax(logits)
init_fn = slim.assign_from_checkpoint_fn(
os.path.join(checkpoints_dir, checkpoints_filename),
slim.get_model_variables(model_name))
init_fn(sess)
np_image, probabilities = sess.run([image, probabilities])
probabilities = probabilities[0, 0:]
sorted_inds = [i[0] for i in sorted(enumerate(-probabilities), key=lambda x:x[1])]
plt.figure()
plt.imshow(np_image.astype(np.uint8))
plt.axis('off')
plt.show()
names = imagenet.create_readable_names_for_imagenet_labels()
for i in range(5):
index = sorted_inds[i]
print('Probability %0.2f%% => [%s]' % (probabilities[index], names[index]))

Another way of loading a pre-trained Imagenet model is
ResNet50
import tensorflow as tf
from tensorflow.keras.applications.resnet50 import ResNet50
model = ResNet50()
model.summary()
InceptionV3
iport tensorflow as tf
from tensorflow.keras.applications.inception_v3 import InceptionV3
model = InceptionV3()
model.summary()
You can check a detailed explanation related to this here

Related

How to load a fine tuned pytorch huggingface bert model from a checkpoint file?

I had fine tuned a bert model in pytorch and saved its checkpoints via torch.save(model.state_dict(), 'model.pt')
Now When I want to reload the model, I have to explain whole network again and reload the weights and then push to the device.
Can anyone tell me how can I save the bert model directly and load directly to use in production/deployment?
Following is the training code and you can try running there in colab itself! After training completion, you will notice in file system we have a checkpoint file. But I want to save the model itself.
LINK TO COLAB NOTEBOOK FOR SAMPLE TRAINING
Following is the current inferencing code I written.
import torch
import torch.nn as nn
from transformers import AutoModel, BertTokenizerFast
import numpy as np
import json
tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')
device = torch.device("cpu")
class BERT_Arch(nn.Module):
def __init__(self, bert):
super(BERT_Arch, self).__init__()
self.bert = bert
# dropout layer
self.dropout = nn.Dropout(0.1)
# relu activation function
self.relu = nn.ReLU()
# dense layer 1
self.fc1 = nn.Linear(768, 512)
# dense layer 2 (Output layer)
self.fc2 = nn.Linear(512, 2)
# softmax activation function
self.softmax = nn.LogSoftmax(dim=1)
# define the forward pass
def forward(self, sent_id, mask):
# pass the inputs to the model
_, cls_hs = self.bert(sent_id, attention_mask=mask, return_dict=False)
x = self.fc1(cls_hs)
x = self.relu(x)
x = self.dropout(x)
# output layer
x = self.fc2(x)
# apply softmax activation
x = self.softmax(x)
return x
bert = AutoModel.from_pretrained('bert-base-uncased')
model = BERT_Arch(bert)
path = './models/saved_weights_new_data.pt'
model.load_state_dict(torch.load(path, map_location=device))
model.to(device)
def inference(comment):
tokens_test = tokenizer.batch_encode_plus(
list([comment]),
max_length=75,
pad_to_max_length=True,
truncation=True,
return_token_type_ids=False
)
test_seq = torch.tensor(tokens_test['input_ids'])
test_mask = torch.tensor(tokens_test['attention_mask'])
predictions = model(test_seq.to(device), test_mask.to(device))
predictions = predictions.detach().cpu().numpy()
predictions = np.argmax(predictions, axis=1)
return predictions
I simply want to save a model from this notebook in a way such that I can use it for inferencing anywhere.
Just save your model using model.save_pretrained, here is an example:
model.save_pretrained("<path_to_dummy_folder>")
You can download the model from colab, save it on your gdrive or at any other location of your choice. While doing inference, you can just give path to this model (you may have to upload it) and start with inference.
To load the model
model = AutoModel.from_pretrained("<path_to_saved_pretrained_model>")
#Note: Instead of AutoModel class, you may use the task specific class as well.

How do I save an optimizer state of JAX trained model?

I am playing with the mnist_vae example and can't figure out how to properly save/load weights of the trained model.
enc_init_rng, dec_init_rng = random.split(random.PRNGKey(2))
_, init_encoder_params = encoder_init(enc_init_rng, (batch_size, 28 * 28))
_, init_decoder_params = decoder_init(dec_init_rng, (batch_size, 10))
init_params = init_encoder_params, init_decoder_params
opt_init, opt_update, get_params = optimizers.momentum(step_size, mass=0.9)
opt_state = opt_init(init_params)
after that, I train the model using opt_update and want to save it. However, I haven't found any function to save the optimizer state to the disk.
I tried to save parameters and initialize opt_state with them, but not all the information conserves, and the result opt_state_1 is not the original opt_state.
weights=get_params(opt_state)
jnp.save(file, weights)
weights = jnp.load(file,allow_pickle=True)
opt_state_1 = opt_init(init_params)
How do I properly save the model I trained?
import pickle
from jax.experimental import optimizers
trained_params = optimizers.unpack_optimizer_state(opt_state)
pickle.dump(trained_params, open(os.path.join(config["ckpt_path"], "best_ckpt.pkl"), "wb"))
best_params = pickle.load(open(os.path.join(config["ckpt_path"], "best_ckpt.pkl"), "rb"))
best_opt_state = optimizers.pack_optimizer_state(best_params)

TensorFlow + Keras multi gpu model with inference

I am trying to do image classification using Keras's Xception model modeled after this code. However I want to use multiple GPU's to do batch parallel image classification using this function. I believe it is possible and I have the original code working without multi GPU support however I can not get multi_gpu_model function to work as I would expect. I am following this example for the multi GPU example. This is my code (it is the backend of a Flask app), it instantiates the model, makes a prediction on an example ndarray when the class is created, and then expects a base 64 encoded image in the classify function:
import os
from keras.preprocessing import image as preprocess_image
from keras.applications import Xception
from keras.applications.inception_v3 import preprocess_input, decode_predictions
from keras.utils import multi_gpu_model
import numpy as np
import tensorflow as tf
import PIL.Image
from numpy import array
class ModelManager:
def __init__(self, model_path):
self.model_name = 'ImageNet'
self.model_version = '1.0'
self.batch_size = 32
height = 224
width = 224
num_classes = 1000
# self.model = tf.keras.models.load_model(os.path.join(model_path, 'ImageNetXception.h5'))
with tf.device('/cpu:0'):
model = Xception(weights=None,
input_shape=(height, width, 3),
classes=num_classes, include_top=True)
# Replicates the model on 8 GPUs.
# This assumes that your machine has 8 available GPUs.
self.parallel_model = multi_gpu_model(model, gpus=8)
self.parallel_model.compile(loss='categorical_crossentropy',
optimizer='rmsprop')
print("Loaded Xception model.")
x = np.empty((1, 224, 224, 3))
self.parallel_model.predict(x, batch_size=self.batch_size)
self.graph = tf.get_default_graph()
self.graph.finalize()
def classify(self, ids, images):
results = []
all_images = np.empty((0, 224, 224, 3))
# all_images = []
for image_id, image in zip(ids, images):
# This does the same as keras.preprocessing.image.load_img
image = image.convert('RGB')
image = image.resize((224, 224), PIL.Image.NEAREST)
x = preprocess_image.img_to_array(image)
x = np.expand_dims(x, axis=0)
x = preprocess_input(x)
all_images = np.append(all_images, x, axis=0)
# all_images.append(x)
# a = array(all_images)
# print(type(a))
# print(a[0])
with self.graph.as_default():
preds = self.parallel_model.predict(all_images, batch_size=288)
#print(type(preds))
top3 = decode_predictions(preds, top=3)[0]
print(top3)
output = [((t[1],) + t[2:]) for t in top3]
predictions = [
{'label': label, 'probability': probability * 100.0}
for label, probability in output
]
results.append({
'id': 1,
'predictions': predictions
})
print(len(results))
return results
The parts I am not sure about is what to pass the predict function. Currently I am creating an ndarray of the images I want classified after they are preprocessed and then passing that to the predict function. The function returns but the preds variable doesn't hold what I expect. I tried to loop through the preds object but decode_predictions errors when I pass a single item but responds with one prediction when I pass the whole preds ndarray. In the example code they don't use the decode_predictions function so I'm not sure how to use it with the response from parallel_model.predict. Any help or resources is appreciated, thanks.
the following site illustrates how to do that correctly link

Re-train pre-trained ResNet-50 model with tf slim for classification purposes

I would like to re-train a pre-trained ResNet-50 model with TensorFlow slim, and use it later for classifying purposes.
The ResNet-50 is designed to 1000 classes, but I would like just 10 classes (land cover types) as output.
First, I try to code it for only one image, what I can generalize later.
So this is my code:
from tensorflow.contrib.slim.nets import resnet_v1
import tensorflow as tf
import tensorflow.contrib.slim as slim
import numpy as np
batch_size = 1
height, width, channels = 224, 224, 3
# Create graph
inputs = tf.placeholder(tf.float32, shape=[batch_size, height, width, channels])
with slim.arg_scope(resnet_v1.resnet_arg_scope()):
logits, end_points = resnet_v1.resnet_v1_50(inputs, is_training=False)
saver = tf.train.Saver()
with tf.Session() as sess:
saver.restore(sess, 'd:/bitbucket/cnn-lcm/data/ckpt/resnet_v1_50.ckpt')
representation_tensor = sess.graph.get_tensor_by_name('resnet_v1_50/pool5:0')
# list of files to read
filename_queue = tf.train.string_input_producer(['d:/bitbucket/cnn-lcm/data/train/AnnualCrop/AnnualCrop_735.jpg'])
reader = tf.WholeFileReader()
key, value = reader.read(filename_queue)
img = tf.image.decode_jpeg(value, channels=3)
im = np.array(img)
im = im.reshape(1,224,224,3)
predict_values, logit_values = sess.run([end_points, logits], feed_dict= {inputs: im})
print (np.max(predict_values), np.max(logit_values))
print (np.argmax(predict_values), np.argmax(logit_values))
#img = ... #load image here with size [1, 224,224, 3]
#features = sess.run(representation_tensor, {'Placeholder:0': img})
I am a bit confused about what comes next (I should open a graph, or I should load the structure of the network and load the weights, or load batches. There is a problem with the image shape as well. There are a lot of versatile documentations, which aren't easy to interpret :/
Any advice how to correct the code in order to fit my purposes?
The test image: AnnualCrop735
The resnet layer gives you predictions if you provide the num_classes kwargs. Look at the documentation and code for resnet_v1
You need to add a loss function and training operations on top of it to fine-tune the resnet_v1 with reuse
...
with slim.arg_scope(resnet_v1.resnet_arg_scope()):
logits, end_points = resnet_v1.resnet_v1_50(
inputs,
num_classes=10,
is_training=True,
reuse=tf.AUTO_REUSE)
...
...
classification_loss = slim.losses.softmax_cross_entropy(
predict_values, im_label)
regularization_loss = tf.add_n(slim.losses.get_regularization_losses())
total_loss = classification_loss + regularization_loss
train_op = slim.learning.create_train_op(classification_loss, optimizer)
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
slim.learning.train(
train_op,
logdir='/tmp/',
number_of_steps=1000,
save_summaries_secs=300,
save_interval_secs=600)

TensorFlow Slim Exporting Transfer Learning to tensorflow serving issue

Any help would be much appreciated.
I have followed this tutorial then I used this simple script to validate that my model works which it does:
import tensorflow as tf
from nets import inception_v3
from preprocessing import inception_preprocessing
from matplotlib.pyplot import imshow, imread
slim = tf.contrib.slim
batch_size = 5
image_size = 299
with tf.Graph().as_default():
with slim.arg_scope(inception_v3.inception_v3_arg_scope()):
imgPath = 'dandelion.jpg'
#imgPath = '/tmp/rose.jpg'
testImage_string = tf.gfile.FastGFile(imgPath, 'rb').read()
testImage = tf.image.decode_jpeg(testImage_string, channels=3)
processed_image = inception_preprocessing.preprocess_image(testImage, image_size, image_size, is_training=False)
processed_images = tf.expand_dims(processed_image, 0)
logits, _ = inception_v3.inception_v3(processed_images, num_classes=5, is_training=False)
probabilities = tf.nn.softmax(logits)
checkpoint_path = tf.train.latest_checkpoint('/tmp/flowers-models/inception_v3')
init_fn = slim.assign_from_checkpoint_fn(
checkpoint_path, slim.get_model_variables('InceptionV3'))
with tf.Session() as sess:
init_fn(sess)
np_image, probabilities = sess.run([processed_images, probabilities])
probabilities = probabilities[0, 0:]
sorted_inds = [i[0] for i in sorted(enumerate(-probabilities), key=lambda x: x[1])]
names = ['daisy', 'dandelion', 'roses', 'sunflowers', 'tulips']
for i in range(5):
index = sorted_inds[i]
print((probabilities[index], names[index]))
However my end goal is to load this model inside tensorflow serving and I'm not sure what would be my next step to convert my model into format that serving understand?
Tensorflow Slim uses the deprecated 'SessionBundle'. I'm currently converting the checkpoints generated by tensorflow/slim (tf.train.saver) to the SavedModel format (tf.saved_model).
'session_bundle.load_session_bundle_from_path' from 'tensorflow/contrib/session_bundle/session_bundle.py' returns the session and the meta_graph_def. Intention is to manually build the savedModel (https://www.tensorflow.org/programmers_guide/saved_model), but can't get to the inputs from the meta_graph_def.
Using Tensorflow 1.7 for the moment, downgrading to see if it makes any difference.

Categories