Python Unittest for a PyTorch Model - python

I have got the following function where I struggle with:
def load_trained_bert(
num_classes: int, path_to_model: Union[Path, str]
) -> Tuple[BertForSequenceClassification, device]:
"""Returns a bert model and device from four required model files of a folder
Parameters
----------
num_classes: int
Number of output layers in the bert model
path_to_model: Union[Path, str]
Folder where the four required models files are
Returns
-------
Tuple[BertForSequenceClassification, device]
BERT model in evaluation mode and device
"""
# Set device
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
# Initialize BERT
model = BertForSequenceClassification.from_pretrained(
path_to_model,
num_classes=num_classes,
output_attentions=False,
output_hidden_states=False,
)
# Load fine tuned model weights
weight_file = get_weight_file(path_to_model)
path_to_weights = os.path.join(path_to_model, weight_file)
model.load_state_dict(torch.load(path_to_weights, map_location=torch.device("cpu")))
# Send model to device
model.to(device)
# Set model to inference mode
model.eval()
return model, device
I am in general not sure how to fest this function, but I thought it would be a good idea just to check the parameters I call the function with:
class LoadModelTest(TestCase):
#patch("abox.util.model_conversion.get_weight_file", return_value="test.model")
def test_load_trained_bert(self, get_weight_file):
BertForSequenceClassification.from_pretrained = Mock()
load_trained_bert(num_classes=16, path_to_model="./model")
BertForSequenceClassification.from_pretrained.assert_called_with(
"./model",
num_classes=16,
output_attentions=False,
output_hidden_states=False,
)
This results in the following error:
FileNotFoundError: [Errno 2] No such file or directory: './model\\test.model'
Now it´s getting difficult... I have no idea what to do with the following snippet:
model.load_state_dict(torch.load(path_to_weights, map_location=torch.device("cpu")))
Can anyone help me here?

Related

huggingface transformers longformer optimizer warning AdamW

I get below warning when I try to run the code from this page.
/usr/local/lib/python3.7/dist-packages/transformers/optimization.py:309: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use thePyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning
FutureWarning,
I am super confused because the code doesn't seem to set the optimizer at all. The most probable places where the optimizer was set could be below but I dont know how to change the optimizer then
# define the training arguments
training_args = TrainingArguments(
output_dir = '/media/data_files/github/website_tutorials/results',
num_train_epochs = 5,
per_device_train_batch_size = 8,
gradient_accumulation_steps = 8,
per_device_eval_batch_size= 16,
evaluation_strategy = "epoch",
disable_tqdm = False,
load_best_model_at_end=True,
warmup_steps=200,
weight_decay=0.01,
logging_steps = 4,
fp16 = True,
logging_dir='/media/data_files/github/website_tutorials/logs',
dataloader_num_workers = 0,
run_name = 'longformer-classification-updated-rtx3090_paper_replication_2_warm'
)
# instantiate the trainer class and check for available devices
trainer = Trainer(
model=model,
args=training_args,
compute_metrics=compute_metrics,
train_dataset=train_data,
eval_dataset=test_data
)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device
I tried another transformer such as distilbert-base-uncased using the identical code but it seems to run without any warnings.
Is this warning more specific to longformer?
How should I change the optimizer?
import torch_optimizer as optim
optim.AdamW(params, opt.learning_rate, (opt.optim_alpha, opt.optim_beta), opt.optim_epsilon, weight_decay=opt.weight_decay)
It can be used this way.
You need to add optim='adamw_torch', the default is optim='adamw_hf'
Refer here
Can you try the following:
# define the training arguments
training_args = TrainingArguments(
optim='adamw_torch',
# your training arguments
...
...
...
)

Integrating a 2D Medical Imaging X-Ray classifier which was trained on jpegs with a script which receives DCM files to be able to diagnose dicom files

Below is the order of how I am going to present my problem:
First I will show you the script .py that I am using to run the web app in a local host(flask app). This web app is a classifier which shows you whether a person has either Viral Pneumonia, Bacterial Pneumonia or they are Normal. Thus there are three classes(Viral, Bacterial or Normal) looking from chest x-rays which are in jpeg format.
Second I will show you the differnt .py script for Binary Classification for Pneumonia which is taking in raw dicom files and converting them into numpy arrays before they are diagnosed.
So to achieve diagnosis I am trying to integrate my app.py script which takes in jpegs, with the Pneumonia binary classification which takes in dicom files so as to take advantage of the dicom processing function of the second script but using all of the information and weights of the Viral and Bacterial one that I have, so that it can be used in a clinical setup. Clinical setups use dicom files not jpegs, that is why I am trying to combine these two scripts to reach the goal.
Below is my app.py script for Viral and Bacterial Pneumonia Classification which takes in jpegs, which I am trying to integrate on the other script that I am going to attach further below:
#::: Import modules and packages :::
# Flask utils
from flask import Flask, redirect, url_for, request, render_template
from werkzeug.utils import secure_filename
from gevent.pywsgi import WSGIServer
# Import Keras dependencies
from tensorflow.keras.models import model_from_json
from tensorflow.python.framework import ops
ops.reset_default_graph()
from keras.preprocessing import image
# Import other dependecies
import numpy as np
import h5py
from PIL import Image
import PIL
import os
#::: Flask App Engine :::
# Define a Flask app
app = Flask(__name__)
# ::: Prepare Keras Model :::
# Model files
MODEL_ARCHITECTURE = './model/model_adam.json'
MODEL_WEIGHTS = './model/model_100_eopchs_adam_20190807.h5'
# Load the model from external files
json_file = open(MODEL_ARCHITECTURE)
loaded_model_json = json_file.read()
json_file.close()
model = model_from_json(loaded_model_json)
# Get weights into the model
model.load_weights(MODEL_WEIGHTS)
print('Model loaded. Check http://127.0.0.1:5000/')
# ::: MODEL FUNCTIONS :::
def model_predict(img_path, model):
'''
Args:
-- img_path : an URL path where a given image is stored.
-- model : a given Keras CNN model.
'''
IMG = image.load_img(img_path).convert('L')
print(type(IMG))
# Pre-processing the image
IMG_ = IMG.resize((257, 342))
print(type(IMG_))
IMG_ = np.asarray(IMG_)
print(IMG_.shape)
IMG_ = np.true_divide(IMG_, 255)
IMG_ = IMG_.reshape(1, 342, 257, 1)
print(type(IMG_), IMG_.shape)
print(model)
model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='rmsprop')
predict_x = model.predict(IMG_)
print(predict_x)
prediction = np.argmax(predict_x,axis=1)
print(prediction)
return prediction
# ::: FLASK ROUTES
#app.route('/', methods=['GET'])
def index():
# Main Page
return render_template('index.html')
#app.route('/predict', methods=['GET', 'POST'])
def upload():
# Constants:
classes = {'TRAIN': ['BACTERIA', 'NORMAL', 'VIRUS'],
'VALIDATION': ['BACTERIA', 'NORMAL'],
'TEST': ['BACTERIA', 'NORMAL', 'VIRUS']}
if request.method == 'POST':
# Get the file from post request
f = request.files['file']
# Save the file to ./uploads
basepath = os.path.dirname(__file__)
file_path = os.path.join(
basepath, 'uploads', secure_filename(f.filename))
f.save(file_path)
# Make a prediction
prediction = model_predict(file_path, model)
predicted_class = classes['TRAIN'][prediction[0]]
print('We think that is {}.'.format(predicted_class.lower()))
return str(predicted_class).lower()
if __name__ == '__main__':
app.run(debug = True)`
Below again is the already functioning script of Pneumonia binary classification which is taking in dicom files that I am trying to integrate with the weights and preprocessing information of the Viral and Bacterial classifier that I want to use:
## Loading standard modules and libraries
import numpy as np
import pandas as pd
import pydicom
%matplotlib inline
import matplotlib.pyplot as plt
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.models import model_from_json
from skimage.transform import resize
# This function reads in a .dcm file, checks the important fields for our device, and returns a numpy array
# of just the imaging data
def check_dicom(filename):
print('Loading file {} ...'.format(filename))
ds = pydicom.dcmread(filename)
if (ds.BodyPartExamined !='CHEST') | (ds.Modality !='DX') | (ds.PatientPosition not in ['PA', 'AP']):
print('The image is not valid because the image position, the image type or the body part is not as per standards')
return
else:
print('ID:', ds.PatientID,
'Age:', ds.PatientAge,
'Modality:', ds.Modality,
'Postion: ', ds.PatientPosition,
'Body Part: ', ds.BodyPartExamined,
'Study Desc: ', ds.StudyDescription)
img = ds.pixel_array
return img
# This function takes the numpy array output by check_dicom and
# runs the appropriate pre-processing needed for our model input
def preprocess_image(img,img_mean,img_std,img_size):
# todo
img = resize(img, (224,224))
img = img / 255.0
grey_img = (img - img_mean) / img_std
proc_img = np.zeros((224,224,3))
proc_img[:, :, 0] = grey_img
proc_img[:, :, 1] = grey_img
proc_img[:, :, 2] = grey_img
proc_img = np.resize(proc_img, img_size)
return proc_img
# This function loads in our trained model w/ weights and compiles it
def load_model(model_path, weight_path):
# todo
json_file = open(model_path, 'r')
loaded_model_json = json_file.read()
json_file.close()
model = model_from_json(loaded_model_json)
model.load_weights(weight_path)
return model
# This function uses our device's threshold parameters to predict whether or not
# the image shows the presence of pneumonia using our trained model
def predict_image(model, img, thresh):
# todo
result = model.predict(img)
print('Predicted value:', result)
predict=result[0]
prediction = "Negative"
if(predict > thresh):
prediction = "Positive"
return prediction
# This function uses our device's threshold parameters to predict whether or not
# the image shows the presence of pneumonia using our trained model
def predict_image(model, img, thresh):
# todo
result = model.predict(img)
print('Predicted value:', result)
predict=result[0]
prediction = "Negative"
if(predict > thresh):
prediction = "Positive"
return prediction
test_dicoms = ['test1.dcm','test2.dcm','test3.dcm','test4.dcm','test5.dcm','test6.dcm']
model_path = "my_model2.json" #path to saved model
weight_path = "xray_class_my_model2.best.hdf5" #path to saved best weights
IMG_SIZE=(1,224,224,3) # This might be different if you did not use vgg16
img_mean = 0.49262813 # mean image value from Build and train model line 22
img_std = 0.24496286 # loads the std dev from Build and train model line 22
my_model = load_model(model_path, weight_path) #loads model
thresh = 0.62786263 #threshold value for New Model2 from Build and train model line 66 at 80% Precision
# use the .dcm files to test your prediction
for i in test_dicoms:
img = np.array([])
img = check_dicom(i)
if img is None:
continue
img_proc = preprocess_image(img,img_mean,img_std,IMG_SIZE)
pred = predict_image(my_model,img_proc,thresh)
print('Model Classification:', pred , 'for Pneumonia' )
print('--------------------------------------------------------------------------------------------------------')
Output of above script:
Loading file test1.dcm ...
ID: 2 Age: 81 Modality: DX Postion: PA Body Part: CHEST Study Desc: No Finding
Predicted value: [[0.4775539]]
Model Classification: Negative for Pneumonia
--------------------------------------------------------------------------------------------------------
Loading file test2.dcm ...
ID: 1 Age: 58 Modality: DX Postion: AP Body Part: CHEST Study Desc: Cardiomegaly
Predicted value: [[0.47687072]]
Model Classification: Negative for Pneumonia
--------------------------------------------------------------------------------------------------------
Loading file test3.dcm ...
ID: 61 Age: 77 Modality: DX Postion: AP Body Part: CHEST Study Desc: Effusion
Predicted value: [[0.47764364]]
Model Classification: Negative for Pneumonia
--------------------------------------------------------------------------------------------------------
Loading file test4.dcm ...
The image is not valid because the image position, the image type or the body part is not as per standards
Loading file test5.dcm ...
The image is not valid because the image position, the image type or the body part is not as per standards
Loading file test6.dcm ...
The image is not valid because the image position, the image type or the body part is not as per standards
Threshold of 0.62786263 is considered at 80% Precision
Below is what I have tried so far but the diagnosis I am getting is always Viral on each and every dicom sample:
## Loading standard modules and libraries
import numpy as np
import pandas as pd
import pydicom
from PIL import Image
#%matplotlib inline
import matplotlib.pyplot as plt
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.models import model_from_json
from keras.preprocessing import image
from skimage.transform import resize
# This function reads in a .dcm file, checks the important fields for our device, and returns a numpy array
# of just the imaging data
def check_dicom(filename):
print('Loading file {} ...'.format(filename))
ds = pydicom.dcmread(filename)
if (ds.BodyPartExamined !='CHEST'): #| (ds.Modality !='DX'): #| (ds.PatientPosition not in ['PA', 'AP']):
print('The image is not valid because the image position, the image type or the body part is not as per standards')
return
else:
print('ID:', ds.PatientID,
'Age:', ds.PatientAge,
'Modality:', ds.Modality,
'Postion: ', ds.PatientPosition,
'Body Part: ', ds.BodyPartExamined,
'Study Desc: ', ds.StudyDescription)
img = ds.pixel_array
return img
# This function takes the numpy array output by check_dicom and
# runs the appropriate pre-processing needed for our model input
def preprocess_image(img):
# todo
#im = np.reshape(img, (342,257 ))
#im = np.arange(257)
#img = Image.fromarray(im)
#img = image.load_img(img).convert('L')
img = resize(img, (342,257))
grey_img = img / 255.0
#grey_img = (img - img_mean) / img_std
proc_img = np.zeros((1,342,257,1))
proc_img[:, :, :, 0] = grey_img
#proc_img[:, :, :, 1] = grey_img
#proc_img[:, :, :, 2] = grey_img
proc_img = proc_img.reshape(1, 342, 257, 1)
return proc_img
# This function loads in our trained model w/ weights and compiles it
def load_model(model_path, weight_path):
# todo
json_file = open(model_path, 'r')
loaded_model_json = json_file.read()
json_file.close()
model = model_from_json(loaded_model_json)
model.load_weights(weight_path)
model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='rmsprop')
return model
# This function uses our device's threshold parameters to predict whether or not
# the image shows the presence of pneumonia using our trained model
def predict_image(model, img):
# todo
model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='rmsprop')
#x = np.expand_dims(img, axis=0)
predict_x= model.predict(img)
print(predict_x)
prediction = np.argmax(predict_x,axis=1)
print(prediction)
return prediction
test_dicoms = ['test3.dcm','test2.dcm','test1.dcm','test4.dcm','test5.dcm','test6.dcm']
model_path = "model_adam.json" #path to saved model
weight_path = "model.h5" #path to saved best weights
#IMG_SIZE=(1,342,257,1) # This might be different if you did not use vgg16
#img_mean = 0.49262813 # mean image value from Build and train model line 22
#img_std = 0.24496286 # loads the std dev from Build and train model line 22
#my_model = load_model(model_path, weight_path) #loads model
#thresh = 0.62786263 #threshold value for New Model2 from Build and train model line 66 at 80% Precision
# use the .dcm files to test your prediction
for i in test_dicoms:
img = np.array([])
img = check_dicom(i)
if img is None:
continue
classes = {'TRAIN': ['BACTERIAL', 'NORMAL', 'VIRAL'],
'VALIDATION': ['BACTERIA', 'NORMAL'],
'TEST': ['BACTERIA', 'NORMAL', 'VIRUS']}
img_proc = preprocess_image(img)
prediction = predict_image(load_model(model_path, weight_path),img_proc)
predicted_class = classes['TRAIN'][int(prediction[0])]
print('Model Classification:', predicted_class, 'Pneumonia' )
print('--------------------------------------------------------------------------------------------------------')
Below is the output:
2022-01-02 10:50:00.817561: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-01-02 10:50:00.817601: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
Loading file test3.dcm ...
ID: 61 Age: 77 Modality: DX Postion: AP Body Part: CHEST Study Desc: Effusion
2022-01-02 10:50:02.652828: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2022-01-02 10:50:02.652859: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
2022-01-02 10:50:02.652899: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (Wisdom-HP-250-G3-Notebook-PC): /proc/driver/nvidia/version does not exist
2022-01-02 10:50:02.653123: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
[[0.01132523 0.00254696 0.98612785]]
[2]
Model Classification: VIRAL Pneumonia
--------------------------------------------------------------------------------------------------------
Loading file test2.dcm ...
ID: 1 Age: 58 Modality: DX Postion: AP Body Part: CHEST Study Desc: Cardiomegaly
[[0.01112939 0.00251635 0.9863543 ]]
[2]
Model Classification: VIRAL Pneumonia
--------------------------------------------------------------------------------------------------------
Loading file test1.dcm ...
ID: 2 Age: 81 Modality: DX Postion: PA Body Part: CHEST Study Desc: No Finding
[[0.01128576 0.00255111 0.9861631 ]]
[2]
Model Classification: VIRAL Pneumonia
--------------------------------------------------------------------------------------------------------
Loading file test4.dcm ...
The image is not valid because the image position, the image type or the body part is not as per standards
Loading file test5.dcm ...
ID: 2 Age: 81 Modality: CT Postion: PA Body Part: CHEST Study Desc: No Finding
[[0.01128576 0.00255111 0.9861631 ]]
[2]
Model Classification: VIRAL Pneumonia
--------------------------------------------------------------------------------------------------------
Loading file test6.dcm ...
ID: 2 Age: 81 Modality: DX Postion: XX Body Part: CHEST Study Desc: No Finding
WARNING:tensorflow:5 out of the last 5 calls to <function Model.make_predict_function.<locals>.predict_function at 0x7fba38ed19d0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating #tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your #tf.function outside of the loop. For (2), #tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for more details.
[[0.01128576 0.00255111 0.9861631 ]]
[2]
Model Classification: VIRAL Pneumonia
---------------------------------------
My suspicion is that I did it wrong on the image preprocessing steps when I have integrated these two scripts (Remember: The goal is to take advantage of the Dicom reading function of the second script). Thus the model is taking in and predicting wrong input altogether due to wrong array arrangements on trying to preprocess when I have integrated these two scripts.
If in need of some information on parameters in the jupyter training presentation of the model kindly highlight.
When a classifier work okay in train/test but not when doing inference in production, a very common reason is that the training data was processed differently from the production data. The fix is to make sure it is processed the same, ideally using the same bit of code.
How were the jpegs the classifier was trained on processed? Do the originally come from dicoms? If yes, what was the exact code for the conversion?
How were the jpegs loaded during training? Pay special attention to bits that modify the data rather than merely copy it, such as grey_img = (img - img_mean) / img_std and the other commented out lines in your code (maybe they were not commented out during training)
If you copy the dicom->jpeg conversion from 1 and the jpeg loading from 2, you will probably have a working prediction
The below dicom to jpeg conversion function did the job for me:
def take_dicom(dicomname):
ds = read_file('Dicom_files/' + dicomname)
im = fromarray(ds.pixel_array)
final_img = im.save('./Jpeg/' + dicomname + '.jpg')
pure_jpg = dicomname + '.jpg'
return pure_jpg
Just had to use the os function to point my prediction function to where it should pick these jpegs before they are preprocessed and diagnosed:
def preprocess_image(pure_jpg):
'''
Args:
-- img_path : an URL path where a given image is stored.
-- model : a given Keras CNN model.
'''
#print(pure_jpg)
basepath = os.path.dirname('./Jpeg/')
file_path = os.path.join(
basepath, img)
#image = take_dicom(file_path)
#print(str(image))
IMG = image.load_img(file_path).convert('L')
#print(IMG)
#print(type(IMG))
# Pre-processing the image
IMG_ = IMG.resize((257, 342))
#print(type(IMG_))
IMG_ = np.asarray(IMG_)
#print(IMG_.shape)
IMG_ = np.true_divide(IMG_, 255)
IMG_ = IMG_.reshape(1, 342, 257, 1)
#print(type(IMG_), IMG_.shape)
return IMG_
However, the problem is that it's only working for the following two dicom imaging modalities:
DX (Digital X-Ray)
CT (Computed Tormography)
CR (Computed Radiography) dicom images are failing to convert.

Why is it not recommended to save the optimizer, model etc as pickable/dillable objs in PyTorch but instead get the state dicts and load them?

Why is it recommended to save the state dicts and load them instead of saving stuff with dill for example and then just getting the usable objects immediately?
I think I've done that without may issues and it saves users code.
But instead we are recommended to do something like:
def _load_model_and_optimizer_from_checkpoint(args: Namespace, training: bool = True) -> Namespace:
"""
based from: https://pytorch.org/tutorials/recipes/recipes/saving_and_loading_a_general_checkpoint.html
"""
import torch
from torch import optim
import torch.nn as nn
# model = Net()
args.model = nn.Linear()
# optimizer = optim.SGD(args.model.parameters(), lr=0.001, momentum=0.9)
optimizer = optim.Adam(args.model.parameters(), lr=0.001)
# scheduler...
checkpoint = torch.load(args.PATH)
args.model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
args.epoch_num = checkpoint['epoch_num']
args.loss = checkpoint['loss']
args.model.train() if training else args.model.eval()
For example I've saved:
def save_for_meta_learning(args: Namespace, ckpt_filename: str = 'ckpt.pt'):
if is_lead_worker(args.rank):
import dill
args.logger.save_current_plots_and_stats()
# - ckpt
assert uutils.xor(args.training_mode == 'epochs', args.training_mode == 'iterations')
f: nn.Module = get_model_from_ddp(args.base_model)
# pickle vs torch.save https://discuss.pytorch.org/t/advantages-disadvantages-of-using-pickle-module-to-save-models-vs-torch-save/79016
args_pickable: Namespace = uutils.make_args_pickable(args)
torch.save({'training_mode': args.training_mode, # assert uutils.xor(args.training_mode == 'epochs', args.training_mode == 'iterations')
'it': args.it,
'epoch_num': args.epoch_num,
'args': args_pickable, # some versions of this might not have args!
'meta_learner': args.meta_learner,
'meta_learner_str': str(args.meta_learner), # added later, to make it easier to check what optimizer was used
'f': f,
'f_state_dict': f.state_dict(), # added later, to make it easier to check what optimizer was used
'f_str': str(f), # added later, to make it easier to check what optimizer was used
# 'f_modules': f._modules,
# 'f_modules_str': str(f._modules),
'outer_opt': args.outer_opt, # added later, to make it easier to check what optimizer was used
'outer_opt_state_dict': args.outer_opt.state_dict(), # added later, to make it easier to check what optimizer was used
'outer_opt_str': str(args.outer_opt) # added later, to make it easier to check what optimizer was used
},
pickle_module=dill,
f=args.log_root / ckpt_filename)
then loaded:
def get_model_opt_meta_learner_to_resume_checkpoint_resnets_rfs(args: Namespace,
path2ckpt: str,
filename: str,
device: Optional[torch.device] = None
) -> tuple[nn.Module, optim.Optimizer, MetaLearner]:
"""
Get the model, optimizer, meta_learner to resume training from checkpoint.
Examples:
- see: _resume_from_checkpoint_meta_learning_for_resnets_rfs_test
"""
import uutils
path2ckpt: Path = Path(path2ckpt).expanduser() if isinstance(path2ckpt, str) else path2ckpt.expanduser()
ckpt: dict = torch.load(path2ckpt / filename, map_location=torch.device('cpu'))
# args_ckpt: Namespace = ckpt['args']
training_mode = ckpt.get('training_mode')
if training_mode is not None:
assert uutils.xor(training_mode == 'epochs', training_mode == 'iterations')
if training_mode == 'epochs':
args.epoch_num = ckpt['epoch_num']
else:
args.it = ckpt['it']
# - get meta-learner
meta_learner: MetaLearner = ckpt['meta_learner']
# - get model
model: nn.Module = meta_learner.base_model
# - get outer-opt
outer_opt_str = ckpt.get('outer_opt_str')
if outer_opt_str is not None:
# use the string to create optimizer, load the state dict, etc.
outer_opt: optim.Optimizer = get_optimizer(outer_opt_str)
outer_opt_state_dict: dict = ckpt['outer_opt_state_dict']
outer_opt.load_state_dict(outer_opt_state_dict)
else:
# this is not ideal, but since Adam has a exponentially moving average for it's adaptive learning rate,
# hopefully this doesn't screw my checkpoint to much
outer_opt: optim.Optimizer = optim.Adam(model.parameters(), lr=args.outer_lr)
# - device setup
if device is not None:
# if torch.cuda.is_available():
# meta_learner.base_model = meta_learner.base_model.cuda()
meta_learner.base_model = meta_learner.base_model.to(device)
return model, outer_opt, meta_learner
without issues.
Related:
Save and load model optimizer state
pytorch save and load model
Save and load a Pytorch model
save and load unserialized pytorch pretrained model
https://pytorch.org/tutorials/recipes/recipes/saving_and_loading_a_general_checkpoint.html
Why is it not recommended to save the optimizer, model etc as pickable/dillable objs in PyTorch but instead get the state dicts and load them?
https://discuss.pytorch.org/t/why-is-it-not-recommended-to-save-the-optimizer-model-etc-as-pickable-dillable-objs-in-pytorch-but-instead-get-the-state-dicts-and-load-them/137933

How to write serving input function for Tensorflow model trained without using Estimators?

I have a model trained on a single machine without using Estimator and I'm looking to serve the final trained model on Google cloud AI platform (ML engine). I exported the frozen graph as a SavedModel using SavedModelBuilder and deployed it on the AI platform. It works fine for small input images but for it to be able to accept large input images for online prediction, I need to change it to accept b64 encoded strings ({'image_bytes': {'b64': base64.b64encode(jpeg_data).decode()}}) which are converted to the required tensor by a serving_input_fn if using Estimators.
What options do I have if I am not using an Estimator? If I have a frozen graph or SavedModel being created from SavedModelBuilder, is there a way to have something similar to an estimator's serving_input_fn when exporting/ saving?
Here's the code I'm using for exporting:
from tensorflow.python.saved_model import signature_constants
from tensorflow.python.saved_model import tag_constants
export_dir = 'serving_model/'
graph_pb = 'model.pb'
builder = tf.saved_model.builder.SavedModelBuilder(export_dir)
with tf.gfile.GFile(graph_pb, "rb") as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
sigs = {}
with tf.Session(graph=tf.Graph()) as sess:
# name="" is important to ensure we don't get spurious prefixing
tf.import_graph_def(graph_def, name="")
g = tf.get_default_graph()
inp = g.get_tensor_by_name("image_bytes:0")
out_f1 = g.get_tensor_by_name("feature_1:0")
out_f2 = g.get_tensor_by_name("feature_2:0")
sigs[signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY] = \
tf.saved_model.signature_def_utils.predict_signature_def(
{"image_bytes": inp}, {"f1": out_f1, "f2": out_f2})
builder.add_meta_graph_and_variables(sess,
[tag_constants.SERVING],
strip_default_attrs=True,
signature_def_map=sigs)
builder.save()
Use a #tf.function to specify a serving signature. Here's an example that calls Keras:
class ExportModel(tf.keras.Model):
def __init__(self, model):
super().__init__(self)
self.model = model
#tf.function(input_signature=[
tf.TensorSpec([None,], dtype='int32', name='a'),
tf.TensorSpec([None,], dtype='int32', name='b')
])
def serving_fn(self, a, b):
return {
'pred' : self.model({'a': a, 'b': b}) #, steps=1)
}
def save(self, export_path):
sigs = {
'serving_default' : self.serving_fn
}
tf.keras.backend.set_learning_phase(0) # inference only
tf.saved_model.save(self, export_path, signatures=sigs)
sm = ExportModel(model)
sm.save(EXPORT_PATH)
First, load your already exported SavedModel with
import tensorflow as tf
loaded_model = tf.saved_model.load(MODEL_DIR)
Then, wrap it with a new Keras model that takes base64 input
class Base64WrapperModel(tf.keras.Model):
def __init__(self, model):
super(Base64WrapperModel, self).__init__()
self.inner_model = model
#tf.function
def call(self, base64_input):
str_input = tf.io.decode_base64(base64_input)
return self.inner_model(str_input)
wrapper_model = Base64WrapperModel(loaded_model)
Finally, save your wrapped model with Keras API
wrapper_model.save(EXPORT_DIR)

Watson generated Pytorch results in: "ValueError: optimizer got an empty parameter list"

I am experimenting Watson Neural Network Modeler.
I've create a model from the built in demo "Single Convolution layer on MNIST". The only customization I did was to specify the training data files.
I then exported the Pytorch code and I am trying to run in on my local computer.
The generated code is pretty readable. The relevant code excerpt is:
# Define network architecture
class Net(nn.Module):
def __init__(self, inp_c):
super(Net, self).__init__()
def forward(self, ImageData_4, target):
Convolution2D_9 = self.Convolution2D_9(ImageData_4)
ReLU_1 = self.ReLU_1(Convolution2D_9)
Pooling2D_8 = self.Pooling2D_8(ReLU_1)
Flatten_2 = Pooling2D_8.view(-1, 10816)
Dense_3 = self.Dense_3(Flatten_2)
Softmax_5 = self.Softmax_5(Dense_3)
Accuracy_6 = torch.topk(Softmax_5, 1)[0]
CrossEntropyLoss_7 = self.CrossEntropyLoss_7(Softmax_5, target)
return Softmax_5, Accuracy_6
# Model Initialization
inp_c = 1
model = Net(inp_c)
model.cuda()
# Define optimizer
learning_rate = 0.001000
decay = 0.000000
beta_1 = 0.900000
beta_2 = 0.999000
optim = optim.Adam(
model.parameters(),
lr=learning_rate,
betas=(beta_1, beta_2),
weight_decay=decay)
I am getting the error:
"ValueError: optimizer got an empty parameter list"
on the optim = optim.Adam() statement.
Is there any Watson user/expert over there to bring some light on this issue? I am basically running the demo. It was not supposed to fail.
Thanks!

Categories