How to include heavy model training in a FastAPI endpoint? - python

There are many resources online describing usage of fastapi for prediction using a pre-trained model that is static. What if we would like to have an endpoint to initiate retraining of new data? It seems that fastapi's BackgroundTasks is not suitable for a training of several minutes that would take up a lot of CPU.
I am wondering how the following skeleton code could roughly be extended, assuming the underling model is a deep learning model (so quite long training).
import numpy as np
import pickle
from fastapi import FastAPI
from pydantic import BaseModel
import os
app = FastAPI(title="dummy app")
# represents a particular datapoint
class Datapoint(BaseModel):
feature1: float
feature2: float
feature3: float
#app.on_event("startup")
def load_model():
# load model from pickle file
#print(os.getcwd())
#print(os.listdir())
with open("../app/model.pkl", "rb") as file:
global model
model = pickle.load(file)
#app.get("/")
async def root():
return {"message": "Hello World"}
#app.post("/predict")
def predict(data: Datapoint):
data_point = np.array(
[
[
data.feature1,
data.feature2,
data.feature3,
]
]
)
pred = model.predict(data_point).tolist()
pred = pred[0]
print(pred)
return {"Prediction": pred}

Related

Tensorflow: Can I run my tensorflow model directly in flask?

I have created a tensorflow model, saved it and have tested it. I really don't know how to use tensorflow serving and I'm not sure about the input and output nodes of my model to convert it into protobuf and then use tensorflow serving. So, I wanted to know if I can directly use the prediction function on flask and load the model there to make the predictions? I am really confused as to why we have to use only tensorflow serving to deploy tensorflow models? Is there any easier direct way?
You can, but you will need to set up a TensorFlow Serving server. Then you send a post request to the server.
Reference link here: Deploying-keras-models-using-tensorflow-serving-and-flask
Reference link here: Serving-TensorFlow flask client
"""This script wraps the client into a Flask server. It receives POST request with
prediction data, and forward the data to tensorflow server for inference.
"""
from flask import Flask, render_template, request, url_for, jsonify,Response
import json
import tensorflow as tf
import numpy as np
import os
import argparse
import sys
from datetime import datetime
from grpc.beta import implementations
from tensorflow_serving.apis import predict_pb2
from tensorflow_serving.apis import prediction_service_pb2
tf.app.flags.DEFINE_string('server', 'localhost:9000', 'PredictionService host:port')
FLAGS = tf.app.flags.FLAGS
app = Flask(__name__)
class mainSessRunning():
def __init__(self):
host, port = FLAGS.server.split(':')
channel = implementations.insecure_channel(host, int(port))
self.stub = prediction_service_pb2.beta_create_PredictionService_stub(channel)
self.request = predict_pb2.PredictRequest()
self.request.model_spec.name = 'example_model'
self.request.model_spec.signature_name = 'prediction'
def inference(self, val_x):
# temp_data = numpy.random.randn(100, 3).astype(numpy.float32)
temp_data = val_x.astype(np.float32).reshape(-1, 3)
print("temp_data is:", temp_data)
data, label = temp_data, np.sum(temp_data * np.array([1, 2, 3]).astype(np.float32), 1)
self.request.inputs['input'].CopyFrom(
tf.contrib.util.make_tensor_proto(data, shape=data.shape))
result = self.stub.Predict(self.request, 5.0)
return result, label
run = mainSessRunning()
print("Initialization done. ")
# Define a route for the default URL, which loads the form
#app.route('/inference', methods=['POST'])
def inference():
request_data = request.json
input_data = np.expand_dims(np.array(request_data), 0)
result, label = run.inference(input_data)
di={"result":str(result),'label': label[0].tolist()}
return Response(json.dumps(di), mimetype='application/json')

How to log from a custom ai platform model

I recently deployed a custom model to google cloud's ai-platform, and I am trying to debug some parts of my preprocessing logic. However, My print statements are not being logged to the stackdriver output. I have also tried using the logging client imported from google.cloud, to no avail. Here is my custom prediction file:
import os
import pickle
import numpy as np
from sklearn.datasets import load_iris
import tensorflow as tf
from google.cloud import logging
class MyPredictor(object):
def __init__(self, model, preprocessor):
self.logging_client = logging.Client()
self._model = model
self._preprocessor = preprocessor
self._class_names = ["Snare", "Kicks", "ClosedHH", "ClosedHH", "Clap", "Crash", "Perc"]
def predict(self, instances, **kwargs):
log_name = "Here I am"
logger = self.logging_client.logger(log_name)
text = 'Hello, world!'
logger.log_text(text)
print('Logged: {}'.format(text), kwargs.get("sr"))
inputs = np.asarray(instances)
outputs = self._model.predict(inputs)
if kwargs.get('probabilities'):
return outputs.tolist()
#return "[]"
else:
return [self._class_names[index] for index in np.argmax(outputs.tolist(), axis=1)]
#classmethod
def from_path(cls, model_dir):
model_path = os.path.join(model_dir, 'model.h5')
model = tf.keras.models.load_model(model_path, custom_objects={"adam": tf.keras.optimizers.Adam,
"categorical_crossentropy":tf.keras.losses.categorical_crossentropy, "lr":0.01, "name": "Adam"})
preprocessor_path = os.path.join(model_dir, 'preprocessor.pkl')
with open(preprocessor_path, 'rb') as f:
preprocessor = pickle.load(f)
return cls(model, preprocessor)
I can't find anything online for why my logs are not showing up in stackdriver (neither print statements nor the logging library calls). Has anyone faced this issue?
Thanks,
Nikita
NOTE: If you have enough rep to create tags please add the google-ai-platform tag to this post. I think it would really help people who are in my position. Thanks!
From Documentation:
If you want to enable online prediction logging, you must configure it
when you create a model resource or when you create a model version
resource, depending on which type of logging you want to enable. There
are three types of logging, which you can enable independently:
Access logging, which logs information like timestamp and latency for
each request to Stackdriver Logging.
You can enable access logging when you create a model resource.
Stream logging, which logs the stderr and stdout streams from your
prediction nodes to Stackdriver Logging, and can be useful for
debugging. This type of logging is in beta, and it is not supported by
Compute Engine (N1) machine types.
You can enable stream logging when you create a model resource.
Request-response logging, which logs a sample of online prediction
requests and responses to a BigQuery table. This type of logging is in
beta.
You can enable request-response logging by creating a model version
resource, then updating that version.
For your use case, please use the following template to log custom information into StackDriver:
Model
gcloud beta ai-platform models create {MODEL_NAME} \
--regions {REGION} \
--enable-logging \
--enable-console-logging
Model version
gcloud beta ai-platform versions create {VERSION_NAME} \
--model {MODEL_NAME} \
--origin gs://{BUCKET}/{MODEL_DIR} \
--python-version 3.7 \
--runtime-version 1.15 \
--package-uris gs://{BUCKET}/{PACKAGES_DIR}/custom-model-0.1.tar.gz \
--prediction-class=custom_prediction.CustomModelPrediction \
--service-account custom#project_id.iam.gserviceaccount.com
I tried this and worked fine:
I did some modification to the constructor due to the #classmethod decorator.
Create a service account and grant it "Stackdriver Debugger User" role, use it during model version creation
Add google-cloud-logging library to your setup.py
Consider extra cost of enabling StackDriver logging
When using log_struct check the correct type is passed. (If using str, make sure you convert bytes to str in Python 3 using .decode('utf-8'))
Define the project_id parameter during Stackdriver client creation
logging.Client(), otherwise you will get:
ERROR:root:Prediction failed: 400 Name "projects//logs/my-custom-prediction-log" is missing the parent component. Expected the form projects/[PROJECT_ID]/logs/[ID]"
Code below:
%%writefile cloud_logging.py
import os
import pickle
import numpy as np
from datetime import date
from google.cloud import logging
import tensorflow.keras as keras
LOG_NAME = 'my-custom-prediction-log'
class CustomModelPrediction(object):
def __init__(self, model, processor, client):
self._model = model
self._processor = processor
self._client = client
def _postprocess(self, predictions):
labels = ['negative', 'positive']
return [
{
"label":labels[int(np.round(prediction))],
"score":float(np.round(prediction, 4))
} for prediction in predictions]
def predict(self, instances, **kwargs):
logger = self._client.logger(LOG_NAME)
logger.log_struct({'instances':instances})
preprocessed_data = self._processor.transform(instances)
predictions = self._model.predict(preprocessed_data)
labels = self._postprocess(predictions)
return labels
#classmethod
def from_path(cls, model_dir):
client = logging.Client(project='project_id') # Change to your project
model = keras.models.load_model(
os.path.join(model_dir,'keras_saved_model.h5'))
with open(os.path.join(model_dir, 'processor_state.pkl'), 'rb') as f:
processor = pickle.load(f)
return cls(model, processor, client)
# Verify model locally
from cloud_logging import CustomModelPrediction
classifier = CustomModelPrediction.from_path('.')
requests = ["God I hate the north", "god I love this"]
response = classifier.predict(requests)
response
Then I check with the sample library:
python snippets.py my-custom-prediction-log list
Listing entries for logger my-custom-prediction-log:
* 2020-02-19T19:51:45.809767+00:00: {u'instances': [u'God I hate the north', u'god I love this']}
* 2020-02-19T19:57:18.615159+00:00: {u'instances': [u'God I hate the north', u'god I love this']}
To visualize the logs, in StackDriver > Logging > Select Global and your Log name, if you want to see Model logs you should be able to select Cloud ML Model version.
You can use my files here: model and pre-processor
If you just want your print to work and not use the logging method above me you can just add flush flag to your print,
print(“logged”,flush=True)

How to use multiple inputs for custom Tensorflow model hosted by AWS Sagemaker

I have a trained Tensorflow model that uses two inputs to make predictions. I have successfully set up and deployed the model on AWS Sagemaker.
from sagemaker.tensorflow.model import TensorFlowModel
sagemaker_model = TensorFlowModel(model_data='s3://' + sagemaker_session.default_bucket()
+ '/R2-model/R2-model.tar.gz',
role = role,
framework_version = '1.12',
py_version='py2',
entry_point='train.py')
predictor = sagemaker_model.deploy(initial_instance_count=1,
instance_type='ml.m4.xlarge')
predictor.predict([data_scaled_1.to_csv(),
data_scaled_2.to_csv()]
)
I always receive an error. I could use an AWS Lambda function, but I don't see any documentation on specifying multiple inputs to deployed models. Does anyone know how to do this?
You need to actually build a correct signature when deploying the model first.
Also, you need to deploy with tensorflow serving.
At inference, you need to also give a proper input format when requesting: basically sagemaker docker server takes the request input and passes it by to tensorflow serving. So, the input needs to match TF serving inputs.
Here is a simple example of deploying a Keras multi-input multi-output model in Tensorflow serving using Sagemaker and how to make inference afterwards:
import tarfile
from tensorflow.python.saved_model import builder
from tensorflow.python.saved_model.signature_def_utils import predict_signature_def
from tensorflow.python.saved_model import tag_constants
from keras import backend as K
import sagemaker
#nano ~/.aws/config
#get_ipython().system('nano ~/.aws/config')
from sagemaker import get_execution_role
from sagemaker.tensorflow.serving import Model
def serialize_to_tf_and_dump(model, export_path):
"""
serialize a Keras model to TF model
:param model: compiled Keras model
:param export_path: str, The export path contains the name and the version of the model
:return:
"""
# Build the Protocol Buffer SavedModel at 'export_path'
save_model_builder = builder.SavedModelBuilder(export_path)
# Create prediction signature to be used by TensorFlow Serving Predict API
signature = predict_signature_def(
inputs={
"input_type_1": model.input[0],
"input_type_2": model.input[1],
},
outputs={
"decision_output_1": model.output[0],
"decision_output_2": model.output[1],
"decision_output_3": model.output[2]
}
)
with K.get_session() as sess:
# Save the meta graph and variables
save_model_builder.add_meta_graph_and_variables(
sess=sess, tags=[tag_constants.SERVING], signature_def_map={"serving_default": signature})
save_model_builder.save()
# instanciate model
model = ....
# convert to tf model
serialize_to_tf_and_dump(model, 'model_folder/1')
# tar tf model
with tarfile.open('model.tar.gz', mode='w:gz') as archive:
archive.add('model_folder', recursive=True)
# upload it to s3
sagemaker_session = sagemaker.Session()
inputs = sagemaker_session.upload_data(path='model.tar.gz')
# convert to sagemaker model
role = get_execution_role()
sagemaker_model = Model(model_data = inputs,
name='DummyModel',
role = role,
framework_version = '1.12')
predictor = sagemaker_model.deploy(initial_instance_count=1,
instance_type='ml.t2.medium', endpoint_name='MultiInputMultiOutputModel')
At inference, here is how to request for predictions:
import json
import boto3
x_inputs = ... # list with 2 np arrays of size (batch_size, ...)
data={
'inputs':{
"input_type_1": x[0].tolist(),
"input_type_2": x[1].tolist()
}
}
endpoint_name = 'MultiInputMultiOutputModel'
client = boto3.client('runtime.sagemaker')
response = client.invoke_endpoint(EndpointName=endpoint_name, Body=json.dumps(data), ContentType='application/json')
predictions = json.loads(response['Body'].read())
You likely need to customize the inference functions loaded in the endpoints. In the SageMaker TF SDK doc here you can find that there are two options for SageMaker TensorFlow deployment:
Python Endpoint, that is the default, check if modifying the
input_fn can accomodate your inference scheme
TF Serving
endpoint
You can diagnose error in Cloudwatch (accessible through the sagemaker endpoint UI), choose the most appropriate serving architecture among the above-mentioned two and customize the inference functions if need be
Only the TF serving endpoint supports multiple inputs in one inference request. You can follow the documentation here to deploy a TFS endpoint -
https://github.com/aws/sagemaker-python-sdk/blob/master/src/sagemaker/tensorflow/deploying_tensorflow_serving.rst

Tensor is not an element of this graph; deploying Keras model

Im deploying a keras model and sending the test data to the model via a flask api. I have two files:
First: My Flask App:
# Let's startup the Flask application
app = Flask(__name__)
# Model reload from jSON:
print('Load model...')
json_file = open('models/model_temp.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
keras_model_loaded = model_from_json(loaded_model_json)
print('Model loaded...')
# Weights reloaded from .h5 inside the model
print('Load weights...')
keras_model_loaded.load_weights("models/Model_temp.h5")
print('Weights loaded...')
# URL that we'll use to make predictions using get and post
#app.route('/predict',methods=['GET','POST'])
def predict():
data = request.get_json(force=True)
predict_request = [data["month"],data["day"],data["hour"]]
predict_request = np.array(predict_request)
predict_request = predict_request.reshape(1,-1)
y_hat = keras_model_loaded.predict(predict_request, batch_size=1, verbose=1)
return jsonify({'prediction': str(y_hat)})
if __name__ == "__main__":
# Choose the port
port = int(os.environ.get('PORT', 9000))
# Run locally
app.run(host='127.0.0.1', port=port)
Second: The file Im using to send the json data sending to the api endpoint:
response = rq.get('api url has been removed')
data=response.json()
currentDT = datetime.datetime.now()
Month = currentDT.month
Day = currentDT.day
Hour = currentDT.hour
url= "http://127.0.0.1:9000/predict"
post_data = json.dumps({'month': month, 'day': day, 'hour': hour,})
r = rq.post(url,post_data)
Im getting this response from Flask regarding Tensorflow:
ValueError: Tensor Tensor("dense_6/BiasAdd:0", shape=(?, 1), dtype=float32) is not an element of this graph.
My keras model is a simple 6 dense layer model and trains with no errors.
Any ideas?
Flask uses multiple threads. The problem you are running into is because the tensorflow model is not loaded and used in the same thread. One workaround is to force tensorflow to use the gloabl default graph .
Add this after you load your model
global graph
graph = tf.get_default_graph()
And inside your predict
with graph.as_default():
y_hat = keras_model_loaded.predict(predict_request, batch_size=1, verbose=1)
It's so much simpler to wrap your keras model in a class and that class can keep track of it's own graph and session. This prevents the problems that having multiple threads/processes/models can cause which is almost certainly the cause of your issue. While other solutions will work this is by far the most general, scalable and catch all. Use this one:
import os
from keras.models import model_from_json
from keras import backend as K
import tensorflow as tf
import logging
logger = logging.getLogger('root')
class NeuralNetwork:
def __init__(self):
self.session = tf.Session()
self.graph = tf.get_default_graph()
# the folder in which the model and weights are stored
self.model_folder = os.path.join(os.path.abspath("src"), "static")
self.model = None
# for some reason in a flask app the graph/session needs to be used in the init else it hangs on other threads
with self.graph.as_default():
with self.session.as_default():
logging.info("neural network initialised")
def load(self, file_name=None):
"""
:param file_name: [model_file_name, weights_file_name]
:return:
"""
with self.graph.as_default():
with self.session.as_default():
try:
model_name = file_name[0]
weights_name = file_name[1]
if model_name is not None:
# load the model
json_file_path = os.path.join(self.model_folder, model_name)
json_file = open(json_file_path, 'r')
loaded_model_json = json_file.read()
json_file.close()
self.model = model_from_json(loaded_model_json)
if weights_name is not None:
# load the weights
weights_path = os.path.join(self.model_folder, weights_name)
self.model.load_weights(weights_path)
logging.info("Neural Network loaded: ")
logging.info('\t' + "Neural Network model: " + model_name)
logging.info('\t' + "Neural Network weights: " + weights_name)
return True
except Exception as e:
logging.exception(e)
return False
def predict(self, x):
with self.graph.as_default():
with self.session.as_default():
y = self.model.predict(x)
return y
Just after loading the model add model._make_predict_function()
`
# Model reload from jSON:
print('Load model...')
json_file = open('models/model_temp.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
keras_model_loaded = model_from_json(loaded_model_json)
print('Model loaded...')
# Weights reloaded from .h5 inside the model
print('Load weights...')
keras_model_loaded.load_weights("models/Model_temp.h5")
print('Weights loaded...')
keras_model_loaded._make_predict_function()
It turns out this way does not need a clear_session call and is at the same time configuration friendly, using the graph object from configured session session = tf.Session(config=_config); self.graph = session.graph and the prediction by the created graph as default with self.graph.as_default(): offers a clean approach
from keras.backend.tensorflow_backend import set_session
...
def __init__(self):
config = self.keras_resource()
self.init_model(config)
def init_model(self, _config, *args):
session = tf.Session(config=_config)
self.graph = session.graph
#set configured session
set_session(session)
self.model = load_model(file_path)
def keras_resource(self):
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
return config
def predict_target(self, to_predict):
with self.graph.as_default():
predict = self.model.predict(to_predict)
return predict
I had the same problem. it was resolved by changing TensorFlow-1 version to TensorFlow-2.
just uninstall ver-1 and install ver-2.
Ya their is a bug when you predict from model with keras. Keras will not be able to build graph due to some error. Try to predict images from model with the help of tensor flow. Just replace this line of code
Keras code:
features = model_places.predict( img )
tensorflow code:
import tensorflow as tf
graph = tf.get_default_graph()
import this library in your code and replace.
with graph.as_default():
features = model_places.predict( img ).tolist()
If Problem still not solved :
if still problem not solved than try to refresh the graph.
As your code is fine, running with a clean environment should solve it.
Clear keras cache at ~/.keras/
Run on a new environment, with the right packages (can be done easily with anaconda)
Make sure you are on a fresh session, keras.backend.clear_session() should remove all existing tf graphs.
Keras Code:
keras.backend.clear_session()
features = model_places.predict( img )
TensorFlow Code:
import tensorflow as tf
with tf.Session() as sess:
tf.reset_default_graph()
Simplest solution is to use tensorflow 2.0. Run your code in Tensorflow 2.0 environment and it will work.
I was facing same issues while exposing a pre-trained model via REST server. I was loading the model at the server startup and later using the loaded model to make predictions via POST/GET request. While predicting it was generating error as session not saved between the predict call. Though when I was loading the model every time prediction is made it was working fine.
Then to avoid this issue with the session I just ran the code in TF=2.0 environment and it ran fine.

How to feed a network through REST

I'm new to python trying to build an app with TensorFlow. Basically what I need is get features from a loaded neural network, which take around 3 minutes to load.
I would like that my script above load the neural network just once when started and that I could call a rest function just to feed an image to the network.
from flask import Flask, request
from flask_restful import Resource, Api
from scipy import misc
import tensorflow as tf
import numpy as np
import sys
import os
import argparse
class ImageFeatures(Resource):
def get(self,img):
image = misc.imread(os.path.expanduser("Img/Abc_001.jpg"))
feed_dict = { images_placeholder: image, phase_train_placeholder:False }
emb = sess.run(embeddings, feed_dict=feed_dict)
return(emb)
with tf.Graph().as_default():
with tf.Session() as sess:
model_dir = 'Model/'
meta_file, ckpt_file = facenet.get_model_filenames(os.path.expanduser(model_dir))
facenet.load_model(model_dir, meta_file, ckpt_file)
images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0")
embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0")
phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0")
print("Rest Running")
app = Flask(__name__)
api = Api(app)
api.add_resource(ImageFeatures, '/getFeatures/<img>')
if __name__ == '__main__':
app.run(port='5002')
Check out https://github.com/PipelineAI/pipeline
We package your TensorFlow model (or any type of model) in a REST-based runtime.

Categories