Flask App with Slow Queries, Multiple Client Users, and Hosted on Kubernetes - python

I've got a Flask app in which I hope to accomplish the following things:
Have an endpoint that will run a series of queries
This endpoint needs to respond to the HTTP request within a limited number of seconds.
The queries can take up to several minutes to finish so I need them to run in a separate thread, with multiple clients polling the server every so often to see if they have fresh data to be returned to them
Hopefully hosted on Kubernetes with multiple instances of the pod running.
My below implementation has several issues:
The poll endpoint seems unnecesarily large, most of this is just dealing with the Queue of queries and making sure that each client gets their own results back, and not someone elses.
Not sure what is going on, but when I try to host more than one instance of this pod on Kubernetes, its like some poll requests from some users are being sent to instances in which their uuid does not exist.
I'm hoping for some understanding of what I'm doing wrong with threading and Queues because this seems like a hacky way of doing this. And also, how can I make the results of these queries available to all instances of Kubernetes running?
Thanks!
from flask import Flask, render_template, request, jsonify, g
from Queue import Queue
from threading import Thread
from time import sleep
app = Flask(__name__, template_folder='Templates')
#app.route('/')
def index():
return render_template('index.html')
#app.before_first_request
def before_first_request():
g.output = Queue()
g.data_results = {}
return ""
#app.route('/data')
def data():
"""
Endpoint hit to fire of a request for data from a given user (uuid)
"""
params = request.args.to_dict()
uuid = params['uuid']
# Create a list for this user, to store their results
g.data_results[uuid] = []
list_of_queries = ["SELECT * FROM tbl1;",
"SELECT * FROM tbl2;",
"SELECT * FROM tbl3;"]
for query in list_of_queries:
t = Thread(target=worker, args=(query, uuid, g.output))
t.daemon = True
t.start()
return jsonify({'msg':'Queries started'})
def worker(*args):
query, uuid, output = args
# Will actually be something like `result = run_query(query)`
result = {'uuid':uuid}
sleep(10)
output.put(result)
#app.route('/poll')
def poll():
"""
Endpoint hit ever x seconds from frontend
to see if the data is ready
"""
params = request.args.to_dict()
uuid_from_client = params['uuid']
# If client polls for result, but server has no record of this uuid
# This can happen in kubernetes with multiple instances running
if g.data_results.get(uuid_from_client) is None:
return jsonify({'msg':'pong', 'data':None, 'freshdata':None})
try:
output = g.output
# This line throws an error if there is nothing to get
results = output.get(False)
output.task_done()
# What is the uuid associated with the most recently returned data
# More than 1 chunk of data can be in here
uuid_from_data = results['uuid']
g.data_results[uuid_from_data].append(results)
except:
uuid_from_data = None
results = None
results_for_client_uuid = g.data_results[uuid_from_client]
if len(results_for_client_uuid) > 0:
res = results_for_client_uuid.pop(0)
else:
res = None
return jsonify({'msg':'pong', 'data':res})
if __name__ == "__main__":
with app.app_context():
app.run(host='0.0.0.0')

Setup your app architecture to use queuing softwares so that there is separation of concerns in terms of what job it does.
Here is a great article that can help you give some insight http://blog.gorgias.io/deploying-flask-celery-with-docker-and-kubernetes/
and one more https://endocode.com/blog/2015/03/24/using-googles-kubernetes-to-build-a-distributed-task-management-cluster/

Related

How to send a POST request in the form of a dictionary to my flask main server

OK so I'm doing a project on finding the Health details of a remote server using python and I'm hosting the main server using flask. But the idk how to send the Health report which I have created using python, to the flask app. The Health report is in the form of a dictionary and I need to pass the values of the dictionary into columns which are the keys of the dictionary in my database.can someone please help me in sending the Health report to the Flask app? This health report is on another system and I need to send that to my main server.
import psutil
import time
import json
import requests
'''
This program will be loaded on to the target server.
A flask app will transmit health data to the main flask app.
'''
SERVER_NAME="test_local_server"
def getHealth(): # function for generating health report. Returns a json object.
print('generating health report')
report={}
report['sever_name']=SERVER_NAME
report['cpupercent']=psutil.cpu_percent(interval=2.0)
report['ctime']=psutil.cpu_times()
report['cpu_total']=report['ctime'].user+report['ctime'].system
report['disk_usages']=psutil.disk_usage("/")
report['net']=psutil.net_io_counters()
report['bytes_sent']=report['net'].bytes_sent
report['bytes_received']=report['net'].bytes_recv
report['packets_sent']=report['net'].packets_sent
report['packets_received']=report['net'].packets_recv
report['mem']=psutil.virtual_memory()
report['memory_Free']=report['mem'].free
json_report=json.dumps(report)
return(json_report)
if __name__=='__main__':
print(f'starting health report stream for server :\t{SERVER_NAME}')
while True:
getHealth()
This is the code for generating the Health details.How to send this back to my flask app in the form of a dictionary?
Client
I would start by simpifying that code somewhat:
import psutil
STATS_URL = 'http://localhost:5000/'
SERVER_NAME="test_local_server"
def get_health():
print('generating health report')
cpu_percent = psutil.cpu_percent(interval=2.0)
cpu_times = psutil.cpu_times()
disk_usage = psutil.disk_usage("/")
net_io_counters = psutil.net_io_counters()
virtual_memory = psutil.virtual_memory()
# The keys in this dict should match the db cols
report = dict (
sever_name = SERVER_NAME
ctime = cpu_times.__str__(),
disk_usages = disk_usage.__str__(),
net = net_io_counters.__str__(),
mem = virtual_memory.__str__(),
cpupercent = cpu_percent,
cpu_total = cpu_times.user + cpu_times.system,
bytes_sent = net_io_counters.bytes_sent,
bytes_received = net_io_counters.bytes_recv,
packets_sent = net_io_counters.packets_sent,
packets_received = net_io_counters.packets_recv,
memory_Free = virtual_memory.free,
)
return report
This get_health function builds and returns a report dictionary. Notice that for some of the return values from the psutil functions, I've used the built in __str__ method. This ensures a friendly type to be inserted into the database.
If you want to check the types yourself, you can do something like:
for item in report:
print (item, type(report[item]), report[item])
Next have this function run in a loop, with a desired time delay between requests:
if __name__=='__main__':
import time
import requests
print(f'starting health report stream for server :\t{SERVER_NAME}')
while True:
report = get_health()
r = requests.post(STATS_URL, json=report)
print (r, r.json())
time.sleep(1)
Notice this uses the json argument to request.post which automatically sets the correct Content-Type which Flask's request.get_json function expects.
Server
This is pretty easy to recieve:
from flask import Flask, request
app = Flask(__name__)
#app.route('/', methods=['POST'])
def index():
incoming_report = request.get_json()
add_to_db(incoming_report) # We'll build this in a sec.
return {'message': 'success'}
You can now work with incoming_report which is a dictionary.
This also sends a success message back to the client, so on the client you'll see the ouptut:
starting health report stream for server : test_local_server
generating health report
<Response [200]> {'message': 'success'}
# Repeats until killed
Database
and I need to pass the values of the dictionary into columns which are the keys of the dictionary in my database
Now that you have a dictionary incoming_report it should be easy to add this to your database if you're using an ORM.
Something along the lines of this answer should allow you to simply unpack that dictionary. So assuming your model is called Report you could simply do something like:
def add_to_db(d):
report = Report(**d)
db.session.add(report)
db.session.commit()
Note this could probably use some validation, and authentication if your deployment requires this.

Returning 'still loading' response with Flask API

I have a scikit-learn classifier running as a Dockerised Flask app, launched with gunicorn. It receives input data in JSON format as a POST request, and responds with a JSON object of results.
When the app is first launched with gunicorn, a large model (serialised with joblib) is read from a database, and loaded into memory before the app is ready for requests. This can take 10-15 minutes.
A reproducible example isn't feasible, but the basic structure is illustrated below:
from flask import Flask, jsonify, request, Response
import joblib
import json
def classifier_app(model_name):
# Line below takes 10-15 mins to complete
classifier = _load_model(model_name)
app = Flask(__name__)
#app.route('/classify_invoice', methods=['POST'])
def apicall():
query = request.get_json()
results = _build_results(query['data'])
return Response(response=results,
status=200,
mimetype='application/json')
print('App loaded!')
return app
How do I configure Flask or gunicorn to return a 'still loading' response (or suitable error message) to any incoming http requests while _load_model is still running?
Basically, you want to return two responses for one request. So there are two different possibilities.
First one is to run time-consuming task in background and ping server with simple ajax requests every two seconds to check if task is completed or not. If task is completed, return result, if not, return "Please standby" string or something.
Second one is to use websockets and flask-socketio extension.
Basic server code would be something like this:
from threading import Thread
from flask import Flask
app = Flask(__name__)
socketio = SocketIO(app)
def do_work():
result = your_heavy_function()
socketio.emit("result", {"result": result}, namespace="/test/")
#app.route("/api/", methods=["POST"])
def start():
socketio.start_background_task(target=do_work)
# return intermediate response
return Response()
On the client side you should do something like this
var socket = io.connect('http://' + document.domain + ':' + location.port + '/test/');
socket.on('result', function(msg) {
// Process your request here
});
For further details, visit this blog post, flask-socketio documentation for server-side reference and socketio documentation for client-side reference.
PS Using web-sockets this you can make progress-bar too.

How to handle multi users' requests separately in Flask?

I've written the following flask server:
from flask import Flask, render_template, request
import os
app = Flask(__name__)
# home
#app.route('/')
def home():
return 'HOME PAGE'
#app.route('/add')
def add():
global a
a += 1
return str(a)
if __name__ == '__main__':
a = 0
HOST = '10.10.10.10'
PORT = 5555
app.run(HOST, PORT)
Considering there are two users (from different IP addresses) of my server: A and B. When user A requests by url 10.10.10.10:5555/add, he gets the result 1. After that, if user B requests by url 10.10.10.10:5555/add he will get 2. Because two users share the same variable a
However, I want my server to handle A and B separately which means a user A and B have a variable a in their own way. The requests of A shouldn't affect the result that B will get. For example, When user A requests, he gets 1. After that user B requests and he should get 1 as well.
How should I modify my code to achieve this?
Based on your question, I think you're confused about the definition of "global".
In Flask, you have a Flask server with multiple threads and potentially multiple processes handling requests. you had a global variable a, and you wanted to keep adding to it in every request and want a variable to be independent.This is totally possible in theory and practice. It's also a really bad idea. This case actually create Deadlocks
The problem is that you can't easily control which threads and processes "win"
You should keep the webserver itself as stateless as possible. Each request should be totally independent and not share any state in the server. Instead, use a database or caching layer which will handle the state for you. This seems more complicated but is actually simpler in practice. Check out SQLite for example ; it's pretty simple.
Thanks to #n00dl3 's suggestion, I've managed to achieve the goal of my example. Here is the code:
from flask import Flask, render_template, request, session
import os
from datetime import timedelta
app = Flask(__name__)
app.config['SECRET_KEY'] = os.urandom(24)
app.config['PERMANENT_SESSION_LIFETIME'] = timedelta(days = 7)
# login
#app.route('/<username>', methods=['GET', 'POST'])
def home(username):
if username in session:
print(session.keys())
return 'hello {}'.format(username)
else:
session[username] = username
# generate this user's variable
a[username] = 0
print(session.keys())
return 'login as {}'.format(username)
# logout
#app.route('/logout/<username>', methods=['GET', 'POST'])
def logout(username):
session.pop(username)
print(session.keys())
return '{} logout!'.format(username)
# call add function with specific username
#app.route('/add/<username>')
def add(username):
global a
a[username] += 1
return str(a[username])
if __name__ == '__main__':
a = {}
#HOST = environ.get('SERVER_HOST', 'localhost')
HOST = '10.10.50.23'
try:
PORT = int(os.environ.get('SERVER_PORT', '5555'))
except ValueError:
PORT = 5555
app.run(HOST, PORT, debug=True)
However, I'm not sure if my way is a decent solution. So still listen to any better answers.
use different WSGI server to deploy your project. see this link http://flask.pocoo.org/docs/1.0/deploying/

Run function on Flask server every x seconds to update Redis cache without clients making separate calls

I currently have a flask app that makes a call to S3 as well as an external API with the following structure before rendering the data in javascript:
from flask import Flask, render_template,make_response
from flask import request
import requests
import requests_cache
import redis
from boto3.session import Session
import json
app = Flask(__name__)
#app.route('/test')
def test1():
bucket_root = 'testbucket'
session = Session(
aws_access_key_id='s3_key',
aws_secret_access_key='s3_secret_key')
s3 = session.resource('s3')
bucket = s3.Bucket(bucket_root)
testvalues = json.dumps(s3.Object(bucket_root,'all1.json').get()['Body'].read())
r = requests.get(api_link)
return render_template('test_html.html',json_s3_test_response=r.content,
limit=limit, testvalues=testvalues)
#app.route('/test2')
def test2():
bucket_root = 'testbucket'
session = Session(
aws_access_key_id='s3_key',
aws_secret_access_key='s3_secret_key')
s3 = session.resource('s3')
bucket = s3.Bucket(bucket_root)
testvalues = json.dumps(s3.Object(bucket_root,'all2.json').get()['Body'].read())
r = requests.get(api_link)
return render_template('test_html.html',json_s3_test_response=r.content,
limit=limit, testvalues=testvalues)
#app.errorhandler(500)
def internal_error(error):
return "500 error"
#app.errorhandler(404)
def not_found(error):
return "404 error",404
#app.errorhandler(400)
def custom400(error):
return "400 error",400
//catch all?
#app.errorhandler(Exception)
def all_exception_handler(error):
return 'error', 500
Obviously I have a lot of inefficiencies here, but my main question is:
To me it seems like I'm calling S3 and the external API for each client, every time they refresh the page. This increases the chance for the app to crash due to timeouts (and my poor error handling) and diminishes performance. I would like to resolve this by periodically caching the S3 results (say every 10 mins) into a local redis server (already set up and running) as well as just pinging the external API just once from the server every few seconds before passing it onto ALL clients.
I have code that can store the data into redis every 10 mins in a regular python script, however, I'm not sure where to place this within the flask server? Do I put it as it's own function or keep the call to redis in the #app.route()?
Thank you everyone for your time and effort. Any help would be appreciated! I'm new to flask so some of this has been confusing.

Shared list among celery workers and Flask using python's multiprocessing

I'm building a Flask application which relies on Celery to process some long running tasks. Each task will essentially append a dictionary to a shared list once it has finished processing - this list is shared by the celery workers and the routes of the Flask application. The Flask component essentially consists of a set of routes to retrieve the contents of the shared list and modify the order of the elements.
I thin I have successfully shared the list between the Celery workers using a Manager from the Python's multiprocessing module. However, the changes made to this list are not seen by the Flask application. Here is a minimal application which illustrates the issue:
import os
import json
from flask import Flask
from multiprocessing import Manager
from celery import Celery
application = Flask(__name__)
redis_url = os.environ.get('REDIS_URL')
if redis_url is None:
redis_url = 'redis://localhost:6379/0'
# Set the secret key to enable cookies
application.secret_key = 'some secret key'
application.config['SESSION_TYPE'] = 'filesystem'
# Redis and Celery configuration
application.config['BROKER_URL'] = redis_url
application.config['CELERY_RESULT_BACKEND'] = redis_url
celery = Celery(application.name, broker=redis_url)
celery.conf.update(BROKER_URL=redis_url,
CELERY_RESULT_BACKEND=redis_url)
manager = Manager()
shared_queue = manager.list() # THIS IS THE SHARED LIST
#application.route("/submit", methods=['GET'])
def submit_song():
add_song_to_queue.delay()
return 'Added a song to the queue'
#application.route("/playlist", methods=['GET', 'POST'])
def get_playlist():
playlist = []
i = 0
queue_size = len(shared_queue)
while i < queue_size:
print(shared_queue[i])
playlist.append(shared_queue[i])
return json.dumps(playlist)
#celery.task
def add_song_to_queue():
shared_queue.append({'some':'data!'})
print(len(shared_queue))
if __name__ == "__main__":
application.run(host='0.0.0.0', debug=True)
In the celery logs I can clearly see that the dictionaries are being appended to the list, and that the size of the list increases. However, when I access the /playlist route on my browser I always get an empty list.
Does anyone know how I can get the list to be shared among all the workers and the Flask application?
I found a solution by moving away from Celery and instead using multiprocessing.Pool as a task queue and shared memory through Manager as shown in sample code in the question. This link has an excellent example of how this solution can be integrated with Flask: http://gouthamanbalaraman.com/blog/python-multiprocessing-as-a-task-queue.html
from multiprocessing import Pool
from flask import Flask
app = Flask(__name__)
_pool = None
def expensive_function(x):
# import packages that is used in this function
# do your expensive time consuming process
return x*x
#app.route('/expensive_calc/<int:x>')
def route_expcalc(x):
f = _pool.apply_async(expensive_function,[x])
r = f.get(timeout=2)
return 'Result is %d'%r
if __name__=='__main__':
_pool = Pool(processes=4)
try:
# insert production server deployment code
app.run()
except KeyboardInterrupt:
_pool.close()
_pool.join()

Categories