How to run a celery task on flask startup? - python

Very simple question, I hope. I have a flask service that needs to listen to a subscription. I have all the code written to listen to the subscription and run some code when triggered.
The flask app code is:
from flask import Flask, jsonify
import logging
from celery.bin.worker import worker
from celery import shared_task
from proj.celery_config import make_celery, get_options
from proj.config import Config
app = Flask(__name__)
app.config.from_object(Config)
celery = make_celery(app)
options = get_options(app)
worker = worker(app)
#shared_task()
def run_listener():
listen()
#app.route('/actuator/health')
def health_check():
return jsonify({'status': 'UP'})
#app.route('/')
def hello_world():
"""
A simple test route for verifying flask is working.
:return: string containing "Hello, World!"
"""
logger.info("Hello world called!")
return 'Hello, World!'
def main():
return app
The code for make_celery and get_options is:
from celery import Celery
def make_celery(app):
celery = Celery(
app.import_name,
backend=app.config['CELERY_BROKER_URL'],
broker=app.config['CELERY_BROKER_URL']
)
celery.conf.update(app.config)
class ContextTask(celery.Task):
def __call__(self, *args, **kwargs):
with app.app_context():
return self.run(*args, **kwargs)
celery.Task = ContextTask
return celery
def get_options(app):
return {
'broker': app.config['CELERY_BROKER_URL'],
'traceback': True,
'loglevel': 'info',
'queues': 'q1'
}
I have not set up all the endpoints yet, but here is something very important here: the task run_listener needs to work on startup without running the worker outside the app. Meaning I cannot do a celery -A tasks .... I just want it to run whenever the flask app is run. Is there a way to do this?
I have tried running worker.run(**options) but I get an error, no matter if I use the flask app or the celery app when initializing the worker object. Am I missing something?

Related

How to preserve Flask app context across Celery and SQLAlchemy

I'm building trying to learn Flask with a proof of concept Flask app, that takes a JSON payload, and uses SQLAlchemy to write it to a DB. I'm using celery to manage the write tasks.
The app is structured
|-app.py
|-project
|-__init__.py
|-celery_utils.py
|-config.py
|-users
|-__init_.py
|-models.py
|-tasks.py
app.py builds the flask app and celery instance.
app.py
from project import create_app, ext_celery
app = create_app()
celery = ext_celery.celery
#app.route("/")
def alive():
return "alive"
/project/__init__.py is the application factory for the flask app. It instantiates the extensions, links everything together, and registers the blueprints.
/project/init.py
import os
from flask import Flask
from flask_celeryext import FlaskCeleryExt
from flask_migrate import Migrate
from flask_sqlalchemy import SQLAlchemy
from project.celery_utils import make_celery
from project.config import config
# instantiate extensions
db = SQLAlchemy()
migrate = Migrate()
ext_celery = FlaskCeleryExt(create_celery_app=make_celery)
def create_app(config_name=None):
if config_name is None:
config_name = os.environ.get("FLASK_CONFIG", "development")
# instantiate the app
app = Flask(__name__)
# set config
app.config.from_object(config[config_name])
# set up extensions
db.init_app(app)
migrate.init_app(app, db)
ext_celery.init_app(app)
# register blueprints
from project.users import users_blueprint
app.register_blueprint(users_blueprint)
# shell context for flask cli
#app.shell_context_processor
def ctx():
return {"app": app, "db": db}
return app
/project/celery_utils.py manages the creation of the celery instances
/project/celery_utils.py
from celery import current_app as current_celery_app
def make_celery(app):
celery = current_celery_app
celery.config_from_object(app.config, namespace="CELERY")
return celery
In the users dir, I'm trying to manage the creation of a basic user with celery task management.
'/project/users/init.py` is where I create the blueprints and routes.
/project/users/init.py
from flask import Blueprint, request, jsonify
from .tasks import divide, post_to_db
users_blueprint = Blueprint("users", __name__, url_prefix="/users", template_folder="templates")
from . import models, tasks
#users_blueprint.route('/users', methods=['POST'])
def users():
request_data = request.get_json()
task = post_to_db.delay(request_data)
response = {"id": task.task_id,
"status": task.status,
}
return jsonify(response)
#users_blueprint.route('/responses', methods=['GET'])
def responses():
request_data = request.get_json()
result = AsyncResult(id=request_data['id'])
response = result.get()
return jsonify(response)
/project/users/models.py is a simple User model - however, it does manage to successfully remain in the context of the flask app if created from the flask app cli.
/project/users/models.py
from project import db
class User(db.Model):
"""model for the user object"""
__tablename__ = "users"
id = db.Column(db.Integer, primary_key=True, autoincrement=True)
username = db.Column(db.String(128), unique=True, nullable=False)
email = db.Column(db.String(128), unique=True, nullable=False)
def __init__(self, username, email, *args, **kwargs):
self.username = username
self.email = email
Finally, /project/users/tasks.py is where I handle the celery tasks for this dir.
/project/users/tasks.py
from celery import shared_task
from .models import User
from project import db
#shared_task()
def post_to_db(payload):
print("made it here")
user = User(**payload)
db.session.add(user)
db.session.commit()
db.session.close()
return True
The modules work, but as soon as I wire it all up and hit the endpoint with a JSON payload, I get the error message:
RuntimeError: No application found. Either work inside a view function or push an application context. ...
I have tried to preserve the app context in tasks.py by:
...
from project import db, ext_celery
#ext_celery.shared_task()
def post_to_db(payload):
...
...
from project import db, ext_celery
#ext_celery.task()
def post_to_db(payload):
...
These error with: TypeError: exceptions must derive from BaseException
I've tried pushing the app context
...
from project import db
from app import app
#shared_task()
def post_to_db(payload):
with app.app_context():
...
This also errors with: TypeError: exceptions must derive from BaseException
I've tried importing celery from the app itself
...
from project import db
from app import celery
#celery.task()
def post_to_db(payload):
...
This also errors with: TypeError: exceptions must derive from BaseException
Any suggestions gratefully received. There's a final piece of the puzzle I'm missing, and it's very frustrating.
With thanks to snakecharmerb
I had to add ContextTask to the make_celery() function in /project/celery_utils.py
from celery import current_app as current_celery_app
def make_celery(app):
celery = current_celery_app
celery.config_from_object(app.config, namespace="CELERY")
class ContextTask(celery.Task):
def __call__(self, *args, **kwargs):
with app.app_context():
return self.run(*args, **kwargs)
celery.Task = ContextTask
return celery
And then a few tweaks in /project/users/tasks.py
from celery import shared_task
from .models import User
from project import db
#shared_task()
def post_to_db(payload):
user = User(**payload)
db.session.add(user)
db.session.commit()
db.session.close()
return True
Now I can see the user in the database, and my message queue is progressing as expected.

using celery with flask_restful

i have a simple flask restful API where I want to execute the request as Celery task since some endpoints need a lot of execution time.
main.py:
from flask import Flask
from flask_restful import Api
from flask_celery import make_celery
app = Flask(__name__)
app.config['CELERY_BROKER_URL'] = 'redis://localhost:6379/0',
app.config['CELERY_RESULT_BACKEND'] = 'redis://localhost:6379/0'
celery = make_celery(app)
api = Api(app)
api.add_resource(someResource, '/someendpoint/')
if __name__ == '__main__':
app.run(debug=True)
with make_celery.py:
from celery import Celery
def make_celery(app):
celery = Celery(
app.import_name,
backend=app.config['CELERY_RESULT_BACKEND'],
broker=app.config['CELERY_BROKER_URL']
)
celery.conf.update(app.config)
class ContextTask(celery.Task):
def __call__(self, *args, **kwargs):
with app.app_context():
return self.run(*args, **kwargs)
celery.Task = ContextTask
return celery
I now want to define that the Resource I defined in resource.py is a celery task:
class Cost(Resource):
def get(self):
result = some_code
return result
what is the most convenient way to make the get method a celery task here?
Thanks a lot!

How do I get the application context in a Blueprint, but not in a request?

I am attempting to convert a collection of Flask apps to a single app with several Blueprints.
In one of my apps, I have a task that runs periodically in the background, not related to a request. It looks something like this:
import apscheduler.schedulers.background
import flask
app = flask.Flask(__name__)
app.config['DATABASE']
scheduler = apscheduler.schedulers.background.BackgroundScheduler()
scheduler.start()
def db():
_db = flask.g.get('_db')
if _db is None:
_db = get_db_connection_somehow(app.config['DATABASE'])
flask.g._db = _db
return _db
#scheduler.scheduled_job('interval', hours=1)
def do_a_thing():
with app.app_context():
db().do_a_thing()
When I convert this app to a Blueprint, I lose access to the app object and I can't figure out how to create an application context when I need one. This is what I tried:
import apscheduler.schedulers.background
import flask
bp = flask.Blueprint('my_blueprint', __name__)
scheduler = apscheduler.schedulers.background.BackgroundScheduler()
scheduler.start()
def db():
_db = flask.g.get('_db')
if _db is None:
_db = get_db_connection_somehow(flask.current_app.config['DATABASE'])
flask.g._db = _db
return _db
#bp.record
def record(state):
with state.app.app_context():
flask.g._app = state.app
#scheduler.scheduled_job('interval', hours=1)
def do_a_thing():
with flask.g._app.app_context():
db().do_a_thing()
The error I get is:
RuntimeError: Working outside of application context.
So, how can I get the application context in a blueprint but outside a request?
I solved this problem with the following changes. First, I set up a scheduler object on my Flask app:
app = flask.Flask(__name__)
app.scheduler = apscheduler.schedulers.background.BackgroundScheduler()
app.scheduler.start()
Next, I changed the function that runs my background task to accept the app as an argument, so I could read the database connection information from app.config:
def do_a_thing(app: flask.Flask):
db = get_db_connection_somehow(app.config['DATABASE'])
db.do_a_thing()
Finally, I set up the scheduled job in Blueprint.record():
#bp.record
def record(state):
state.app.scheduler.add_job(do_a_thing, trigger='interval', args=[state.app], hours=1)

Correctly managing postgresql connections in celery task for Flask-SQLAlchemy and Celery

I'm using Flask-SQLAlchemy, Celery and uWSGI.
I know that Flask-SQLAlchemy automatically manages the session for you. I'm not sure how this works with Celery workers, but it seems that when I run a task a second time, I get the following error: DatabaseError: (psycopg2.DatabaseError) server closed the connection unexpectedly.
Here's how I create the app context and celery tasks:
def make_celery(app):
celery = Celery(
app.import_name,
backend=app.config['CELERY_BACKEND'],
broker=app.config['CELERY_BROKER_URL'],
)
celery.conf.update(app.config)
TaskBase = celery.Task
class ContextTask(TaskBase):
abstract = True
def __call__(self, *args, **kwargs):
with app.app_context():
return TaskBase.__call__(self, *args, **kwargs)
celery.Task = ContextTask
return celery
It seems that maybe the workers are using the same database connection and after a task completes that connection is not replenished?
It may be related to the following question?
I'm not sure how to correctly setup the workers or celery so that they're using new connections to the database..
Okay. I figured it out, for every process that's using an application context, you must use a new application context. Before, in my app/__init__.py I was simply creating the application globally like so:
from flask import Flask
app = Flask(__name__)
I then changed my app to use create_app like in this pattern
Now, my tasks.py looks like this:
from myapp import create_app
from celery import Celery
def make_celery(app=None):
app = app or create_app()
celery = Celery(
app.import_name,
backend=app.config['CELERY_BACKEND'],
broker=app.config['CELERY_BROKER_URL'],
)
celery.conf.update(app.config)
TaskBase = celery.Task
class ContextTask(TaskBase):
abstract = True
def __call__(self, *args, **kwargs):
with app.app_context():
return TaskBase.__call__(self, *args, **kwargs)
celery.Task = ContextTask
return celery
celery = make_celery()
Make sure in your create_app you are calling db.init_app(app).

How to use Flask-SQLAlchemy in a Celery task

I recently switch to Celery 3.0. Before that I was using Flask-Celery in order to integrate Celery with Flask. Although it had many issues like hiding some powerful Celery functionalities but it allowed me to use the full context of Flask app and especially Flask-SQLAlchemy.
In my background tasks I am processing data and the SQLAlchemy ORM to store the data. The maintainer of Flask-Celery has dropped support of the plugin. The plugin was pickling the Flask instance in the task so I could have full access to SQLAlchemy.
I am trying to replicate this behavior in my tasks.py file but with no success. Do you have any hints on how to achieve this?
Update: We've since started using a better way to handle application teardown and set up on a per-task basis, based on the pattern described in the more recent flask documentation.
extensions.py
import flask
from flask.ext.sqlalchemy import SQLAlchemy
from celery import Celery
class FlaskCelery(Celery):
def __init__(self, *args, **kwargs):
super(FlaskCelery, self).__init__(*args, **kwargs)
self.patch_task()
if 'app' in kwargs:
self.init_app(kwargs['app'])
def patch_task(self):
TaskBase = self.Task
_celery = self
class ContextTask(TaskBase):
abstract = True
def __call__(self, *args, **kwargs):
if flask.has_app_context():
return TaskBase.__call__(self, *args, **kwargs)
else:
with _celery.app.app_context():
return TaskBase.__call__(self, *args, **kwargs)
self.Task = ContextTask
def init_app(self, app):
self.app = app
self.config_from_object(app.config)
celery = FlaskCelery()
db = SQLAlchemy()
app.py
from flask import Flask
from extensions import celery, db
def create_app():
app = Flask()
#configure/initialize all your extensions
db.init_app(app)
celery.init_app(app)
return app
Once you've set up your app this way, you can run and use celery without having to explicitly run it from within an application context, as all your tasks will automatically be run in an application context if necessary, and you don't have to explicitly worry about post-task teardown, which is an important issue to manage (see other responses below).
Troubleshooting
Those who keep getting with _celery.app.app_context(): AttributeError: 'FlaskCelery' object has no attribute 'app' make sure to:
Keep the celery import at the app.py file level. Avoid:
app.py
from flask import Flask
def create_app():
app = Flask()
initiliaze_extensions(app)
return app
def initiliaze_extensions(app):
from extensions import celery, db # DOOMED! Keep celery import at the FILE level
db.init_app(app)
celery.init_app(app)
Start you celery workers BEFORE you flask run and use
celery worker -A app:celery -l info -f celery.log
Note the app:celery, i.e. loading from app.py.
You can still import from extensions to decorate tasks, i.e. from extensions import celery.
Old answer below, still works, but not as clean a solution
I prefer to run all of celery within the application context by creating a separate file that invokes celery.start() with the application's context. This means your tasks file doesn't have to be littered with context setup and teardowns. It also lends itself well to the flask 'application factory' pattern.
extensions.py
from from flask.ext.sqlalchemy import SQLAlchemy
from celery import Celery
db = SQLAlchemy()
celery = Celery()
tasks.py
from extensions import celery, db
from flask.globals import current_app
from celery.signals import task_postrun
#celery.task
def do_some_stuff():
current_app.logger.info("I have the application context")
#you can now use the db object from extensions
#task_postrun.connect
def close_session(*args, **kwargs):
# Flask SQLAlchemy will automatically create new sessions for you from
# a scoped session factory, given that we are maintaining the same app
# context, this ensures tasks have a fresh session (e.g. session errors
# won't propagate across tasks)
db.session.remove()
app.py
from extensions import celery, db
def create_app():
app = Flask()
#configure/initialize all your extensions
db.init_app(app)
celery.config_from_object(app.config)
return app
RunCelery.py
from app import create_app
from extensions import celery
app = create_app()
if __name__ == '__main__':
with app.app_context():
celery.start()
In your tasks.py file do the following:
from main import create_app
app = create_app()
celery = Celery(__name__)
celery.add_defaults(lambda: app.config)
#celery.task
def create_facet(project_id, **kwargs):
with app.test_request_context():
# your code
I used Paul Gibbs' answer with two differences. Instead of task_postrun I used worker_process_init. And instead of .remove() I used db.session.expire_all().
I'm not 100% sure, but from what I understand the way this works is when Celery creates a worker process, all inherited/shared db sessions will be expired, and SQLAlchemy will create new sessions on demand unique to that worker process.
So far it seems to have fixed my problem. With Paul's solution, when one worker finished and removed the session, another worker using the same session was still running its query, so db.session.remove() closed the connection while it was being used, giving me a "Lost connection to MySQL server during query" exception.
Thanks Paul for steering me in the right direction!
Nevermind that didn't work. I ended up having an argument in my Flask app factory to not run db.init_app(app) if Celery was calling it. Instead the workers will call it after Celery forks them. I now see several connections in my MySQL processlist.
from extensions import db
from celery.signals import worker_process_init
from flask import current_app
#worker_process_init.connect
def celery_worker_init_db(**_):
db.init_app(current_app)
from flask import Flask
from werkzeug.utils import import_string
from celery.signals import worker_process_init, celeryd_init
from flask_celery import Celery
from src.app import config_from_env, create_app
celery = Celery()
def get_celery_conf():
config = import_string('src.settings')
config = {k: getattr(config, k) for k in dir(config) if k.isupper()}
config['BROKER_URL'] = config['CELERY_BROKER_URL']
return config
#celeryd_init.connect
def init_celeryd(conf=None, **kwargs):
conf.update(get_celery_conf())
#worker_process_init.connect
def init_celery_flask_app(**kwargs):
app = create_app()
app.app_context().push()
Update celery config at celeryd init
Use your flask app factory to inititalize all flask extensions, including SQLAlchemy extension.
By doing this, we are able to maintain database connection per-worker.
If you want to run your task under flask context, you can subclass Task.__call__:
class SmartTask(Task):
abstract = True
def __call__(self, *_args, **_kwargs):
with self.app.flask_app.app_context():
with self.app.flask_app.test_request_context():
result = super(SmartTask, self).__call__(*_args, **_kwargs)
return result
class SmartCelery(Celery):
def init_app(self, app):
super(SmartCelery, self).init_app(app)
self.Task = SmartTask

Categories