Celery Worker not Inheriting Settings from Django App - python

I am developing a django app that has to process large spreadsheets that users upload - so naturally I turned to celery and rabbitmq. I successfully setup the environment and the task completes in the background successfully except for one issue: I use several environment variables (defined in my apache vhost file and passed to django by mod_wsgi) in a db router class to determine which database to use (i.e. production vs staging vs dev):
class DbRouter(object):
def db_for_read(self, model, **hints):
if model._meta.app_label == 'auth':
return 'auth'
elif model._meta.app_label == 'admin':
return 'auth'
elif model._meta.app_label == 'contenttypes':
return 'auth'
else:
environment = environment = os.environ['BH_ENVIRONMENT_NAME']
if environment == "staging":
return 'staging'
if environment == "production":
return 'production'
def db_for_write(self, model, **hints):
if model._meta.app_label == 'auth':
return 'auth'
elif model._meta.app_label == 'admin':
return 'auth'
elif model._meta.app_label == 'contenttypes':
return 'auth'
else:
###########################################
#This is where django raises the exception#
###########################################
environment = os.environ['BH_ENVIRONMENT_NAME']
if environment == "staging":
return 'staging'
if environment == "production":
return 'production'
def allow_migrate(self, db, app_label, model_name, **hints):
return True
which works just fine in the regular django thread; however, when the background task attempts to use the db router I get the following key error (which seems to imply that os.environ dictionary is not available to the worker thread?):
[2018-02-16 14:23:01,516: ERROR/ForkPoolWorker-2] Task
exposures.tasks.process_exposure_file[27d7e651-e73e-49f1-bd20-8ab80b90d13a]
raised unexpected: KeyError('BH_ENVIRONMENT_NAME',)
Traceback (most recent call last):
File "/var/www/BeachHouse/bhvenv/lib/python3.6/site-
packages/celery/app/trace.py", line 374, in trace_task
R = retval = fun(*args, **kwargs)
File "/var/www/BeachHouse/bhvenv/lib/python3.6/site-
packages/celery/app/trace.py", line 629, in __protected_call__
return self.run(*args, **kwargs)
File "/home/bryan/PycharmProjects/BeachHouse/exposures/tasks.py", line 10,
in process_exposure_file
exposure = Exposure.objects.get(pk=exposure_id)
File "/var/www/BeachHouse/bhvenv/lib/python3.6/site-
packages/django/db/models/manager.py", line 82, in manager_method
return getattr(self.get_queryset(), name)(*args, **kwargs)
File "/var/www/BeachHouse/bhvenv/lib/python3.6/site-
packages/django/db/models/query.py", line 397, in get
num = len(clone)
File "/var/www/BeachHouse/bhvenv/lib/python3.6/site-
packages/django/db/models/query.py", line 254, in __len__
self._fetch_all()
File "/var/www/BeachHouse/bhvenv/lib/python3.6/site-
packages/django/db/models/query.py", line 1179, in _fetch_all
self._result_cache = list(self._iterable_class(self))
File "/var/www/BeachHouse/bhvenv/lib/python3.6/site-
packages/django/db/models/query.py", line 50, in __iter__
db = queryset.db
File "/var/www/BeachHouse/bhvenv/lib/python3.6/site-
packages/django/db/models/query.py", line 1109, in db
return self._db or router.db_for_read(self.model, **self._hints)
File "/var/www/BeachHouse/bhvenv/lib/python3.6/site-
packages/django/db/utils.py", line 258, in _route_db
chosen_db = method(model, **hints)
File "/home/bryan/PycharmProjects/BeachHouse/BeachHouse/db_routers.py", line
16, in db_for_read
environment = os.environ['BH_ENVIRONMENT_NAME']
File "/var/www/BeachHouse/bhvenv/lib/python3.6/os.py", line 669, in
__getitem__
raise KeyError(key) from None
KeyError: 'BH_ENVIRONMENT_NAME'
I attempted to fix this by adding the following to my settings.py file:
ENVIRONMENT_NAME = os.environ.get('BH_ENVIRONMENT_NAME')
and updating the db router to use settings.ENVIRONMENT_NAME - however, when I print this out in the celery worker terminal it's blank. My celery.py file is as follows (not sure if that's relevant or not?):
from __future__ import absolute_import, unicode_literals
import os
from celery import Celery
# set the default Django settings module for the 'celery' program.
os.environ['DJANGO_SETTINGS_MODULE'] = 'BeachHouse.settings'
app = Celery('BeachHouse')
# Using a string here means the worker doesn't have to serialize
# the configuration object to child processes.
# - namespace='CELERY' means all celery-related configuration keys
# should have a `CELERY_` prefix.
app.config_from_object('BeachHouse.settings', namespace='CELERY')
# Load task modules from all registered Django app configs.
app.autodiscover_tasks()
so the question is: how do I pass the apache vhost variables to the celery workers - or alternatively, how do I get my django settings to this worker thread (that successfully load the apache variables)? I would like to stick with using the apache virtual host approach to define the variables if possible as there are many settings getting pulled from there.

Related

Integrating Celery with Flask using the application factory pattern: maximum recursion depth error

I am working from the cookiecutter Flask template, which uses the application factory pattern. I had Celery working for tasks that did not use the application context, but one of my tasks does need to know it; it makes a database query and updates a database object. Right now I have not a circular import error (though I've had them with other attempts) but a maximum recursion depth error.
I consulted this blog post about how to use Celery with the application factory pattern, and I'm trying to follow this Stack Overflow answer closely, since it has a structure apparently also derived from cookiecutter Flask.
Relevant portions of my project structure:
cookiecutter_mbam
│ celeryconfig.py
│
└───cookiecutter_mbam
| __init__.py
│ app.py
│ run_celery.py
│
└───utility
| celery_utils.py
|
└───derivation
| tasks.py
|
└───storage
| tasks.py
|
└───xnat
tasks.py
__init__.py:
"""Main application package."""
from celery import Celery
celery = Celery('cookiecutter_mbam', config_source='cookiecutter_mbam.celeryconfig')
Relevant portion of app.py:
from cookiecutter_mbam import celery
def create_app(config_object='cookiecutter_mbam.settings'):
"""An application factory, as explained here: http://flask.pocoo.org/docs/patterns/appfactories/.
:param config_object: The configuration object to use.
"""
app = Flask(__name__.split('.')[0])
app.config.from_object(config_object)
init_celery(app, celery=celery)
register_extensions(app)
# ...
return app
run_celery.py:
from cookiecutter_mbam.app import create_app
from cookiecutter_mbam import celery
from cookiecutter_mbam.utility.celery_utils import init_celery
app = create_app(config_object='cookiecutter_mbam.settings')
init_celery(app, celery)
celeryconfig.py:
broker_url = 'redis://localhost:6379'
result_backend = 'redis://localhost:6379'
task_serializer = 'json'
result_serializer = 'json'
accept_content = ['json']
enable_utc = True
imports = {'cookiecutter_mbam.xnat.tasks', 'cookiecutter_mbam.storage.tasks', 'cookiecutter_mbam.derivation.tasks'}
Relevant portion of celery_utils.py:
def init_celery(app, celery):
"""Add flask app context to celery.Task"""
class ContextTask(celery.Task):
def __call__(self, *args, **kwargs):
with app.app_context():
return self.run(*args, **kwargs)
celery.Task = ContextTask
return celery
When I try to start the worker using celery -A cookiecutter_mbam.run_celery:celery worker I get a RecursionError: maximum recursion depth exceeded while calling a Python object error. (I also have tried several other ways to invoke the worker, all with the same error.) Here's an excerpt from the stack trace:
Traceback (most recent call last):
File "/Users/katie/anaconda/bin/celery", line 11, in <module>
sys.exit(main())
File "/Users/katie/anaconda/lib/python3.6/site-packages/celery/__main__.py", line 16, in main
_main()
File "/Users/katie/anaconda/lib/python3.6/site-packages/celery/bin/celery.py", line 322, in main
cmd.execute_from_commandline(argv)
File "/Users/katie/anaconda/lib/python3.6/site-packages/celery/bin/celery.py", line 496, in execute_from_commandline
super(CeleryCommand, self).execute_from_commandline(argv)))
File "/Users/katie/anaconda/lib/python3.6/site-packages/celery/bin/base.py", line 275, in execute_from_commandline
return self.handle_argv(self.prog_name, argv[1:])
File "/Users/katie/anaconda/lib/python3.6/site-packages/celery/bin/celery.py", line 488, in handle_argv
return self.execute(command, argv)
File "/Users/katie/anaconda/lib/python3.6/site-packages/celery/bin/celery.py", line 420, in execute
).run_from_argv(self.prog_name, argv[1:], command=argv[0])
File "/Users/katie/anaconda/lib/python3.6/site-packages/celery/bin/worker.py", line 221, in run_from_argv
*self.parse_options(prog_name, argv, command))
File "/Users/katie/anaconda/lib/python3.6/site-packages/celery/bin/base.py", line 398, in parse_options
self.parser = self.create_parser(prog_name, command)
File "/Users/katie/anaconda/lib/python3.6/site-packages/celery/bin/base.py", line 414, in create_parser
self.add_arguments(parser)
File "/Users/katie/anaconda/lib/python3.6/site-packages/celery/bin/worker.py", line 277, in add_arguments
default=conf.worker_state_db,
File "/Users/katie/anaconda/lib/python3.6/site-packages/celery/utils/collections.py", line 126, in __getattr__
return self[k]
File "/Users/katie/anaconda/lib/python3.6/site-packages/celery/utils/collections.py", line 429, in __getitem__
return getitem(k)
File "/Users/katie/anaconda/lib/python3.6/site-packages/celery/utils/collections.py", line 278, in __getitem__
return mapping[_key]
File "/Users/katie/anaconda/lib/python3.6/collections/__init__.py", line 989, in __getitem__
if key in self.data:
File "/Users/katie/anaconda/lib/python3.6/site-packages/celery/utils/collections.py", line 126, in __getattr__
return self[k]
File "/Users/katie/anaconda/lib/python3.6/collections/__init__.py", line 989, in __getitem__
if key in self.data:
File "/Users/katie/anaconda/lib/python3.6/site-packages/celery/utils/collections.py", line 126, in __getattr__
return self[k]
I understand the basic sense of this error -- something is calling itself infinitely. Maybe create_app. But I can't see why, and I don't know how to go about debugging this.
I'm also getting this when I try to load my site:
File "~/cookiecutter_mbam/cookiecutter_mbam/xnat/tasks.py", line 14, in <module>
#celery.task
AttributeError: module 'cookiecutter_mbam.celery' has no attribute 'task'
I did not have this problem when I was using the make_celery method described here, but that method creates circular import problems when you need your tasks to access the application context. Pointers on how to do this correctly with the Cookiecutter Flask template would be much appreciated.
I'm suspicious of that bit of code that's making the Flask app available to celery. It's skipping over some essential code by going directly to run(). (See https://github.com/celery/celery/blob/master/celery/app/task.py#L387)
Try calling the inherited __call__. Here's a snippet from one of my (working) apps.
# Arrange for tasks to have access to the Flask app
TaskBase = celery.Task
class ContextTask(TaskBase):
def __call__(self, *args, **kwargs):
with app.app_context():
return TaskBase.__call__(self, *args, **kwargs) ## << here
celery.Task = ContextTask
I also don't see where you're creating an instance of Celery and configuring it. I assume you have
celery = Celery(__name__)
and then need to
celery.config_from_object(...)
from somewhere within init_celery()
This is solved. I had my configcelery.py in the wrong place. I needed to move it to the package directory, not the parent repo directory. It is incredibly unintuitive/uninformative that a misplaced config file, rather than causing an "I can't find that file"-type error, causes an infinite recursion. But at least I finally saw it and corrected it.

Celery outside of flask application context

Getting the following error running a celery task, even with a Flask application context:
raised unexpected: RuntimeError('Working outside of application context.\n\nThis typically means that you attempted to use functionality that needed\nto interface with the current application object in some way. To solve\nthis, set up an application context with app.app_context(). See the\ndocumentation for more information.',)
Traceback (most recent call last):
File "/usr/lib/python3.6/site-packages/celery/app/trace.py", line 382, in trace_task
R = retval = fun(*args, **kwargs)
File "/usr/lib/python3.6/site-packages/celery/app/trace.py", line 641, in __protected_call__
return self.run(*args, **kwargs)
File "/app/example.py", line 172, in start_push_task
}, data=data)
File "/app/push.py", line 65, in push
if user and not g.get('in_celery_task') and 'got_user' not in g:
File "/usr/lib/python3.6/site-packages/werkzeug/local.py", line 347, in __getattr__
return getattr(self._get_current_object(), name)
File "/usr/lib/python3.6/site-packages/werkzeug/local.py", line 306, in _get_current_object
return self.__local()
File "/usr/lib/python3.6/site-packages/flask/globals.py", line 44, in _lookup_app_object
raise RuntimeError(_app_ctx_err_msg)
RuntimeError: Working outside of application context.
This typically means that you attempted to use functionality that needed
to interface with the current application object in some way. To solve
this, set up an application context with app.app_context(). See the
documentation for more information.
Any way to fix this?
For me, the issue was that I had import celery instead of from app import celery.
Here's some more of my setup code for anyone who stumbles across here in the future:
app.py
def make_celery(app):
app.config['broker_url'] = 'amqp://rabbitmq:rabbitmq#rabbit:5672/'
app.config['result_backend'] = 'rpc://rabbitmq:rabbitmq#rabbit:5672/'
celery = Celery(app.import_name, backend=app.config['result_backend'], broker=app.config['broker_url'])
celery.conf.update(app.config)
class ContextTask(Task):
abstract = True
def __call__(self, *args, **kwargs):
with app.test_request_context():
g.in_celery_task = True
res = self.run(*args, **kwargs)
return res
celery.Task = ContextTask
celery.config_from_object(__name__)
celery.conf.timezone = 'UTC'
return celery
celery = make_celery(app)
In the other file:
from app import celery

Passing application context to custom converter using the Application Factory pattern

I am currently building an application that uses the Application Factory pattern. In this application, I have a custom URL converter, that takes an integer and returns an SQLAlchemy model instance with that ID, if it exists. This works fine when I'm not using the Application Factory pattern, but with it, I get this error when accessing any route that uses the converter:
RuntimeError: application not registered on db instance and no application bound to current context
My application structure looks like this:
app/__init__.py
from flask import Flask
from flask_sqlalchemy import SQLAlchemy
from config import config
db = SQLAlchemy()
def create_app(config_name):
app = Flask(__name__)
app.config.from_object(config[config_name])
db.init_app(app)
from app.converters import CustomConverter
app.url_map.converters["custom"] = CustomConverter
from app.views.main import main
app.register_blueprint(main)
return app
app/converters.py
from werkzeug.routing import ValidationError, IntegerConverter
from app.models import SomeModel
class CustomConverter(IntegerConverter):
""" Converts a valid SomeModel ID into a SomeModel object. """
def to_python(self, value):
some_model = SomeModel.query.get(value)
if some_model is None:
raise ValidationError()
else:
return some_model
app/views/main.py
from flask import Blueprint
main = Blueprint("main", __name__)
# This causes the aforementioned error.
#main.route("/<custom:some_model>")
def get_some_model(some_model):
return some_model.name
Is there any way to somehow pass the application context to the CustomConverter? I have tried wrapping the contents of the to_python method with with current_app.app_context(), but all that does is reduce the error to RuntimeError: working outside of application context.
Here is the full traceback:
File "c:\Python34\lib\site-packages\flask\app.py", line 1836, in __call__
return self.wsgi_app(environ, start_response)
File "c:\Python34\lib\site-packages\flask\app.py", line 1812, in wsgi_app
ctx = self.request_context(environ)
File "c:\Python34\lib\site-packages\flask\app.py", line 1773, in request_context
return RequestContext(self, environ)
File "c:\Python34\lib\site-packages\flask\ctx.py", line 247, in __init__
self.match_request()
File "c:\Python34\lib\site-packages\flask\ctx.py", line 286, in match_request
self.url_adapter.match(return_rule=True)
File "c:\Python34\lib\site-packages\werkzeug\routing.py", line 1440, in match
rv = rule.match(path)
File "c:\Python34\lib\site-packages\werkzeug\routing.py", line 715, in match
value = self._converters[name].to_python(value)
File "c:\Users\Encrylize\Desktop\Testing\Flask\app\converters.py", line 8, in to_python
some_model = SomeModel.query.get(value)
File "c:\Python34\lib\site-packages\flask_sqlalchemy\__init__.py", line 428, in __get__
return type.query_class(mapper, session=self.sa.session())
File "c:\Python34\lib\site-packages\sqlalchemy\orm\scoping.py", line 71, in __call__
return self.registry()
File "c:\Python34\lib\site-packages\sqlalchemy\util\_collections.py", line 988, in __call__
return self.registry.setdefault(key, self.createfunc())
File "c:\Python34\lib\site-packages\flask_sqlalchemy\__init__.py", line 136, in __init__
self.app = db.get_app()
File "c:\Python34\lib\site-packages\flask_sqlalchemy\__init__.py", line 809, in get_app
raise RuntimeError('application not registered on db '
RuntimeError: application not registered on db instance and no application bound to current context
I just had the same problem. I'm not sure what the 'correct' way to solve it is, since this seems to be a rather obvious thing to do and should just work, but I solved it with the generic workaround that works for most problems with the application factory pattern: save the app object in a closure and inject it from outside. For your example:
def converters(app):
class CustomConverter(IntegerConverter):
""" Converts a valid SomeModel ID into a SomeModel object. """
def to_python(self, value):
with app.app_context():
some_model = SomeModel.query.get(value)
if some_model is None:
raise ValidationError()
else:
return some_model
return {"custom": CustomConverter}
def create_app(config_name):
app = Flask(__name__)
app.config.from_object(config[config_name])
db.init_app(app)
app.url_map.converters.update(converters(app))
from app.views.main import main
app.register_blueprint(main)
return app
Obviously this is rather less then elegant or optimal: A temporary app context is created during URL parsing and then discarded immediately.
EDIT: Major Gotcha: This does not work for non-trivial cases. The object returned will not be connected to a live session (the session is cleaned up when the temporary app context is closed). Modification and lazy loading will break.
The other solution is nice but (as it mentioned) presents a lot of problems. A more robust solution is to take a different approach and use a decorator-
def swap_model(func):
#wraps(func)
def decorated_function(*args, **kwargs):
kwargs['some_model'] = SomeModel.query.filter(SomeModel.name == kwargs['some_model']).first()
return func(*args, **kwargs)
return decorated_function
Then for your route-
#main.route("<some_model>")
#swap_model
def get_some_model(some_model):
return some_model.name
You can even expand that by adding 404 errors when the model isn't present-
def swap_model(func):
#wraps(func)
def decorated_function(*args, **kwargs):
some_model = SomeModel.query.filter(SomeModel.name == kwargs['some_model']).first()
if not some_model:
abort(404)
kwargs['some_model'] = some_model
return func(*args, **kwargs)
return decorated_function

How am I getting 'InternalError: table "dev~guestbook!!Entities" already exists' when I just created datastore?

I'm trying to figure out how to setup Test Driven Development for GAE.
I start the tests with:
nosetests -v --with-gae
I keep getting the error:
InternalError: table "dev~guestbook!!Entities" already exists
The datastore doesn't exist until I create it in the setUp(), but I'm still getting an error that the entities already exists?
I'm using the code from the GAE tutorial.
Here is my testing code in functional_tests.py:
import sys, os, subprocess, time, unittest, shlex
sys.path.append("/usr/local/google_appengine")
sys.path.append("/usr/local/google_appengine/lib/yaml/lib")
sys.path.append("/usr/local/google_appengine/lib/webapp2-2.5.2")
sys.path.append("/usr/local/google_appengine/lib/django-1.5")
sys.path.append("/usr/local/google_appengine/lib/cherrypy")
sys.path.append("/usr/local/google_appengine/lib/concurrent")
sys.path.append("/usr/local/google_appengine/lib/docker")
sys.path.append("/usr/local/google_appengine/lib/requests")
sys.path.append("/usr/local/google_appengine/lib/websocket")
sys.path.append("/usr/local/google_appengine/lib/fancy_urllib")
sys.path.append("/usr/local/google_appengine/lib/antlr3")
from selenium import webdriver
from google.appengine.api import memcache, apiproxy_stub, apiproxy_stub_map
from google.appengine.ext import db
from google.appengine.ext import testbed
from google.appengine.datastore import datastore_stub_util
from google.appengine.tools.devappserver2 import devappserver2
class NewVisitorTest(unittest.TestCase):
def setUp(self):
# Start the dev server
cmd = "/usr/local/bin/dev_appserver.py /Users/Bryan/work/GoogleAppEngine/guestbook/app.yaml --port 8080 --storage_path /tmp/datastore --clear_datastore --skip_sdk_update_check"
self.dev_appserver = subprocess.Popen(shlex.split(cmd),
stdout=subprocess.PIPE)
time.sleep(2) # Important, let dev_appserver start up
self.testbed = testbed.Testbed()
self.testbed.setup_env(app_id='dermal')
self.testbed.activate()
self.testbed.init_user_stub()
# Create a consistency policy that will simulate the High Replication consistency model.
# with a probability of 1, the datastore should be available.
self.policy = datastore_stub_util.PseudoRandomHRConsistencyPolicy(probability=1)
# Initialize the datastore stub with this policy.
self.testbed.init_datastore_v3_stub(datastore_file="/tmp/datastore/datastore.db", use_sqlite=True, consistency_policy=self.policy)
self.testbed.init_memcache_stub()
self.datastore_stub = apiproxy_stub_map.apiproxy.GetStub('datastore_v3')
# setup the dev_appserver
APP_CONFIGS = ['app.yaml']
# setup client to make sure
from guestbook import Author, Greeting
if not ( Author.query( Author.email == "bryan#mail.com").get()):
logging.info("create Admin")
client = Author(
email = "bryan#mail.com",
).put()
Assert( Author.query( Author.email == "bryan#mail.com").get() )
self.browser = webdriver.Firefox()
self.browser.implicitly_wait(3)
def tearDown(self):
self.browser.quit()
self.testbed.deactivate()
self.dev_appserver.terminate()
def test_submit_anon_greeting(self):
self.browser.get('http://localhost:8080')
self.browser.find_element_by_name('content').send_keys('Anonymous test post')
self.browser.find_element_by_name('submit').submit()
Assert.assertEquals(driver.getPageSource().contains('Anonymous test post'))
Here is the traceback:
test_submit_anon_greeting (functional_tests.NewVisitorTest) ... INFO 2015-05-11 14:41:40,516 devappserver2.py:745] Skipping SDK update check.
INFO 2015-05-11 14:41:40,594 api_server.py:190] Starting API server at: http://localhost:59656
INFO 2015-05-11 14:41:40,598 dispatcher.py:192] Starting module "default" running at: http://localhost:8080
INFO 2015-05-11 14:41:40,600 admin_server.py:118] Starting admin server at: http://localhost:8000
WARNING 2015-05-11 14:41:45,008 tasklets.py:409] suspended generator _run_to_list(query.py:964) raised InternalError(table "dev~guestbook!!Entities" already exists)
ERROR 2015-05-11 14:41:45,009 webapp2.py:1552] table "dev~guestbook!!Entities" already exists
Traceback (most recent call last):
File "/Users/Bryan/Desktop/GoogleAppEngineLauncher.app/Contents/Resources/GoogleAppEngine-default.bundle/Contents/Resources/google_appengine/lib/webapp2-2.5.2/webapp2.py", line 1535, in __call__
rv = self.handle_exception(request, response, e)
File "/Users/Bryan/Desktop/GoogleAppEngineLauncher.app/Contents/Resources/GoogleAppEngine-default.bundle/Contents/Resources/google_appengine/lib/webapp2-2.5.2/webapp2.py", line 1529, in __call__
rv = self.router.dispatch(request, response)
File "/Users/Bryan/Desktop/GoogleAppEngineLauncher.app/Contents/Resources/GoogleAppEngine-default.bundle/Contents/Resources/google_appengine/lib/webapp2-2.5.2/webapp2.py", line 1278, in default_dispatcher
return route.handler_adapter(request, response)
File "/Users/Bryan/Desktop/GoogleAppEngineLauncher.app/Contents/Resources/GoogleAppEngine-default.bundle/Contents/Resources/google_appengine/lib/webapp2-2.5.2/webapp2.py", line 1102, in __call__
return handler.dispatch()
File "/Users/Bryan/Desktop/GoogleAppEngineLauncher.app/Contents/Resources/GoogleAppEngine-default.bundle/Contents/Resources/google_appengine/lib/webapp2-2.5.2/webapp2.py", line 572, in dispatch
return self.handle_exception(e, self.app.debug)
File "/Users/Bryan/Desktop/GoogleAppEngineLauncher.app/Contents/Resources/GoogleAppEngine-default.bundle/Contents/Resources/google_appengine/lib/webapp2-2.5.2/webapp2.py", line 570, in dispatch
return method(*args, **kwargs)
File "/Users/Bryan/work/GoogleAppEngine/guestbook/guestbook.py", line 50, in get
greetings = greetings_query.fetch(10)
File "/Users/Bryan/Desktop/GoogleAppEngineLauncher.app/Contents/Resources/GoogleAppEngine-default.bundle/Contents/Resources/google_appengine/google/appengine/ext/ndb/utils.py", line 142, in positional_wrapper
return wrapped(*args, **kwds)
File "/Users/Bryan/Desktop/GoogleAppEngineLauncher.app/Contents/Resources/GoogleAppEngine-default.bundle/Contents/Resources/google_appengine/google/appengine/ext/ndb/query.py", line 1187, in fetch
return self.fetch_async(limit, **q_options).get_result()
File "/Users/Bryan/Desktop/GoogleAppEngineLauncher.app/Contents/Resources/GoogleAppEngine-default.bundle/Contents/Resources/google_appengine/google/appengine/ext/ndb/tasklets.py", line 325, in get_result
self.check_success()
File "/Users/Bryan/Desktop/GoogleAppEngineLauncher.app/Contents/Resources/GoogleAppEngine-default.bundle/Contents/Resources/google_appengine/google/appengine/ext/ndb/tasklets.py", line 368, in _help_tasklet_along
value = gen.throw(exc.__class__, exc, tb)
File "/Users/Bryan/Desktop/GoogleAppEngineLauncher.app/Contents/Resources/GoogleAppEngine-default.bundle/Contents/Resources/google_appengine/google/appengine/ext/ndb/query.py", line 964, in _run_to_list
batch = yield rpc
File "/Users/Bryan/Desktop/GoogleAppEngineLauncher.app/Contents/Resources/GoogleAppEngine-default.bundle/Contents/Resources/google_appengine/google/appengine/ext/ndb/tasklets.py", line 454, in _on_rpc_completion
result = rpc.get_result()
File "/Users/Bryan/Desktop/GoogleAppEngineLauncher.app/Contents/Resources/GoogleAppEngine-default.bundle/Contents/Resources/google_appengine/google/appengine/api/apiproxy_stub_map.py", line 613, in get_result
return self.__get_result_hook(self)
File "/Users/Bryan/Desktop/GoogleAppEngineLauncher.app/Contents/Resources/GoogleAppEngine-default.bundle/Contents/Resources/google_appengine/google/appengine/datastore/datastore_query.py", line 2870, in __query_result_hook
self._batch_shared.conn.check_rpc_success(rpc)
File "/Users/Bryan/Desktop/GoogleAppEngineLauncher.app/Contents/Resources/GoogleAppEngine-default.bundle/Contents/Resources/google_appengine/google/appengine/datastore/datastore_rpc.py", line 1342, in check_rpc_success
raise _ToDatastoreError(err)
InternalError: table "dev~guestbook!!Entities" already exists
It looks like there are a couple of things happening here.
First, it looks like you are using NoseGAE --with-gae. The plugin handles setting up and tearing down your testbed so you don't have to. This means that you do not need any of the self.testbed code and actually it can cause conflicts internally. Either switch to doing it the NoseGAE way, or don't use the --with-gae flag. If you stick with NoseGAE, it has an option --gae-datastore that lets you set the path to the datastore that it will use for your tests. Then inside your test class, set the property nosegae_datastore_v3 = True to have it set up for you:
class NewVisitorTest(unittest.TestCase):
# enable the datastore stub
nosegae_datastore_v3 = True
Second, the way that dev_appserver / sqlite work together, the appserver loads the sqlite db file into memory and works with it there. When the app server exits, it flushes the database contents back to disk. Since you are using the same datastore for your tests as the dev_appserver.py process you are opening for selenium, they may or may not see the fixture data you set up inside your test.
Here is an example from https://github.com/Trii/NoseGAE/blob/master/nosegae.py#L124-L140
class MyTest(unittest.TestCase):
nosegae_datastore_v3 = True
nosegae_datastore_v3_kwargs = {
'datastore_file': '/tmp/nosegae.sqlite3',
'use_sqlite': True
}
def test_something(self):
entity = MyModel(name='NoseGAE')
entity.put()
self.assertNotNone(entity.key.id())
I guess this line might be the faulty one:
self.testbed.init_datastore_v3_stub(datastore_file="/tmp/datastore/datastore.db", use_sqlite=True, consistency_policy=self.policy)
Setting datastore_file="/tmp/datastore/datastore.db" indicate you want to reuse this existing datastore in your tests
The python code documentation says:
The 'datastore_file' argument can be the path to an existing
datastore file, or None (default) to use an in-memory datastore
that is initially empty.
Personaly I use these in my tests:
def setUp(self):
self.testbed = testbed.Testbed()
self.testbed.activate()
self.testbed.init_datastore_v3_stub(
consistency_policy=datastore_stub_util.PseudoRandomHRConsistencyPolicy(probability=0)
)
self.testbed.init_memcache_stub()
def tearDown(self):
self.testbed.deactivate()

Connection is closed when a SQLAlchemy event triggers a Celery task

When one of my unit tests deletes a SQLAlchemy object, the object triggers an after_delete event which triggers a Celery task to delete a file from the drive.
The task is CELERY_ALWAYS_EAGER = True when testing.
gist to reproduce the issue easily
The example has two tests. One triggers the task in the event, the other outside the event. Only the one in the event closes the connection.
To quickly reproduce the error you can run:
git clone https://gist.github.com/5762792fc1d628843697.git
cd 5762792fc1d628843697
virtualenv venv
. venv/bin/activate
pip install -r requirements.txt
python test.py
The stack:
$ python test.py
E
======================================================================
ERROR: test_delete_task (__main__.CeleryTestCase)
----------------------------------------------------------------------
Traceback (most recent call last):
File "test.py", line 73, in test_delete_task
db.session.commit()
File "/home/brice/Code/5762792fc1d628843697/venv/local/lib/python2.7/site-packages/sqlalchemy/orm/scoping.py", line 150, in do
return getattr(self.registry(), name)(*args, **kwargs)
File "/home/brice/Code/5762792fc1d628843697/venv/local/lib/python2.7/site-packages/sqlalchemy/orm/session.py", line 776, in commit
self.transaction.commit()
File "/home/brice/Code/5762792fc1d628843697/venv/local/lib/python2.7/site-packages/sqlalchemy/orm/session.py", line 377, in commit
self._prepare_impl()
File "/home/brice/Code/5762792fc1d628843697/venv/local/lib/python2.7/site-packages/sqlalchemy/orm/session.py", line 357, in _prepare_impl
self.session.flush()
File "/home/brice/Code/5762792fc1d628843697/venv/local/lib/python2.7/site-packages/sqlalchemy/orm/session.py", line 1919, in flush
self._flush(objects)
File "/home/brice/Code/5762792fc1d628843697/venv/local/lib/python2.7/site-packages/sqlalchemy/orm/session.py", line 2037, in _flush
transaction.rollback(_capture_exception=True)
File "/home/brice/Code/5762792fc1d628843697/venv/local/lib/python2.7/site-packages/sqlalchemy/util/langhelpers.py", line 63, in __exit__
compat.reraise(type_, value, traceback)
File "/home/brice/Code/5762792fc1d628843697/venv/local/lib/python2.7/site-packages/sqlalchemy/orm/session.py", line 2037, in _flush
transaction.rollback(_capture_exception=True)
File "/home/brice/Code/5762792fc1d628843697/venv/local/lib/python2.7/site-packages/sqlalchemy/orm/session.py", line 393, in rollback
self._assert_active(prepared_ok=True, rollback_ok=True)
File "/home/brice/Code/5762792fc1d628843697/venv/local/lib/python2.7/site-packages/sqlalchemy/orm/session.py", line 223, in _assert_active
raise sa_exc.ResourceClosedError(closed_msg)
ResourceClosedError: This transaction is closed
----------------------------------------------------------------------
Ran 1 test in 0.014s
FAILED (errors=1)
I think I found the problem - it's in how you set up your Celery task. If you remove the app context call from your celery setup, everything runs fine:
class ContextTask(TaskBase):
abstract = True
def __call__(self, *args, **kwargs):
# deleted --> with app.app_context():
return TaskBase.__call__(self, *args, **kwargs)
There's a big warning in the SQLAlchemy docs about never modifying the session during after_delete events: http://docs.sqlalchemy.org/en/latest/orm/events.html#sqlalchemy.orm.events.MapperEvents.after_delete
So I suspect the with app.app_context(): is being called during the delete, trying to attach to and/or modify the session that Flask-SQLAlchemy stores in the app object, and therefore the whole thing is bombing.
Flask-SQlAlchemy does a lot of magic behind the scenes for you, but you can bypass this and use SQLAlchemy directly. If you need to talk to the database during the delete event, you can create a new session to the db:
#celery.task()
def my_task():
# obviously here I create a new object
session = db.create_scoped_session()
session.add(User(id=13, value="random string"))
session.commit()
return
But it sounds like you don't need this, you're just trying to delete an image path. In that case, I would just change your task so it takes a path:
# instance will call the task
#event.listens_for(User, "after_delete")
def after_delete(mapper, connection, target):
my_task.delay(target.value)
#celery.task()
def my_task(image_path):
os.remove(image_path)
Hopefully that's helpful - let me know if any of that doesn't work for you. Thanks for the very detailed setup, it really helped in debugging.
Similar to the answer suggested by deBrice, but using the approach similar to Rachel.
class ContextTask(TaskBase):
abstract = True
def __call__(self, *args, **kwargs):
import flask
# tests will be run in unittest app context
if flask.current_app:
return TaskBase.__call__(self, *args, **kwargs)
else:
# actual workers need to enter worker app context
with app.app_context():
return TaskBase.__call__(self, *args, **kwargs)
Ask, the creator of celery, suggested that solution on github
from celery import signals
def make_celery(app):
...
#signals.task_prerun.connect
def add_task_flask_context(sender, **kwargs):
if not sender.request.is_eager:
sender.request.flask_context = app.app_context().__enter__()
#signals.task_postrun.connect
def cleanup_task_flask_context(sender, **kwargs):
flask_context = getattr(sender.request, 'flask_context', None)
if flask_context is not None:
flask_context.__exit__(None, None, None)

Categories