Testing TaskResult celery tasks fails - python

I am trying to use django_celery_results in my project. I created a task add and a function add_new(x, y) in tasks.py:
from celery import shared_task
from django_celery_results.models import TaskResult
#shared_task(bind=True)
def add(self, x, y):
return(x, y)
def add_new(x, y):
task = add.delay(x, y)
task_result = TaskResult(task_id=task.task_id)
return task_result.as_dict()
I am testing my functions in test_tasks.py like this:
from tasks import add, add_new
from nose.tools import eq_
from django.test import override_settings
def test_add_task():
result = add.apply(args=(4, 4,)).get()
eq_(result, 8)
#override_settings(CELERY_TASK_ALWAYS_EAGER=True)
def test_add_new_task():
result = add_new(4, 4)
eq_(result['result'], 8)
The first test passes, but the second one fails with:
AssertionError: None != 8
Looking at the list of TaskResult objects the latest task is not present. Repeating the test, the task_id from the test before is there, but again the latest one is missing and the test fails. So I have the impression that the DB is not updated, even after the task has finished successfully. Trying to execute the functions add and add_new in a shell, it works perfectly fine. Only running the tests with nose do not work. What do I miss?

Related

Mocking code run inside an rq SimpleWorker

I have code which uses Python requests to kick off a task which runs in a worker that is started with rq. (Actually, the GET request results in one task which itself starts a second task. But this complexity shouldn't affect things, so I've left that out of the code below.) I already have a test which uses rq's SimpleWorker class to cause the code to run synchronously. This works fine. But now I'm adding requests_ratelimiter to the second task, and I want to be sure it's behaving correctly. I think I need to somehow mock the time.sleep() function used by the rate limiter, and I can't figure out how to patch it.
routes.py
#app.route("/do_work/", methods=["POST"])
def do_work():
rq_job = my_queue.enqueue(f"my_app.worker.do_work", job_timeout=3600, *args, **kwargs)
worker.py
from requests_ratelimiter import LimiterSession
#job('my_queue', connection=redis_conn, timeout=3600, result_ttl=24 * 60 * 60)
def do_work():
session = LimiterSession(per_second=1)
r = session.get(WORK_URL)
test.py
import requests_mock
def test_get(client):
# call the Flask function to kick off the task
client.get("/do_work/")
with requests_mock.Mocker() as m:
# mock the return value of the requests.get() call in the worker
response_success = {"result": "All good"}
m.get(WORK_URL, json=response_success)
worker = SimpleWorker([my_queue], connection=redis_conn)
worker.work(burst=True) # Work until the queue is empty
A test in requests_ratelimiter patches the sleep function using a target path of 'pyrate_limiter.limit_context_decorator.sleep', but that doesn't work for me because I'm not declaring pyrate_limiter at all. I've tried mocking the time function and then passing that into the LimiterSession, and that sort of works:
worker.py
from requests_ratelimiter import LimiterSession
from time import time
#job('my_queue', connection=redis_conn, timeout=3600, result_ttl=24 * 60 * 60)
def do_work():
session = LimiterSession(per_second=1, time_function=time)
r = session.get(WORK_URL)
test.py
import requests_mock
def test_get(client):
# call the Flask function to kick off the task
client.get("/do_work/")
with patch("my_app.worker.time", return_value=None) as mock_time:
with requests_mock.Mocker() as m:
response_success = {"result": "All good"}
m.get(URL, json=response_success)
worker = SimpleWorker([my_queue], connection=redis_conn)
worker.work(burst=True) # Work until the queue is empty
assert mock_time.call_count == 1
However, then I see time called many more times than sleep would be, so I don't get the info I need from it. And patching my_app.worker.time.sleep results in the error:
AttributeError: does not have the attribute 'sleep'
I have also tried patching the pyrate_limiter as the requests_ratelimiter testing code does:
with patch(
"my_app.worker.requests_ratelimiter.pyrate_limiter.limit_context_decorator.sleep", return_value=None
) as mock_sleep:
But this fails with:
ModuleNotFoundError: No module named 'my_app.worker.requests_ratelimiter'; 'my_app.worker' is not a package
How can I test and make sure the rate limiter is engaging properly?
The solution was indeed to use 'pyrate_limiter.limit_context_decorator.sleep', despite the fact that I wasn't importing it.
When I did that and made the mock return None, I discovered that sleep() was being called tens of thousands of times because it's in a while loop.
So in the end, I also needed to use freezegun and a side effect on my mock_sleep to get the behavior I wanted. Now time is frozen, but sleep() jumps the test clock forward synchronously and instantly by the amount of seconds passed as an argument.
from datetime import timedelta
from unittest.mock import patch
import requests_mock
from freezegun import freeze_time
from rq import SimpleWorker
def test_get(client):
with patch("pyrate_limiter.limit_context_decorator.sleep") as mock_sleep:
with freeze_time() as frozen_time:
# Make sleep operate on the frozen time
# See: https://github.com/spulec/freezegun/issues/47#issuecomment-324442679
mock_sleep.side_effect = lambda seconds: frozen_time.tick(timedelta(seconds=seconds))
with requests_mock.Mocker() as m:
m.get(URL, json=response_success)
worker = SimpleWorker([my_queue], connection=redis_conn)
worker.work(burst=True) # Work until the queue is empty
# The worker will do enough to get rate limited once
assert mock_sleep.call_count == 1

Python Redis Queue (rq) - how to avoid preloading ML model for each job?

I want to queue my ml predictions using rq. Example code (pesudo-ish):
predict.py:
import tensorflow as tf
def predict_stuff(foo):
model = tf.load_model()
result = model.predict(foo)
return result
app.py:
from rq import Queue
from redis import Redis
from predict import predict_stuff
q = Queue(connection=Redis())
for foo in baz:
job = q.enqueue(predict_stuff, foo)
worker.py:
import sys
from rq import Connection, Worker
# Preload libraries
import tensorflow as tf
with Connection():
qs = sys.argv[1:] or ['default']
w = Worker(qs)
w.work()
I've read rq docs explaining that you can preload libraries to avoid importing them every time a job is run (so in example code I import tensorflow in the worker code). However, I also want to move model loading from predict_stuff to avoid loading the model every time the worker runs a job. How can I go about that?
I'm not sure if this is something that can help but, following the example here:
https://github.com/rq/rq/issues/720
Instead of sharing a connection pool, you can share the model.
pseudo code:
import tensorflow as tf
from rq import Worker as _Worker
from rq.local import LocalStack
_model_stack = LocalStack()
def get_model():
"""Get Model."""
m = _model_stack.top
try:
assert m
except AssertionError:
raise('Run outside of worker context')
return m
class Worker(_Worker):
"""Worker Class."""
def work(self, burst=False, logging_level='WARN'):
"""Work."""
_model_stack.push(tf.load_model())
return super().work(burst, logging_level)
def predict_stuff_job(foo):
model = get_model()
result = model.predict(foo)
return result
I use something similar to this for a "global" file reader I wrote. Load up the instance into the LocalStack and have the workers read off the stack.
In the end I haven't figured out how to do it with python-rq. I moved to celery where I did it like this:
app.py
from tasks import predict_stuff
for foo in baz:
task = predict_stuff.delay(foo)
tasks.py
import tensorflow as tf
from celery import Celery
from celery.signals import worker_process_init
cel_app = Celery('tasks')
model = None
#worker_process_init.connect()
def on_worker_init(**_):
global model
model = tf.load_model()
#cel_app.task(name='predict_stuff')
def predict_stuff(foo):
result = model.predict(foo)
return result

How to test celery periodic_task in Django?

I have a simple periodic task:
from celery.decorators import periodic_task
from celery.task.schedules import crontab
from .models import Subscription
#periodic_task(run_every=crontab(minute=0, hour=0))
def deactivate_subscriptions():
for subscription in Subscription.objects.filter(is_expired=True):
print(subscription)
subscription.is_active = False
subscription.can_activate = False
subscription.save()
And I want to cover it with tests.
I found information about how to test simple tasks, like #shared_task, but nowhere can I find an example of testing #periodic_task
When having a periodic task defined with a decorator you can access the crontab configuration the following way:
tasks.py
#periodic_task(
run_every=(crontab(minute="*/1")),
)
def my_task():
pass
some file where you need to access it
from .tasks import my_task
crontab = my_task.run_every
hours_when_it_will_run = crontab.hour
minutes_when_it_will_run = crontab.minute
day_of_the_week_when_it_will_run = crontab.day_of_week
day_of_the_month_when_it_will_run = crontab.day_of_month
month_of_year_when_it_will_run = crontab.month_of_year
This way you can access when task will be executed and you check in your test if the expected time is there.
Use the function apply(). Documentation for apply().

Python mock function called in subprocess

Having following file handler.py
import job
def worker():
return job()
And following test test_handler.py
import subprocess
def test_worker():
subprocess.Popen(['./handler.py'], stderr=subprocess.PIPE)
How can I mock job function, taking into account that handler.py is called in subprocess
This isn't the best approach to test this feature.
Since it is a unit test, you should test the worker and check it's value.
handler.py
import job
def worker():
return job()
test_handler.py
import unittest
from handler import worker
class TestHandlerCase(unittest.TestCase):
def test_worker(self):
"""Test call worker should succeed."""
self.assertEqual(worker(), True)
if __name__ == '__main__':
unittest.main()
Check if the result of method is equal to the expected. And so, adjust the method/test.
Anyway, this is an example, based on you question code.

How do you unit test a Celery task?

The Celery documentation mentions testing Celery within Django but doesn't explain how to test a Celery task if you are not using Django. How do you do this?
It is possible to test tasks synchronously using any unittest lib out there. I normaly do 2 different test sessions when working with celery tasks. The first one (as I'm suggesting bellow) is completely synchronous and should be the one that makes sure the algorithm does what it should do. The second session uses the whole system (including the broker) and makes sure I'm not having serialization issues or any other distribution, comunication problem.
So:
from celery import Celery
celery = Celery()
#celery.task
def add(x, y):
return x + y
And your test:
from nose.tools import eq_
def test_add_task():
rst = add.apply(args=(4, 4)).get()
eq_(rst, 8)
Here is an update to my seven years old answer:
You can run a worker in a separate thread via a pytest fixture:
https://docs.celeryq.dev/en/v5.2.6/userguide/testing.html#celery-worker-embed-live-worker
According to the docs, you should not use "always_eager" (see the top of the page of the above link).
Old answer:
I use this:
with mock.patch('celeryconfig.CELERY_ALWAYS_EAGER', True, create=True):
...
Docs: https://docs.celeryq.dev/en/3.1/configuration.html#celery-always-eager
CELERY_ALWAYS_EAGER lets you run your task synchronously, and you don't need a celery server.
Depends on what exactly you want to be testing.
Test the task code directly. Don't call "task.delay(...)" just call "task(...)" from your unit tests.
Use CELERY_ALWAYS_EAGER. This will cause your tasks to be called immediately at the point you say "task.delay(...)", so you can test the whole path (but not any asynchronous behavior).
For those on Celery 4 it's:
#override_settings(CELERY_TASK_ALWAYS_EAGER=True)
Because the settings names have been changed and need updating if you choose to upgrade, see
https://docs.celeryproject.org/en/latest/history/whatsnew-4.0.html?highlight=what%20is%20new#lowercase-setting-names
unittest
import unittest
from myproject.myapp import celeryapp
class TestMyCeleryWorker(unittest.TestCase):
def setUp(self):
celeryapp.conf.update(CELERY_ALWAYS_EAGER=True)
py.test fixtures
# conftest.py
from myproject.myapp import celeryapp
#pytest.fixture(scope='module')
def celery_app(request):
celeryapp.conf.update(CELERY_ALWAYS_EAGER=True)
return celeryapp
# test_tasks.py
def test_some_task(celery_app):
...
Addendum: make send_task respect eager
from celery import current_app
def send_task(name, args=(), kwargs={}, **opts):
# https://github.com/celery/celery/issues/581
task = current_app.tasks[name]
return task.apply(args, kwargs, **opts)
current_app.send_task = send_task
As of Celery 3.0, one way to set CELERY_ALWAYS_EAGER in Django is:
from django.test import TestCase, override_settings
from .foo import foo_celery_task
class MyTest(TestCase):
#override_settings(CELERY_ALWAYS_EAGER=True)
def test_foo(self):
self.assertTrue(foo_celery_task.delay())
Since Celery v4.0, py.test fixtures are provided to start a celery worker just for the test and are shut down when done:
def test_myfunc_is_executed(celery_session_worker):
# celery_session_worker: <Worker: gen93553#mymachine.local (running)>
assert myfunc.delay().wait(3)
Among other fixtures described on http://docs.celeryproject.org/en/latest/userguide/testing.html#py-test, you can change the celery default options by redefining the celery_config fixture this way:
#pytest.fixture(scope='session')
def celery_config():
return {
'accept_content': ['json', 'pickle'],
'result_serializer': 'pickle',
}
By default, the test worker uses an in-memory broker and result backend. No need to use a local Redis or RabbitMQ if not testing specific features.
reference
using pytest.
def test_add(celery_worker):
mytask.delay()
if you use flask, set the app config
CELERY_BROKER_URL = 'memory://'
CELERY_RESULT_BACKEND = 'cache+memory://'
and in conftest.py
#pytest.fixture
def app():
yield app # Your actual Flask application
#pytest.fixture
def celery_app(app):
from celery.contrib.testing import tasks # need it
yield celery_app # Your actual Flask-Celery application
In my case (and I assume many others), all I wanted was to test the inner logic of a task using pytest.
TL;DR; ended up mocking everything away (OPTION 2)
Example Use Case:
proj/tasks.py
#shared_task(bind=True)
def add_task(self, a, b):
return a+b;
tests/test_tasks.py
from proj import add_task
def test_add():
assert add_task(1, 2) == 3, '1 + 2 should equal 3'
but, since shared_task decorator does a lot of celery internal logic, it isn't really a unit tests.
So, for me, there were 2 options:
OPTION 1: Separate internal logic
proj/tasks_logic.py
def internal_add(a, b):
return a + b;
proj/tasks.py
from .tasks_logic import internal_add
#shared_task(bind=True)
def add_task(self, a, b):
return internal_add(a, b);
This looks very odd, and other than making it less readable, it requires to manually extract and pass attributes that are part of the request, for instance the task_id in case you need it, which make the logic less pure.
OPTION 2: mocks
mocking away celery internals
tests/__init__.py
# noinspection PyUnresolvedReferences
from celery import shared_task
from mock import patch
def mock_signature(**kwargs):
return {}
def mocked_shared_task(*decorator_args, **decorator_kwargs):
def mocked_shared_decorator(func):
func.signature = func.si = func.s = mock_signature
return func
return mocked_shared_decorator
patch('celery.shared_task', mocked_shared_task).start()
which then allows me to mock the request object (again, in case you need things from the request, like the id, or the retries counter.
tests/test_tasks.py
from proj import add_task
class MockedRequest:
def __init__(self, id=None):
self.id = id or 1
class MockedTask:
def __init__(self, id=None):
self.request = MockedRequest(id=id)
def test_add():
mocked_task = MockedTask(id=3)
assert add_task(mocked_task, 1, 2) == 3, '1 + 2 should equal 3'
This solution is much more manual, but, it gives me the control I need to actually unit test, without repeating myself, and without losing the celery scope.
I see a lot of CELERY_ALWAYS_EAGER = true in unit tests methods as a solution for unit tests, but since the version 5.0.5 is available there are a lot of changes which makes most of the old answers deprecated and for me a time consuming nonsense, so for everyone here searching a Solution, go to the Doc and read the well documented unit test examples for the new Version:
https://docs.celeryproject.org/en/stable/userguide/testing.html
And to the Eager Mode with Unit Tests, here a quote from the actual docs:
Eager mode
The eager mode enabled by the task_always_eager setting is by
definition not suitable for unit tests.
When testing with eager mode you are only testing an emulation of what
happens in a worker, and there are many discrepancies between the
emulation and what happens in reality.
Another option is to mock the task if you do not need the side effects of running it.
from unittest import mock
#mock.patch('module.module.task')
def test_name(self, mock_task): ...

Categories