Execute some code when a celery chain has completed - python

I have the following code that starts a celery chain by visiting a url. The chain arguments are passed through a query parameter like: /process_pipeline/?pipeline=task_a|task_c|task_b. In order to avoid launching several similar chained tasks (if for instance someone refresh the page) I use a simple cache locking system.
I have a timeout on the cache, but what I'm missing here is a way to release the cache when the chain has commpleted.
Any idea?
tasks.py
from __future__ import absolute_import
from celery import shared_task
registry = {}
def register(fn):
registry[fn.__name__] = fn
#shared_task
def task_a(*args, **kwargs):
print('task a')
#shared_task
def task_b(*args, **kwargs):
print('task b')
#shared_task
def task_c(*args, **kwargs):
print('task c')
register(task_a)
register(task_b)
register(task_c)
views.py
from __future__ import absolute_import
from django.core.cache import cache as memcache
from django.shortcuts import redirect
from django.utils.hashcompat import md5_constructor as md5
from celery import chain
from .tasks import registry
LOCK_EXPIRE = 60 * 5 # Lock expires in 5 minutes
def process_pipeline(request):
pipeline = request.GET.get('pipeline')
hexdigest = md5(pipeline).hexdigest()
lock_id = 'lock-{0}'.format(hexdigest)
# cache.add fails if if the key already exists
acquire_lock = lambda: memcache.add(lock_id, None, LOCK_EXPIRE)
# memcache delete is very slow, but we have to use it to take
# advantage of using add() for atomic locking
release_lock = lambda: memcache.delete(lock_id)
if acquire_lock():
args = [registry[p].s() for p in pipeline.split('|')]
task = chain(*args).apply_async()
memcache.set(lock_id, task.id)
return redirect('celery-task_status', task_id=task.id)
else:
task_id = memcache.get(lock_id)
return redirect('celery-task_status', task_id=task_id)
from django.conf.urls import patterns, url
urls.py
urlpatterns = patterns('aafilters.views',
url(r'^process_pipeline/$', 'process_pipeline', name="process_pipeline"),
)

I have never used it, but I think you should take a look at Celery Canvas. It seems to be what you want.

Related

Irregular behaviour of celery

I am very new to Celery and I am trying to use it to schedule a function, but its not working properly it seems.
Here is my settings.py: (Along with the default settings given by django)
CELERY_BROKER_URL = 'amqp://guest:guest#localhost'
CELERY_ACCEPT_CONTENT = ['json']
CELERY_RESULT_BACKEND = 'db+sqlite:///results.sqlite'
CELERY_TASK_SERIALIZER = 'json'
celery.py:
rom __future__ import absolute_import, unicode_literals
import os
from celery import Celery
from celery.schedules import crontab
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'mera_project.settings')
app = Celery('mera_project')
app.config_from_object('django.conf:settings', namespace='CELERY')
app.autodiscover_tasks()
init.py:
from __future__ import absolute_import, unicode_literals
from .celery import app as celery_app
__all__ = ['celery_app']
tasks_notes/tasks.py:(tasks_notes is my app name)
from celery.decorators import periodic_task
from celery.task.schedules import crontab
from tasks_notes.models import BudgetInfo
#periodic_task(run_every=(crontab(minute='*/15')))
def monthly_starting_value():
print("hi")
return 0
views.py:
from .tasks import monthly_starting_value
def app_view(request):
abcd = monthly_starting_value.delay()
print("new"+str(abcd))
I had expected value zero and hi in my terminal, but instead of that I have got a random number as new 42bf83ef-850f-4b34-af78-da696d2ee0f2 and the random number keeps on changing in every 15 minutes.
In my ``celery beat``` running terminal tab, I am getting something like:
WARNING/ForkPoolWorker-9] hi
Task tasks_notes.tasks.monthly_starting_value[42bf83ef-850f-4b34-af78-da696d2ee0f2] succeeded in 0.0009442089994990965s: 0
in every 15 minutes.
I have even tried ``app.beat.conf_scheduleincelery.py``` and also tried running in admin phase, but its not working as expected.
Where can I be wrong?
Any help is highly appreciated.
It is definitely not irregular - it behaves exactly as it should.
If you wanted to grab the result of a task, then you should have something like:
abcd = monthly_starting_value.delay().get()
delay() returns an instance of AsyncResult class.
Finally, do not call print() inside task. Use the Celery logger.
Example:
import os
from celery.utils.log import get_task_logger
from worker import app
logger = get_task_logger(__name__)
#app.task()
def add(x, y):
result = x + y
logger.info(f'Add: {x} + {y} = {result}')
return result

django celery beat arguments with imported class functions from another library

I'm trying to get a function to work in my django project with celerybeat that imports a class based function from a wrapper library. I've been reading that celery doesn't work with classes too easily. my function login_mb doesn't take an argument but when I try register and call this task I get an error Couldn't apply scheduled task login_mb: login_mb() takes 0 positional arguments but 1 was given
Is this because of self in the wrapper function imported?
What could I do to get this to work with celerybeat?
settings.py
CELERY_BEAT_SCHEDULE = {
'login_mb': {
'task': 'backend.tasks.login_mb',
'schedule': timedelta(minutes=30),
} ,
tasks.py
from matchbook.apiclient import APIClient
import logging
from celery import task
log = logging.getLogger(__name__)
#shared_task(bind=True)
def login_mb():
mb = APIClient('abc', '123')
mb.login()
mb.keep_alive()
apiclient.py (wrapper library)
from matchbook.baseclient import BaseClient
from matchbook import endpoints
class APIClient(BaseClient):
def __init__(self, username, password=None):
super(APIClient, self).__init__(username, password)
self.login = endpoints.Login(self)
self.keep_alive = endpoints.KeepAlive(self)
self.logout = endpoints.Logout(self)
self.betting = endpoints.Betting(self)
self.account = endpoints.Account(self)
self.market_data = endpoints.MarketData(self)
self.reference_data = endpoints.ReferenceData(self)
self.reporting = endpoints.Reporting(self)
def __repr__(self):
return '<APIClient [%s]>' % self.username
def __str__(self):
return 'APIClient'
The error is not related to your wrapper library, there seems to be nothing wrong with your task.
The problem arises because you've defined your task with bind=True When done so, celery automatillca injects a parameter to the method containing information about the current task. So you can either remove bind=True, or add a parameter to your task method like so:
#shared_task(bind=True)
def login_mb(self):
mb = APIClient('abc', '123')
mb.login()
mb.keep_alive()

Python Redis Queue (rq) - how to avoid preloading ML model for each job?

I want to queue my ml predictions using rq. Example code (pesudo-ish):
predict.py:
import tensorflow as tf
def predict_stuff(foo):
model = tf.load_model()
result = model.predict(foo)
return result
app.py:
from rq import Queue
from redis import Redis
from predict import predict_stuff
q = Queue(connection=Redis())
for foo in baz:
job = q.enqueue(predict_stuff, foo)
worker.py:
import sys
from rq import Connection, Worker
# Preload libraries
import tensorflow as tf
with Connection():
qs = sys.argv[1:] or ['default']
w = Worker(qs)
w.work()
I've read rq docs explaining that you can preload libraries to avoid importing them every time a job is run (so in example code I import tensorflow in the worker code). However, I also want to move model loading from predict_stuff to avoid loading the model every time the worker runs a job. How can I go about that?
I'm not sure if this is something that can help but, following the example here:
https://github.com/rq/rq/issues/720
Instead of sharing a connection pool, you can share the model.
pseudo code:
import tensorflow as tf
from rq import Worker as _Worker
from rq.local import LocalStack
_model_stack = LocalStack()
def get_model():
"""Get Model."""
m = _model_stack.top
try:
assert m
except AssertionError:
raise('Run outside of worker context')
return m
class Worker(_Worker):
"""Worker Class."""
def work(self, burst=False, logging_level='WARN'):
"""Work."""
_model_stack.push(tf.load_model())
return super().work(burst, logging_level)
def predict_stuff_job(foo):
model = get_model()
result = model.predict(foo)
return result
I use something similar to this for a "global" file reader I wrote. Load up the instance into the LocalStack and have the workers read off the stack.
In the end I haven't figured out how to do it with python-rq. I moved to celery where I did it like this:
app.py
from tasks import predict_stuff
for foo in baz:
task = predict_stuff.delay(foo)
tasks.py
import tensorflow as tf
from celery import Celery
from celery.signals import worker_process_init
cel_app = Celery('tasks')
model = None
#worker_process_init.connect()
def on_worker_init(**_):
global model
model = tf.load_model()
#cel_app.task(name='predict_stuff')
def predict_stuff(foo):
result = model.predict(foo)
return result

Celery tasks through view (Django) returned as pending, but okay via Terminal

This issue has been discussed before and looking over numerous posts, I am so far unable to find a solution to this problem. I'm new to celery so my learning curve is still fairly steep. Below my current scripts:
myapp.__init__.py
from __future__ import absolute_import, unicode_literals
from .celery_main import app as celery_app # Ensures app is always imported when Django starts so that shared_task will use this app.
__all__ = ['celery_app']
myapp.celery_main.py
from __future__ import absolute_import
from celery import Celery
from django.apps import apps
# Initialise the app
app = Celery()
app.config_from_object('myapp.celeryconfig') # WORKS WHEN CALLED THROUGH VIEW/DJANGO: Tell Celery instance to use celeryconfig module
#app.config_from_object('celeryconfig') # WORKS WHEN CALLED THROUGH TERMINAL
# Load task modules from all registered Django app configs.
app.autodiscover_tasks(lambda: [n.name for n in apps.get_app_configs()])
myapp.celeryconfig.py
from __future__ import absolute_import, unicode_literals
from datetime import timedelta
## List of modules to import when celery starts.
CELERY_IMPORTS = ('celery_tasks',)
## Message Broker (RabbitMQ) settings.
BROKER_URL = 'amqp://'
BROKER_PORT = 5672
## Result store settings.
CELERY_RESULT_BACKEND = 'rpc://'
## Misc
#CELERY_IGNORE_RESULT = False
CELERY_TASK_SERIALIZER = 'json'
CELERY_RESULT_SERIALIZER = 'json'
CELERY_ACCEPT_CONTENT=['json']
CELERY_TIMEZONE = 'Europe/Berlin'
CELERY_ENABLE_UTC = True
CELERYBEAT_SCHEDULE = {
'doctor-every-10-seconds': {
'task': 'celery_tasks.fav_doctor',
'schedule': timedelta(seconds=3),
},
}
myapp.celery_tasks.py
from __future__ import absolute_import
from celery.task import task
suf = lambda n: "%d%s" % (n, {1: "st", 2: "nd", 3: "rd"}.get(n if n < 20 else n % 10, "th"))
#task
def fav_doctor():
# Stuff happend here
#task
def reverse(string):
return string[::-1]
#task
def send_email(user_id):
# Stuff happend here
#task
def add(x, y):
return x+y
anotherapp.settings.py
INSTALLED_APPS = [
...
'kombu.transport.django',
]
myapp.views.admin_scripts.py
from celery.result import AsyncResult
from myapp.celery_tasks import fav_doctor, reverse, send_email, add
from myapp.celery_main import app
#login_required
def admin_script_dashboard(request):
if request.method == 'POST':
form = Admin_Script(request.POST)
if form.is_valid():
# Results
async_result = add.delay(2, 5)
task_id = async_result.task_id
res = AsyncResult(async_result)
res_1 = add.AsyncResult(async_result)
res_2 = add.AsyncResult(async_result.id)
print ("async_result: {0}\ntask_id: {1}\nres: {2}\nres_1: {3}\nres_2: {4}".format(async_result, task_id, res, res_1, res_2))
# Backend: Make sure the client is configured with the right backend
print("Backend check: {0}".format(async_result.backend))
# States/statuses
task_state = res.state
A = async_result.status
B = res.status
print ("task_state: {0}\nA: {1}\nB: {2}".format(task_state, A, B))
The results when triggering the celery workers through my django application (related to the print statements in app.views.admin_scripts.py):
async_result: 00d7ec84-ebdb-4968-9ea6-f20ca2a793b7
task_id: 00d7ec84-ebdb-4968-9ea6-f20ca2a793b7
res: 00d7ec84-ebdb-4968-9ea6-f20ca2a793b7
res_1: 00d7ec84-ebdb-4968-9ea6-f20ca2a793b7
res_2: 00d7ec84-ebdb-4968-9ea6-f20ca2a793b7
Backend check: <celery.backends.rpc.RPCBackend object at 0x106e308d0>
task_state: PENDING
A: PENDING
B: PENDING
Output in Terminal triggered:
[2018-07-15 21:41:47,015: ERROR/MainProcess] Received unregistered task of type 'MyApp.celery_tasks.add'.
The message has been ignored and discarded.
Did you remember to import the module containing this task?
Or maybe you are using relative imports?
Please see <link> for more information.
The full contents of the message body was:
{'task': 'MyApp.celery_tasks.add', 'id': 'b21ffa43-d1f1-4767-9ab8-e58afec3ea0f', 'args': [2, 5], 'kwargs': {}, 'retries': 0, 'eta': None, 'expires': None, 'utc': True, 'callbacks': None, 'errbacks': None, 'timelimit': [None, None], 'taskset': None, 'chord': None} (266b)
Traceback (most recent call last):
File "/Users/My_MBP/anaconda3/lib/python3.6/site-packages/celery/worker/consumer.py", line 465, in on_task_received
strategies[type_](message, body,
KeyError: 'MyApp.celery_tasks.add'
I have several questions:
1. I can trigger the expected results by using commands in Terminal:
celery -A celery_tasks worker -l info
Then in the Python shell:
from celery_tasks import *
add.delay(2,3)
Which succeeds:
[2018-07-13 10:12:14,943: INFO/MainProcess] Received task: celery_tasks.add[c100ad91-2f94-40b1-bb0e-9bc2990ff3bc]
[2018-07-13 10:12:14,961: INFO/MainProcess] Task celery_tasks.add[c100ad91-2f94-40b1-bb0e-9bc2990ff3bc] succeeded in 0.017578680999577045s: 54
So executing the tasks in Terminal works, but not in my view.py in Django, why not?
2. Perhaps related to 1.: I have to, annoyingly, configure in app.celery_main.py the app.config_from_object depending if I want to test via Django, or via Terminal. You can see either I set the celeryconfig.py with myapp name prefixed, or without. Otherwise, an error message is thrown. I suspect some kind of import looping is causing an issue here (though I could be wrong) but I don't know why/where. How can I overcome this?
3. In my settings.py file (not celeryconfig.py) I have configured in INSTALLED_APPS: 'kombu.transport.django'. Is this necessary? I'm using celery 3.1.26.post2 (Cipater)
4. In all my files I have at the top:
from __future__ import absolute_import, unicode_literals
For what purpose is this for exactly and for 3.1.26 is it required?
5. I read here, that you need to ensure the client is configured with the right backend. But I'm not sure exactly what this means. My print out is (as per app.views.admin_scripts.py):
Backend check: <celery.backends.rpc.RPCBackend object at 0x106e308d0>
If there are any abnormalities in my code you recognise, please feel free to let me know.
I'm still trying to figure out the answer to my question 2, meanwhile I've figured out how to retrieve the required results: I have async_result = add.delay(2, 5) but then after this I need to have async_result.get() followed by task_output = async_result.result. The result status/state (async_result.state or async_result.status) is then set to succeed.
Celery tasks should have proper names. When running from django, the task name is MyApp.celery_tasks.add which is why celery worker not able to run it. But from terminal when your imported using from celery_tasks import *, task name is celery_tasks.add which is why it is working correctly.
You can change config based on environment variable.
kombu.transport.django Adding this not necessary.
This is related to Python 2/3. See this docs for more info.
If you want task results after the task is completed, it should be stored somewhere. So backend is needed for this. If you don't want to retrieve results, you don't need this.

How to test celery periodic_task in Django?

I have a simple periodic task:
from celery.decorators import periodic_task
from celery.task.schedules import crontab
from .models import Subscription
#periodic_task(run_every=crontab(minute=0, hour=0))
def deactivate_subscriptions():
for subscription in Subscription.objects.filter(is_expired=True):
print(subscription)
subscription.is_active = False
subscription.can_activate = False
subscription.save()
And I want to cover it with tests.
I found information about how to test simple tasks, like #shared_task, but nowhere can I find an example of testing #periodic_task
When having a periodic task defined with a decorator you can access the crontab configuration the following way:
tasks.py
#periodic_task(
run_every=(crontab(minute="*/1")),
)
def my_task():
pass
some file where you need to access it
from .tasks import my_task
crontab = my_task.run_every
hours_when_it_will_run = crontab.hour
minutes_when_it_will_run = crontab.minute
day_of_the_week_when_it_will_run = crontab.day_of_week
day_of_the_month_when_it_will_run = crontab.day_of_month
month_of_year_when_it_will_run = crontab.month_of_year
This way you can access when task will be executed and you check in your test if the expected time is there.
Use the function apply(). Documentation for apply().

Categories