celery task_success with sender filter - python

I am trying to get the sender filter working e.g.
#celery.task
def run_timer(crawl_start_time):
return crawl_start_time
#task_success.connect
def run_timer_success_handler(sender, result, **kwargs):
print '##################################'
print 'in run_timer_success_handler'
The above works fine, but if I try to filter by sender, it never works:
#task_success.connect(sender='tasks.run_timer')
def run_timer_success_handler(sender, result, **kwargs):
print '##################################'
print 'in run_timer_success_handler'
I also tried:
#task_success.connect(sender='run_timer')
#task_success.connect(sender=run_timer)
#task_success.connect(sender=globals()['run_timer'])
None of them work.
How do I effectively use the sender filter to ensure that by callback is called on for the run_timer task and not the others.

It's better to filter sender inside function in this case now. Like:
#task_success.connect
def ...
if sender == '...':
...
Because current celery signals implementation has issue when task sender and worker are different python processes.
Because it converts your sender into the identifier and uses it for filtering, but celery sends task by string name. Here is the problem code (celery.utils.dispatch.signals):
def _make_id(target): # pragma: no cover
if hasattr(target, 'im_func'):
return (id(target.im_self), id(target.im_func))
return id(target)
And id('tasks.run_timer') is not the same as id('tasks.run_timer') of a worker process. If you want you may hack it and relace id by hash function

http://docs.celeryproject.org/en/latest/userguide/signals.html#task-success
...
Sender is the task object executed. (not the same as after_task_publish.sender)
...
So, you should
#task_success.connect(sender=run_timer)
def ...
It works for me. Good Luck.

Related

trigger a celery job via django singnals

I would like to use Django signals to trigger a celery task like so:
def delete_content(sender, instance, **kwargs):
task_id = uuid()
task = delete_libera_contents.apply_async(kwargs={"instance": instance}, task_id=task_id)
task.wait(timeout=300, interval=2)
But I'm always running into kombu.exceptions.EncodeError: Object of type MusicTracks is not JSON serializable
Now I'm not sure how to tread MusicTracks instance as it's a model class instance. How can I properly pass such instances to my task?
At my tasks.py I have the following:
#app.task(name="Delete Libera Contents", queue='high_priority_tasks')
def delete_libera_contents(instance, **kwargs):
libera_backend = instance.file.libera_backend
...
Never send instance in celery task, you only should send variables for example instanse primary key and then inside of the celery task via this pk find this instance and then do your logic
your code should be like this:
views.py
def delete_content(sender, **kwargs):
task_id = uuid()
task = delete_libera_contents.apply_async(kwargs={"instance_pk": sender.pk}, task_id=task_id)
task.wait(timeout=300, interval=2)
task.py
#app.task(name="Delete Libera Contents", queue='high_priority_tasks')
def delete_libera_contents(instance_pk, **kwargs):
instance = Instance.ojbects.get(pk = instance_pk)
libera_backend = instance.file.libera_backend
...
you can find this rule in celery documentation (can't find link), one of
reasons imagine situation:
you send your instance to celery tasks (it is delayed for any reason for 5 min)
then your project makes logic with this instance, before your task finished
then celery's task time come and it uses this instance old version, and this instance become corrupted
(this is the reason as I think it is, not from the documentation)
First off, sorry for making the question a bit confusing, especially for the people that have already written an answer.
In my case, the delete_content signal can be trigger from three different models, so it actually looks like this:
#receiver(pre_delete, sender=MusicTracks)
#receiver(pre_delete, sender=Movies)
#receiver(pre_delete, sender=TvShowEpisodes)
def delete_content(sender, instance, **kwargs):
delete_libera_contents.delay(instance_pk=instance.pk)
So every time one of these models triggers a delete action, this signal will also trigger a celery task to actually delete the stuff in the background (all stored on S3).
As I cannot and should not pass instances around directly as pointed out by #oruchkin, I pass the instance.pk to the celery task which I then have to find in the celery task as I don't know in the celery task what model has triggered the delete action:
#app.task(name="Delete Libera Contents", queue='high_priority_tasks')
def delete_libera_contents(instance_pk, **kwargs):
if Movies.objects.filter(pk=instance_pk).exists():
instance = Movies.objects.get(pk=instance_pk)
elif MusicTracks.objects.filter(pk=instance_pk).exists():
instance = MusicTracks.objects.get(pk=instance_pk)
elif TvShowEpisodes.objects.filter(pk=instance_pk).exists():
instance = TvShowEpisodes.objects.get(pk=instance_pk)
else:
raise logger.exception("Task: 'Delete Libera Contents', reports: No instance found (code: JFN4LK) - Warning")
libera_backend = instance.file.libera_backend
You might ask why do you not simply pass the sender from the signal to the celery task. I also tried this and again, as already pointed out, I cannot pass instances and I fail with:
kombu.exceptions.EncodeError: Object of type ModelBase is not JSON serializable
So it really seems I have to hard obtain the instance using the if-elif-else clauses at the celery task.

Celery AsyncResult of task_method

I'm trying to get the state of a task as follow :
__init__.py
celery = Celery(app.name,backend='amqp',broker=app.config['CELERY_BROKER_URL'])
celery.conf.update(app.config)
foo.py
class Foo(object):
def bar(self):
task = self._bar_async.apply_async()
return task.id
#celery.task(filter=task_method,bind=True)
def _bar_async(task,self):
for i in range(0,100):
task.update_state(state='PROGRESS',meta={'progress':i})
time.sleep(2)
taskstatus.py
def taskstatus(task_id):
task = celery.AsyncResult(id=task_id)
Is it the recommended way to use update_state with bind ?
Also when I try to get the state of the task using taskstatus, I always get NoneType for task. What is the problem ?
There are two issues in your code
Firstly, add an argument self to apply_async method
def bar(self):
task = self._bar_async.apply_async([self])
This change will fix the get NoneType for task issue. The reason is the task will be failed in worker, so you could not get the result.
Secondly, should use app.backend.get_result in taskstatus() to see the progress instead of AsyncResult since AsyncResult.get() will block until the task status become ready.
from apps import celery
app = celery.app
r = app.backend.get_result(task_id)
print r

If I send a python 'Signal' object from a function, what should the "sender" argument be?

If I send a Signal from a module function (a django view function as it happens), that is not inside a Class, it's not obvious (to me) what the sender should be - if anything? Is sender=None appropriate in this case?
Alternatively, the function is invoked by an HTTP request, which I currently pass in as a separate argument - should I pass that instead?
Option A:
from django.dispatch import Signal
my_signal = Signal(
providing_args=['my_arg', 'request']
)
# this is a view function
def do_something(request):
# ... do useful stuff
my_signal.send(
sender=None,
my_arg="Hello",
request=request
)
Option B:
from django.dispatch import Signal
my_signal = Signal(
providing_args=['my_arg']
)
# this is a view function
def do_something(request):
# ... do useful stuff
my_signal.send(
sender=request,
my_arg="Hello",
)
[UPDATE]
Option A has it. There's nothing useful that the receiver can do with the sender in this case (i.e. it's not an object), so set it to None.
The django.dispatch.Dispatcher source simply says it should be
"...[t]he sender of the signal. Either a specific object or None."
which then ties in with the receiver via connect(), for which the sender's significance is:
"The sender to which the receiver should respond. Must either be
of type Signal, or None to receive events from any sender"
which, I admit, isn't particularly clear, but in your case, I would say to use sender=None because there's nothing concrete to hook to, as the request is transient.
A function is an object in Python, so you can just set the sender to be a reference your function, like this:
def my_func():
my_signal.send(sender=my_func, my_arg="Hello")

Signal m2m_changed never triggered

class Lab(Model):
pass
class School(Model):
labs = ManyToManyField(Lab, related_name='schools')
def m2m_changed_labs(*args, **kwargs):
pass
m2m_changed.connect(m2m_changed_labs, sender=Lab.schools)
The m2m_changed signal is never triggered, therefore the m2m_changed_labs function is never called. I want the m2m_changed_labs function to be called each time a school is added to or removed from a lab.
Have you tried sender=School.labs.through?
and with your receiver method like this:
def m2m_changed_labs(sender, **kwargs): # notice sender instead of *args
print "signal received"
That's the sender used in the example from the docs.

How do I mock a django signal handler?

I have a signal_handler connected through a decorator, something like this very simple one:
#receiver(post_save, sender=User,
dispatch_uid='myfile.signal_handler_post_save_user')
def signal_handler_post_save_user(sender, *args, **kwargs):
# do stuff
What I want to do is to mock it with the mock library http://www.voidspace.org.uk/python/mock/ in a test, to check how many times django calls it. My code at the moment is something like:
def test_cache():
with mock.patch('myapp.myfile.signal_handler_post_save_user') as mocked_handler:
# do stuff that will call the post_save of User
self.assert_equal(mocked_handler.call_count, 1)
The problem here is that the original signal handler is called even if mocked, most likely because the #receiver decorator is storing a copy of the signal handler somewhere, so I'm mocking the wrong code.
So the question: how do I mock my signal handler to make my test work?
Note that if I change my signal handler to:
def _support_function(*args, **kwargs):
# do stuff
#receiver(post_save, sender=User,
dispatch_uid='myfile.signal_handler_post_save_user')
def signal_handler_post_save_user(sender, *args, **kwargs):
_support_function(*args, **kwargs)
and I mock _support_function instead, everything works as expected.
Possibly a better idea is to mock out the functionality inside the signal handler rather than the handler itself. Using the OP's code:
#receiver(post_save, sender=User, dispatch_uid='myfile.signal_handler_post_save_user')
def signal_handler_post_save_user(sender, *args, **kwargs):
do_stuff() # <-- mock this
def do_stuff():
... do stuff in here
Then mock do_stuff:
with mock.patch('myapp.myfile.do_stuff') as mocked_handler:
self.assert_equal(mocked_handler.call_count, 1)
So, I ended up with a kind-of solution: mocking a signal handler simply means to connect the mock itself to the signal, so this exactly is what I did:
def test_cache():
with mock.patch('myapp.myfile.signal_handler_post_save_user', autospec=True) as mocked_handler:
post_save.connect(mocked_handler, sender=User, dispatch_uid='test_cache_mocked_handler')
# do stuff that will call the post_save of User
self.assertEquals(mocked_handler.call_count, 1) # standard django
# self.assert_equal(mocked_handler.call_count, 1) # when using django-nose
Notice that autospec=True in mock.patch is required in order to make post_save.connect to correctly work on a MagicMock, otherwise django will raise some exceptions and the connection will fail.
You can mock a django signal by mocking the ModelSignal class at django.db.models.signals.py like this:
#patch("django.db.models.signals.ModelSignal.send")
def test_overwhelming(self, mocker_signal):
obj = Object()
That should do the trick. Note that this will mock ALL signals no matter which object you are using.
If by any chance you use the mocker library instead, it can be done like this:
from mocker import Mocker, ARGS, KWARGS
def test_overwhelming(self):
mocker = Mocker()
# mock the post save signal
msave = mocker.replace("django.db.models.signals")
msave.post_save.send(KWARGS)
mocker.count(0, None)
with mocker:
obj = Object()
It's more lines but it works pretty well too :)
take a look at mock_django . It has support for signals
https://github.com/dcramer/mock-django/blob/master/tests/mock_django/signals/tests.py
In django 1.9 you can mock all receivers with something like this
# replace actual receivers with mocks
mocked_receivers = []
for i, receiver in enumerate(your_signal.receivers):
mock_receiver = Mock()
your_signal.receivers[i] = (receiver[0], mock_receiver)
mocked_receivers.append(mock_receiver)
... # whatever your test does
# ensure that mocked receivers have been called as expected
for mocked_receiver in mocked_receivers:
assert mocked_receiver.call_count == 1
mocked_receiver.assert_called_with(*your_args, sender="your_sender", signal=your_signal, **your_kwargs)
This replaces all receivers with mocks, eg ones you've registered, ones pluggable apps have registered and ones that django itself has registered. Don't be suprised if you use this on post_save and things start breaking.
You may want to inspect the receiver to determine if you actually want to mock it.
There is a way to mock django signals with a small class.
You should keep in mind that this would only mock the function as a django signal handler and not the original function; for example, if a m2mchange trigers a call to a function that calls your handler directly, mock.call_count would not be incremented. You would need a separate mock to keep track of those calls.
Here is the class in question:
class LocalDjangoSignalsMock():
def __init__(self, to_mock):
"""
Replaces registered django signals with MagicMocks
:param to_mock: list of signal handlers to mock
"""
self.mocks = {handler:MagicMock() for handler in to_mock}
self.reverse_mocks = {magicmock:mocked
for mocked,magicmock in self.mocks.items()}
django_signals = [signals.post_save, signals.m2m_changed]
self.registered_receivers = [signal.receivers
for signal in django_signals]
def _apply_mocks(self):
for receivers in self.registered_receivers:
for receiver_index in xrange(len(receivers)):
handler = receivers[receiver_index]
handler_function = handler[1]()
if handler_function in self.mocks:
receivers[receiver_index] = (
handler[0], self.mocks[handler_function])
def _reverse_mocks(self):
for receivers in self.registered_receivers:
for receiver_index in xrange(len(receivers)):
handler = receivers[receiver_index]
handler_function = handler[1]
if not isinstance(handler_function, MagicMock):
continue
receivers[receiver_index] = (
handler[0], weakref.ref(self.reverse_mocks[handler_function]))
def __enter__(self):
self._apply_mocks()
return self.mocks
def __exit__(self, *args):
self._reverse_mocks()
Example usage
to_mock = [my_handler]
with LocalDjangoSignalsMock(to_mock) as mocks:
my_trigger()
for mocked in to_mock:
assert(mocks[mocked].call_count)
# 'function {0} was called {1}'.format(
# mocked, mocked.call_count)
As you mentioned,
mock.patch('myapp.myfile._support_function') is correct but mock.patch('myapp.myfile.signal_handler_post_save_user') is wrong.
I think the reason is:
When init you test, some file import the signal's realization python file, then #receive decorator create a new signal connection.
In the test, mock.patch('myapp.myfile._support_function') will create another signal connection, so the original signal handler is called even if mocked.
Try to disconnect the signal connection before mock.patch('myapp.myfile._support_function'), like
post_save.disconnect(signal_handler_post_save_user)
with mock.patch("review.signals. signal_handler_post_save_user", autospec=True) as handler:
#do stuff

Categories