Debounce Celery tasks? - python

Is there a standard method for debouncing Celery tasks?
For example, so that a task can be "started" multiple times, but will only be run once after some delay:
def debounce_task(task):
if task_is_queued(task):
return
task.apply_async(countdown=30)

Here's how we do it with Redis counters. All of this can probably be generalized in a decorator but we only use it for a specific task (webhooks)
Your public-facing task is what you call from other functions. It'll need to increment a key in Redis. The key is formed by the arguments of your function, whatever they may be (this ensures the counter is unique amongst individual tasks)
#task
def your_public_task(*args, **kwargs):
cache_key = make_public_task_cache_key(*args, **kwargs)
get_redis().incr(cache_key)
_your_task(*args, **kwargs, countdown=settings.QUEUE_DELAY)
Note the cache key functions are shared (you want the same cache key in each function), and the countdown setting.
Then, the actual task executing the code does the following:
#task
def _your_task(*args, **kwargs):
cache_key = make_public_task_cache_key(*args, **kwargs)
counter = get_redis().getset(cache_key, 0)
# redis makes the zero a string.
if counter == '0':
return
... execute your actual task code.
This lets you hit your_public_task.delay(..) as many times as you want, within your QUEUE_DELAY, and it'll only fire off once.

bartek has the idea, use redis counters which are atomic (and should be easily available if your broker is redis). Although his solution is thottling, not debouncing. The difference is minor though (getset vs decr).
Queue up the task:
conn = get_redis()
conn.incr(key)
task.apply_async(args=args, kwargs=kwargs, countdown=countdown)
Then in the task:
conn = get_redis()
counter = conn.decr(key)
if counter > 0:
# task is still queued
return
# continue on to rest of task
It's hard to make it a decorator since you need to decorate the task and calling the task itself. So you will need a decorator before the celery #task decorator and one after it.
For now I'm just made some functions that help me call the task, and one that checks in the start of the task.

Here's how you can do it with Mongo.
NOTE: I had to make the design a little more forgiving, as Celery tasks aren't guaranteed to execute the exact moment of eta is met or countdown runs out.
Also, Mongo expiring indexes are only cleaned up every minute or so; So you can't base the design around records being deleted the moment the eta is up.
Anyhow, the flow is something like this:
Client code calls my_task.
preflight increments a call counter, and returns it as flight_id
_my_task is set to be executed after TTL seconds.
When _my_task runs, it checks if it's flight_id is still current. If it's not, it aborts.
... sometime later... mongo cleans up stale entries in the collection, via an expiring index.
#celery.task(track_started=False, ignore_result=True)
def my_task(my_arg):
flight_id = preflight(inflight_collection, 'my_task', HASH(my_arg), TTL)
_my_task.apply_async((my_arg,), {'flight_id':flight_id}, countdown=TTL)
#celery.task(track_started=False, ignore_result=True)
def _my_task(my_arg, flight_id=None):
if not check_for_takeoff(inflight_collection, 'my_task', HASH(my_arg), flight_id):
return
# ... actual work ... #
Library code:
TTL = 5 * 60 # Run tasks after 5 minutes
EXPIRY = 6 * TTL # This needs to be much larger than TTL.
# We need to store a list of task-executions currently pending
inflight_collection = db['celery_In_Flight']
inflight_collection.create_index([('fn', pymongo.ASCENDING,),
('key', pymongo.ASCENDING,)])
inflight_collection.create_index('eta', expiresAfterSeconds=EXPIRY)
def preflight(collection, fn, key, ttl):
eta = datetime.datetime.now() + datetime.timedelta(seconds=ttl)
result = collection.find_one_and_update({
'fn': fn,
'key': key,
}, {
'$set': {
'eta': eta
},
'$inc': {
'flightId': 1
}
}, upsert=True, return_document=pymongo.ReturnDocument.AFTER)
print 'Preflight[{}][{}] = {}'.format(fn, key, result['flightId'])
return result['flightId']
def check_for_takeoff(collection, fn, key, flight_id):
result = collection.find_one({
'fn': fn,
'key': key
})
ready = result is None or result['flightId'] == flight_id
print 'Check[{}][{}] = {}, {}'.format(fn, key, result['flightId'], ready)
return ready

Here's the solution I came up with: https://gist.github.com/wolever/3cf2305613052f3810a271e09d42e35c
And copied here, for posterity:
import time
import redis
def get_redis_connection():
return redis.connect()
class TaskDebouncer(object):
""" A simple Celery task debouncer.
Usage::
def debounce_process_corpus(corpus):
# Only one task with ``key`` will be allowed to execute at a
# time. For example, if the task was resizing an image, the key
# might be the image's URL.
key = "process_corpus:%s" %(corpus.id, )
TaskDebouncer.delay(
key, my_taks, args=[corpus.id], countdown=0,
)
#task(bind=True)
def process_corpus(self, corpus_id, debounce_key=None):
debounce = TaskDebouncer(debounce_key, keepalive=30)
corpus = Corpus.load(corpus_id)
try:
for item in corpus:
item.process()
# If ``debounce.keepalive()`` isn't called every
# ``keepalive`` interval (the ``keepalive=30`` in the
# call to ``TaskDebouncer(...)``) the task will be
# considered dead and another one will be allowed to
# start.
debounce.keepalive()
finally:
# ``finalize()`` will mark the task as complete and allow
# subsequent tasks to execute. If it returns true, there
# was another attempt to start a task with the same key
# while this task was running. Depending on your business
# logic, this might indicate that the task should be
# retried.
needs_retry = debounce.finalize()
if needs_retry:
raise self.retry(max_retries=None)
"""
def __init__(self, key, keepalive=60):
if key:
self.key = key.partition("!")[0]
self.run_key = key
else:
self.key = None
self.run_key = None
self._keepalive = keepalive
self.cxn = get_redis_connection()
self.init()
self.keepalive()
#classmethod
def delay(cls, key, task, args=None, kwargs=None, countdown=30):
cxn = get_redis_connection()
now = int(time.time())
first = cxn.set(key, now, nx=True, ex=countdown + 10)
if not first:
now = cxn.get(key)
run_key = "%s!%s" %(key, now)
if first:
kwargs = dict(kwargs or {})
kwargs["debounce_key"] = run_key
task.apply_async(args=args, kwargs=kwargs, countdown=countdown)
return (first, run_key)
def init(self):
self.initial = self.key and self.cxn.get(self.key)
def keepalive(self, expire=None):
if self.key is None:
return
expire = expire if expire is not None else self._keepalive
self.cxn.expire(self.key, expire)
def is_out_of_date(self):
if self.key is None:
return False
return self.cxn.get(self.key) != self.initial
def finalize(self):
if self.key is None:
return False
with self.cxn.pipeline() as pipe:
while True:
try:
pipe.watch(self.key)
if pipe.get(self.key) != self.initial:
return True
pipe.multi()
pipe.delete(self.key)
pipe.execute()
break
except redis.WatchError:
continue
return False

Here's a more filled out solution based off https://stackoverflow.com/a/28157498/4391298 but turned into a decorator and reaching into the Kombu connection pool to reuse your Redis counter.
import logging
from functools import wraps
# Not strictly required
from django.core.exceptions import ImproperlyConfigured
from django.core.cache.utils import make_template_fragment_key
from celery.utils import gen_task_name
LOGGER = logging.getLogger(__name__)
def debounced_task(**options):
"""Debounced task decorator."""
try:
countdown = options.pop('countdown')
except KeyError:
raise ImproperlyConfigured("Debounced tasks require a countdown")
def factory(func):
"""Decorator factory."""
try:
name = options.pop('name')
except KeyError:
name = gen_task_name(app, func.__name__, func.__module__)
#wraps(func)
def inner(*args, **kwargs):
"""Decorated function."""
key = make_template_fragment_key(name, [args, kwargs])
with app.pool.acquire_channel(block=True) as (_, channel):
depth = channel.client.decr(key)
if depth <= 0:
try:
func(*args, **kwargs)
except:
# The task failed (or is going to retry), set the
# count back to where it was
channel.client.set(key, depth)
raise
else:
LOGGER.debug("%s calls pending to %s",
depth, name)
task = app._task_from_fun(inner, **options, name=name + '__debounced')
#wraps(func)
def debouncer(*args, **kwargs):
"""
Debouncer that calls the real task.
This is the task we are scheduling."""
key = make_template_fragment_key(name, [args, kwargs])
with app.pool.acquire_channel(block=True) as (_, channel):
# Mark this key to expire after the countdown, in case our
# task never runs or runs too many times, we want to clean
# up our Redis to eventually resolve the issue.
channel.client.expire(key, countdown + 10)
depth = channel.client.incr(key)
LOGGER.debug("Requesting %s in %i seconds (depth=%s)",
name, countdown, depth)
task.si(*args, **kwargs).apply_async(countdown=countdown)
return app._task_from_fun(debouncer, **options, name=name)
return factory

Related

gevent/threading causes some deadlock

I have this code, whose purpose is to dedupe requests.
def dedup_requests(f):
pending = {}
#functools.wraps(f)
def wrapped(*args, **kwargs):
key = _make_call_key(args, kwargs)
if key not in pending:
pending[key] = gevent.spawn(f, *args, **kwargs)
result = pending[key].get()
if key in pending:
del pending[key]
return result
return wrapped
I suspect it is causing a deadlock somehow (this happens once in awhile, and I can't reproduce it).
It happens both when using threading and gevent.
Is the recurring use of get allowed?
Can this code even produce a deadlock when threading is not involved?
Note that it runs under other gevent tasks, so spawned tasks might spawn additional tasks, in case that's an issue.
Though I still don't exactly understand the source of the deadlock (my best guess is that get doesn't really work as expected when called more than once), this seems to work:
from gevent import lock
def queue_identical_calls(f, max_size=100):
pending = {}
#functools.wraps(f)
def wrapped(*args, **kwargs):
key = _make_call_key(args, kwargs)
if key not in pending:
pending[key] = lock.BoundedSemaphore(1)
lock_for_current_call = pending[key]
lock_for_current_call.acquire()
result = f(*args, **kwargs)
lock_for_current_call.release()
if len(pending) > max_size:
pending.clear()
return result
return wrapped
Your problem is that your code is not async. You need to have the function itself handle the key update and then in a while loop test for your values. This is an example of async working. You can prove it by noticing that the last element sometimes shows up first in the list.
import gevent
import random
pending = {}
def dedup_requests(key, *args, **kwargs):
global pending
if key not in pending:
gevent.spawn(ftest, key, *args, **kwargs)
def ftest(key, *args, **kwargs):
global pending
z = random.randint(1,7)
gevent.sleep(z)
pending[key] = z
return z
l = ['test','test2','test3']
for i in l:
dedup_requests(i)
while 1:
if set(pending.keys()) != set(l):
print(pending)
else:
print(pending)
break
gevent.sleep(1)

Multi-threading recursive functions in Python 3

Background:
I am working on Telecoms Network discovery script, that is run by crontab on linux. It uses a seed file of initial network nodes, it connects to them, get all neighbors and then connects to those neighbors and so on and so on. Typical recursion.
To speed up the whole thing I was using Multi-threading with Semaphore, so I had only certain number of running threads, but huge number of started threads, waiting. At certain point I run into maximum thread limit of linux so the script was not able to start new threads.
Problem:
In pursuit of a design, that would allow multi-threading of this recursion it seemed to me its a case of multi hybrid producer/consumer scenario. Multiple consumers are also producing.
Consumer takes item from queue, consumes it and if there are any results, returns each result into the queue again.
To make it really nice I would like to create design pattern, that is usable for any type of recursion function, in other words with any args and kwargs.
What I expect from such function is, that I pass it any combination of variables(args, kwargs) that it needs and I get in return list of arguments, that I can pass to it again in other recursions.
Questions:
Is there any better way to handle getting args, kwargs from function return other than the one I used? I basically created a tuple (args, kwargs) (tuple(), dict()), that the func returns and Worker splits it into args, kwargs afterwards. Ideal would be to not need to create that tuple at all.
Would you have any other improvement tips on this design?
Thank you sincerely!
Current Code:
#!/usr/bin/env python3
from queue import Queue, Empty
from threading import Thread
from time import sleep
from random import choice, random
class RecursiveWorkerThread(Thread):
def __init__(self, name, pool):
Thread.__init__(self)
self.name = name
self.pool = pool
self.tasks = pool.tasks
self.POISON = pool.POISON
self.daemon = False
self.result = None
self.start()
def run(self):
print(f'WORKER {self.name} - is awake.')
while True:
if not self.tasks.empty():
# take task from queue
try:
func, f_args, f_kwargs = self.tasks.get(timeout=1)
# check for POISON
if func is self.POISON:
print(f'WORKER {self.name} - POISON found. Sending it back to queue. Dying...')
self.pool.add_task(self.POISON)
break
# try to perform the task on arguments and get result
try:
self.result = func(*f_args, **f_kwargs)
except Exception as e:
print(e)
# recursive part, add results to queue
print(f'WORKER {self.name} - FUNC: ({func.__name__}) IN: (args: {f_args}, kwargs: {f_kwargs}) OUT: ({self.result}).')
for n_args, n_kwargs in self.result:
self.pool.add_task(func, *n_args, **n_kwargs)
# mark one task done in queue
self.tasks.task_done()
except Empty:
pass
sleep(random())
class RecursiveThreadPool:
def __init__(self, num_threads):
self.tasks = Queue()
self.POISON = object()
print('\nTHREAD_POOL - initialized.\nTHREAD_POOL - waking up WORKERS.')
self.workers = [RecursiveWorkerThread(name=str(num), pool=self) for num in range(num_threads)]
def add_task(self, func, *args, **kwargs):
if func is not self.POISON:
print(f'THREAD_POOL - task received: [func: ({func.__name__}), args: ({args}), kwargs:({kwargs})]')
else:
print('THREAD_POOL - task received: POISON.')
self.tasks.put((func, args, kwargs))
def wait_for_completion(self):
print('\nTHREAD_POOL - waiting for all tasks to be completed.')
self.tasks.join()
print('\nTHREAD_POOL - all tasks have been completed.\nTHREAD_POOL - sending POISON to queue.')
self.add_task(self.POISON)
print('THREAD_POOL - waiting for WORKERS to die.')
for worker in self.workers:
worker.join()
print('\nTHREAD_POOL - all WORKERS are dead.\nTHREAD_POOL - FINISHED.')
# Test part
if __name__ == '__main__':
percentage = [True] * 2 + [False] * 8
# example function
def get_subnodes(node):
maximum_subnodes = 2
sleep(5 * random())
result_list = list()
for i in range(maximum_subnodes):
# apply chance on every possible subnode
if choice(percentage):
new_node = node + '.' + str(i)
# create single result
args = tuple()
kwargs = dict({'node': new_node})
# append it to the result list
result_list.append((args, kwargs))
return result_list
# 1) Init a Thread pool with the desired number of worker threads
THREAD_POOL = RecursiveThreadPool(10)
# 2) Put initial data into queue
initial_nodes = 10
for root_node in [str(i) for i in range(initial_nodes)]:
THREAD_POOL.add_task(get_subnodes, node=root_node)
# 3) Wait for completion
THREAD_POOL.wait_for_completion()

How to override an async NDB method and write your own tasklet

I am trying to grasp async operations introduced with NDB, I would like to use #ndb.tasklet to async some of my work.
The simple example would be string_id generation in the overridden get_or_insert_async
Is this a correct way to to things? What can be improved here?
#classmethod
#ndb.tasklet
def get_or_insert_async(cls, *args):
id = cls.make_string_id(*args)
model = yield super(MyModel, cls).get_or_insert_async(id)
raise ndb.Return(model)
Another example would be doing stuff in a loop in fan-out kinda way. Is this correct?
#classmethod
#ndb.tasklet
def do_stuff(cls, some_collection):
#ndb.tasklet
def internal_tasklet(data):
do_some_long_taking_stuff(data)
id = make_stuff_needed_for_id(data)
model = yield cls.get_or_insert_async(id)
model.long_processing(data)
yield model.put_async()
raise ndb.Return(None)
for data in some_collection:
# will it parallelise internal_tasklet execution?
yield internal_tasklet(data)
raise ndb.Return(None)
EDIT:
As understood the whole concept, yields are here to provide a Future objects which are then collected in parallel (where possible) and executed asynchronously. Am I correct?
After Nick's hint (is it what you meant?):
#classmethod
#ndb.tasklet
def do_stuff(cls, some_collection):
#ndb.tasklet
def internal_tasklet(data):
do_some_long_taking_stuff(data)
id = make_stuff_needed_for_id(data)
model = yield cls.get_or_insert_async(id)
model.long_processing(data)
raise ndb.Return(model) # change here
models = []
for data in some_collection:
# will it parallelise internal_tasklet execution?
m = yield internal_tasklet(data) # change here
models.appedn(m) # change here
keys = yield ndb.put_multi_async(models) # change here
raise ndb.Return(keys) # change here
EDIT:
New revised version…
#classmethod
#ndb.tasklet
def do_stuff(cls, some_collection):
#ndb.tasklet
def internal_tasklet(data):
do_some_long_taking_stuff(data)
id = make_stuff_needed_for_id(data)
model = yield cls.get_or_insert_async(id)
model.long_processing(data)
raise ndb.Return(model)
futures = []
for data in some_collection:
# tasklets won't run in parallel but while
# one is waiting on a yield (and RPC underneath)
# the other will advance it's execution
# up to a next yield or return
fut = internal_tasklet(data)) # change here
futures.append(fut) # change here
Future.wait_all(futures) # change here
models = [fut.get_result() for fut in futures]
keys = yield ndb.put_multi_async(models) # change here
raise ndb.Return(keys) # change here
You don't need to use tasklets if all you want to do is call something async with different arguments - just return the wrapped function's return value, like this:
def get_or_insert_async(cls, *args):
id = cls.make_string_id(*args)
return super(MyModel, cls).get_or_insert_async(id)
I'd be cautious about this for several reasons, though: You're changing the meaning of a built in function, which is usually a bad idea, you're changing the signature (positional arguments but no keyword arguments), and you're not passing extra arguments through to the original function.
For your second example, yielding things one at a time will force NDB to wait on their completion - 'yield' is synonymous with 'wait'. Instead, execute the tasklet function for each element in the collection, then wait on them all (by calling yield on the list) at the same time.

Python Equivalent of setInterval()?

Does Python have a function similar to JavaScript's setInterval()?
I would like to have:
def set_interval(func, interval):
...
That will call func every interval time units.
This might be the correct snippet you were looking for:
import threading
def set_interval(func, sec):
def func_wrapper():
set_interval(func, sec)
func()
t = threading.Timer(sec, func_wrapper)
t.start()
return t
This is a version where you could start and stop.
It is not blocking.
There is also no glitch as execution time error is not added (important for long time execution with very short interval as audio for example)
import time, threading
StartTime=time.time()
def action() :
print('action ! -> time : {:.1f}s'.format(time.time()-StartTime))
class setInterval :
def __init__(self,interval,action) :
self.interval=interval
self.action=action
self.stopEvent=threading.Event()
thread=threading.Thread(target=self.__setInterval)
thread.start()
def __setInterval(self) :
nextTime=time.time()+self.interval
while not self.stopEvent.wait(nextTime-time.time()) :
nextTime+=self.interval
self.action()
def cancel(self) :
self.stopEvent.set()
# start action every 0.6s
inter=setInterval(0.6,action)
print('just after setInterval -> time : {:.1f}s'.format(time.time()-StartTime))
# will stop interval in 5s
t=threading.Timer(5,inter.cancel)
t.start()
Output is :
just after setInterval -> time : 0.0s
action ! -> time : 0.6s
action ! -> time : 1.2s
action ! -> time : 1.8s
action ! -> time : 2.4s
action ! -> time : 3.0s
action ! -> time : 3.6s
action ! -> time : 4.2s
action ! -> time : 4.8s
Just keep it nice and simple.
import threading
def setInterval(func,time):
e = threading.Event()
while not e.wait(time):
func()
def foo():
print "hello"
# using
setInterval(foo,5)
# output:
hello
hello
.
.
.
EDIT : This code is non-blocking
import threading
class ThreadJob(threading.Thread):
def __init__(self,callback,event,interval):
'''runs the callback function after interval seconds
:param callback: callback function to invoke
:param event: external event for controlling the update operation
:param interval: time in seconds after which are required to fire the callback
:type callback: function
:type interval: int
'''
self.callback = callback
self.event = event
self.interval = interval
super(ThreadJob,self).__init__()
def run(self):
while not self.event.wait(self.interval):
self.callback()
event = threading.Event()
def foo():
print "hello"
k = ThreadJob(foo,event,2)
k.start()
print "It is non-blocking"
Change Nailxx's answer a bit and you got the answer!
from threading import Timer
def hello():
print "hello, world"
Timer(30.0, hello).start()
Timer(30.0, hello).start() # after 30 seconds, "hello, world" will be printed
The sched module provides these abilities for general Python code. However, as its documentation suggests, if your code is multithreaded it might make more sense to use the threading.Timer class instead.
I think this is what you're after:
#timertest.py
import sched, time
def dostuff():
print "stuff is being done!"
s.enter(3, 1, dostuff, ())
s = sched.scheduler(time.time, time.sleep)
s.enter(3, 1, dostuff, ())
s.run()
If you add another entry to the scheduler at the end of the repeating method, it'll just keep going.
I use sched to create setInterval function gist
import functools
import sched, time
s = sched.scheduler(time.time, time.sleep)
def setInterval(sec):
def decorator(func):
#functools.wraps(func)
def wrapper(*argv, **kw):
setInterval(sec)(func)
func(*argv, **kw)
s.enter(sec, 1, wrapper, ())
return wrapper
s.run()
return decorator
#setInterval(sec=3)
def testInterval():
print ("test Interval ")
testInterval()
Simple setInterval utils
from threading import Timer
def setInterval(timer, task):
isStop = task()
if not isStop:
Timer(timer, setInterval, [timer, task]).start()
def hello():
print "do something"
return False # return True if you want to stop
if __name__ == "__main__":
setInterval(2.0, hello) # every 2 seconds, "do something" will be printed
The above method didn't quite do it for me as I needed to be able to cancel the interval. I turned the function into a class and came up with the following:
class setInterval():
def __init__(self, func, sec):
def func_wrapper():
self.t = threading.Timer(sec, func_wrapper)
self.t.start()
func()
self.t = threading.Timer(sec, func_wrapper)
self.t.start()
def cancel(self):
self.t.cancel()
Most of the answers above do not shut down the Thread properly. While using Jupyter notebook I noticed that when an explicit interrupt was sent, the threads were still running and worse, they would keep multiplying starting at 1 thread running,2, 4 etc. My method below is based on the answer by #doom but cleanly handles interrupts by running an infinite loop in the Main thread to listen for SIGINT and SIGTERM events
No drift
Cancelable
Handles SIGINT and SIGTERM very well
Doesnt make a new thread for every run
Feel free to suggest improvements
import time
import threading
import signal
# Record the time for the purposes of demonstration
start_time=time.time()
class ProgramKilled(Exception):
"""
An instance of this custom exception class will be thrown everytime we get an SIGTERM or SIGINT
"""
pass
# Raise the custom exception whenever SIGINT or SIGTERM is triggered
def signal_handler(signum, frame):
raise ProgramKilled
# This function serves as the callback triggered on every run of our IntervalThread
def action() :
print('action ! -> time : {:.1f}s'.format(time.time()-start_time))
# https://stackoverflow.com/questions/2697039/python-equivalent-of-setinterval
class IntervalThread(threading.Thread) :
def __init__(self,interval,action, *args, **kwargs) :
super(IntervalThread, self).__init__()
self.interval=interval
self.action=action
self.stopEvent=threading.Event()
self.start()
def run(self) :
nextTime=time.time()+self.interval
while not self.stopEvent.wait(nextTime-time.time()) :
nextTime+=self.interval
self.action()
def cancel(self) :
self.stopEvent.set()
def main():
# Handle SIGINT and SIFTERM with the help of the callback function
signal.signal(signal.SIGTERM, signal_handler)
signal.signal(signal.SIGINT, signal_handler)
# start action every 1s
inter=IntervalThread(1,action)
print('just after setInterval -> time : {:.1f}s'.format(time.time()-start_time))
# will stop interval in 500s
t=threading.Timer(500,inter.cancel)
t.start()
# https://www.g-loaded.eu/2016/11/24/how-to-terminate-running-python-threads-using-signals/
while True:
try:
time.sleep(1)
except ProgramKilled:
print("Program killed: running cleanup code")
inter.cancel()
break
if __name__ == "__main__":
main()
In the above solutions if a situation arises where program is shutdown, there is no guarantee that it will shutdown gracefully,Its always recommended to shut a program via a soft kill, neither did most of them have a function to stop I found a nice article on medium written by Sankalp which solves both of these issues (run periodic tasks in python) refer the attached link to get a deeper insight.
In the below sample a library named signal is used to track the kill is soft kill or a hard kill
import threading, time, signal
from datetime import timedelta
WAIT_TIME_SECONDS = 1
class ProgramKilled(Exception):
pass
def foo():
print time.ctime()
def signal_handler(signum, frame):
raise ProgramKilled
class Job(threading.Thread):
def __init__(self, interval, execute, *args, **kwargs):
threading.Thread.__init__(self)
self.daemon = False
self.stopped = threading.Event()
self.interval = interval
self.execute = execute
self.args = args
self.kwargs = kwargs
def stop(self):
self.stopped.set()
self.join()
def run(self):
while not self.stopped.wait(self.interval.total_seconds()):
self.execute(*self.args, **self.kwargs)
if __name__ == "__main__":
signal.signal(signal.SIGTERM, signal_handler)
signal.signal(signal.SIGINT, signal_handler)
job = Job(interval=timedelta(seconds=WAIT_TIME_SECONDS), execute=foo)
job.start()
while True:
try:
time.sleep(1)
except ProgramKilled:
print "Program killed: running cleanup code"
job.stop()
break
#output
#Tue Oct 16 17:47:51 2018
#Tue Oct 16 17:47:52 2018
#Tue Oct 16 17:47:53 2018
#^CProgram killed: running cleanup code
setInterval should be run on multiple thread, and not freeze the task when it running loop.
Here is my RUNTIME package that support multithread feature:
setTimeout(F,ms) : timming to fire function in independence thread.
delayF(F,ms) : similar setTimeout(F,ms).
setInterval(F,ms) : asynchronous loop
.pause, .resume : pause and resume the interval
clearInterval(interval) : clear the interval
It's short and simple. Note that python need lambda if you input direct the function, but lambda is not support command block, so you should define the function content before put it in the setInterval.
### DEMO PYTHON MULTITHREAD ASYNCHRONOUS LOOP ###
import time;
import threading;
import random;
def delay(ms):time.sleep(ms/1000); # Controil while speed
def setTimeout(R,delayMS):
t=threading.Timer(delayMS/1000,R)
t.start();
return t;
def delayF(R,delayMS):
t=threading.Timer(delayMS/1000,R)
t.start();
return t;
class THREAD:
def __init__(this):
this.R_onRun=None;
this.thread=None;
def run(this):
this.thread=threading.Thread(target=this.R_onRun);
this.thread.start();
def isRun(this): return this.thread.isAlive();
class setInterval :
def __init__(this,R_onRun,msInterval) :
this.ms=msInterval;
this.R_onRun=R_onRun;
this.kStop=False;
this.thread=THREAD();
this.thread.R_onRun=this.Clock;
this.thread.run();
def Clock(this) :
while not this.kStop :
this.R_onRun();
delay(this.ms);
def pause(this) :
this.kStop=True;
def stop(this) :
this.kStop=True;
def resume(this) :
if (this.kStop) :
this.kStop=False;
this.thread.run();
def clearInterval(Timer): Timer.stop();
# EXAMPLE
def p():print(random.random());
tm=setInterval(p,20);
tm2=setInterval(lambda:print("AAAAA"),20);
delayF(tm.pause,1000);
delayF(tm.resume,2000);
delayF(lambda:clearInterval(tm),3000);
Save to file .py and run it. You will see it print both random number and string "AAAAA". The print number thread will pause printing after 1 second and resume print again for 1 second then stop, while the print string keep printing text not corrupt.
In case you use OpenCV for graphic animation with those setInterval for boost animate speed, you must have 1 main thread to apply waitKey, otherwise the window will freeze no matter how slow delay or you applied waitKey in sub thread:
def p:... # Your drawing task
setInterval(p,1); # Subthread1 running draw
setInterval(p,1); # Subthread2 running draw
setInterval(p,1); # Subthread3 running draw
while True: cv2.waitKey(10); # Main thread which waitKey have effect
You can also try out this method:
import time
while True:
time.sleep(5)
print("5 seconds has passed")
So it will print "5 seconds has passed" every 5 seconds.
The function sleep() suspends execution for the given number of seconds. The argument may be a floating point number to indicate a more precise sleep time.
Recently, I have the same issue as you. And I find these soluation:
1. you can use the library: threading.Time(this have introduction above)
2. you can use the library: sched(this have introduction above too)
3. you can use the library: Advanced Python Scheduler(Recommend)
Some answers above that uses func_wrapper and threading.Timer indeed work, except that it spawns a new thread every time an interval is called, which is causing memory problems.
The basic example below roughly implemented a similar mechanism by putting interval on a separate thread. It sleeps at the given interval. Before jumping into code, here are some of the limitations that you need to be aware of:
JavaScript is single threaded, so when the function inside setInterval is fired, nothing else will be working at the same time (excluding worker thread, but let's talk general use case of setInterval. Therefore, threading is safe. But here in this implementation, you may encounter race conditions unless using a threading.rLock.
The implementation below uses time.sleep to simulate intervals, but adding the execution time of func, the total time for this interval may be greater than what you expect. So depending on use cases, you may want to "sleep less" (minus time taken for calling func)
I only roughly tested this, and you should definitely not use global variables the way I did, feel free to tweak it so that it fits in your system.
Enough talking, here is the code:
# Python 2.7
import threading
import time
class Interval(object):
def __init__(self):
self.daemon_alive = True
self.thread = None # keep a reference to the thread so that we can "join"
def ticktock(self, interval, func):
while self.daemon_alive:
time.sleep(interval)
func()
num = 0
def print_num():
global num
num += 1
print 'num + 1 = ', num
def print_negative_num():
global num
print '-num = ', num * -1
intervals = {} # keep track of intervals
g_id_counter = 0 # roughly generate ids for intervals
def set_interval(interval, func):
global g_id_counter
interval_obj = Interval()
# Put this interval on a new thread
t = threading.Thread(target=interval_obj.ticktock, args=(interval, func))
t.setDaemon(True)
interval_obj.thread = t
t.start()
# Register this interval so that we can clear it later
# using roughly generated id
interval_id = g_id_counter
g_id_counter += 1
intervals[interval_id] = interval_obj
# return interval id like it does in JavaScript
return interval_id
def clear_interval(interval_id):
# terminate this interval's while loop
intervals[interval_id].daemon_alive = False
# kill the thread
intervals[interval_id].thread.join()
# pop out the interval from registry for reusing
intervals.pop(interval_id)
if __name__ == '__main__':
num_interval = set_interval(1, print_num)
neg_interval = set_interval(3, print_negative_num)
time.sleep(10) # Sleep 10 seconds on main thread to let interval run
clear_interval(num_interval)
clear_interval(neg_interval)
print "- Are intervals all cleared?"
time.sleep(3) # check if both intervals are stopped (not printing)
print "- Yup, time to get beers"
Expected output:
num + 1 = 1
num + 1 = 2
-num = -2
num + 1 = 3
num + 1 = 4
num + 1 = 5
-num = -5
num + 1 = 6
num + 1 = 7
num + 1 = 8
-num = -8
num + 1 = 9
num + 1 = 10
-num = -10
Are intervals all cleared?
Yup, time to get beers
My Python 3 module jsinterval.py will be helpful! Here it is:
"""
Threaded intervals and timeouts from JavaScript
"""
import threading, sys
__all__ = ['TIMEOUTS', 'INTERVALS', 'setInterval', 'clearInterval', 'setTimeout', 'clearTimeout']
TIMEOUTS = {}
INTERVALS = {}
last_timeout_id = 0
last_interval_id = 0
class Timeout:
"""Class for all timeouts."""
def __init__(self, func, timeout):
global last_timeout_id
last_timeout_id += 1
self.timeout_id = last_timeout_id
TIMEOUTS[str(self.timeout_id)] = self
self.func = func
self.timeout = timeout
self.threadname = 'Timeout #%s' %self.timeout_id
def run(self):
func = self.func
delx = self.__del__
def func_wrapper():
func()
delx()
self.t = threading.Timer(self.timeout/1000, func_wrapper)
self.t.name = self.threadname
self.t.start()
def __repr__(self):
return '<JS Timeout set for %s seconds, launching function %s on timeout reached>' %(self.timeout, repr(self.func))
def __del__(self):
self.t.cancel()
class Interval:
"""Class for all intervals."""
def __init__(self, func, interval):
global last_interval_id
self.interval_id = last_interval_id
INTERVALS[str(self.interval_id)] = self
last_interval_id += 1
self.func = func
self.interval = interval
self.threadname = 'Interval #%s' %self.interval_id
def run(self):
func = self.func
interval = self.interval
def func_wrapper():
timeout = Timeout(func_wrapper, interval)
self.timeout = timeout
timeout.run()
func()
self.t = threading.Timer(self.interval/1000, func_wrapper)
self.t.name = self.threadname
self.t.run()
def __repr__(self):
return '<JS Interval, repeating function %s with interval %s>' %(repr(self.func), self.interval)
def __del__(self):
self.timeout.__del__()
def setInterval(func, interval):
"""
Create a JS Interval: func is the function to repeat, interval is the interval (in ms)
of executing the function.
"""
temp = Interval(func, interval)
temp.run()
idx = int(temp.interval_id)
del temp
return idx
def clearInterval(interval_id):
try:
INTERVALS[str(interval_id)].__del__()
del INTERVALS[str(interval_id)]
except KeyError:
sys.stderr.write('No such interval "Interval #%s"\n' %interval_id)
def setTimeout(func, timeout):
"""
Create a JS Timeout: func is the function to timeout, timeout is the timeout (in ms)
of executing the function.
"""
temp = Timeout(func, timeout)
temp.run()
idx = int(temp.timeout_id)
del temp
return idx
def clearTimeout(timeout_id):
try:
TIMEOUTS[str(timeout_id)].__del__()
del TIMEOUTS[str(timeout_id)]
except KeyError:
sys.stderr.write('No such timeout "Timeout #%s"\n' %timeout_id)
CODE EDIT:
Fixed the memory leak (spotted by #benjaminz). Now ALL threads are cleaned up upon end. Why does this leak happen? It happens because of the implicit (or even explicit) references. In my case, TIMEOUTS and INTERVALS. Timeouts self-clean automatically (after this patch) because they use function wrapper which calls the function and then self-kills. But how does this happen? Objects can't be deleted from memory unless all references are deleted too or gc module is used. Explaining: there's no way to create (in my code) unwanted references to timeouts/intervals. They have only ONE referrer: the TIMEOUTS/INTERVALS dicts. And, when interrupted or finished (only timeouts can finish uninterrupted) they delete the only existing reference to themselves: their corresponding dict element. Classes are perfectly encapsulated using __all__, so no space for memory leaks.
Here is a low time drift solution that uses a thread to periodically signal an Event object. The thread's run() does almost nothing while waiting for a timeout; hence the low time drift.
# Example of low drift (time) periodic execution of a function.
import threading
import time
# Thread that sets 'flag' after 'timeout'
class timerThread (threading.Thread):
def __init__(self , timeout , flag):
threading.Thread.__init__(self)
self.timeout = timeout
self.stopFlag = False
self.event = threading.Event()
self.flag = flag
# Low drift run(); there is only the 'if'
# and 'set' methods between waits.
def run(self):
while not self.event.wait(self.timeout):
if self.stopFlag:
break
self.flag.set()
def stop(self):
stopFlag = True
self.event.set()
# Data.
printCnt = 0
# Flag to print.
printFlag = threading.Event()
# Create and start the timer thread.
printThread = timerThread(3 , printFlag)
printThread.start()
# Loop to wait for flag and print time.
while True:
global printCnt
# Wait for flag.
printFlag.wait()
# Flag must be manually cleared.
printFlag.clear()
print(time.time())
printCnt += 1
if printCnt == 3:
break;
# Stop the thread and exit.
printThread.stop()
printThread.join()
print('Done')
fall asleep until the next interval of seconds length starts: (not concurrent)
def sleep_until_next_interval(self, seconds):
now = time.time()
fall_asleep = seconds - now % seconds
time.sleep(fall_asleep)
while True:
sleep_until_next_interval(10) # 10 seconds - worktime
# work here
simple and no drift.
I have written my code to make a very very flexible setInterval in python. Here you are:
import threading
class AlreadyRunning(Exception):
pass
class IntervalNotValid(Exception):
pass
class setInterval():
def __init__(this, func=None, sec=None, args=[]):
this.running = False
this.func = func # the function to be run
this.sec = sec # interval in second
this.Return = None # The returned data
this.args = args
this.runOnce = None # asociated with run_once() method
this.runOnceArgs = None # asociated with run_once() method
if (func is not None and sec is not None):
this.running = True
if (not callable(func)):
raise TypeError("non-callable object is given")
if (not isinstance(sec, int) and not isinstance(sec, float)):
raise TypeError("A non-numeric object is given")
this.TIMER = threading.Timer(this.sec, this.loop)
this.TIMER.start()
def start(this):
if (not this.running):
if (not this.isValid()):
raise IntervalNotValid("The function and/or the " +
"interval hasn't provided or invalid.")
this.running = True
this.TIMER = threading.Timer(this.sec, this.loop)
this.TIMER.start()
else:
raise AlreadyRunning("Tried to run an already run interval")
def stop(this):
this.running = False
def isValid(this):
if (not callable(this.func)):
return False
cond1 = not isinstance(this.sec, int)
cond2 = not isinstance(this.sec, float)
if (cond1 and cond2):
return False
return True
def loop(this):
if (this.running):
this.TIMER = threading.Timer(this.sec, this.loop)
this.TIMER.start()
function_, Args_ = this.func, this.args
if (this.runOnce is not None): # someone has provide the run_once
runOnce, this.runOnce = this.runOnce, None
result = runOnce(*(this.runOnceArgs))
this.runOnceArgs = None
# if and only if the result is False. not accept "None"
# nor zero.
if (result is False):
return # cancel the interval right now
this.Return = function_(*Args_)
def change_interval(this, sec):
cond1 = not isinstance(sec, int)
cond2 = not isinstance(sec, float)
if (cond1 and cond2):
raise TypeError("A non-numeric object is given")
# prevent error when providing interval to a blueprint
if (this.running):
this.TIMER.cancel()
this.sec = sec
# prevent error when providing interval to a blueprint
# if the function hasn't provided yet
if (this.running):
this.TIMER = threading.Timer(this.sec, this.loop)
this.TIMER.start()
def change_next_interval(this, sec):
if (not isinstance(sec, int) and not isinstance(sec, float)):
raise TypeError("A non-numeric object is given")
this.sec = sec
def change_func(this, func, args=[]):
if (not callable(func)):
raise TypeError("non-callable object is given")
this.func = func
this.args = args
def run_once(this, func, args=[]):
this.runOnce = func
this.runOnceArgs = args
def get_return(this):
return this.Return
You can get many features and flexibility. Running this code won't freeze your code, you can change the interval at run time, you can change the function at run time, you can pass arguments, you can get the returned object from your function, and many more. You can make your tricks too!
here's a very simple and basic example to use it:
import time
def interval(name="world"):
print(f"Hello {name}!")
# function named interval will be called every two seconds
# output: "Hello world!"
interval1 = setInterval(interval, 2)
# function named interval will be called every 1.5 seconds
# output: "Hello Jane!"
interval2 = setInterval(interval, 1.5, ["Jane"])
time.sleep(5) #stop all intervals after 5 seconds
interval1.stop()
interval2.stop()
Check out my Github project to see more examples and follow next updates :D
https://github.com/Hzzkygcs/setInterval-python
Here's something easy peazy:
import time
delay = 10 # Seconds
def setInterval():
print('I print in intervals!')
time.sleep(delay)
setInterval()
Things work differently in Python: you need to either sleep() (if you want to block the current thread) or start a new thread. See http://docs.python.org/library/threading.html
From Python Documentation:
from threading import Timer
def hello():
print "hello, world"
t = Timer(30.0, hello)
t.start() # after 30 seconds, "hello, world" will be printed

Python Observer Pattern: Examples, Tips? [closed]

Closed. This question needs to be more focused. It is not currently accepting answers.
Want to improve this question? Update the question so it focuses on one problem only by editing this post.
Closed 4 years ago.
Improve this question
Are there any exemplary examples of the GoF Observer implemented in Python? I have a bit code which currently has bits of debugging code laced through the key class (currently generating messages to stderr if a magic env is set). Additionally, the class has an interface for incrementally return results as well as storing them (in memory) for post processing. (The class itself is a job manager for concurrently executing commands on remote machines over ssh).
Currently the usage of the class looks something like:
job = SSHJobMan(hostlist, cmd)
job.start()
while not job.done():
for each in job.poll():
incrementally_process(job.results[each])
time.sleep(0.2) # or other more useful work
post_process(job.results)
An alernative usage model is:
job = SSHJobMan(hostlist, cmd)
job.wait() # implicitly performs a start()
process(job.results)
This all works fine for the current utility. However it does lack flexibility. For example I currently support a brief output format or a progress bar as incremental results, I also support
brief, complete and "merged message" outputs for the post_process() function.
However, I'd like to support multiple results/output streams (progress bar to the terminal, debugging and warnings to a log file, outputs from successful jobs to one file/directory, error messages and other results from non-successful jobs to another, etc).
This sounds like a situation that calls for Observer ... have instances of my class accept registration from other objects and call them back with specific types of events as they occur.
I'm looking at PyPubSub since I saw several references to that in SO related questions. I'm not sure I'm ready to add the external dependency to my utility but I could see value in using their interface as a model for mine if that's going to make it easier for others to use. (The project is intended as both a standalone command line utility and a class for writing other scripts/utilities).
In short I know how to do what I want ... but there are numerous ways to accomplish it. I want suggestions on what's most likely to work for other users of the code in the long run.
The code itself is at: classh.
However it does lack flexibility.
Well... actually, this looks like a good design to me if an asynchronous API is what you want. It usually is. Maybe all you need is to switch from stderr to Python's logging module, which has a sort of publish/subscribe model of its own, what with Logger.addHandler() and so on.
If you do want to support observers, my advice is to keep it simple. You really only need a few lines of code.
class Event(object):
pass
class Observable(object):
def __init__(self):
self.callbacks = []
def subscribe(self, callback):
self.callbacks.append(callback)
def fire(self, **attrs):
e = Event()
e.source = self
for k, v in attrs.items():
setattr(e, k, v)
for fn in self.callbacks:
fn(e)
Your Job class can subclass Observable. When something of interest happens, call self.fire(type="progress", percent=50) or the like.
I think people in the other answers overdo it. You can easily achieve events in Python with less than 15 lines of code.
You simple have two classes: Event and Observer. Any class that wants to listen for an event, needs to inherit Observer and set to listen (observe) for a specific event. When an Event is instantiated and fired, all observers listening to that event will run the specified callback functions.
class Observer():
_observers = []
def __init__(self):
self._observers.append(self)
self._observables = {}
def observe(self, event_name, callback):
self._observables[event_name] = callback
class Event():
def __init__(self, name, data, autofire = True):
self.name = name
self.data = data
if autofire:
self.fire()
def fire(self):
for observer in Observer._observers:
if self.name in observer._observables:
observer._observables[self.name](self.data)
Example:
class Room(Observer):
def __init__(self):
print("Room is ready.")
Observer.__init__(self) # Observer's init needs to be called
def someone_arrived(self, who):
print(who + " has arrived!")
room = Room()
room.observe('someone arrived', room.someone_arrived)
Event('someone arrived', 'Lenard')
Output:
Room is ready.
Lenard has arrived!
A few more approaches...
Example: the logging module
Maybe all you need is to switch from stderr to Python's logging module, which has a powerful publish/subscribe model.
It's easy to get started producing log records.
# producer
import logging
log = logging.getLogger("myjobs") # that's all the setup you need
class MyJob(object):
def run(self):
log.info("starting job")
n = 10
for i in range(n):
log.info("%.1f%% done" % (100.0 * i / n))
log.info("work complete")
On the consumer side there's a bit more work. Unfortunately configuring logger output takes, like, 7 whole lines of code to do. ;)
# consumer
import myjobs, sys, logging
if user_wants_log_output:
ch = logging.StreamHandler(sys.stderr)
ch.setLevel(logging.INFO)
formatter = logging.Formatter(
"%(asctime)s - %(name)s - %(levelname)s - %(message)s")
ch.setFormatter(formatter)
myjobs.log.addHandler(ch)
myjobs.log.setLevel(logging.INFO)
myjobs.MyJob().run()
On the other hand there's an amazing amount of stuff in the logging package. If you ever need to send log data to a rotating set of files, an email address, and the Windows Event Log, you're covered.
Example: simplest possible observer
But you don't need to use any library at all. An extremely simple way to support observers is to call a method that does nothing.
# producer
class MyJob(object):
def on_progress(self, pct):
"""Called when progress is made. pct is the percent complete.
By default this does nothing. The user may override this method
or even just assign to it."""
pass
def run(self):
n = 10
for i in range(n):
self.on_progress(100.0 * i / n)
self.on_progress(100.0)
# consumer
import sys, myjobs
job = myjobs.MyJob()
job.on_progress = lambda pct: sys.stdout.write("%.1f%% done\n" % pct)
job.run()
Sometimes instead of writing a lambda, you can just say job.on_progress = progressBar.update, which is nice.
This is about as simple as it gets. One drawback is that it doesn't naturally support multiple listeners subscribing to the same events.
Example: C#-like events
With a bit of support code, you can get C#-like events in Python. Here's the code:
# glue code
class event(object):
def __init__(self, func):
self.__doc__ = func.__doc__
self._key = ' ' + func.__name__
def __get__(self, obj, cls):
try:
return obj.__dict__[self._key]
except KeyError, exc:
be = obj.__dict__[self._key] = boundevent()
return be
class boundevent(object):
def __init__(self):
self._fns = []
def __iadd__(self, fn):
self._fns.append(fn)
return self
def __isub__(self, fn):
self._fns.remove(fn)
return self
def __call__(self, *args, **kwargs):
for f in self._fns[:]:
f(*args, **kwargs)
The producer declares the event using a decorator:
# producer
class MyJob(object):
#event
def progress(pct):
"""Called when progress is made. pct is the percent complete."""
def run(self):
n = 10
for i in range(n+1):
self.progress(100.0 * i / n)
#consumer
import sys, myjobs
job = myjobs.MyJob()
job.progress += lambda pct: sys.stdout.write("%.1f%% done\n" % pct)
job.run()
This works exactly like the "simple observer" code above, but you can add as many listeners as you like using +=. (Unlike C#, there are no event handler types, you don't have to new EventHandler(foo.bar) when subscribing to an event, and you don't have to check for null before firing the event. Like C#, events do not squelch exceptions.)
How to choose
If logging does everything you need, use that. Otherwise do the simplest thing that works for you. The key thing to note is that you don't need to take on a big external dependency.
How about an implementation where objects aren't kept alive just because they're observing something? Below please find an implementation of the observer pattern with the following features:
Usage is pythonic. To add an observer to a bound method .bar of instance foo, just do foo.bar.addObserver(observer).
Observers are not kept alive by virtue of being observers. In other words, the observer code uses no strong references.
No sub-classing necessary (descriptors ftw).
Can be used with unhashable types.
Can be used as many times you want in a single class.
(bonus) As of today the code exists in a proper downloadable, installable package on github.
Here's the code (the github package or PyPI package have the most up to date implementation):
import weakref
import functools
class ObservableMethod(object):
"""
A proxy for a bound method which can be observed.
I behave like a bound method, but other bound methods can subscribe to be
called whenever I am called.
"""
def __init__(self, obj, func):
self.func = func
functools.update_wrapper(self, func)
self.objectWeakRef = weakref.ref(obj)
self.callbacks = {} #observing object ID -> weak ref, methodNames
def addObserver(self, boundMethod):
"""
Register a bound method to observe this ObservableMethod.
The observing method will be called whenever this ObservableMethod is
called, and with the same arguments and keyword arguments. If a
boundMethod has already been registered to as a callback, trying to add
it again does nothing. In other words, there is no way to sign up an
observer to be called back multiple times.
"""
obj = boundMethod.__self__
ID = id(obj)
if ID in self.callbacks:
s = self.callbacks[ID][1]
else:
wr = weakref.ref(obj, Cleanup(ID, self.callbacks))
s = set()
self.callbacks[ID] = (wr, s)
s.add(boundMethod.__name__)
def discardObserver(self, boundMethod):
"""
Un-register a bound method.
"""
obj = boundMethod.__self__
if id(obj) in self.callbacks:
self.callbacks[id(obj)][1].discard(boundMethod.__name__)
def __call__(self, *arg, **kw):
"""
Invoke the method which I proxy, and all of it's callbacks.
The callbacks are called with the same *args and **kw as the main
method.
"""
result = self.func(self.objectWeakRef(), *arg, **kw)
for ID in self.callbacks:
wr, methodNames = self.callbacks[ID]
obj = wr()
for methodName in methodNames:
getattr(obj, methodName)(*arg, **kw)
return result
#property
def __self__(self):
"""
Get a strong reference to the object owning this ObservableMethod
This is needed so that ObservableMethod instances can observe other
ObservableMethod instances.
"""
return self.objectWeakRef()
class ObservableMethodDescriptor(object):
def __init__(self, func):
"""
To each instance of the class using this descriptor, I associate an
ObservableMethod.
"""
self.instances = {} # Instance id -> (weak ref, Observablemethod)
self._func = func
def __get__(self, inst, cls):
if inst is None:
return self
ID = id(inst)
if ID in self.instances:
wr, om = self.instances[ID]
if not wr():
msg = "Object id %d should have been cleaned up"%(ID,)
raise RuntimeError(msg)
else:
wr = weakref.ref(inst, Cleanup(ID, self.instances))
om = ObservableMethod(inst, self._func)
self.instances[ID] = (wr, om)
return om
def __set__(self, inst, val):
raise RuntimeError("Assigning to ObservableMethod not supported")
def event(func):
return ObservableMethodDescriptor(func)
class Cleanup(object):
"""
I manage remove elements from a dict whenever I'm called.
Use me as a weakref.ref callback to remove an object's id from a dict
when that object is garbage collected.
"""
def __init__(self, key, d):
self.key = key
self.d = d
def __call__(self, wr):
del self.d[self.key]
To use this we just decorate methods we want to make observable with #event. Here's an example
class Foo(object):
def __init__(self, name):
self.name = name
#event
def bar(self):
print("%s called bar"%(self.name,))
def baz(self):
print("%s called baz"%(self.name,))
a = Foo('a')
b = Foo('b')
a.bar.addObserver(b.bar)
a.bar()
From wikipedia:
from collections import defaultdict
class Observable (defaultdict):
def __init__ (self):
defaultdict.__init__(self, object)
def emit (self, *args):
'''Pass parameters to all observers and update states.'''
for subscriber in self:
response = subscriber(*args)
self[subscriber] = response
def subscribe (self, subscriber):
'''Add a new subscriber to self.'''
self[subscriber]
def stat (self):
'''Return a tuple containing the state of each observer.'''
return tuple(self.values())
The Observable is used like this.
myObservable = Observable ()
# subscribe some inlined functions.
# myObservable[lambda x, y: x * y] would also work here.
myObservable.subscribe(lambda x, y: x * y)
myObservable.subscribe(lambda x, y: float(x) / y)
myObservable.subscribe(lambda x, y: x + y)
myObservable.subscribe(lambda x, y: x - y)
# emit parameters to each observer
myObservable.emit(6, 2)
# get updated values
myObservable.stat() # returns: (8, 3.0, 4, 12)
Based on Jason's answer, I implemented the C#-like events example as a fully-fledged python module including documentation and tests. I love fancy pythonic stuff :)
So, if you want some ready-to-use solution, you can just use the code on github.
Example: twisted log observers
To register an observer yourCallable() (a callable that accepts a dictionary) to receive all log events (in addition to any other observers):
twisted.python.log.addObserver(yourCallable)
Example: complete producer/consumer example
From Twisted-Python mailing list:
#!/usr/bin/env python
"""Serve as a sample implementation of a twisted producer/consumer
system, with a simple TCP server which asks the user how many random
integers they want, and it sends the result set back to the user, one
result per line."""
import random
from zope.interface import implements
from twisted.internet import interfaces, reactor
from twisted.internet.protocol import Factory
from twisted.protocols.basic import LineReceiver
class Producer:
"""Send back the requested number of random integers to the client."""
implements(interfaces.IPushProducer)
def __init__(self, proto, cnt):
self._proto = proto
self._goal = cnt
self._produced = 0
self._paused = False
def pauseProducing(self):
"""When we've produced data too fast, pauseProducing() will be
called (reentrantly from within resumeProducing's transport.write
method, most likely), so set a flag that causes production to pause
temporarily."""
self._paused = True
print('pausing connection from %s' % (self._proto.transport.getPeer()))
def resumeProducing(self):
self._paused = False
while not self._paused and self._produced < self._goal:
next_int = random.randint(0, 10000)
self._proto.transport.write('%d\r\n' % (next_int))
self._produced += 1
if self._produced == self._goal:
self._proto.transport.unregisterProducer()
self._proto.transport.loseConnection()
def stopProducing(self):
pass
class ServeRandom(LineReceiver):
"""Serve up random data."""
def connectionMade(self):
print('connection made from %s' % (self.transport.getPeer()))
self.transport.write('how many random integers do you want?\r\n')
def lineReceived(self, line):
cnt = int(line.strip())
producer = Producer(self, cnt)
self.transport.registerProducer(producer, True)
producer.resumeProducing()
def connectionLost(self, reason):
print('connection lost from %s' % (self.transport.getPeer()))
factory = Factory()
factory.protocol = ServeRandom
reactor.listenTCP(1234, factory)
print('listening on 1234...')
reactor.run()
OP asks "Are there any exemplary examples of the GoF Observer implemented in Python?"
This is an example in Python 3.7. This Observable class meets the requirement of creating a relationship between one observable and many observers while remaining independent of their structure.
from functools import partial
from dataclasses import dataclass, field
import sys
from typing import List, Callable
#dataclass
class Observable:
observers: List[Callable] = field(default_factory=list)
def register(self, observer: Callable):
self.observers.append(observer)
def deregister(self, observer: Callable):
self.observers.remove(observer)
def notify(self, *args, **kwargs):
for observer in self.observers:
observer(*args, **kwargs)
def usage_demo():
observable = Observable()
# Register two anonymous observers using lambda.
observable.register(
lambda *args, **kwargs: print(f'Observer 1 called with args={args}, kwargs={kwargs}'))
observable.register(
lambda *args, **kwargs: print(f'Observer 2 called with args={args}, kwargs={kwargs}'))
# Create an observer function, register it, then deregister it.
def callable_3():
print('Observer 3 NOT called.')
observable.register(callable_3)
observable.deregister(callable_3)
# Create a general purpose observer function and register four observers.
def callable_x(*args, **kwargs):
print(f'{args[0]} observer called with args={args}, kwargs={kwargs}')
for gui_field in ['Form field 4', 'Form field 5', 'Form field 6', 'Form field 7']:
observable.register(partial(callable_x, gui_field))
observable.notify('test')
if __name__ == '__main__':
sys.exit(usage_demo())
A functional approach to observer design:
def add_listener(obj, method_name, listener):
# Get any existing listeners
listener_attr = method_name + '_listeners'
listeners = getattr(obj, listener_attr, None)
# If this is the first listener, then set up the method wrapper
if not listeners:
listeners = [listener]
setattr(obj, listener_attr, listeners)
# Get the object's method
method = getattr(obj, method_name)
#wraps(method)
def method_wrapper(*args, **kwags):
method(*args, **kwags)
for l in listeners:
l(obj, *args, **kwags) # Listener also has object argument
# Replace the original method with the wrapper
setattr(obj, method_name, method_wrapper)
else:
# Event is already set up, so just add another listener
listeners.append(listener)
def remove_listener(obj, method_name, listener):
# Get any existing listeners
listener_attr = method_name + '_listeners'
listeners = getattr(obj, listener_attr, None)
if listeners:
# Remove the listener
next((listeners.pop(i)
for i, l in enumerate(listeners)
if l == listener),
None)
# If this was the last listener, then remove the method wrapper
if not listeners:
method = getattr(obj, method_name)
delattr(obj, listener_attr)
setattr(obj, method_name, method.__wrapped__)
These methods can then be used to add a listener to any class method. For example:
class MyClass(object):
def __init__(self, prop):
self.prop = prop
def some_method(self, num, string):
print('method:', num, string)
def listener_method(obj, num, string):
print('listener:', num, string, obj.prop)
my = MyClass('my_prop')
add_listener(my, 'some_method', listener_method)
my.some_method(42, 'with listener')
remove_listener(my, 'some_method', listener_method)
my.some_method(42, 'without listener')
And the output is:
method: 42 with listener
listener: 42 with listener my_prop
method: 42 without listener

Categories