Use contextmanager to trap instructions for later execution - python

I want to achieve a pseudo-db-like transaction using context manager.
Take for example:
class Transactor:
def a(): pass
def b(d, b): pass
def c(i): pass
#contextmanager
def get_session(self):
txs = []
yield self # accumulate method calls
for tx in tx:
tx() # somehow pass the arguments
def main():
t = Transactor()
with t.get_session() as session:
session.a() # inserts `a` into `txs`
... more code ...
session.c(value) # inserts `c` and `(value)` into `txs`
session.b(value1, value2) # inserts `b` and `(value1, value2)` into `txs`
... more code ...
# non-transator related code
f = open('file.txt') # If this throws an exception,
# break out of the context manager,
# and discard previous transactor calls.
... more code ...
session.a() # inserts `a` into `txs`
session.b(x, y) # inserts `b` and `(x, y)` into `txs`
# Now is outside of context manager.
# The following calls should execute immediately
t.a()
t.b(x, y)
t.c(k)
If something goes wrong such as an exception, discard txs (rollback). If it makes it to the end of the context, execute each instruction in order of insertion and pass in the appropriate arguments.
How can to trap the method call for later execution?
And one extra caveat:
If get_session is not called, I want to execute the instructions immediately.

It's not pretty, but to follow the structure you're looking for you'd have to build a temporary transaction class that holds your function queues and execute it after the context manager exits. You'll need to use functools.partial, but there are some restrictions though:
All the queued up calls must be methods based on your "session" instance. Anything else gets executed right away.
I don't know how you want to handle non-callable session attributes, so for now I assume it'll just retrieve the value.
Having said that, here's my take on it:
from functools import partial
class TempTrans:
# pass in the object instance to mimic
def __init__(self, obj):
self._queue = []
# iterate through the attributes and methods within the object and its class
for attr, val in type(obj).__dict__.items() ^ obj.__dict__.items():
if not attr.startswith('_'):
if callable(val):
setattr(self, attr, partial(self._add, getattr(obj, attr)))
else:
# placeholder to handle non-callable attributes
setattr(self, attr, val)
# function to add to queue
def _add(self, func, *args, **kwargs):
self._queue.append(partial(func, *args, **kwargs))
# function to execute the queue
def _execute(self):
_remove = []
# iterate through the queue to call the functions.
# I suggest catching errors here in case your functions falls through
for func in self._queue:
try:
func()
_remove.append(func)
except Exception as e:
print('some error occured')
break
# remove the functions that were successfully ran
for func in _remove:
self._queue.remove(func)
Now onto the context manager (it will be outside your class, you can place it in as a class method if you wish):
#contextmanager
def temp_session(obj):
t = TempTrans(obj)
try:
yield t
t._execute()
print('Transactions successfully ran')
except:
print('Encountered errors, queue was not executed')
finally:
print(t._queue) # debug to see what's left of the queue
Usage:
f = Foo()
with temp_session(f) as session:
session.a('hello')
session.b(1, 2, 3)
# a hello
# b 1 2 3
# Transactions successfully ran
# []
with temp_session(f) as session:
session.a('hello')
session.b(1, 2, 3)
session.attrdoesnotexist # expect an error
# Encountered errors, queue was not executed
# [
# functools.partial(<bound method Foo.a of <__main__.Foo object at 0x0417D3B0>>, 'hello'),
# functools.partial(<bound method Foo.b of <__main__.Foo object at 0x0417D3B0>>, 1, 2, 3)
# ]
This solution was a bit contrived because of the way you wanted it structured, but if you didn't need a context manager and doesn't need the session to look like a direct function call, it's trivial to just use partial:
my_queue = []
# some session
my_queue.append(partial(f, a))
my_queue.append(partial(f, b))
for func in my_queue:
func()

Related

Alternative to contextlib.nested with variable number of context managers

We have code that invokes a variable number of context managers depending on runtime parameters:
from contextlib import nested, contextmanager
#contextmanager
def my_context(arg):
print("entering", arg)
try:
yield arg
finally:
print("exiting", arg)
def my_fn(items):
with nested(*(my_context(arg) for arg in items)) as managers:
print("processing under", managers)
my_fn(range(3))
However, contextlib.nested is deprecated since Python 2.7:
DeprecationWarning: With-statements now directly support multiple context managers
The answers to Multiple variables in Python 'with' statement indicate that contextlib.nested has some "confusing error prone quirks", but the suggested alternative of using the multiple-manager with statement won't work for a variable number of context managers (and also breaks backward compatibility).
Are there any alternatives to contextlib.nested that aren't deprecated and (preferably) don't have the same bugs?
Or should I continue to use contextlib.nested and ignore the warning? If so, should I plan for contextlib.nested to be removed at some time in the future?
The new Python 3 contextlib.ExitStack class was added as a replacement for contextlib.nested() (see issue 13585).
It is coded in such a way you can use it in Python 2 directly:
import sys
from collections import deque
class ExitStack(object):
"""Context manager for dynamic management of a stack of exit callbacks
For example:
with ExitStack() as stack:
files = [stack.enter_context(open(fname)) for fname in filenames]
# All opened files will automatically be closed at the end of
# the with statement, even if attempts to open files later
# in the list raise an exception
"""
def __init__(self):
self._exit_callbacks = deque()
def pop_all(self):
"""Preserve the context stack by transferring it to a new instance"""
new_stack = type(self)()
new_stack._exit_callbacks = self._exit_callbacks
self._exit_callbacks = deque()
return new_stack
def _push_cm_exit(self, cm, cm_exit):
"""Helper to correctly register callbacks to __exit__ methods"""
def _exit_wrapper(*exc_details):
return cm_exit(cm, *exc_details)
_exit_wrapper.__self__ = cm
self.push(_exit_wrapper)
def push(self, exit):
"""Registers a callback with the standard __exit__ method signature
Can suppress exceptions the same way __exit__ methods can.
Also accepts any object with an __exit__ method (registering a call
to the method instead of the object itself)
"""
# We use an unbound method rather than a bound method to follow
# the standard lookup behaviour for special methods
_cb_type = type(exit)
try:
exit_method = _cb_type.__exit__
except AttributeError:
# Not a context manager, so assume its a callable
self._exit_callbacks.append(exit)
else:
self._push_cm_exit(exit, exit_method)
return exit # Allow use as a decorator
def callback(self, callback, *args, **kwds):
"""Registers an arbitrary callback and arguments.
Cannot suppress exceptions.
"""
def _exit_wrapper(exc_type, exc, tb):
callback(*args, **kwds)
# We changed the signature, so using #wraps is not appropriate, but
# setting __wrapped__ may still help with introspection
_exit_wrapper.__wrapped__ = callback
self.push(_exit_wrapper)
return callback # Allow use as a decorator
def enter_context(self, cm):
"""Enters the supplied context manager
If successful, also pushes its __exit__ method as a callback and
returns the result of the __enter__ method.
"""
# We look up the special methods on the type to match the with statement
_cm_type = type(cm)
_exit = _cm_type.__exit__
result = _cm_type.__enter__(cm)
self._push_cm_exit(cm, _exit)
return result
def close(self):
"""Immediately unwind the context stack"""
self.__exit__(None, None, None)
def __enter__(self):
return self
def __exit__(self, *exc_details):
# We manipulate the exception state so it behaves as though
# we were actually nesting multiple with statements
frame_exc = sys.exc_info()[1]
def _fix_exception_context(new_exc, old_exc):
while 1:
exc_context = new_exc.__context__
if exc_context in (None, frame_exc):
break
new_exc = exc_context
new_exc.__context__ = old_exc
# Callbacks are invoked in LIFO order to match the behaviour of
# nested context managers
suppressed_exc = False
while self._exit_callbacks:
cb = self._exit_callbacks.pop()
try:
if cb(*exc_details):
suppressed_exc = True
exc_details = (None, None, None)
except:
new_exc_details = sys.exc_info()
# simulate the stack of exceptions by setting the context
_fix_exception_context(new_exc_details[1], exc_details[1])
if not self._exit_callbacks:
raise
exc_details = new_exc_details
return suppressed_exc
Use this as your context manager, then add nested context managers at will:
with ExitStack() as stack:
managers = [stack.enter_context(my_context(arg)) for arg in items]
print("processing under", managers)
For your example context manager, this prints:
>>> my_fn(range(3))
('entering', 0)
('entering', 1)
('entering', 2)
('processing under', [0, 1, 2])
('exiting', 2)
('exiting', 1)
('exiting', 0)
You can also install the contextlib2 module; it includes ExitStack as a backport.
It's a little vexing that the python3 maintainers chose to break backwards compatibility, since implementing nested in terms of ExitStack is pretty straightforward:
try:
from contextlib import nested # Python 2
except ImportError:
from contextlib import ExitStack, contextmanager
#contextmanager
def nested(*contexts):
"""
Reimplementation of nested in python 3.
"""
with ExitStack() as stack:
for ctx in contexts:
stack.enter_context(ctx)
yield contexts
import sys
import contextlib
class nodeA(object):
def __init__(self):
print( '__init__ nodeA')
def __enter__(self):
print( '__enter__ nodeA')
def __exit__(self, a, b, c):
print( '__exit__ nodeA')
class nodeB(object):
def __init__(self):
print( '__init__ nodeB')
def __enter__(self):
print( '__enter__ nodeB')
def __exit__(self, a, b, c):
print( '__exit__ nodeB')
class nodeC(object):
def __init__(self):
print( '__init__ nodeC')
def __enter__(self):
print( '__enter__ nodeC')
def __exit__(self, a, b, c):
print( '__exit__ nodeC')
print( 'Start...')
a = nodeA()
b = nodeB()
c = nodeC()
print( 'Python version: %s' % (sys.version))
if sys.version.startswith('2'):
print('Use python 2!')
with contextlib.nested(a, b, c):
print('hallo?')
if sys.version.startswith('3'):
print('Use python 3!')
with contextlib.ExitStack() as stack:
[stack.enter_context(arg) for arg in [a,b,c]]
print('...end!')

Turn functions with a callback into Python generators?

The Scipy minimization function (just to use as an example), has the option of adding a callback function at each step. So I can do something like,
def my_callback(x):
print x
scipy.optimize.fmin(func, x0, callback=my_callback)
Is there a way to use the callback function to create a generator version of fmin, so that I could do,
for x in my_fmin(func,x0):
print x
It seems like it might be possible with some combination of yields and sends, but I can quite think of anything.
As pointed in the comments, you could do it in a new thread, using Queue. The drawback is that you'd still need some way to access the final result (what fmin returns at the end). My example below uses an optional callback to do something with it (another option would be to just yield it also, though your calling code would have to differentiate between iteration results and final results):
from thread import start_new_thread
from Queue import Queue
def my_fmin(func, x0, end_callback=(lambda x:x), timeout=None):
q = Queue() # fmin produces, the generator consumes
job_done = object() # signals the processing is done
# Producer
def my_callback(x):
q.put(x)
def task():
ret = scipy.optimize.fmin(func,x0,callback=my_callback)
q.put(job_done)
end_callback(ret) # "Returns" the result of the main call
# Starts fmin in a new thread
start_new_thread(task,())
# Consumer
while True:
next_item = q.get(True,timeout) # Blocks until an input is available
if next_item is job_done:
break
yield next_item
Update: to block the execution of the next iteration until the consumer has finished processing the last one, it's also necessary to use task_done and join.
# Producer
def my_callback(x):
q.put(x)
q.join() # Blocks until task_done is called
# Consumer
while True:
next_item = q.get(True,timeout) # Blocks until an input is available
if next_item is job_done:
break
yield next_item
q.task_done() # Unblocks the producer, so a new iteration can start
Note that maxsize=1 is not necessary, since no new item will be added to the queue until the last one is consumed.
Update 2: Also note that, unless all items are eventually retrieved by this generator, the created thread will deadlock (it will block forever and its resources will never be released). The producer is waiting on the queue, and since it stores a reference to that queue, it will never be reclaimed by the gc even if the consumer is. The queue will then become unreachable, so nobody will be able to release the lock.
A clean solution for that is unknown, if possible at all (since it would depend on the particular function used in the place of fmin). A workaround could be made using timeout, having the producer raises an exception if put blocks for too long:
q = Queue(maxsize=1)
# Producer
def my_callback(x):
q.put(x)
q.put("dummy",True,timeout) # Blocks until the first result is retrieved
q.join() # Blocks again until task_done is called
# Consumer
while True:
next_item = q.get(True,timeout) # Blocks until an input is available
q.task_done() # (one "task_done" per "get")
if next_item is job_done:
break
yield next_item
q.get() # Retrieves the "dummy" object (must be after yield)
q.task_done() # Unblocks the producer, so a new iteration can start
Generator as coroutine (no threading)
Let's have FakeFtp with retrbinary function using callback being called with each successful read of chunk of data:
class FakeFtp(object):
def __init__(self):
self.data = iter(["aaa", "bbb", "ccc", "ddd"])
def login(self, user, password):
self.user = user
self.password = password
def retrbinary(self, cmd, cb):
for chunk in self.data:
cb(chunk)
Using simple callback function has disadvantage, that it is called repeatedly and the callback
function cannot easily keep context between calls.
Following code defines process_chunks generator, which will be able receiving chunks of data one
by one and processing them. In contrast to simple callback, here we are able to keep all the
processing within one function without losing context.
from contextlib import closing
from itertools import count
def main():
processed = []
def process_chunks():
for i in count():
try:
# (repeatedly) get the chunk to process
chunk = yield
except GeneratorExit:
# finish_up
print("Finishing up.")
return
else:
# Here process the chunk as you like
print("inside coroutine, processing chunk:", i, chunk)
product = "processed({i}): {chunk}".format(i=i, chunk=chunk)
processed.append(product)
with closing(process_chunks()) as coroutine:
# Get the coroutine to the first yield
coroutine.next()
ftp = FakeFtp()
# next line repeatedly calls `coroutine.send(data)`
ftp.retrbinary("RETR binary", cb=coroutine.send)
# each callback "jumps" to `yield` line in `process_chunks`
print("processed result", processed)
print("DONE")
To see the code in action, put the FakeFtp class, the code shown above and following line:
main()
into one file and call it:
$ python headsandtails.py
('inside coroutine, processing chunk:', 0, 'aaa')
('inside coroutine, processing chunk:', 1, 'bbb')
('inside coroutine, processing chunk:', 2, 'ccc')
('inside coroutine, processing chunk:', 3, 'ddd')
Finishing up.
('processed result', ['processed(0): aaa', 'processed(1): bbb', 'processed(2): ccc', 'processed(3): ddd'])
DONE
How it works
processed = [] is here just to show, the generator process_chunks shall have no problems to
cooperate with its external context. All is wrapped into def main(): to prove, there is no need to
use global variables.
def process_chunks() is the core of the solution. It might have one shot input parameters (not
used here), but main point, where it receives input is each yield line returning what anyone sends
via .send(data) into instance of this generator. One can coroutine.send(chunk) but in this example it is done via callback refering to this function callback.send.
Note, that in real solution there is no problem to have multiple yields in the code, they are
processed one by one. This might be used e.g. to read (and ignore) header of CSV file and then
continue processing records with data.
We could instantiate and use the generator as follows:
coroutine = process_chunks()
# Get the coroutine to the first yield
coroutine.next()
ftp = FakeFtp()
# next line repeatedly calls `coroutine.send(data)`
ftp.retrbinary("RETR binary", cb=coroutine.send)
# each callback "jumps" to `yield` line in `process_chunks`
# close the coroutine (will throw the `GeneratorExit` exception into the
# `process_chunks` coroutine).
coroutine.close()
Real code is using contextlib closing context manager to ensure, the coroutine.close() is
always called.
Conclusions
This solution is not providing sort of iterator to consume data from in traditional style "from
outside". On the other hand, we are able to:
use the generator "from inside"
keep all iterative processing within one function without being interrupted between callbacks
optionally use external context
provide usable results to outside
all this can be done without using threading
Credits: The solution is heavily inspired by SO answer Python FTP “chunk” iterator (without loading entire file into memory)
written by user2357112
Concept Use a blocking queue with maxsize=1 and a producer/consumer model.
The callback produces, then the next call to the callback will block on the full queue.
The consumer then yields the value from the queue, tries to get another value, and blocks on read.
The producer is the allowed to push to the queue, rinse and repeat.
Usage:
def dummy(func, arg, callback=None):
for i in range(100):
callback(func(arg+i))
# Dummy example:
for i in Iteratorize(dummy, lambda x: x+1, 0):
print(i)
# example with scipy:
for i in Iteratorize(scipy.optimize.fmin, func, x0):
print(i)
Can be used as expected for an iterator:
for i in take(5, Iteratorize(dummy, lambda x: x+1, 0)):
print(i)
Iteratorize class:
from thread import start_new_thread
from Queue import Queue
class Iteratorize:
"""
Transforms a function that takes a callback
into a lazy iterator (generator).
"""
def __init__(self, func, ifunc, arg, callback=None):
self.mfunc=func
self.ifunc=ifunc
self.c_callback=callback
self.q = Queue(maxsize=1)
self.stored_arg=arg
self.sentinel = object()
def _callback(val):
self.q.put(val)
def gentask():
ret = self.mfunc(self.ifunc, self.stored_arg, callback=_callback)
self.q.put(self.sentinel)
if self.c_callback:
self.c_callback(ret)
start_new_thread(gentask, ())
def __iter__(self):
return self
def next(self):
obj = self.q.get(True,None)
if obj is self.sentinel:
raise StopIteration
else:
return obj
Can probably do with some cleaning up to accept *args and **kwargs for the function being wrapped and/or the final result callback.
How about
data = []
scipy.optimize.fmin(func,x0,callback=data.append)
for line in data:
print line
If not, what exactly do you want to do with the generator's data?
A variant of Frits' answer, that:
Supports send to choose a return value for the callback
Supports throw to choose an exception for the callback
Supports close to gracefully shut down
Does not compute a queue item until it is requested
The complete code with tests can be found on github
import queue
import threading
import collections.abc
class generator_from_callback(collections.abc.Generator):
def __init__(self, expr):
"""
expr: a function that takes a callback
"""
self._expr = expr
self._done = False
self._ready_queue = queue.Queue(1)
self._done_queue = queue.Queue(1)
self._done_holder = [False]
# local to avoid reference cycles
ready_queue = self._ready_queue
done_queue = self._done_queue
done_holder = self._done_holder
def callback(value):
done_queue.put((False, value))
cmd, *args = ready_queue.get()
if cmd == 'close':
raise GeneratorExit
elif cmd == 'send':
return args[0]
elif cmd == 'throw':
raise args[0]
def thread_func():
try:
cmd, *args = ready_queue.get()
if cmd == 'close':
raise GeneratorExit
elif cmd == 'send':
if args[0] is not None:
raise TypeError("can't send non-None value to a just-started generator")
elif cmd == 'throw':
raise args[0]
ret = expr(callback)
raise StopIteration(ret)
except BaseException as e:
done_holder[0] = True
done_queue.put((True, e))
self._thread = threading.Thread(target=thread_func)
self._thread.start()
def __next__(self):
return self.send(None)
def send(self, value):
if self._done_holder[0]:
raise StopIteration
self._ready_queue.put(('send', value))
is_exception, val = self._done_queue.get()
if is_exception:
raise val
else:
return val
def throw(self, exc):
if self._done_holder[0]:
raise StopIteration
self._ready_queue.put(('throw', exc))
is_exception, val = self._done_queue.get()
if is_exception:
raise val
else:
return val
def close(self):
if not self._done_holder[0]:
self._ready_queue.put(('close',))
self._thread.join()
def __del__(self):
self.close()
Which works as:
In [3]: def callback(f):
...: ret = f(1)
...: print("gave 1, got {}".format(ret))
...: f(2)
...: print("gave 2")
...: f(3)
...:
In [4]: i = generator_from_callback(callback)
In [5]: next(i)
Out[5]: 1
In [6]: i.send(4)
gave 1, got 4
Out[6]: 2
In [7]: next(i)
gave 2, got None
Out[7]: 3
In [8]: next(i)
StopIteration
For scipy.optimize.fmin, you would use generator_from_callback(lambda c: scipy.optimize.fmin(func, x0, callback=c))
Solution to handle non-blocking callbacks
The solution using threading and queue is pretty good, of high-performance and cross-platform, probably the best one.
Here I provide this not-too-bad solution, which is mainly for handling non-blocking callbacks, e.g. called from the parent function through threading.Thread(target=callback).start(), or other non-blocking ways.
import pickle
import select
import subprocess
def my_fmin(func, x0):
# open a process to use as a pipeline
proc = subprocess.Popen(['cat'], stdin=subprocess.PIPE, stdout=subprocess.PIPE)
def my_callback(x):
# x might be any object, not only str, so we use pickle to dump it
proc.stdin.write(pickle.dumps(x).replace(b'\n', b'\\n') + b'\n')
proc.stdin.flush()
from scipy import optimize
optimize.fmin(func, x0, callback=my_callback)
# this is meant to handle non-blocking callbacks, e.g. called somewhere
# through `threading.Thread(target=callback).start()`
while select.select([proc.stdout], [], [], 0)[0]:
yield pickle.loads(proc.stdout.readline()[:-1].replace(b'\\n', b'\n'))
# close the process
proc.communicate()
Then you can use the function like this:
# unfortunately, `scipy.optimize.fmin`'s callback is blocking.
# so this example is just for showing how-to.
for x in my_fmin(lambda x: x**2, 3):
print(x)
Although This solution seems quite simple and readable, it's not as high-performance as the threading and queue solution, because:
Processes are much heavier than threadings.
Passing data through pipe instead of memory is much slower.
Besides, it doesn't work on Windows, because the select module on Windows can only handle sockets, not pipes and other file descriptors.
For a super simple approach...
def callback_to_generator():
data = []
method_with_callback(blah, foo, callback=data.append)
for item in data:
yield item
Yes, this isn't good for large data
Yes, this blocks on all items being processed first
But it still might be useful for some use cases :)
Also thanks to #winston-ewert as this is just a small variant on his answer :)

How to override an async NDB method and write your own tasklet

I am trying to grasp async operations introduced with NDB, I would like to use #ndb.tasklet to async some of my work.
The simple example would be string_id generation in the overridden get_or_insert_async
Is this a correct way to to things? What can be improved here?
#classmethod
#ndb.tasklet
def get_or_insert_async(cls, *args):
id = cls.make_string_id(*args)
model = yield super(MyModel, cls).get_or_insert_async(id)
raise ndb.Return(model)
Another example would be doing stuff in a loop in fan-out kinda way. Is this correct?
#classmethod
#ndb.tasklet
def do_stuff(cls, some_collection):
#ndb.tasklet
def internal_tasklet(data):
do_some_long_taking_stuff(data)
id = make_stuff_needed_for_id(data)
model = yield cls.get_or_insert_async(id)
model.long_processing(data)
yield model.put_async()
raise ndb.Return(None)
for data in some_collection:
# will it parallelise internal_tasklet execution?
yield internal_tasklet(data)
raise ndb.Return(None)
EDIT:
As understood the whole concept, yields are here to provide a Future objects which are then collected in parallel (where possible) and executed asynchronously. Am I correct?
After Nick's hint (is it what you meant?):
#classmethod
#ndb.tasklet
def do_stuff(cls, some_collection):
#ndb.tasklet
def internal_tasklet(data):
do_some_long_taking_stuff(data)
id = make_stuff_needed_for_id(data)
model = yield cls.get_or_insert_async(id)
model.long_processing(data)
raise ndb.Return(model) # change here
models = []
for data in some_collection:
# will it parallelise internal_tasklet execution?
m = yield internal_tasklet(data) # change here
models.appedn(m) # change here
keys = yield ndb.put_multi_async(models) # change here
raise ndb.Return(keys) # change here
EDIT:
New revised version…
#classmethod
#ndb.tasklet
def do_stuff(cls, some_collection):
#ndb.tasklet
def internal_tasklet(data):
do_some_long_taking_stuff(data)
id = make_stuff_needed_for_id(data)
model = yield cls.get_or_insert_async(id)
model.long_processing(data)
raise ndb.Return(model)
futures = []
for data in some_collection:
# tasklets won't run in parallel but while
# one is waiting on a yield (and RPC underneath)
# the other will advance it's execution
# up to a next yield or return
fut = internal_tasklet(data)) # change here
futures.append(fut) # change here
Future.wait_all(futures) # change here
models = [fut.get_result() for fut in futures]
keys = yield ndb.put_multi_async(models) # change here
raise ndb.Return(keys) # change here
You don't need to use tasklets if all you want to do is call something async with different arguments - just return the wrapped function's return value, like this:
def get_or_insert_async(cls, *args):
id = cls.make_string_id(*args)
return super(MyModel, cls).get_or_insert_async(id)
I'd be cautious about this for several reasons, though: You're changing the meaning of a built in function, which is usually a bad idea, you're changing the signature (positional arguments but no keyword arguments), and you're not passing extra arguments through to the original function.
For your second example, yielding things one at a time will force NDB to wait on their completion - 'yield' is synonymous with 'wait'. Instead, execute the tasklet function for each element in the collection, then wait on them all (by calling yield on the list) at the same time.

python function fails to return unless the last statement is slow

I'm working on a subclass of threading.Thread which allows its methods to be called and run in the thread represented by the object that they are called on as opposed to the usual behavior. I do this by using decorators on the target method that place the call to the method in a collections.deque and using the run method to process the deque.
the run method uses a while not self.__stop: statement and a threading.Condition object to wait for a call to be placed in the deque and then call self.__process_calls. The else part of the while loop makes a final call to __process_calls. if self.__stop, an exception is raised on any attempts to call one of the 'callable' methods from another thread.
The problem is that __process_calls fails to return unless the last statement is a print which I discovered during debugging. I've tried a = 1 and an explicit return but neither work. with any print statement as the final statement of the function though, it returns and the thread doesn't hang. Any ideas what's going on?
EDIT: It was pointed out by David Zaslavsky that the print works because it takes a while
and I've confirmed that
The code's a little long but hopefully, my explanation above is clear enough to help understand it.
import threading
import collections
class BrokenPromise(Exception): pass
class CallableThreadError(Exception): pass
class CallToNonRunningThreadError(CallableThreadError): pass
class Promise(object):
def __init__(self, deque, condition):
self._condition = condition
self._deque = deque
def read(self, timeout=None):
if not self._deque:
with self._condition:
if timeout:
self._condition.wait(timeout)
else:
self._condition.wait()
if self._deque:
value = self._deque.popleft()
del self._deque
del self._condition
return value
else:
raise BrokenPromise
def ready(self):
return bool(self._deque)
class CallableThread(threading.Thread):
def __init__(self, *args, **kwargs):
# _enqueued_calls is used to store tuples that encode a function call.
# It is processed by the run method
self.__enqueued_calls = collections.deque()
# _enqueue_call_permission is for callers to signal that they have
# placed something on the queue
self.__enqueue_call_permission = threading.Condition()
self.__stop = False
super(CallableThread, self).__init__(*args, **kwargs)
#staticmethod
def blocking_method(f):
u"""A decorator function to implement a blocking method on a thread"""
# the returned function enqueues the decorated function and blocks
# until the decorated function# is called and returns. It then returns
# the value unmodified. The code in register runs in the calling thread
# and the decorated method runs in thread that it is called on
f = CallableThread.nonblocking_method_with_promise(f)
def register(self, *args, **kwargs):
p = f(self, *args, **kwargs)
return p.read()
return register
#staticmethod
def nonblocking_method_with_promise(f):
u"""A decorator function to implement a non-blocking method on a
thread
"""
# the returned function enqueues the decorated function and returns a
# Promise object.N The code in register runs in the calling thread
# and the decorated method runs in thread that it is called on.
def register(self, *args, **kwargs):
call_complete = threading.Condition()
response_deque = collections.deque()
self.__push_call(f, args, kwargs, response_deque, call_complete)
return Promise(response_deque, call_complete)
return register
#staticmethod
def nonblocking_method(f):
def register(self, *args, **kwargs):
self.__push_call(f, args, kwargs)
return register
def run(self):
while not self.__stop: # while we've not been killed
with self.__enqueue_call_permission:
# get the condition so that we can wait on it if we need too.
if not self.__enqueued_calls:
self.__enqueue_call_permission.wait()
self.__process_calls()
else:
# if we exit because self._run == False, finish processing
# the pending calls if there are any
self.__process_calls()
def stop(self):
u""" Signal the thread to stop"""
with self.__enqueue_call_permission:
# we do this in case the run method is stuck waiting on an update
self.__stop = True
self.__enqueue_call_permission.notify()
def __process_calls(self):
print "processing calls"
while self.__enqueued_calls:
((f, args, kwargs),
response_deque, call_complete) = self.__enqueued_calls.popleft()
if call_complete:
with call_complete:
response_deque.append(f(self, *args, **kwargs))
call_complete.notify()
else:
f(self, *args, **kwargs)
# this is where you place the print statement if you want to see the
# behavior
def __push_call(self, f, args, kwargs, response_deque=None,
call_complete=None):
if self.__stop:
raise CallToNonRunningThreadError(
"This thread is no longer accepting calls")
with self.__enqueue_call_permission:
self.__enqueued_calls.append(((f, args, kwargs),
response_deque, call_complete))
self.__enqueue_call_permission.notify()
#if __name__=='__main__': i lost the indent on the following code in copying but
#it doesn't matter in this context
class TestThread(CallableThread):
u"""Increment a counter on each call and print the value"""
counter = 0
#CallableThread.nonblocking_method_with_promise
def increment(self):
self.counter += 1
return self.counter
class LogThread(CallableThread):
#CallableThread.nonblocking_method
def log(self, message):
print message
l = LogThread()
l.start()
l.log("logger started")
t = TestThread()
t.start()
l.log("test thread started")
p = t.increment()
l.log("promise aquired")
v = p.read()
l.log("promise read")
l.log("{0} read from promise".format(v))
l.stop()
t.stop()
l.join()
t.join()
__process_calls is modifying __enqueued_calls without owning the lock. This may be creating a race condition.
Edit: deque may be "threadsafe" (ie not corrupted by thread accesses), but the checking of its state still should be locked.
The stop condition is also not safe.
Comments inline:
def run(self):
while not self.__stop: # while we've not been killed
with self.__enqueue_call_permission:
# get the condition so that we can wait on it if we need too.
### should be checking __stop here, it could have been modified before
### you took the lock.
if not self.__enqueued_calls:
self.__enqueue_call_permission.wait()
self.__process_calls()
else:
# if we exit because self._run == False, finish processing
# the pending calls if there are any
self.__process_calls()

Python Observer Pattern: Examples, Tips? [closed]

Closed. This question needs to be more focused. It is not currently accepting answers.
Want to improve this question? Update the question so it focuses on one problem only by editing this post.
Closed 4 years ago.
Improve this question
Are there any exemplary examples of the GoF Observer implemented in Python? I have a bit code which currently has bits of debugging code laced through the key class (currently generating messages to stderr if a magic env is set). Additionally, the class has an interface for incrementally return results as well as storing them (in memory) for post processing. (The class itself is a job manager for concurrently executing commands on remote machines over ssh).
Currently the usage of the class looks something like:
job = SSHJobMan(hostlist, cmd)
job.start()
while not job.done():
for each in job.poll():
incrementally_process(job.results[each])
time.sleep(0.2) # or other more useful work
post_process(job.results)
An alernative usage model is:
job = SSHJobMan(hostlist, cmd)
job.wait() # implicitly performs a start()
process(job.results)
This all works fine for the current utility. However it does lack flexibility. For example I currently support a brief output format or a progress bar as incremental results, I also support
brief, complete and "merged message" outputs for the post_process() function.
However, I'd like to support multiple results/output streams (progress bar to the terminal, debugging and warnings to a log file, outputs from successful jobs to one file/directory, error messages and other results from non-successful jobs to another, etc).
This sounds like a situation that calls for Observer ... have instances of my class accept registration from other objects and call them back with specific types of events as they occur.
I'm looking at PyPubSub since I saw several references to that in SO related questions. I'm not sure I'm ready to add the external dependency to my utility but I could see value in using their interface as a model for mine if that's going to make it easier for others to use. (The project is intended as both a standalone command line utility and a class for writing other scripts/utilities).
In short I know how to do what I want ... but there are numerous ways to accomplish it. I want suggestions on what's most likely to work for other users of the code in the long run.
The code itself is at: classh.
However it does lack flexibility.
Well... actually, this looks like a good design to me if an asynchronous API is what you want. It usually is. Maybe all you need is to switch from stderr to Python's logging module, which has a sort of publish/subscribe model of its own, what with Logger.addHandler() and so on.
If you do want to support observers, my advice is to keep it simple. You really only need a few lines of code.
class Event(object):
pass
class Observable(object):
def __init__(self):
self.callbacks = []
def subscribe(self, callback):
self.callbacks.append(callback)
def fire(self, **attrs):
e = Event()
e.source = self
for k, v in attrs.items():
setattr(e, k, v)
for fn in self.callbacks:
fn(e)
Your Job class can subclass Observable. When something of interest happens, call self.fire(type="progress", percent=50) or the like.
I think people in the other answers overdo it. You can easily achieve events in Python with less than 15 lines of code.
You simple have two classes: Event and Observer. Any class that wants to listen for an event, needs to inherit Observer and set to listen (observe) for a specific event. When an Event is instantiated and fired, all observers listening to that event will run the specified callback functions.
class Observer():
_observers = []
def __init__(self):
self._observers.append(self)
self._observables = {}
def observe(self, event_name, callback):
self._observables[event_name] = callback
class Event():
def __init__(self, name, data, autofire = True):
self.name = name
self.data = data
if autofire:
self.fire()
def fire(self):
for observer in Observer._observers:
if self.name in observer._observables:
observer._observables[self.name](self.data)
Example:
class Room(Observer):
def __init__(self):
print("Room is ready.")
Observer.__init__(self) # Observer's init needs to be called
def someone_arrived(self, who):
print(who + " has arrived!")
room = Room()
room.observe('someone arrived', room.someone_arrived)
Event('someone arrived', 'Lenard')
Output:
Room is ready.
Lenard has arrived!
A few more approaches...
Example: the logging module
Maybe all you need is to switch from stderr to Python's logging module, which has a powerful publish/subscribe model.
It's easy to get started producing log records.
# producer
import logging
log = logging.getLogger("myjobs") # that's all the setup you need
class MyJob(object):
def run(self):
log.info("starting job")
n = 10
for i in range(n):
log.info("%.1f%% done" % (100.0 * i / n))
log.info("work complete")
On the consumer side there's a bit more work. Unfortunately configuring logger output takes, like, 7 whole lines of code to do. ;)
# consumer
import myjobs, sys, logging
if user_wants_log_output:
ch = logging.StreamHandler(sys.stderr)
ch.setLevel(logging.INFO)
formatter = logging.Formatter(
"%(asctime)s - %(name)s - %(levelname)s - %(message)s")
ch.setFormatter(formatter)
myjobs.log.addHandler(ch)
myjobs.log.setLevel(logging.INFO)
myjobs.MyJob().run()
On the other hand there's an amazing amount of stuff in the logging package. If you ever need to send log data to a rotating set of files, an email address, and the Windows Event Log, you're covered.
Example: simplest possible observer
But you don't need to use any library at all. An extremely simple way to support observers is to call a method that does nothing.
# producer
class MyJob(object):
def on_progress(self, pct):
"""Called when progress is made. pct is the percent complete.
By default this does nothing. The user may override this method
or even just assign to it."""
pass
def run(self):
n = 10
for i in range(n):
self.on_progress(100.0 * i / n)
self.on_progress(100.0)
# consumer
import sys, myjobs
job = myjobs.MyJob()
job.on_progress = lambda pct: sys.stdout.write("%.1f%% done\n" % pct)
job.run()
Sometimes instead of writing a lambda, you can just say job.on_progress = progressBar.update, which is nice.
This is about as simple as it gets. One drawback is that it doesn't naturally support multiple listeners subscribing to the same events.
Example: C#-like events
With a bit of support code, you can get C#-like events in Python. Here's the code:
# glue code
class event(object):
def __init__(self, func):
self.__doc__ = func.__doc__
self._key = ' ' + func.__name__
def __get__(self, obj, cls):
try:
return obj.__dict__[self._key]
except KeyError, exc:
be = obj.__dict__[self._key] = boundevent()
return be
class boundevent(object):
def __init__(self):
self._fns = []
def __iadd__(self, fn):
self._fns.append(fn)
return self
def __isub__(self, fn):
self._fns.remove(fn)
return self
def __call__(self, *args, **kwargs):
for f in self._fns[:]:
f(*args, **kwargs)
The producer declares the event using a decorator:
# producer
class MyJob(object):
#event
def progress(pct):
"""Called when progress is made. pct is the percent complete."""
def run(self):
n = 10
for i in range(n+1):
self.progress(100.0 * i / n)
#consumer
import sys, myjobs
job = myjobs.MyJob()
job.progress += lambda pct: sys.stdout.write("%.1f%% done\n" % pct)
job.run()
This works exactly like the "simple observer" code above, but you can add as many listeners as you like using +=. (Unlike C#, there are no event handler types, you don't have to new EventHandler(foo.bar) when subscribing to an event, and you don't have to check for null before firing the event. Like C#, events do not squelch exceptions.)
How to choose
If logging does everything you need, use that. Otherwise do the simplest thing that works for you. The key thing to note is that you don't need to take on a big external dependency.
How about an implementation where objects aren't kept alive just because they're observing something? Below please find an implementation of the observer pattern with the following features:
Usage is pythonic. To add an observer to a bound method .bar of instance foo, just do foo.bar.addObserver(observer).
Observers are not kept alive by virtue of being observers. In other words, the observer code uses no strong references.
No sub-classing necessary (descriptors ftw).
Can be used with unhashable types.
Can be used as many times you want in a single class.
(bonus) As of today the code exists in a proper downloadable, installable package on github.
Here's the code (the github package or PyPI package have the most up to date implementation):
import weakref
import functools
class ObservableMethod(object):
"""
A proxy for a bound method which can be observed.
I behave like a bound method, but other bound methods can subscribe to be
called whenever I am called.
"""
def __init__(self, obj, func):
self.func = func
functools.update_wrapper(self, func)
self.objectWeakRef = weakref.ref(obj)
self.callbacks = {} #observing object ID -> weak ref, methodNames
def addObserver(self, boundMethod):
"""
Register a bound method to observe this ObservableMethod.
The observing method will be called whenever this ObservableMethod is
called, and with the same arguments and keyword arguments. If a
boundMethod has already been registered to as a callback, trying to add
it again does nothing. In other words, there is no way to sign up an
observer to be called back multiple times.
"""
obj = boundMethod.__self__
ID = id(obj)
if ID in self.callbacks:
s = self.callbacks[ID][1]
else:
wr = weakref.ref(obj, Cleanup(ID, self.callbacks))
s = set()
self.callbacks[ID] = (wr, s)
s.add(boundMethod.__name__)
def discardObserver(self, boundMethod):
"""
Un-register a bound method.
"""
obj = boundMethod.__self__
if id(obj) in self.callbacks:
self.callbacks[id(obj)][1].discard(boundMethod.__name__)
def __call__(self, *arg, **kw):
"""
Invoke the method which I proxy, and all of it's callbacks.
The callbacks are called with the same *args and **kw as the main
method.
"""
result = self.func(self.objectWeakRef(), *arg, **kw)
for ID in self.callbacks:
wr, methodNames = self.callbacks[ID]
obj = wr()
for methodName in methodNames:
getattr(obj, methodName)(*arg, **kw)
return result
#property
def __self__(self):
"""
Get a strong reference to the object owning this ObservableMethod
This is needed so that ObservableMethod instances can observe other
ObservableMethod instances.
"""
return self.objectWeakRef()
class ObservableMethodDescriptor(object):
def __init__(self, func):
"""
To each instance of the class using this descriptor, I associate an
ObservableMethod.
"""
self.instances = {} # Instance id -> (weak ref, Observablemethod)
self._func = func
def __get__(self, inst, cls):
if inst is None:
return self
ID = id(inst)
if ID in self.instances:
wr, om = self.instances[ID]
if not wr():
msg = "Object id %d should have been cleaned up"%(ID,)
raise RuntimeError(msg)
else:
wr = weakref.ref(inst, Cleanup(ID, self.instances))
om = ObservableMethod(inst, self._func)
self.instances[ID] = (wr, om)
return om
def __set__(self, inst, val):
raise RuntimeError("Assigning to ObservableMethod not supported")
def event(func):
return ObservableMethodDescriptor(func)
class Cleanup(object):
"""
I manage remove elements from a dict whenever I'm called.
Use me as a weakref.ref callback to remove an object's id from a dict
when that object is garbage collected.
"""
def __init__(self, key, d):
self.key = key
self.d = d
def __call__(self, wr):
del self.d[self.key]
To use this we just decorate methods we want to make observable with #event. Here's an example
class Foo(object):
def __init__(self, name):
self.name = name
#event
def bar(self):
print("%s called bar"%(self.name,))
def baz(self):
print("%s called baz"%(self.name,))
a = Foo('a')
b = Foo('b')
a.bar.addObserver(b.bar)
a.bar()
From wikipedia:
from collections import defaultdict
class Observable (defaultdict):
def __init__ (self):
defaultdict.__init__(self, object)
def emit (self, *args):
'''Pass parameters to all observers and update states.'''
for subscriber in self:
response = subscriber(*args)
self[subscriber] = response
def subscribe (self, subscriber):
'''Add a new subscriber to self.'''
self[subscriber]
def stat (self):
'''Return a tuple containing the state of each observer.'''
return tuple(self.values())
The Observable is used like this.
myObservable = Observable ()
# subscribe some inlined functions.
# myObservable[lambda x, y: x * y] would also work here.
myObservable.subscribe(lambda x, y: x * y)
myObservable.subscribe(lambda x, y: float(x) / y)
myObservable.subscribe(lambda x, y: x + y)
myObservable.subscribe(lambda x, y: x - y)
# emit parameters to each observer
myObservable.emit(6, 2)
# get updated values
myObservable.stat() # returns: (8, 3.0, 4, 12)
Based on Jason's answer, I implemented the C#-like events example as a fully-fledged python module including documentation and tests. I love fancy pythonic stuff :)
So, if you want some ready-to-use solution, you can just use the code on github.
Example: twisted log observers
To register an observer yourCallable() (a callable that accepts a dictionary) to receive all log events (in addition to any other observers):
twisted.python.log.addObserver(yourCallable)
Example: complete producer/consumer example
From Twisted-Python mailing list:
#!/usr/bin/env python
"""Serve as a sample implementation of a twisted producer/consumer
system, with a simple TCP server which asks the user how many random
integers they want, and it sends the result set back to the user, one
result per line."""
import random
from zope.interface import implements
from twisted.internet import interfaces, reactor
from twisted.internet.protocol import Factory
from twisted.protocols.basic import LineReceiver
class Producer:
"""Send back the requested number of random integers to the client."""
implements(interfaces.IPushProducer)
def __init__(self, proto, cnt):
self._proto = proto
self._goal = cnt
self._produced = 0
self._paused = False
def pauseProducing(self):
"""When we've produced data too fast, pauseProducing() will be
called (reentrantly from within resumeProducing's transport.write
method, most likely), so set a flag that causes production to pause
temporarily."""
self._paused = True
print('pausing connection from %s' % (self._proto.transport.getPeer()))
def resumeProducing(self):
self._paused = False
while not self._paused and self._produced < self._goal:
next_int = random.randint(0, 10000)
self._proto.transport.write('%d\r\n' % (next_int))
self._produced += 1
if self._produced == self._goal:
self._proto.transport.unregisterProducer()
self._proto.transport.loseConnection()
def stopProducing(self):
pass
class ServeRandom(LineReceiver):
"""Serve up random data."""
def connectionMade(self):
print('connection made from %s' % (self.transport.getPeer()))
self.transport.write('how many random integers do you want?\r\n')
def lineReceived(self, line):
cnt = int(line.strip())
producer = Producer(self, cnt)
self.transport.registerProducer(producer, True)
producer.resumeProducing()
def connectionLost(self, reason):
print('connection lost from %s' % (self.transport.getPeer()))
factory = Factory()
factory.protocol = ServeRandom
reactor.listenTCP(1234, factory)
print('listening on 1234...')
reactor.run()
OP asks "Are there any exemplary examples of the GoF Observer implemented in Python?"
This is an example in Python 3.7. This Observable class meets the requirement of creating a relationship between one observable and many observers while remaining independent of their structure.
from functools import partial
from dataclasses import dataclass, field
import sys
from typing import List, Callable
#dataclass
class Observable:
observers: List[Callable] = field(default_factory=list)
def register(self, observer: Callable):
self.observers.append(observer)
def deregister(self, observer: Callable):
self.observers.remove(observer)
def notify(self, *args, **kwargs):
for observer in self.observers:
observer(*args, **kwargs)
def usage_demo():
observable = Observable()
# Register two anonymous observers using lambda.
observable.register(
lambda *args, **kwargs: print(f'Observer 1 called with args={args}, kwargs={kwargs}'))
observable.register(
lambda *args, **kwargs: print(f'Observer 2 called with args={args}, kwargs={kwargs}'))
# Create an observer function, register it, then deregister it.
def callable_3():
print('Observer 3 NOT called.')
observable.register(callable_3)
observable.deregister(callable_3)
# Create a general purpose observer function and register four observers.
def callable_x(*args, **kwargs):
print(f'{args[0]} observer called with args={args}, kwargs={kwargs}')
for gui_field in ['Form field 4', 'Form field 5', 'Form field 6', 'Form field 7']:
observable.register(partial(callable_x, gui_field))
observable.notify('test')
if __name__ == '__main__':
sys.exit(usage_demo())
A functional approach to observer design:
def add_listener(obj, method_name, listener):
# Get any existing listeners
listener_attr = method_name + '_listeners'
listeners = getattr(obj, listener_attr, None)
# If this is the first listener, then set up the method wrapper
if not listeners:
listeners = [listener]
setattr(obj, listener_attr, listeners)
# Get the object's method
method = getattr(obj, method_name)
#wraps(method)
def method_wrapper(*args, **kwags):
method(*args, **kwags)
for l in listeners:
l(obj, *args, **kwags) # Listener also has object argument
# Replace the original method with the wrapper
setattr(obj, method_name, method_wrapper)
else:
# Event is already set up, so just add another listener
listeners.append(listener)
def remove_listener(obj, method_name, listener):
# Get any existing listeners
listener_attr = method_name + '_listeners'
listeners = getattr(obj, listener_attr, None)
if listeners:
# Remove the listener
next((listeners.pop(i)
for i, l in enumerate(listeners)
if l == listener),
None)
# If this was the last listener, then remove the method wrapper
if not listeners:
method = getattr(obj, method_name)
delattr(obj, listener_attr)
setattr(obj, method_name, method.__wrapped__)
These methods can then be used to add a listener to any class method. For example:
class MyClass(object):
def __init__(self, prop):
self.prop = prop
def some_method(self, num, string):
print('method:', num, string)
def listener_method(obj, num, string):
print('listener:', num, string, obj.prop)
my = MyClass('my_prop')
add_listener(my, 'some_method', listener_method)
my.some_method(42, 'with listener')
remove_listener(my, 'some_method', listener_method)
my.some_method(42, 'without listener')
And the output is:
method: 42 with listener
listener: 42 with listener my_prop
method: 42 without listener

Categories