How to override an async NDB method and write your own tasklet

How to override an async NDB method and write your own tasklet - python

I am trying to grasp async operations introduced with NDB, I would like to use #ndb.tasklet to async some of my work.
The simple example would be string_id generation in the overridden get_or_insert_async
Is this a correct way to to things? What can be improved here?
#classmethod
#ndb.tasklet
def get_or_insert_async(cls, *args):
id = cls.make_string_id(*args)
model = yield super(MyModel, cls).get_or_insert_async(id)
raise ndb.Return(model)
Another example would be doing stuff in a loop in fan-out kinda way. Is this correct?
#classmethod
#ndb.tasklet
def do_stuff(cls, some_collection):
#ndb.tasklet
def internal_tasklet(data):
do_some_long_taking_stuff(data)
id = make_stuff_needed_for_id(data)
model = yield cls.get_or_insert_async(id)
model.long_processing(data)
yield model.put_async()
raise ndb.Return(None)
for data in some_collection:
# will it parallelise internal_tasklet execution?
yield internal_tasklet(data)
raise ndb.Return(None)
EDIT:
As understood the whole concept, yields are here to provide a Future objects which are then collected in parallel (where possible) and executed asynchronously. Am I correct?
After Nick's hint (is it what you meant?):
#classmethod
#ndb.tasklet
def do_stuff(cls, some_collection):
#ndb.tasklet
def internal_tasklet(data):
do_some_long_taking_stuff(data)
id = make_stuff_needed_for_id(data)
model = yield cls.get_or_insert_async(id)
model.long_processing(data)
raise ndb.Return(model) # change here
models = []
for data in some_collection:
# will it parallelise internal_tasklet execution?
m = yield internal_tasklet(data) # change here
models.appedn(m) # change here
keys = yield ndb.put_multi_async(models) # change here
raise ndb.Return(keys) # change here
EDIT:
New revised version…
#classmethod
#ndb.tasklet
def do_stuff(cls, some_collection):
#ndb.tasklet
def internal_tasklet(data):
do_some_long_taking_stuff(data)
id = make_stuff_needed_for_id(data)
model = yield cls.get_or_insert_async(id)
model.long_processing(data)
raise ndb.Return(model)
futures = []
for data in some_collection:
# tasklets won't run in parallel but while
# one is waiting on a yield (and RPC underneath)
# the other will advance it's execution
# up to a next yield or return
fut = internal_tasklet(data)) # change here
futures.append(fut) # change here
Future.wait_all(futures) # change here
models = [fut.get_result() for fut in futures]
keys = yield ndb.put_multi_async(models) # change here
raise ndb.Return(keys) # change here

You don't need to use tasklets if all you want to do is call something async with different arguments - just return the wrapped function's return value, like this:
def get_or_insert_async(cls, *args):
id = cls.make_string_id(*args)
return super(MyModel, cls).get_or_insert_async(id)
I'd be cautious about this for several reasons, though: You're changing the meaning of a built in function, which is usually a bad idea, you're changing the signature (positional arguments but no keyword arguments), and you're not passing extra arguments through to the original function.
For your second example, yielding things one at a time will force NDB to wait on their completion - 'yield' is synonymous with 'wait'. Instead, execute the tasklet function for each element in the collection, then wait on them all (by calling yield on the list) at the same time.

Related

Queue objects should only be shared between processes through inheritance even when mp.Manager.Queue() is used

as shown in the below posted code, i use map_async as shown in method startProcessing(self).when the execution of the code enters the if-statement in the code in section labeled code-1 the getResults method should be called
and blocks waiting for all processes to finish. the problem i am facing is that despite run(self,params) method returns list, an invocation to proc.get() causes the app to crash and generates the error message posted below.
please let me know why i am getting this error message and how to solve it
code1-instantiation of an object
NDVIsPer10mX10mForNoneKeyWindowQueue = multiprocessing.Manager().Queue()
areasOfCoveragePerNoneKeyWindowQueue = multiprocessing.Manager().Queue()
noneKeyGridCellsProcessingPool = NoneKeyGridCellsProcessingPool(
NDVIsPer10mX10mForNoneKeyWindowQueue,
areasOfCoveragePerNoneKeyWindowQueue,
ndviTIFFDetails.getNDVIValuePer10mX10m(),
pixelsValuesDoNotSatisfyThresholdInTIFFImageDatasetCnt,
fourCornersOfWindowInEPSG25832,
[]
)
noneKeyGridCellsProcessingPool.startProcessing()
if (noneKeyWindowCnt > 0):
'''close pools: do not accept or allow any new tasks/jobs'''
resultsForNoneKeyGridCellsProcessingPool = NoneKeyGridCellsProcessingPool.getResults() #<====generates the error message posted below
NoneKeyGridCellsProcessingPool.closePool()
code in class NoneKeyGridCellsProcessingPool
#staticmethod
def getResults():
for proc in NoneKeyGridCellsProcessingPool.procs:
proc.get() #<====generates the error message posted below
last = MiscUtils.getElementFromArrayForIndex(NoneKeyGridCellsProcessingPool.procs,len(NoneKeyGridCellsProcessingPool.procs) - 1)
return last.get()
def __init__(
self,
NDVIsPer10mX10mForNoneKeyWindowQueue:Queue,
areasOfCoveragePerNoneKeyWindowQueue:Queue,
NDVIValuePer10mX10m,
pixelsValuesDoNotSatisfyThresholdInTIFFImageDatasetCnt,
fourCornersOfWindowInEPSG25832,
fourCornersOfNoneKeyWindowInEPSG4326
):
super().__init__()
self.params = (
NDVIsPer10mX10mForNoneKeyWindowQueue,
areasOfCoveragePerNoneKeyWindowQueue,
NDVIValuePer10mX10m,
pixelsValuesDoNotSatisfyThresholdInTIFFImageDatasetCnt,
fourCornersOfWindowInEPSG25832,
fourCornersOfNoneKeyWindowInEPSG4326
)
self.res = None
def run(self,params):
self.NDVIsPer10mX10mForNoneKeyWindowQueue = params[0]
self.areasOfCoveragePerNoneKeyWindowQueue = params[1]
NDVIValuePer10mX10m = params[2]
pixelsValuesDoNotSatisfyThresholdInTIFFImageDatasetCnt = params[3]
fourCornersOfWindowInEPSG25832 = params[4]
fourCornersOfNoneKeyWindowInEPSG4326 = params[5]
....
....
....
return self.NDVIsPer10mX10mForNoneKeyWindowQueue,self.areasOfCoveragePerNoneKeyWindowQueue
def startProcessing(self):
self.res = NoneKeyGridCellsProcessingPool.pool.map_async(self.run, [self.params])
NoneKeyGridCellsProcessingPool.procs.append(self.res)
error msg:
File "C:\Python310\lib\multiprocessing\context.py", line 359, in assert_spawning
raise RuntimeError(
RuntimeError: Queue objects should only be shared between processes through inheritance
note:
i am returning a list as shown in the code in the class NoneKeyGridCellsProcessingPool, but .get() method as in `proc.get()` seems sees the returned values as a queue??!!
1st_update
to solve this issue, i commented the return statment in class NoneKeyGridCellsProcessingPool out, and replaced it with return 'ok', however, i got the same error message

You cannot pass or return a multiprocessing.Queue to or from a child process. Either the queue should already be available in the parent process, in which case you can simply read from it. Otherwise you must pass an actual list or tuple rather than a multiprocessing queue.
The following is an example that produces a similar stack trace. Given you have said that the types of the queues you are not returning are not multiprocessing.Queue. It is reasonable to assume one of the parameters contains a multiprocessing.Queue (or is one). You'll need to figure out which. Maybe try dummy jobs that just takes and return its argument, once for each separate parameter. NB. self is passed as an implicit parameter to when you do pool.map_async(self.run, ...)
import multiprocessing
def main():
with multiprocessing.Pool() as pool:
queue = multiprocessing.Queue()
# The given queue is not allowed to be passed as an argument,
# but no error is raised by the following line.
future = pool.apply_async(dummy_job, [queue])
# Instead, the error is raised here. If the queue is changed to
# an instance of Manager.Queue then no error will be raised.
result = future.get()
print(result)
def dummy_job(arg):
return 'foo'
if __name__ == '__main__':
main()
Thought

Use contextmanager to trap instructions for later execution

I want to achieve a pseudo-db-like transaction using context manager.
Take for example:
class Transactor:
def a(): pass
def b(d, b): pass
def c(i): pass
#contextmanager
def get_session(self):
txs = []
yield self # accumulate method calls
for tx in tx:
tx() # somehow pass the arguments
def main():
t = Transactor()
with t.get_session() as session:
session.a() # inserts `a` into `txs`
... more code ...
session.c(value) # inserts `c` and `(value)` into `txs`
session.b(value1, value2) # inserts `b` and `(value1, value2)` into `txs`
... more code ...
# non-transator related code
f = open('file.txt') # If this throws an exception,
# break out of the context manager,
# and discard previous transactor calls.
... more code ...
session.a() # inserts `a` into `txs`
session.b(x, y) # inserts `b` and `(x, y)` into `txs`
# Now is outside of context manager.
# The following calls should execute immediately
t.a()
t.b(x, y)
t.c(k)
If something goes wrong such as an exception, discard txs (rollback). If it makes it to the end of the context, execute each instruction in order of insertion and pass in the appropriate arguments.
How can to trap the method call for later execution?
And one extra caveat:
If get_session is not called, I want to execute the instructions immediately.

It's not pretty, but to follow the structure you're looking for you'd have to build a temporary transaction class that holds your function queues and execute it after the context manager exits. You'll need to use functools.partial, but there are some restrictions though:
All the queued up calls must be methods based on your "session" instance. Anything else gets executed right away.
I don't know how you want to handle non-callable session attributes, so for now I assume it'll just retrieve the value.
Having said that, here's my take on it:
from functools import partial
class TempTrans:
# pass in the object instance to mimic
def __init__(self, obj):
self._queue = []
# iterate through the attributes and methods within the object and its class
for attr, val in type(obj).__dict__.items() ^ obj.__dict__.items():
if not attr.startswith('_'):
if callable(val):
setattr(self, attr, partial(self._add, getattr(obj, attr)))
else:
# placeholder to handle non-callable attributes
setattr(self, attr, val)
# function to add to queue
def _add(self, func, *args, **kwargs):
self._queue.append(partial(func, *args, **kwargs))
# function to execute the queue
def _execute(self):
_remove = []
# iterate through the queue to call the functions.
# I suggest catching errors here in case your functions falls through
for func in self._queue:
try:
func()
_remove.append(func)
except Exception as e:
print('some error occured')
break
# remove the functions that were successfully ran
for func in _remove:
self._queue.remove(func)
Now onto the context manager (it will be outside your class, you can place it in as a class method if you wish):
#contextmanager
def temp_session(obj):
t = TempTrans(obj)
try:
yield t
t._execute()
print('Transactions successfully ran')
except:
print('Encountered errors, queue was not executed')
finally:
print(t._queue) # debug to see what's left of the queue
Usage:
f = Foo()
with temp_session(f) as session:
session.a('hello')
session.b(1, 2, 3)
# a hello
# b 1 2 3
# Transactions successfully ran
# []
with temp_session(f) as session:
session.a('hello')
session.b(1, 2, 3)
session.attrdoesnotexist # expect an error
# Encountered errors, queue was not executed
# [
# functools.partial(<bound method Foo.a of <__main__.Foo object at 0x0417D3B0>>, 'hello'),
# functools.partial(<bound method Foo.b of <__main__.Foo object at 0x0417D3B0>>, 1, 2, 3)
# ]
This solution was a bit contrived because of the way you wanted it structured, but if you didn't need a context manager and doesn't need the session to look like a direct function call, it's trivial to just use partial:
my_queue = []
# some session
my_queue.append(partial(f, a))
my_queue.append(partial(f, b))
for func in my_queue:
func()

How to create generic method for methods that have the same structure?

I have the following method in CommentsService class:
async def background_job_auto_approve(self):
while True:
new = get_comments_by_status(CommentStatus.NEW.value)
pending = get_comments_by_status(CommentStatus.PENDING.value)
all = new + pending
for comment in all:
if check_it_auto_approve(item=comment):
await self.auto_approve(comment_id=comment['comment_id'],
alert_id=comment['alert_id'])
yield comment
await asyncio.sleep(self.check_expire_seconds)
But I have exactly the same method in my AlertsService:
async def background_job_auto_approve(self):
while True:
new = get_alerts_by_status(AlertStatus.NEW.value)
pending = get_alerts_by_status(AlertStatus.PENDING.value)
all = new + pending
for alert in all:
if check_it_auto_approve(item=alert):
await self.auto_approve(alert_id=alert['alert_id'])
yield alert
await asyncio.sleep(self.check_expire_seconds)
How to avoid code duplication? I have the same problem for other methods in those classes.

It's difficult to come up with a truly generic way to do this without the rest of the code base / context of it's purpose. However, if you can solidify the pattern you seem to be using (which is [objectname]Status, [objectname]_id, get_[objectname]s_by_status), this will work fine. I'd recommend adding an additional class for methods such as auto_approve
class AutoApprovalLoop(object):
def __init__(self, method_ptr=None, cls_ptr=None, approval_key=None):
self.method_ptr = method_ptr # one of get_comments_by_status or get_alerts_by_status
self.cls_ptr = cls_ptr # one of AlertStatus or CommentStatus
self.approval_key = approval_key # one of 'comment_id' or 'alert_id'
def loop(self):
while True:
new = self.method_ptr(self.cls_ptr.NEW.value)
pending = self.method_ptr(self.cls_ptr.PENDING.value)
all = new + pending
for item in all:
if check_it_auto_approve(item=item)
await self.auto_approve(**{self.approval_key: item[self.approval_key]})
yield item
await asyncio.sleep(self.check_expire_seconds)

Bit of warning. I am not on Python 3 and I am not entirely sure how the asyncio and async keywords behave when you forward calls to another function.
First thing first: pass in as variables anything that is different between your first and second function. That is: which function returns your awaiting-approvals, and their status criteria object.
Then, use prepared operator.itemgetter to generically retrieve the keys from each obj. If itemgetter has several fields to look up it returns a tuple, otherwise a scalar - that's what the isinstance(tu, tuple) tries to normalize. Use *tu to unpack that back for the auto_approve call.
Call check_it_auto_approve and self.auto_approve with positionals, using
async def generic_background_job_auto_approve(self, get_obj_by_status, ObjStatus, myitemgetter):
while True:
new = get_obj_by_status(ObjStatus.NEW.value)
pending = get_obj_by_status(ObjStatus.PENDING.value)
all = new + pending
for obj in all:
#call with positional if possible.
if check_it_auto_approve(obj):
tu = myitemgetter(obj)
#itemgetters return a tuple if multiple keys, a scalar if only one
tu = tu if isinstance(tu, tuple) else (tu,)
#assuming your auto_approve does ok with positional variables, doesn't need named ones
await self.auto_approve(*tu)
yield obj
await asyncio.sleep(self.check_expire_seconds)
ok, now you are ready to call your generic code
import operator
async def call_for_comment(self):
return self.generic_background_job_auto_approve(
get_obj_by_status=get_comments_by_status
,ObjStatus=CommentStatus
,myitemgetter=operator.itemgetter('comment_id','alert_id')
)
async def call_for_alert(self):
return self.generic_background_job_auto_approve(
get_obj_by_status=get_alerts_by_status
,ObjStatus=AlertStatus
,myitemgetter=operator.itemgetter('alert_id')
)
If you needed to call with named parameters, you could pass in a function that return a dict of the required variables and **mydict to call instead.

Turn functions with a callback into Python generators?

The Scipy minimization function (just to use as an example), has the option of adding a callback function at each step. So I can do something like,
def my_callback(x):
print x
scipy.optimize.fmin(func, x0, callback=my_callback)
Is there a way to use the callback function to create a generator version of fmin, so that I could do,
for x in my_fmin(func,x0):
print x
It seems like it might be possible with some combination of yields and sends, but I can quite think of anything.

As pointed in the comments, you could do it in a new thread, using Queue. The drawback is that you'd still need some way to access the final result (what fmin returns at the end). My example below uses an optional callback to do something with it (another option would be to just yield it also, though your calling code would have to differentiate between iteration results and final results):
from thread import start_new_thread
from Queue import Queue
def my_fmin(func, x0, end_callback=(lambda x:x), timeout=None):
q = Queue() # fmin produces, the generator consumes
job_done = object() # signals the processing is done
# Producer
def my_callback(x):
q.put(x)
def task():
ret = scipy.optimize.fmin(func,x0,callback=my_callback)
q.put(job_done)
end_callback(ret) # "Returns" the result of the main call
# Starts fmin in a new thread
start_new_thread(task,())
# Consumer
while True:
next_item = q.get(True,timeout) # Blocks until an input is available
if next_item is job_done:
break
yield next_item
Update: to block the execution of the next iteration until the consumer has finished processing the last one, it's also necessary to use task_done and join.
# Producer
def my_callback(x):
q.put(x)
q.join() # Blocks until task_done is called
# Consumer
while True:
next_item = q.get(True,timeout) # Blocks until an input is available
if next_item is job_done:
break
yield next_item
q.task_done() # Unblocks the producer, so a new iteration can start
Note that maxsize=1 is not necessary, since no new item will be added to the queue until the last one is consumed.
Update 2: Also note that, unless all items are eventually retrieved by this generator, the created thread will deadlock (it will block forever and its resources will never be released). The producer is waiting on the queue, and since it stores a reference to that queue, it will never be reclaimed by the gc even if the consumer is. The queue will then become unreachable, so nobody will be able to release the lock.
A clean solution for that is unknown, if possible at all (since it would depend on the particular function used in the place of fmin). A workaround could be made using timeout, having the producer raises an exception if put blocks for too long:
q = Queue(maxsize=1)
# Producer
def my_callback(x):
q.put(x)
q.put("dummy",True,timeout) # Blocks until the first result is retrieved
q.join() # Blocks again until task_done is called
# Consumer
while True:
next_item = q.get(True,timeout) # Blocks until an input is available
q.task_done() # (one "task_done" per "get")
if next_item is job_done:
break
yield next_item
q.get() # Retrieves the "dummy" object (must be after yield)
q.task_done() # Unblocks the producer, so a new iteration can start

Generator as coroutine (no threading)
Let's have FakeFtp with retrbinary function using callback being called with each successful read of chunk of data:
class FakeFtp(object):
def __init__(self):
self.data = iter(["aaa", "bbb", "ccc", "ddd"])
def login(self, user, password):
self.user = user
self.password = password
def retrbinary(self, cmd, cb):
for chunk in self.data:
cb(chunk)
Using simple callback function has disadvantage, that it is called repeatedly and the callback
function cannot easily keep context between calls.
Following code defines process_chunks generator, which will be able receiving chunks of data one
by one and processing them. In contrast to simple callback, here we are able to keep all the
processing within one function without losing context.
from contextlib import closing
from itertools import count
def main():
processed = []
def process_chunks():
for i in count():
try:
# (repeatedly) get the chunk to process
chunk = yield
except GeneratorExit:
# finish_up
print("Finishing up.")
return
else:
# Here process the chunk as you like
print("inside coroutine, processing chunk:", i, chunk)
product = "processed({i}): {chunk}".format(i=i, chunk=chunk)
processed.append(product)
with closing(process_chunks()) as coroutine:
# Get the coroutine to the first yield
coroutine.next()
ftp = FakeFtp()
# next line repeatedly calls `coroutine.send(data)`
ftp.retrbinary("RETR binary", cb=coroutine.send)
# each callback "jumps" to `yield` line in `process_chunks`
print("processed result", processed)
print("DONE")
To see the code in action, put the FakeFtp class, the code shown above and following line:
main()
into one file and call it:
$ python headsandtails.py
('inside coroutine, processing chunk:', 0, 'aaa')
('inside coroutine, processing chunk:', 1, 'bbb')
('inside coroutine, processing chunk:', 2, 'ccc')
('inside coroutine, processing chunk:', 3, 'ddd')
Finishing up.
('processed result', ['processed(0): aaa', 'processed(1): bbb', 'processed(2): ccc', 'processed(3): ddd'])
DONE
How it works
processed = [] is here just to show, the generator process_chunks shall have no problems to
cooperate with its external context. All is wrapped into def main(): to prove, there is no need to
use global variables.
def process_chunks() is the core of the solution. It might have one shot input parameters (not
used here), but main point, where it receives input is each yield line returning what anyone sends
via .send(data) into instance of this generator. One can coroutine.send(chunk) but in this example it is done via callback refering to this function callback.send.
Note, that in real solution there is no problem to have multiple yields in the code, they are
processed one by one. This might be used e.g. to read (and ignore) header of CSV file and then
continue processing records with data.
We could instantiate and use the generator as follows:
coroutine = process_chunks()
# Get the coroutine to the first yield
coroutine.next()
ftp = FakeFtp()
# next line repeatedly calls `coroutine.send(data)`
ftp.retrbinary("RETR binary", cb=coroutine.send)
# each callback "jumps" to `yield` line in `process_chunks`
# close the coroutine (will throw the `GeneratorExit` exception into the
# `process_chunks` coroutine).
coroutine.close()
Real code is using contextlib closing context manager to ensure, the coroutine.close() is
always called.
Conclusions
This solution is not providing sort of iterator to consume data from in traditional style "from
outside". On the other hand, we are able to:
use the generator "from inside"
keep all iterative processing within one function without being interrupted between callbacks
optionally use external context
provide usable results to outside
all this can be done without using threading
Credits: The solution is heavily inspired by SO answer Python FTP “chunk” iterator (without loading entire file into memory)
written by user2357112

Concept Use a blocking queue with maxsize=1 and a producer/consumer model.
The callback produces, then the next call to the callback will block on the full queue.
The consumer then yields the value from the queue, tries to get another value, and blocks on read.
The producer is the allowed to push to the queue, rinse and repeat.
Usage:
def dummy(func, arg, callback=None):
for i in range(100):
callback(func(arg+i))
# Dummy example:
for i in Iteratorize(dummy, lambda x: x+1, 0):
print(i)
# example with scipy:
for i in Iteratorize(scipy.optimize.fmin, func, x0):
print(i)
Can be used as expected for an iterator:
for i in take(5, Iteratorize(dummy, lambda x: x+1, 0)):
print(i)
Iteratorize class:
from thread import start_new_thread
from Queue import Queue
class Iteratorize:
"""
Transforms a function that takes a callback
into a lazy iterator (generator).
"""
def __init__(self, func, ifunc, arg, callback=None):
self.mfunc=func
self.ifunc=ifunc
self.c_callback=callback
self.q = Queue(maxsize=1)
self.stored_arg=arg
self.sentinel = object()
def _callback(val):
self.q.put(val)
def gentask():
ret = self.mfunc(self.ifunc, self.stored_arg, callback=_callback)
self.q.put(self.sentinel)
if self.c_callback:
self.c_callback(ret)
start_new_thread(gentask, ())
def __iter__(self):
return self
def next(self):
obj = self.q.get(True,None)
if obj is self.sentinel:
raise StopIteration
else:
return obj
Can probably do with some cleaning up to accept *args and **kwargs for the function being wrapped and/or the final result callback.

How about
data = []
scipy.optimize.fmin(func,x0,callback=data.append)
for line in data:
print line
If not, what exactly do you want to do with the generator's data?

A variant of Frits' answer, that:
Supports send to choose a return value for the callback
Supports throw to choose an exception for the callback
Supports close to gracefully shut down
Does not compute a queue item until it is requested
The complete code with tests can be found on github
import queue
import threading
import collections.abc
class generator_from_callback(collections.abc.Generator):
def __init__(self, expr):
"""
expr: a function that takes a callback
"""
self._expr = expr
self._done = False
self._ready_queue = queue.Queue(1)
self._done_queue = queue.Queue(1)
self._done_holder = [False]
# local to avoid reference cycles
ready_queue = self._ready_queue
done_queue = self._done_queue
done_holder = self._done_holder
def callback(value):
done_queue.put((False, value))
cmd, *args = ready_queue.get()
if cmd == 'close':
raise GeneratorExit
elif cmd == 'send':
return args[0]
elif cmd == 'throw':
raise args[0]
def thread_func():
try:
cmd, *args = ready_queue.get()
if cmd == 'close':
raise GeneratorExit
elif cmd == 'send':
if args[0] is not None:
raise TypeError("can't send non-None value to a just-started generator")
elif cmd == 'throw':
raise args[0]
ret = expr(callback)
raise StopIteration(ret)
except BaseException as e:
done_holder[0] = True
done_queue.put((True, e))
self._thread = threading.Thread(target=thread_func)
self._thread.start()
def __next__(self):
return self.send(None)
def send(self, value):
if self._done_holder[0]:
raise StopIteration
self._ready_queue.put(('send', value))
is_exception, val = self._done_queue.get()
if is_exception:
raise val
else:
return val
def throw(self, exc):
if self._done_holder[0]:
raise StopIteration
self._ready_queue.put(('throw', exc))
is_exception, val = self._done_queue.get()
if is_exception:
raise val
else:
return val
def close(self):
if not self._done_holder[0]:
self._ready_queue.put(('close',))
self._thread.join()
def __del__(self):
self.close()
Which works as:
In [3]: def callback(f):
...: ret = f(1)
...: print("gave 1, got {}".format(ret))
...: f(2)
...: print("gave 2")
...: f(3)
...:
In [4]: i = generator_from_callback(callback)
In [5]: next(i)
Out[5]: 1
In [6]: i.send(4)
gave 1, got 4
Out[6]: 2
In [7]: next(i)
gave 2, got None
Out[7]: 3
In [8]: next(i)
StopIteration
For scipy.optimize.fmin, you would use generator_from_callback(lambda c: scipy.optimize.fmin(func, x0, callback=c))

Solution to handle non-blocking callbacks
The solution using threading and queue is pretty good, of high-performance and cross-platform, probably the best one.
Here I provide this not-too-bad solution, which is mainly for handling non-blocking callbacks, e.g. called from the parent function through threading.Thread(target=callback).start(), or other non-blocking ways.
import pickle
import select
import subprocess
def my_fmin(func, x0):
# open a process to use as a pipeline
proc = subprocess.Popen(['cat'], stdin=subprocess.PIPE, stdout=subprocess.PIPE)
def my_callback(x):
# x might be any object, not only str, so we use pickle to dump it
proc.stdin.write(pickle.dumps(x).replace(b'\n', b'\\n') + b'\n')
proc.stdin.flush()
from scipy import optimize
optimize.fmin(func, x0, callback=my_callback)
# this is meant to handle non-blocking callbacks, e.g. called somewhere
# through `threading.Thread(target=callback).start()`
while select.select([proc.stdout], [], [], 0)[0]:
yield pickle.loads(proc.stdout.readline()[:-1].replace(b'\\n', b'\n'))
# close the process
proc.communicate()
Then you can use the function like this:
# unfortunately, `scipy.optimize.fmin`'s callback is blocking.
# so this example is just for showing how-to.
for x in my_fmin(lambda x: x**2, 3):
print(x)
Although This solution seems quite simple and readable, it's not as high-performance as the threading and queue solution, because:
Processes are much heavier than threadings.
Passing data through pipe instead of memory is much slower.
Besides, it doesn't work on Windows, because the select module on Windows can only handle sockets, not pipes and other file descriptors.

For a super simple approach...
def callback_to_generator():
data = []
method_with_callback(blah, foo, callback=data.append)
for item in data:
yield item
Yes, this isn't good for large data
Yes, this blocks on all items being processed first
But it still might be useful for some use cases :)
Also thanks to #winston-ewert as this is just a small variant on his answer :)

Python Observer Pattern: Examples, Tips? [closed]

Closed. This question needs to be more focused. It is not currently accepting answers.
Want to improve this question? Update the question so it focuses on one problem only by editing this post.
Closed 4 years ago.
Improve this question
Are there any exemplary examples of the GoF Observer implemented in Python? I have a bit code which currently has bits of debugging code laced through the key class (currently generating messages to stderr if a magic env is set). Additionally, the class has an interface for incrementally return results as well as storing them (in memory) for post processing. (The class itself is a job manager for concurrently executing commands on remote machines over ssh).
Currently the usage of the class looks something like:
job = SSHJobMan(hostlist, cmd)
job.start()
while not job.done():
for each in job.poll():
incrementally_process(job.results[each])
time.sleep(0.2) # or other more useful work
post_process(job.results)
An alernative usage model is:
job = SSHJobMan(hostlist, cmd)
job.wait() # implicitly performs a start()
process(job.results)
This all works fine for the current utility. However it does lack flexibility. For example I currently support a brief output format or a progress bar as incremental results, I also support
brief, complete and "merged message" outputs for the post_process() function.
However, I'd like to support multiple results/output streams (progress bar to the terminal, debugging and warnings to a log file, outputs from successful jobs to one file/directory, error messages and other results from non-successful jobs to another, etc).
This sounds like a situation that calls for Observer ... have instances of my class accept registration from other objects and call them back with specific types of events as they occur.
I'm looking at PyPubSub since I saw several references to that in SO related questions. I'm not sure I'm ready to add the external dependency to my utility but I could see value in using their interface as a model for mine if that's going to make it easier for others to use. (The project is intended as both a standalone command line utility and a class for writing other scripts/utilities).
In short I know how to do what I want ... but there are numerous ways to accomplish it. I want suggestions on what's most likely to work for other users of the code in the long run.
The code itself is at: classh.

However it does lack flexibility.
Well... actually, this looks like a good design to me if an asynchronous API is what you want. It usually is. Maybe all you need is to switch from stderr to Python's logging module, which has a sort of publish/subscribe model of its own, what with Logger.addHandler() and so on.
If you do want to support observers, my advice is to keep it simple. You really only need a few lines of code.
class Event(object):
pass
class Observable(object):
def __init__(self):
self.callbacks = []
def subscribe(self, callback):
self.callbacks.append(callback)
def fire(self, **attrs):
e = Event()
e.source = self
for k, v in attrs.items():
setattr(e, k, v)
for fn in self.callbacks:
fn(e)
Your Job class can subclass Observable. When something of interest happens, call self.fire(type="progress", percent=50) or the like.

I think people in the other answers overdo it. You can easily achieve events in Python with less than 15 lines of code.
You simple have two classes: Event and Observer. Any class that wants to listen for an event, needs to inherit Observer and set to listen (observe) for a specific event. When an Event is instantiated and fired, all observers listening to that event will run the specified callback functions.
class Observer():
_observers = []
def __init__(self):
self._observers.append(self)
self._observables = {}
def observe(self, event_name, callback):
self._observables[event_name] = callback
class Event():
def __init__(self, name, data, autofire = True):
self.name = name
self.data = data
if autofire:
self.fire()
def fire(self):
for observer in Observer._observers:
if self.name in observer._observables:
observer._observables[self.name](self.data)
Example:
class Room(Observer):
def __init__(self):
print("Room is ready.")
Observer.__init__(self) # Observer's init needs to be called
def someone_arrived(self, who):
print(who + " has arrived!")
room = Room()
room.observe('someone arrived', room.someone_arrived)
Event('someone arrived', 'Lenard')
Output:
Room is ready.
Lenard has arrived!

A few more approaches...
Example: the logging module
Maybe all you need is to switch from stderr to Python's logging module, which has a powerful publish/subscribe model.
It's easy to get started producing log records.
# producer
import logging
log = logging.getLogger("myjobs") # that's all the setup you need
class MyJob(object):
def run(self):
log.info("starting job")
n = 10
for i in range(n):
log.info("%.1f%% done" % (100.0 * i / n))
log.info("work complete")
On the consumer side there's a bit more work. Unfortunately configuring logger output takes, like, 7 whole lines of code to do. ;)
# consumer
import myjobs, sys, logging
if user_wants_log_output:
ch = logging.StreamHandler(sys.stderr)
ch.setLevel(logging.INFO)
formatter = logging.Formatter(
"%(asctime)s - %(name)s - %(levelname)s - %(message)s")
ch.setFormatter(formatter)
myjobs.log.addHandler(ch)
myjobs.log.setLevel(logging.INFO)
myjobs.MyJob().run()
On the other hand there's an amazing amount of stuff in the logging package. If you ever need to send log data to a rotating set of files, an email address, and the Windows Event Log, you're covered.
Example: simplest possible observer
But you don't need to use any library at all. An extremely simple way to support observers is to call a method that does nothing.
# producer
class MyJob(object):
def on_progress(self, pct):
"""Called when progress is made. pct is the percent complete.
By default this does nothing. The user may override this method
or even just assign to it."""
pass
def run(self):
n = 10
for i in range(n):
self.on_progress(100.0 * i / n)
self.on_progress(100.0)
# consumer
import sys, myjobs
job = myjobs.MyJob()
job.on_progress = lambda pct: sys.stdout.write("%.1f%% done\n" % pct)
job.run()
Sometimes instead of writing a lambda, you can just say job.on_progress = progressBar.update, which is nice.
This is about as simple as it gets. One drawback is that it doesn't naturally support multiple listeners subscribing to the same events.
Example: C#-like events
With a bit of support code, you can get C#-like events in Python. Here's the code:
# glue code
class event(object):
def __init__(self, func):
self.__doc__ = func.__doc__
self._key = ' ' + func.__name__
def __get__(self, obj, cls):
try:
return obj.__dict__[self._key]
except KeyError, exc:
be = obj.__dict__[self._key] = boundevent()
return be
class boundevent(object):
def __init__(self):
self._fns = []
def __iadd__(self, fn):
self._fns.append(fn)
return self
def __isub__(self, fn):
self._fns.remove(fn)
return self
def __call__(self, *args, **kwargs):
for f in self._fns[:]:
f(*args, **kwargs)
The producer declares the event using a decorator:
# producer
class MyJob(object):
#event
def progress(pct):
"""Called when progress is made. pct is the percent complete."""
def run(self):
n = 10
for i in range(n+1):
self.progress(100.0 * i / n)
#consumer
import sys, myjobs
job = myjobs.MyJob()
job.progress += lambda pct: sys.stdout.write("%.1f%% done\n" % pct)
job.run()
This works exactly like the "simple observer" code above, but you can add as many listeners as you like using +=. (Unlike C#, there are no event handler types, you don't have to new EventHandler(foo.bar) when subscribing to an event, and you don't have to check for null before firing the event. Like C#, events do not squelch exceptions.)
How to choose
If logging does everything you need, use that. Otherwise do the simplest thing that works for you. The key thing to note is that you don't need to take on a big external dependency.

How about an implementation where objects aren't kept alive just because they're observing something? Below please find an implementation of the observer pattern with the following features:
Usage is pythonic. To add an observer to a bound method .bar of instance foo, just do foo.bar.addObserver(observer).
Observers are not kept alive by virtue of being observers. In other words, the observer code uses no strong references.
No sub-classing necessary (descriptors ftw).
Can be used with unhashable types.
Can be used as many times you want in a single class.
(bonus) As of today the code exists in a proper downloadable, installable package on github.
Here's the code (the github package or PyPI package have the most up to date implementation):
import weakref
import functools
class ObservableMethod(object):
"""
A proxy for a bound method which can be observed.
I behave like a bound method, but other bound methods can subscribe to be
called whenever I am called.
"""
def __init__(self, obj, func):
self.func = func
functools.update_wrapper(self, func)
self.objectWeakRef = weakref.ref(obj)
self.callbacks = {} #observing object ID -> weak ref, methodNames
def addObserver(self, boundMethod):
"""
Register a bound method to observe this ObservableMethod.
The observing method will be called whenever this ObservableMethod is
called, and with the same arguments and keyword arguments. If a
boundMethod has already been registered to as a callback, trying to add
it again does nothing. In other words, there is no way to sign up an
observer to be called back multiple times.
"""
obj = boundMethod.__self__
ID = id(obj)
if ID in self.callbacks:
s = self.callbacks[ID][1]
else:
wr = weakref.ref(obj, Cleanup(ID, self.callbacks))
s = set()
self.callbacks[ID] = (wr, s)
s.add(boundMethod.__name__)
def discardObserver(self, boundMethod):
"""
Un-register a bound method.
"""
obj = boundMethod.__self__
if id(obj) in self.callbacks:
self.callbacks[id(obj)][1].discard(boundMethod.__name__)
def __call__(self, *arg, **kw):
"""
Invoke the method which I proxy, and all of it's callbacks.
The callbacks are called with the same *args and **kw as the main
method.
"""
result = self.func(self.objectWeakRef(), *arg, **kw)
for ID in self.callbacks:
wr, methodNames = self.callbacks[ID]
obj = wr()
for methodName in methodNames:
getattr(obj, methodName)(*arg, **kw)
return result
#property
def __self__(self):
"""
Get a strong reference to the object owning this ObservableMethod
This is needed so that ObservableMethod instances can observe other
ObservableMethod instances.
"""
return self.objectWeakRef()
class ObservableMethodDescriptor(object):
def __init__(self, func):
"""
To each instance of the class using this descriptor, I associate an
ObservableMethod.
"""
self.instances = {} # Instance id -> (weak ref, Observablemethod)
self._func = func
def __get__(self, inst, cls):
if inst is None:
return self
ID = id(inst)
if ID in self.instances:
wr, om = self.instances[ID]
if not wr():
msg = "Object id %d should have been cleaned up"%(ID,)
raise RuntimeError(msg)
else:
wr = weakref.ref(inst, Cleanup(ID, self.instances))
om = ObservableMethod(inst, self._func)
self.instances[ID] = (wr, om)
return om
def __set__(self, inst, val):
raise RuntimeError("Assigning to ObservableMethod not supported")
def event(func):
return ObservableMethodDescriptor(func)
class Cleanup(object):
"""
I manage remove elements from a dict whenever I'm called.
Use me as a weakref.ref callback to remove an object's id from a dict
when that object is garbage collected.
"""
def __init__(self, key, d):
self.key = key
self.d = d
def __call__(self, wr):
del self.d[self.key]
To use this we just decorate methods we want to make observable with #event. Here's an example
class Foo(object):
def __init__(self, name):
self.name = name
#event
def bar(self):
print("%s called bar"%(self.name,))
def baz(self):
print("%s called baz"%(self.name,))
a = Foo('a')
b = Foo('b')
a.bar.addObserver(b.bar)
a.bar()

From wikipedia:
from collections import defaultdict
class Observable (defaultdict):
def __init__ (self):
defaultdict.__init__(self, object)
def emit (self, *args):
'''Pass parameters to all observers and update states.'''
for subscriber in self:
response = subscriber(*args)
self[subscriber] = response
def subscribe (self, subscriber):
'''Add a new subscriber to self.'''
self[subscriber]
def stat (self):
'''Return a tuple containing the state of each observer.'''
return tuple(self.values())
The Observable is used like this.
myObservable = Observable ()
# subscribe some inlined functions.
# myObservable[lambda x, y: x * y] would also work here.
myObservable.subscribe(lambda x, y: x * y)
myObservable.subscribe(lambda x, y: float(x) / y)
myObservable.subscribe(lambda x, y: x + y)
myObservable.subscribe(lambda x, y: x - y)
# emit parameters to each observer
myObservable.emit(6, 2)
# get updated values
myObservable.stat() # returns: (8, 3.0, 4, 12)

Based on Jason's answer, I implemented the C#-like events example as a fully-fledged python module including documentation and tests. I love fancy pythonic stuff :)
So, if you want some ready-to-use solution, you can just use the code on github.

Example: twisted log observers
To register an observer yourCallable() (a callable that accepts a dictionary) to receive all log events (in addition to any other observers):
twisted.python.log.addObserver(yourCallable)
Example: complete producer/consumer example
From Twisted-Python mailing list:
#!/usr/bin/env python
"""Serve as a sample implementation of a twisted producer/consumer
system, with a simple TCP server which asks the user how many random
integers they want, and it sends the result set back to the user, one
result per line."""
import random
from zope.interface import implements
from twisted.internet import interfaces, reactor
from twisted.internet.protocol import Factory
from twisted.protocols.basic import LineReceiver
class Producer:
"""Send back the requested number of random integers to the client."""
implements(interfaces.IPushProducer)
def __init__(self, proto, cnt):
self._proto = proto
self._goal = cnt
self._produced = 0
self._paused = False
def pauseProducing(self):
"""When we've produced data too fast, pauseProducing() will be
called (reentrantly from within resumeProducing's transport.write
method, most likely), so set a flag that causes production to pause
temporarily."""
self._paused = True
print('pausing connection from %s' % (self._proto.transport.getPeer()))
def resumeProducing(self):
self._paused = False
while not self._paused and self._produced < self._goal:
next_int = random.randint(0, 10000)
self._proto.transport.write('%d\r\n' % (next_int))
self._produced += 1
if self._produced == self._goal:
self._proto.transport.unregisterProducer()
self._proto.transport.loseConnection()
def stopProducing(self):
pass
class ServeRandom(LineReceiver):
"""Serve up random data."""
def connectionMade(self):
print('connection made from %s' % (self.transport.getPeer()))
self.transport.write('how many random integers do you want?\r\n')
def lineReceived(self, line):
cnt = int(line.strip())
producer = Producer(self, cnt)
self.transport.registerProducer(producer, True)
producer.resumeProducing()
def connectionLost(self, reason):
print('connection lost from %s' % (self.transport.getPeer()))
factory = Factory()
factory.protocol = ServeRandom
reactor.listenTCP(1234, factory)
print('listening on 1234...')
reactor.run()

OP asks "Are there any exemplary examples of the GoF Observer implemented in Python?"
This is an example in Python 3.7. This Observable class meets the requirement of creating a relationship between one observable and many observers while remaining independent of their structure.
from functools import partial
from dataclasses import dataclass, field
import sys
from typing import List, Callable
#dataclass
class Observable:
observers: List[Callable] = field(default_factory=list)
def register(self, observer: Callable):
self.observers.append(observer)
def deregister(self, observer: Callable):
self.observers.remove(observer)
def notify(self, *args, **kwargs):
for observer in self.observers:
observer(*args, **kwargs)
def usage_demo():
observable = Observable()
# Register two anonymous observers using lambda.
observable.register(
lambda *args, **kwargs: print(f'Observer 1 called with args={args}, kwargs={kwargs}'))
observable.register(
lambda *args, **kwargs: print(f'Observer 2 called with args={args}, kwargs={kwargs}'))
# Create an observer function, register it, then deregister it.
def callable_3():
print('Observer 3 NOT called.')
observable.register(callable_3)
observable.deregister(callable_3)
# Create a general purpose observer function and register four observers.
def callable_x(*args, **kwargs):
print(f'{args[0]} observer called with args={args}, kwargs={kwargs}')
for gui_field in ['Form field 4', 'Form field 5', 'Form field 6', 'Form field 7']:
observable.register(partial(callable_x, gui_field))
observable.notify('test')
if __name__ == '__main__':
sys.exit(usage_demo())

A functional approach to observer design:
def add_listener(obj, method_name, listener):
# Get any existing listeners
listener_attr = method_name + '_listeners'
listeners = getattr(obj, listener_attr, None)
# If this is the first listener, then set up the method wrapper
if not listeners:
listeners = [listener]
setattr(obj, listener_attr, listeners)
# Get the object's method
method = getattr(obj, method_name)
#wraps(method)
def method_wrapper(*args, **kwags):
method(*args, **kwags)
for l in listeners:
l(obj, *args, **kwags) # Listener also has object argument
# Replace the original method with the wrapper
setattr(obj, method_name, method_wrapper)
else:
# Event is already set up, so just add another listener
listeners.append(listener)
def remove_listener(obj, method_name, listener):
# Get any existing listeners
listener_attr = method_name + '_listeners'
listeners = getattr(obj, listener_attr, None)
if listeners:
# Remove the listener
next((listeners.pop(i)
for i, l in enumerate(listeners)
if l == listener),
None)
# If this was the last listener, then remove the method wrapper
if not listeners:
method = getattr(obj, method_name)
delattr(obj, listener_attr)
setattr(obj, method_name, method.__wrapped__)
These methods can then be used to add a listener to any class method. For example:
class MyClass(object):
def __init__(self, prop):
self.prop = prop
def some_method(self, num, string):
print('method:', num, string)
def listener_method(obj, num, string):
print('listener:', num, string, obj.prop)
my = MyClass('my_prop')
add_listener(my, 'some_method', listener_method)
my.some_method(42, 'with listener')
remove_listener(my, 'some_method', listener_method)
my.some_method(42, 'without listener')
And the output is:
method: 42 with listener
listener: 42 with listener my_prop
method: 42 without listener

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

How to override an async NDB method and write your own tasklet - python

Related

Queue objects should only be shared between processes through inheritance even when mp.Manager.Queue() is used

Use contextmanager to trap instructions for later execution

How to create generic method for methods that have the same structure?

Turn functions with a callback into Python generators?

Python Observer Pattern: Examples, Tips? [closed]

Categories

Resources