Persist and fetch data in with block - python

I have a situation - I'm using the asyncio package with Python 3.x, and persisting data in a with block, something like this:
test_repo = TestRepository()
with (yield from test_repo):
res = yield from test_repo.get_by_lim_off(
page_size=int(length),
offset=start,
customer_name=customer_name,
customer_phone=customer_phone,
return_type=return_type
)
I need to get res data in the with block, but persistence and fetching data should happen when I exit from the with block. How can I achieve this?

This behavior is only supported in Python 3.5+, via asynchronous context managers (__aenter__/__aexit__), and async with, both of which were added in PEP 492:
class TestRepository:
# All your normal methods go here
async def __aenter__(self):
# You can call coroutines here
await self.some_init()
async def __aexit__(self, exc_type, exc, tb):
# You can call coroutines here
await self.do_persistence()
await self.fetch_data()
async def do_work():
test_repo = TestRepository()
async with test_repo:
res = await test_repo.get_by_lim_off(
page_size=int(length),
offset=start,
customer_name=customer_name,
customer_phone=customer_phone,
return_type=return_type
)
asyncio.get_event_loop().run_until_complete(do_work())
Prior to 3.5, you have to use a try/finally block with explicit calls to the init/cleanup coroutines, unfortunately:
#asyncio.coroutine
def do_work():
test_repo = TestRepository()
yield from test_repo.some_init()
try:
res = yield from test_repo.get_by_lim_off(
page_size=int(length),
offset=start,
customer_name=customer_name,
customer_phone=customer_phone,
return_type=return_type
)
finally:
yield from test_repo.do_persistence()
yield from test_repo.fetch_data()

Related

Python: await the generator end

Current versions of Python (Dec 2022) still allow using #coroutine decorator and a generation can be as:
import asyncio
asyncify = asyncio.coroutine
data_ready = False # Status of a pipe, just to test
def gen():
global data_ready
while not data_ready:
print("not ready")
data_ready = True # Just to test
yield
return "done"
async def main():
result = await asyncify(gen)()
print(result)
loop = asyncio.new_event_loop()
loop.create_task(main())
loop.run_forever()
However, new Python versions 3.8+ will deprecate #coroutine decorator (the asyncify function alias), how to wait for (await) generator to end as above?
I tried to use async def as expected by the warning but not working:
import asyncio
asyncify = asyncio.coroutine
data_ready = False # Just to test
async def gen():
global data_ready
while not data_ready:
print("not ready")
data_ready = True # Just to test
yield
yield "done"
return
async def main():
# this has error: TypeError: object async_generator can't be used in 'await' expression
result = await gen()
print(result)
loop = asyncio.new_event_loop()
loop.create_task(main())
loop.run_forever()
Asynchronous generators inherit asynchronous iterator and are aimed for asynchronous iterations. You can not directly await them as regular coroutines.
With that in mind, returning to your experimental case and your question "how to wait for (await) generator to end?": to get the final yielded value - perform asynchronous iterations:
import asyncio
data_ready = False # Just to test
async def gen():
global data_ready
while not data_ready:
print("not ready")
data_ready = True # Just to test
yield "processing"
yield "done"
return
async def main():
a_gen = gen()
async for result in a_gen: # assign to result on each async iteration
pass
print('result:', result)
asyncio.run(main())
Prints:
not ready
result: done
Naturally, you can also advance the async generator in steps with anext:
a_gen = gen()
val_1 = await anext(a_gen)
Summing up, follow the guidlines on PEP 525 – Asynchronous Generators and try to not mix old-depreceted things with the actual ones.

Automatic conversion of standard function into asynchronous function in Python

In most of the asynchronous coroutines I write, I only need to replace the function definition def func() -> async def func() and the sleep time.sleep(s) -> await asyncio.sleep(s).
Is it possible to convert a standard python function into an async function where all the time.sleep(s) is converted to await asyncio.sleep(s)?
Example
Performance during task
Measure performance during a task
import asyncio
import random
async def performance_during_task(task):
stop_event = asyncio.Event()
target_task = asyncio.create_task(task(stop_event))
perf_task = asyncio.create_task(measure_performance(stop_event))
await target_task
await perf_task
async def measure_performance(event):
while not event.is_set():
print('Performance: ', random.random())
await asyncio.sleep(.2)
if __name__ == "__main__":
asyncio.run(
performance_during_task(task)
)
Task
The task has to be defined with async def and await asyncio.sleep(s)
async def task(event):
for i in range(10):
print('Step: ', i)
await asyncio.sleep(.2)
event.set()
into ->
Easy task definition
To have others not worrying about async etc. I want them to be able to define a task normally (e.g. with a decorator?)
#as_async
def easy_task(event):
for i in range(10):
print('Step: ', i)
time.sleep(.2)
event.set()
So that it can be used as an async function with e.g. performance_during_task()
I think I found a solution similar to the interesting GitHub example mentioned in the comments and a similar post here.
We can write a decorator like
from functools import wraps, partial
def to_async(func):
#wraps(func) # Makes sure that function is returned for e.g. func.__name__ etc.
async def run(*args, loop=None, executor=None, **kwargs):
if loop is None:
loop = asyncio.get_event_loop(). # Make event loop of nothing exists
pfunc = partial(func, *args, **kwargs) # Return function with variables (event) filled in
return await loop.run_in_executor(executor, pfunc).
return run
Such that easy task becomes
#to_async
def easy_task(event):
for i in range(10):
print('Step: ', i)
time.sleep(.2)
event.set()
Where wraps makes sure we can call attributes of the original function (explained here).
And partial fills in the variables as explained here.

How to run async coroutines from init, wait until it is complete

I am connecting to aioredis from __init__ (I do not want to move it out since this means I have to some extra major changes). How can I wait for aioredis connection task in below __init__ example code and have it print self.sub and self.pub object? Currently it gives an error saying
abc.py:42> exception=AttributeError("'S' object has no attribute
'pub'")
I do see redis connections created and coro create_connetion done.
Is there a way to call blocking asyncio calls from __init__. If I replace asyncio.wait with asyncio.run_until_complete I get an error that roughly says
loop is already running.
asyncio.gather is
import sys, json
from addict import Dict
import asyncio
import aioredis
class S():
def __init__(self, opts):
print(asyncio.Task.all_tasks())
task = asyncio.wait(asyncio.create_task(self.create_connection()), return_when="ALL_COMPLETED")
print(asyncio.Task.all_tasks())
print(task)
print(self.pub, self.sub)
async def receive_message(self, channel):
while await channel.wait_message():
message = await channel.get_json()
await asyncio.create_task(self.callback_loop(Dict(json.loads(message))))
async def run_s(self):
asyncio.create_task(self.listen())
async def callback_loop(msg):
print(msg)
self.callback_loop = callback_loop
async def create_connection(self):
self.pub = await aioredis.create_redis("redis://c8:7070/0", password="abc")
self.sub = await aioredis.create_redis("redis://c8:7070/0", password="abc")
self.db = await aioredis.create_redis("redis://c8:7070/0", password="abc")
self.listener = await self.sub.subscribe(f"abc")
async def listen(self):
self.tsk = asyncio.ensure_future(self.receive_message(self.listener[0]))
await self.tsk
async def periodic(): #test function to show current tasks
number = 5
while True:
await asyncio.sleep(number)
print(asyncio.Task.all_tasks())
async def main(opts):
loop.create_task(periodic())
s = S(opts)
print(s.pub, s.sub)
loop.create_task(s.run_s())
if __name__ == "__main__":
loop = asyncio.get_event_loop()
main_task = loop.create_task(main(sys.argv[1:]))
loop.run_forever() #I DONT WANT TO MOVE THIS UNLESS IT IS NECESSARY
I think what you want to do is to make sure the function create_connections runs to completion BEFORE the S constructor. A way to do that is to rearrange your code a little bit. Move the create_connections function outside the class:
async def create_connection():
pub = await aioredis.create_redis("redis://c8:7070/0", password="abc")
sub = await aioredis.create_redis("redis://c8:7070/0", password="abc")
db = await aioredis.create_redis("redis://c8:7070/0", password="abc")
listener = await self.sub.subscribe(f"abc")
return pub, sub, db, listener
Now await that function before constructing S. So your main function becomes:
async def main(opts):
loop.create_task(periodic())
x = await create_connections()
s = S(opts, x) # pass the result of create_connections to S
print(s.pub, s.sub)
loop.create_task(s.run_s())
Now modify the S constructor to receive the objects created:
def __init__(self, opts, x):
self.pub, self.sub, self.db, self.listener = x
I'm not sure what you're trying to do with the return_when argument and the call to asyncio.wait. The create_connections function doesn't launch a set of parallel tasks, but rather awaits each of the calls before moving on to the next one. Perhaps you could improve performance by running the four calls in parallel but that's a different question.

Python parallelising "async for"

I have the following method in my Tornado handler:
async def get(self):
url = 'url here'
try:
async for batch in downloader.fetch(url):
self.write(batch)
await self.flush()
except Exception as e:
logger.warning(e)
This is the code for downloader.fetch():
async def fetch(url, **kwargs):
timeout = kwargs.get('timeout', aiohttp.ClientTimeout(total=12))
response_validator = kwargs.get('response_validator', json_response_validator)
extractor = kwargs.get('extractor', json_extractor)
try:
async with aiohttp.ClientSession(timeout=timeout) as session:
async with session.get(url) as resp:
response_validator(resp)
async for batch in extractor(resp):
yield batch
except aiohttp.client_exceptions.ClientConnectorError:
logger.warning("bad request")
raise
except asyncio.TimeoutError:
logger.warning("server timeout")
raise
I would like yield the "batch" object from multiple downloaders in paralel.
I want the first available batch from the first downloader and so on until all downloaders finished. Something like this (this is not working code):
async for batch in [downloader.fetch(url1), downloader.fetch(url2)]:
....
Is this possible? How can I modify what I am doing in order to be able to yield from multiple coroutines in parallel?
How can I modify what I am doing in order to be able to yield from multiple coroutines in parallel?
You need a function that merges two async sequences into one, iterating over both in parallel and yielding elements from one or the other, as they become available. While such a function is not included in the current standard library, you can find one in the aiostream package.
You can also write your own merge function, as shown in this answer:
async def merge(*iterables):
iter_next = {it.__aiter__(): None for it in iterables}
while iter_next:
for it, it_next in iter_next.items():
if it_next is None:
fut = asyncio.ensure_future(it.__anext__())
fut._orig_iter = it
iter_next[it] = fut
done, _ = await asyncio.wait(iter_next.values(),
return_when=asyncio.FIRST_COMPLETED)
for fut in done:
iter_next[fut._orig_iter] = None
try:
ret = fut.result()
except StopAsyncIteration:
del iter_next[fut._orig_iter]
continue
yield ret
Using that function, the loop would look like this:
async for batch in merge(downloader.fetch(url1), downloader.fetch(url2)):
....
Edit:
As mentioned in the comment, below method does not execute given routines in parallel.
Checkout aitertools library.
import asyncio
import aitertools
async def f1():
await asyncio.sleep(5)
yield 1
async def f2():
await asyncio.sleep(6)
yield 2
async def iter_funcs():
async for x in aitertools.chain(f2(), f1()):
print(x)
if __name__ == '__main__':
loop = asyncio.get_event_loop()
loop.run_until_complete(iter_funcs())
It seems that, functions being iterated must be couroutine.

How to make a asyncio pool cancelable?

I have a pool_map function that can be used to limit the number of simultaneously executing functions.
The idea is to have a coroutine function accepting a single parameter that is mapped to a list of possible parameters, but to also wrap all function calls into a semaphore acquisition, whereupon only a limited number is running at once:
from typing import Callable, Awaitable, Iterable, Iterator
from asyncio import Semaphore
A = TypeVar('A')
V = TypeVar('V')
async def pool_map(
func: Callable[[A], Awaitable[V]],
arg_it: Iterable[A],
size: int=10
) -> Generator[Awaitable[V], None, None]:
"""
Maps an async function to iterables
ensuring that only some are executed at once.
"""
semaphore = Semaphore(size)
async def sub(arg):
async with semaphore:
return await func(arg)
return map(sub, arg_it)
I modified and didn’t test above code for the sake of an example, but my variant works well. E.g. you can use it like this:
from asyncio import get_event_loop, coroutine, as_completed
from contextlib import closing
URLS = [...]
async def run_all(awaitables):
for a in as_completed(awaitables):
result = await a
print('got result', result)
async def download(url): ...
if __name__ != '__main__':
pool = pool_map(download, URLS)
with closing(get_event_loop()) as loop:
loop.run_until_complete(run_all(pool))
But a problem arises if there is an exception thrown while awaiting a future. I can’t see how to cancel all scheduled or still-running tasks, neither the ones still waiting for the semaphore to be acquired.
Is there a library or an elegant building block for this that I don’t know, or do I have to build all parts myself? (i.e. a Semaphore with access to its waiters, a as_finished that provides access to its running task queue, …)
Use ensure_future to get a Task instead of a coroutine:
import asyncio
from contextlib import closing
def pool_map(func, args, size=10):
"""
Maps an async function to iterables
ensuring that only some are executed at once.
"""
semaphore = asyncio.Semaphore(size)
async def sub(arg):
async with semaphore:
return await func(arg)
tasks = [asyncio.ensure_future(sub(x)) for x in args]
return tasks
async def f(n):
print(">>> start", n)
if n == 7:
raise Exception("boom!")
await asyncio.sleep(n / 10)
print("<<< end", n)
return n
async def run_all(tasks):
exc = None
for a in asyncio.as_completed(tasks):
try:
result = await a
print('=== result', result)
except asyncio.CancelledError as e:
print("!!! cancel", e)
except Exception as e:
print("Exception in task, cancelling!")
for t in tasks:
t.cancel()
exc = e
if exc:
raise exc
pool = pool_map(f, range(1, 20), 3)
with closing(asyncio.get_event_loop()) as loop:
loop.run_until_complete(run_all(pool))
Here's a naive solution, based on the fact that cancel is a no-op if the task is already finished:
async def run_all(awaitables):
futures = [asyncio.ensure_future(a) for a in awaitables]
try:
for fut in as_completed(futures):
result = await fut
print('got result', result)
except:
for future in futures:
future.cancel()
await asyncio.wait(futures)

Categories