Python asyncio task list generation without executing the function - python

While working in asyncio, I'm trying to use a list comprehension to build my task list. The basic form of the function is as follows:
import asyncio
import urllib.request as req
#asyncio.coroutine
def coro(term):
print(term)
google = "https://www.google.com/search?q=" + term.replace(" ", "+") + "&num=100&start=0"
request = req.Request(google, None, headers)
(some beautiful soup stuff)
My goal is to use a list of terms to create my task list:
terms = ["pie", "chicken" ,"things" ,"stuff"]
tasks=[
coro("pie"),
coro("chicken"),
coro("things"),
coro("stuff")]
My initial thought was:
loop = asyncio.get_event_loop()
tasks = [my_coroutine(term) for term in terms]
loop.run_until_complete(asyncio.wait(tasks))
loop.close()
This doesn't create the task list it runs the function during the list comprehension. Is there a way to use a shortcut to create the task list wihout writing every task?

Your HTTP client does not support asyncio, and you will not get the expected results. Try this to see .wait() does work as you expected:
import asyncio
import random
#asyncio.coroutine
def my_coroutine(term):
print("start", term)
yield from asyncio.sleep(random.uniform(1, 3))
print("end", term)
terms = ["pie", "chicken", "things", "stuff"]
loop = asyncio.get_event_loop()
tasks = [my_coroutine(term) for term in terms]
print("Here we go!")
loop.run_until_complete(asyncio.wait(tasks))
loop.close()
If you use asyncio.gather() you get one future encapsulating all your tasks, which can be easily canceled with .cancel(), here demonstrated with python 3.5+ async def/await syntax (but works the same with #coroutine and yield from):
import asyncio
import random
async def my_coroutine(term):
print("start", term)
n = random.uniform(0.2, 1.5)
await asyncio.sleep(n)
print("end", term)
return "Term {} slept for {:.2f} seconds".format(term, n)
async def stop_all():
"""Cancels all still running tasks after one second"""
await asyncio.sleep(1)
print("stopping")
fut.cancel()
return ":-)"
loop = asyncio.get_event_loop()
terms = ["pie", "chicken", "things", "stuff"]
tasks = (my_coroutine(term) for term in terms)
fut = asyncio.gather(stop_all(), *tasks, return_exceptions=True)
print("Here we go!")
loop.run_until_complete(fut)
for task_result in fut.result():
if not isinstance(task_result, Exception):
print("OK", task_result)
else:
print("Failed", task_result)
loop.close()
And finally, if you want to use an async HTTP client, try aiohttp. First install it with:
pip install aiohttp
then try this example, which uses asyncio.as_completed:
import asyncio
import aiohttp
async def fetch(session, url):
print("Getting {}...".format(url))
async with session.get(url) as resp:
text = await resp.text()
return "{}: Got {} bytes".format(url, len(text))
async def fetch_all():
async with aiohttp.ClientSession() as session:
tasks = [fetch(session, "http://httpbin.org/delay/{}".format(delay))
for delay in (1, 1, 2, 3, 3)]
for task in asyncio.as_completed(tasks):
print(await task)
return "Done."
loop = asyncio.get_event_loop()
resp = loop.run_until_complete(fetch_all())
print(resp)
loop.close()

this works in python 3.5 (added the new async-await syntax):
import asyncio
async def coro(term):
for i in range(3):
await asyncio.sleep(int(len(term))) # just sleep
print("cor1", i, term)
terms = ["pie", "chicken", "things", "stuff"]
tasks = [coro(term) for term in terms]
loop = asyncio.get_event_loop()
cors = asyncio.wait(tasks)
loop.run_until_complete(cors)
should't your version yield from req.Request(google, None, headers)? and (what library is that?) is this library even made for use with asyncio?
(here is the same code with the python <= 3.4 syntax; the missing parts are the same as above):
#asyncio.coroutine
def coro(term):
for i in range(3):
yield from asyncio.sleep(int(len(term))) # just sleep
print("cor1", i, term)

Create queue and run event loop
def main():
while terms:
tasks.append(asyncio.create_task(terms.pop())
responses = asyncio.gather(*tasks, return_exception=True)
loop = asyncio.get_event_loop()
loop.run_until_complete(responses)

Related

How can I make Async IO work on a non async function?

I have a complex function Vehicle.set_data, which has many nested functions, API calls, DB calls, etc. For the sake of this example, I will simplify it.
I am trying to use Async IO to run Vehicle.set_data on multiple vehicles at once. Here is my Vehicle model:
class Vehicle:
def __init__(self, token):
self.token = token
# Works async
async def set_data(self):
await asyncio.sleep(random.random() * 10)
# Does not work async
# def set_data(self):
# time.sleep(random.random() * 10)
And here is my Async IO routinue:
async def set_vehicle_data(vehicle):
# sleep for T seconds on average
await vehicle.set_data()
def get_random_string():
return ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(5))
async def producer(queue):
count = 0
while True:
count += 1
# produce a token and send it to a consumer
token = get_random_string()
vehicle = Vehicle(token)
print(f'produced {vehicle.token}')
await queue.put(vehicle)
if count > 3:
break
async def consumer(queue):
while True:
vehicle = await queue.get()
# process the token received from a producer
print(f'Starting consumption for vehicle {vehicle.token}')
await set_vehicle_data(vehicle)
queue.task_done()
print(f'Ending consumption for vehicle {vehicle.token}')
async def main():
queue = asyncio.Queue()
# #todo now, do I need multiple producers
producers = [asyncio.create_task(producer(queue))
for _ in range(3)]
consumers = [asyncio.create_task(consumer(queue))
for _ in range(3)]
# with both producers and consumers running, wait for
# the producers to finish
await asyncio.gather(*producers)
print('---- done producing')
# wait for the remaining tasks to be processed
await queue.join()
# cancel the consumers, which are now idle
for c in consumers:
c.cancel()
asyncio.run(main())
In the example above, this commented section of code does not allow multiple vehicles to process at once:
# Does not work async
# def set_data(self):
# time.sleep(random.random() * 10)
Because this is such a complex query in our actual codebase, it would be a tremendous refactor to go flag every single nested function with async and await. Is there any way I can make this function work async without marking up my whole codebase with async?
You can run the function in a separate thread with asyncio.to_thread
await asyncio.to_thread(self.set_data)
If you're using python <3.9 use loop.run_in_executor
loop = asyncio.get_event_loop()
await loop.run_in_executor(None, self.set_data)

asyncio task was destroyed but it is pending

I am working a sample program that reads from a datasource (csv or rdbms) in chunks, makes some transformation and sends it via socket to a server.
But because the csv is very large, for testing purpose I want to break the reading after few chunks.
Unfortunately something goes wrong and I do not know what and how to fix it. Probably I have to do some cancellation, but now sure where and how. I get the following error:
Task was destroyed but it is pending!
task: <Task pending coro=<<async_generator_athrow without __name__>()>>
The sample code is:
import asyncio
import json
async def readChunks():
# this is basically a dummy alternative for reading csv in chunks
df = [{"chunk_" + str(x) : [r for r in range(10)]} for x in range(10)]
for chunk in df:
await asyncio.sleep(0.001)
yield chunk
async def send(row):
j = json.dumps(row)
print(f"to be sent: {j}")
await asyncio.sleep(0.001)
async def main():
i = 0
async for chunk in readChunks():
for k, v in chunk.items():
await asyncio.gather(send({k:v}))
i += 1
if i > 5:
break
#print(f"item in main via async generator is {chunk}")
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
loop.close()
Many async resources, such as generators, need to be cleaned up with the help of an event loop. When an async for loop stops iterating an async generator via break, the generator is cleaned up by the garbage collector only. This means the task is pending (waits for the event loop) but gets destroyed (by the garbage collector).
The most straightforward fix is to aclose the generator explicitly:
async def main():
i = 0
aiter = readChunks() # name iterator in order to ...
try:
async for chunk in aiter:
...
i += 1
if i > 5:
break
finally:
await aiter.aclose() # ... clean it up when done
These patterns can be simplified using the asyncstdlib (disclaimer: I maintain this library). asyncstdlib.islice allows to take a fixed number of items before cleanly closing the generator:
import asyncstdlib as a
async def main():
async for chunk in a.islice(readChunks(), 5):
...
If the break condition is dynamic, scoping the iterator guarantees cleanup in any case:
import asyncstdlib as a
async def main():
async with a.scoped_iter(readChunks()) as aiter:
async for idx, chunk in a.enumerate(aiter):
...
if idx >= 5:
break
This works...
import asyncio
import json
import logging
logging.basicConfig(format='%(asctime)s.%(msecs)03d %(message)s',
datefmt='%S')
root = logging.getLogger()
root.setLevel(logging.INFO)
async def readChunks():
# this is basically a dummy alternative for reading csv in chunks
df = [{"chunk_" + str(x) : [r for r in range(10)]} for x in range(10)]
for chunk in df:
await asyncio.sleep(0.002)
root.info('readChunks: next chunk coming')
yield chunk
async def send(row):
j = json.dumps(row)
root.info(f"to be sent: {j}")
await asyncio.sleep(0.002)
async def main():
i = 0
root.info('main: starting to read chunks')
async for chunk in readChunks():
for k, v in chunk.items():
root.info(f'main: sending an item')
#await asyncio.gather(send({k:v}))
stuff = await send({k:v})
i += 1
if i > 5:
break
#print(f"item in main via async generator is {chunk}")
##loop = asyncio.get_event_loop()
##loop.run_until_complete(main())
##loop.close()
if __name__ == '__main__':
asyncio.run(main())
... At least it runs and finishes.
The issue with stopping an async generator by reaking out of an async for loop is described in bugs.python.org/issue38013 and looks like it was fixed in 3.7.5.
However, using
loop = asyncio.get_event_loop()
loop.set_debug(True)
loop.run_until_complete(main())
loop.close()
I get a debug error but no Exception in Python 3.8.
Task was destroyed but it is pending!
task: <Task pending name='Task-8' coro=<<async_generator_athrow without __name__>()>>
Using the higher level API asyncio.run(main()) with debugging ON I do not get the debug message. If you are going to try and upgrade to Python 3.7.5-9 you probably should still use asyncio.run().
The problem is simple. You do early exit from loop, but async generator is not exhausted yet(its pending):
...
if i > 5:
break
...
Your readChunks is running in async and your loop. and without completing the program you are breaking it.
That's why it gives asyncio task was destroyed but it is pending
In short async task was doing its work in the background but you killed it by breaking the loop (stopping the program).

Python parallelising "async for"

I have the following method in my Tornado handler:
async def get(self):
url = 'url here'
try:
async for batch in downloader.fetch(url):
self.write(batch)
await self.flush()
except Exception as e:
logger.warning(e)
This is the code for downloader.fetch():
async def fetch(url, **kwargs):
timeout = kwargs.get('timeout', aiohttp.ClientTimeout(total=12))
response_validator = kwargs.get('response_validator', json_response_validator)
extractor = kwargs.get('extractor', json_extractor)
try:
async with aiohttp.ClientSession(timeout=timeout) as session:
async with session.get(url) as resp:
response_validator(resp)
async for batch in extractor(resp):
yield batch
except aiohttp.client_exceptions.ClientConnectorError:
logger.warning("bad request")
raise
except asyncio.TimeoutError:
logger.warning("server timeout")
raise
I would like yield the "batch" object from multiple downloaders in paralel.
I want the first available batch from the first downloader and so on until all downloaders finished. Something like this (this is not working code):
async for batch in [downloader.fetch(url1), downloader.fetch(url2)]:
....
Is this possible? How can I modify what I am doing in order to be able to yield from multiple coroutines in parallel?
How can I modify what I am doing in order to be able to yield from multiple coroutines in parallel?
You need a function that merges two async sequences into one, iterating over both in parallel and yielding elements from one or the other, as they become available. While such a function is not included in the current standard library, you can find one in the aiostream package.
You can also write your own merge function, as shown in this answer:
async def merge(*iterables):
iter_next = {it.__aiter__(): None for it in iterables}
while iter_next:
for it, it_next in iter_next.items():
if it_next is None:
fut = asyncio.ensure_future(it.__anext__())
fut._orig_iter = it
iter_next[it] = fut
done, _ = await asyncio.wait(iter_next.values(),
return_when=asyncio.FIRST_COMPLETED)
for fut in done:
iter_next[fut._orig_iter] = None
try:
ret = fut.result()
except StopAsyncIteration:
del iter_next[fut._orig_iter]
continue
yield ret
Using that function, the loop would look like this:
async for batch in merge(downloader.fetch(url1), downloader.fetch(url2)):
....
Edit:
As mentioned in the comment, below method does not execute given routines in parallel.
Checkout aitertools library.
import asyncio
import aitertools
async def f1():
await asyncio.sleep(5)
yield 1
async def f2():
await asyncio.sleep(6)
yield 2
async def iter_funcs():
async for x in aitertools.chain(f2(), f1()):
print(x)
if __name__ == '__main__':
loop = asyncio.get_event_loop()
loop.run_until_complete(iter_funcs())
It seems that, functions being iterated must be couroutine.

asyncio as_yielded from async generators

I'm looking to be able to yield from a number of async coroutines. Asyncio's as_completed is kind of close to what I'm looking for (i.e. I want any of the coroutines to be able to yield at any time back to the caller and then continue), but that only seems to allow regular coroutines with a single return.
Here's what I have so far:
import asyncio
async def test(id_):
print(f'{id_} sleeping')
await asyncio.sleep(id_)
return id_
async def test_gen(id_):
count = 0
while True:
print(f'{id_} sleeping')
await asyncio.sleep(id_)
yield id_
count += 1
if count > 5:
return
async def main():
runs = [test(i) for i in range(3)]
for i in asyncio.as_completed(runs):
i = await i
print(f'{i} yielded')
if __name__ == '__main__':
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
loop.close()
Replacing runs = [test(i) for i in range(3)] with runs = [test_gen(i) for i in range(3)] and for for i in asyncio.as_completed(runs) to iterate on each yield is what I'm after.
Is this possible to express in Python and are there any third party maybe that give you more options then the standard library for coroutine process flow?
Thanks
You can use aiostream.stream.merge:
from aiostream import stream
async def main():
runs = [test_gen(i) for i in range(3)]
async for x in stream.merge(*runs):
print(f'{x} yielded')
Run it in a safe context to make sure the generators are cleaned up properly after the iteration:
async def main():
runs = [test_gen(i) for i in range(3)]
merged = stream.merge(*runs)
async with merged.stream() as streamer:
async for x in streamer:
print(f'{x} yielded')
Or make it more compact using pipes:
from aiostream import stream, pipe
async def main():
runs = [test_gen(i) for i in range(3)]
await (stream.merge(*runs) | pipe.print('{} yielded'))
More examples in the documentation.
Adressing #nirvana-msu comment
It is possible to identify the generator that yielded a given value by preparing sources accordingly:
async def main():
runs = [test_gen(i) for i in range(3)]
sources = [stream.map(xs, lambda x: (i, x)) for i, xs in enumerate(runs)]
async for i, x in stream.merge(*sources):
print(f'ID {i}: {x}')

Execute future only when accessed

I would like to do something like the following:
import asyncio
async def g():
print('called g')
return 'somevalue'
async def f():
x = g()
loop = asyncio.get_event_loop()
loop.run_until_complete(f())
loop.close()
Where there is no output. Notice that I did not await the g(). This will generate a g was not awaited exception, but I'm looking for behaviour where g most definitely did not run.
This is useful for me where I have a long running operation with complex setup, but I only need its result in certain situations, so why bother running it when it is not needed. Kind of an 'on demand' situation.
How can I do this?
One option is to use simple flags to signal tasks:
import asyncio
import random
async def g(info):
print('> called g')
if not info['skip']:
print('* running g', info['id'])
await asyncio.sleep(random.uniform(1, 3))
else:
print('- skiping g', info['id'])
print('< done g', info['id'])
return info['id']
async def main():
data = [{
'id': i,
'skip': False
} for i in range(10)]
# schedule 10 tasks to run later
tasks = [asyncio.ensure_future(g(info)) for info in data]
# tell some tasks to skip processing
data[2]['skip'] = True
data[5]['skip'] = True
data[6]['skip'] = True
# wait for all results
results = await asyncio.gather(*tasks)
print(results)
print("Done!")
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
loop.close()
A different option would be using task.cancel:
import asyncio
async def coro(x):
print('coro', x)
return x
async def main():
task1 = asyncio.ensure_future(coro(1))
task2 = asyncio.ensure_future(coro(2))
task3 = asyncio.ensure_future(coro(3))
task2.cancel()
for task in asyncio.as_completed([task1, task2, task3]):
try:
result = await task
print("success", result)
except asyncio.CancelledError as e:
print("cancelled", e)
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
loop.close()

Categories