I'm looking to be able to yield from a number of async coroutines. Asyncio's as_completed is kind of close to what I'm looking for (i.e. I want any of the coroutines to be able to yield at any time back to the caller and then continue), but that only seems to allow regular coroutines with a single return.
Here's what I have so far:
import asyncio
async def test(id_):
print(f'{id_} sleeping')
await asyncio.sleep(id_)
return id_
async def test_gen(id_):
count = 0
while True:
print(f'{id_} sleeping')
await asyncio.sleep(id_)
yield id_
count += 1
if count > 5:
return
async def main():
runs = [test(i) for i in range(3)]
for i in asyncio.as_completed(runs):
i = await i
print(f'{i} yielded')
if __name__ == '__main__':
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
loop.close()
Replacing runs = [test(i) for i in range(3)] with runs = [test_gen(i) for i in range(3)] and for for i in asyncio.as_completed(runs) to iterate on each yield is what I'm after.
Is this possible to express in Python and are there any third party maybe that give you more options then the standard library for coroutine process flow?
Thanks
You can use aiostream.stream.merge:
from aiostream import stream
async def main():
runs = [test_gen(i) for i in range(3)]
async for x in stream.merge(*runs):
print(f'{x} yielded')
Run it in a safe context to make sure the generators are cleaned up properly after the iteration:
async def main():
runs = [test_gen(i) for i in range(3)]
merged = stream.merge(*runs)
async with merged.stream() as streamer:
async for x in streamer:
print(f'{x} yielded')
Or make it more compact using pipes:
from aiostream import stream, pipe
async def main():
runs = [test_gen(i) for i in range(3)]
await (stream.merge(*runs) | pipe.print('{} yielded'))
More examples in the documentation.
Adressing #nirvana-msu comment
It is possible to identify the generator that yielded a given value by preparing sources accordingly:
async def main():
runs = [test_gen(i) for i in range(3)]
sources = [stream.map(xs, lambda x: (i, x)) for i, xs in enumerate(runs)]
async for i, x in stream.merge(*sources):
print(f'ID {i}: {x}')
Related
I am trying to connect and recieve messages from multiple websockets concurrently.
For this purpose I made it with asyncio, and it prints messages correctly. But the problem is that I just can print it, not return it.
The simplified example of pseudo code which I am struggle with is as below:
import websockets
import json
symbols_id = [1,2]
## LOOP RUNNING EXAMPLE OF ASYNCIO
async def get_connect(symbols_id):
tasks = []
for _id in symbols_id:
print('conncetion to', _id)
if _id == 1:
a = 0
elif _id == 2:
a = 200
tasks.append(asyncio.create_task(_loop(a)))
return tasks
async def _loop(a):
while True:
print(a)
a+=1
await asyncio.sleep(2.5)
async def ping_func():
while True:
print('------ ping')
await asyncio.sleep(5)
async def main():
tasks = await get_connect(symbols_id)
asyncio.create_task(ping_func())
await asyncio.gather(*tasks)
asyncio.run(main())
As you can see from the code above I used print(a) to print a in each loop.
I test return a instead of print(a) but it was not helpful.
thanks
yield a? return a will exit the function and the loop, yield is usually what you want in asyncio for looped tasks
Finally I found the way of using yield and async for to read data in each loop.
It will work correctly, by changing the code to the following one.
import websockets
import json
symbols_id = [1,2]
global a
a=0
## LOOP RUNNING EXAMPLE OF ASYNCIO
async def get_connect(symbols_id):
tasks = []
for _id in symbols_id:
print('conncetion to', _id)
if _id == 1:
a = 0
elif _id == 2:
a = 200
tasks.append(asyncio.create_task(_loop(a)))
return tasks
async def _loop(param):
global a
a = param
while True:
print(a)
a+=1
await asyncio.sleep(2.5)
async def ping_func():
while True:
print('------ ping')
await asyncio.sleep(5)
async def get_result():
global a
while True:
yield a
await asyncio.sleep(1)
async def main():
tasks = await get_connect(symbols_id)
asyncio.create_task(ping_func())
async for x in get_result():
print(x)
await asyncio.gather(*tasks)
asyncio.run(main())
I was confused with how to use generated data from this code snippet inside the other code snippet. what I found is:
1- Generated data can be accessible with global variables.
2- By defining a class and a property, it can be accessible from every part of the code.
In asyncio, it there a way to attach a callback function to a Task such that this callback function will run after the Task has been completed?
So far, the only way I can figure out is to use asyncio.completed in a loop, as shown below. But this requires 2 lists (tasks and cb_tasks) to hold all the tasks/futures.
Is there a better way to do this?
import asyncio
import random
class Foo:
async def start(self):
tasks = []
cb_tasks = []
# Start ten `do_work` tasks simultaneously
for i in range(10):
task = asyncio.create_task(self.do_work(i))
tasks.append(task)
# How to run `self.handle_work_done` as soon as this `task` is completed?
for f in asyncio.as_completed(tasks):
res = await f
t = asyncio.create_task(self.work_done_cb(res))
cb_tasks.append(t)
await asyncio.wait(tasks + cb_tasks)
async def do_work(self, i):
""" Simulate doing some work """
x = random.randint(1, 10)
await asyncio.sleep(x)
print(f"Finished work #{i}")
return x
async def work_done_cb(self, x):
""" Callback after `do_work` has been completed """
await asyncio.sleep(random.randint(1, 3))
print(f"Finished additional work {x}")
if __name__ == "__main__":
foo = Foo()
asyncio.run(foo.start())
I have a complex function Vehicle.set_data, which has many nested functions, API calls, DB calls, etc. For the sake of this example, I will simplify it.
I am trying to use Async IO to run Vehicle.set_data on multiple vehicles at once. Here is my Vehicle model:
class Vehicle:
def __init__(self, token):
self.token = token
# Works async
async def set_data(self):
await asyncio.sleep(random.random() * 10)
# Does not work async
# def set_data(self):
# time.sleep(random.random() * 10)
And here is my Async IO routinue:
async def set_vehicle_data(vehicle):
# sleep for T seconds on average
await vehicle.set_data()
def get_random_string():
return ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(5))
async def producer(queue):
count = 0
while True:
count += 1
# produce a token and send it to a consumer
token = get_random_string()
vehicle = Vehicle(token)
print(f'produced {vehicle.token}')
await queue.put(vehicle)
if count > 3:
break
async def consumer(queue):
while True:
vehicle = await queue.get()
# process the token received from a producer
print(f'Starting consumption for vehicle {vehicle.token}')
await set_vehicle_data(vehicle)
queue.task_done()
print(f'Ending consumption for vehicle {vehicle.token}')
async def main():
queue = asyncio.Queue()
# #todo now, do I need multiple producers
producers = [asyncio.create_task(producer(queue))
for _ in range(3)]
consumers = [asyncio.create_task(consumer(queue))
for _ in range(3)]
# with both producers and consumers running, wait for
# the producers to finish
await asyncio.gather(*producers)
print('---- done producing')
# wait for the remaining tasks to be processed
await queue.join()
# cancel the consumers, which are now idle
for c in consumers:
c.cancel()
asyncio.run(main())
In the example above, this commented section of code does not allow multiple vehicles to process at once:
# Does not work async
# def set_data(self):
# time.sleep(random.random() * 10)
Because this is such a complex query in our actual codebase, it would be a tremendous refactor to go flag every single nested function with async and await. Is there any way I can make this function work async without marking up my whole codebase with async?
You can run the function in a separate thread with asyncio.to_thread
await asyncio.to_thread(self.set_data)
If you're using python <3.9 use loop.run_in_executor
loop = asyncio.get_event_loop()
await loop.run_in_executor(None, self.set_data)
Given a regular generator, you can get an iterator from it that can only be consumed once and continue where you left off. Like this -
sync_gen = (i in range(10))
def fetch_batch_sync(num_tasks, job_list):
for i, job in enumerate(job_list):
yield job
if i == num_tasks - 1:
break
>>> sync_gen_iter = sync_gen.__iter__()
>>> for i in fetch_batch_sync(2, sync_gen_iter):
... print i
...
0
1
>>> for i in fetch_batch_sync(3, sync_gen_iter):
... print i
...
2
3
4
Is there a way to do the same with an async generator?
async def fetch_batch_async(num_tasks, job_list_iter):
async for i, job in enumerate(job_list_iter):
yield job
if i == num_tasks - 1:
break
The only difference between regular and async generators is that async generators' equivalents of __next__ and __iter__ methods are themselves async. This is why ordinary for and enumerate fail to recognize them as iterables.
As with regular generators, it is possible to extract a subset of values out of an async generator, but you need to use the appropriate tools. fetch_batch_async already uses async for, but it should also use an async version of enemuerate; for example:
async def aenumerate(aiterable, start=0):
i = start
async for obj in aiterable:
yield i, obj
i += 1
fetch_batch_async would use it exactly like enumerate:
async def fetch_batch_async(num_tasks, job_list_iter):
async for i, job in aenumerate(job_list_iter):
yield job
if i == num_tasks - 1:
break
Finally, this code uses fetch_batch_async to extract several items out of an infinite async iterator:
import asyncio, time
async def infinite():
while True:
yield time.time()
await asyncio.sleep(.1)
async def main():
async for received in fetch_batch_async(10, infinite()):
print(received)
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
While working in asyncio, I'm trying to use a list comprehension to build my task list. The basic form of the function is as follows:
import asyncio
import urllib.request as req
#asyncio.coroutine
def coro(term):
print(term)
google = "https://www.google.com/search?q=" + term.replace(" ", "+") + "&num=100&start=0"
request = req.Request(google, None, headers)
(some beautiful soup stuff)
My goal is to use a list of terms to create my task list:
terms = ["pie", "chicken" ,"things" ,"stuff"]
tasks=[
coro("pie"),
coro("chicken"),
coro("things"),
coro("stuff")]
My initial thought was:
loop = asyncio.get_event_loop()
tasks = [my_coroutine(term) for term in terms]
loop.run_until_complete(asyncio.wait(tasks))
loop.close()
This doesn't create the task list it runs the function during the list comprehension. Is there a way to use a shortcut to create the task list wihout writing every task?
Your HTTP client does not support asyncio, and you will not get the expected results. Try this to see .wait() does work as you expected:
import asyncio
import random
#asyncio.coroutine
def my_coroutine(term):
print("start", term)
yield from asyncio.sleep(random.uniform(1, 3))
print("end", term)
terms = ["pie", "chicken", "things", "stuff"]
loop = asyncio.get_event_loop()
tasks = [my_coroutine(term) for term in terms]
print("Here we go!")
loop.run_until_complete(asyncio.wait(tasks))
loop.close()
If you use asyncio.gather() you get one future encapsulating all your tasks, which can be easily canceled with .cancel(), here demonstrated with python 3.5+ async def/await syntax (but works the same with #coroutine and yield from):
import asyncio
import random
async def my_coroutine(term):
print("start", term)
n = random.uniform(0.2, 1.5)
await asyncio.sleep(n)
print("end", term)
return "Term {} slept for {:.2f} seconds".format(term, n)
async def stop_all():
"""Cancels all still running tasks after one second"""
await asyncio.sleep(1)
print("stopping")
fut.cancel()
return ":-)"
loop = asyncio.get_event_loop()
terms = ["pie", "chicken", "things", "stuff"]
tasks = (my_coroutine(term) for term in terms)
fut = asyncio.gather(stop_all(), *tasks, return_exceptions=True)
print("Here we go!")
loop.run_until_complete(fut)
for task_result in fut.result():
if not isinstance(task_result, Exception):
print("OK", task_result)
else:
print("Failed", task_result)
loop.close()
And finally, if you want to use an async HTTP client, try aiohttp. First install it with:
pip install aiohttp
then try this example, which uses asyncio.as_completed:
import asyncio
import aiohttp
async def fetch(session, url):
print("Getting {}...".format(url))
async with session.get(url) as resp:
text = await resp.text()
return "{}: Got {} bytes".format(url, len(text))
async def fetch_all():
async with aiohttp.ClientSession() as session:
tasks = [fetch(session, "http://httpbin.org/delay/{}".format(delay))
for delay in (1, 1, 2, 3, 3)]
for task in asyncio.as_completed(tasks):
print(await task)
return "Done."
loop = asyncio.get_event_loop()
resp = loop.run_until_complete(fetch_all())
print(resp)
loop.close()
this works in python 3.5 (added the new async-await syntax):
import asyncio
async def coro(term):
for i in range(3):
await asyncio.sleep(int(len(term))) # just sleep
print("cor1", i, term)
terms = ["pie", "chicken", "things", "stuff"]
tasks = [coro(term) for term in terms]
loop = asyncio.get_event_loop()
cors = asyncio.wait(tasks)
loop.run_until_complete(cors)
should't your version yield from req.Request(google, None, headers)? and (what library is that?) is this library even made for use with asyncio?
(here is the same code with the python <= 3.4 syntax; the missing parts are the same as above):
#asyncio.coroutine
def coro(term):
for i in range(3):
yield from asyncio.sleep(int(len(term))) # just sleep
print("cor1", i, term)
Create queue and run event loop
def main():
while terms:
tasks.append(asyncio.create_task(terms.pop())
responses = asyncio.gather(*tasks, return_exception=True)
loop = asyncio.get_event_loop()
loop.run_until_complete(responses)