asyncio Add Callback after Task is completed, instead of asyncio.as_completed? - python

In asyncio, it there a way to attach a callback function to a Task such that this callback function will run after the Task has been completed?
So far, the only way I can figure out is to use asyncio.completed in a loop, as shown below. But this requires 2 lists (tasks and cb_tasks) to hold all the tasks/futures.
Is there a better way to do this?
import asyncio
import random
class Foo:
async def start(self):
tasks = []
cb_tasks = []
# Start ten `do_work` tasks simultaneously
for i in range(10):
task = asyncio.create_task(self.do_work(i))
tasks.append(task)
# How to run `self.handle_work_done` as soon as this `task` is completed?
for f in asyncio.as_completed(tasks):
res = await f
t = asyncio.create_task(self.work_done_cb(res))
cb_tasks.append(t)
await asyncio.wait(tasks + cb_tasks)
async def do_work(self, i):
""" Simulate doing some work """
x = random.randint(1, 10)
await asyncio.sleep(x)
print(f"Finished work #{i}")
return x
async def work_done_cb(self, x):
""" Callback after `do_work` has been completed """
await asyncio.sleep(random.randint(1, 3))
print(f"Finished additional work {x}")
if __name__ == "__main__":
foo = Foo()
asyncio.run(foo.start())

Related

How can I make Async IO work on a non async function?

I have a complex function Vehicle.set_data, which has many nested functions, API calls, DB calls, etc. For the sake of this example, I will simplify it.
I am trying to use Async IO to run Vehicle.set_data on multiple vehicles at once. Here is my Vehicle model:
class Vehicle:
def __init__(self, token):
self.token = token
# Works async
async def set_data(self):
await asyncio.sleep(random.random() * 10)
# Does not work async
# def set_data(self):
# time.sleep(random.random() * 10)
And here is my Async IO routinue:
async def set_vehicle_data(vehicle):
# sleep for T seconds on average
await vehicle.set_data()
def get_random_string():
return ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(5))
async def producer(queue):
count = 0
while True:
count += 1
# produce a token and send it to a consumer
token = get_random_string()
vehicle = Vehicle(token)
print(f'produced {vehicle.token}')
await queue.put(vehicle)
if count > 3:
break
async def consumer(queue):
while True:
vehicle = await queue.get()
# process the token received from a producer
print(f'Starting consumption for vehicle {vehicle.token}')
await set_vehicle_data(vehicle)
queue.task_done()
print(f'Ending consumption for vehicle {vehicle.token}')
async def main():
queue = asyncio.Queue()
# #todo now, do I need multiple producers
producers = [asyncio.create_task(producer(queue))
for _ in range(3)]
consumers = [asyncio.create_task(consumer(queue))
for _ in range(3)]
# with both producers and consumers running, wait for
# the producers to finish
await asyncio.gather(*producers)
print('---- done producing')
# wait for the remaining tasks to be processed
await queue.join()
# cancel the consumers, which are now idle
for c in consumers:
c.cancel()
asyncio.run(main())
In the example above, this commented section of code does not allow multiple vehicles to process at once:
# Does not work async
# def set_data(self):
# time.sleep(random.random() * 10)
Because this is such a complex query in our actual codebase, it would be a tremendous refactor to go flag every single nested function with async and await. Is there any way I can make this function work async without marking up my whole codebase with async?
You can run the function in a separate thread with asyncio.to_thread
await asyncio.to_thread(self.set_data)
If you're using python <3.9 use loop.run_in_executor
loop = asyncio.get_event_loop()
await loop.run_in_executor(None, self.set_data)

async / await: Continuously stream data from async method

I'm using python 3.5 to asynchronously return data from one method to another as follows:
async def A():
# Need to get data here from B continuously
val = await B()
async def B():
# Need to get data here from C continuously as they get generated inside while loop of method C
data = await C()
# Modify and process the data and return to A
return await D(data)
async def C():
i = 0
while i < 5:
await asyncio.sleep(1)
# Return this data to method B one by one, Not sure how to do this ??
return i
async def D(val):
# Do some processing of val and return it
return val
I want to continuously stream data from method C and return it to method B, process each item as they are received and return it to method A.
One way is use an asyncio queue and pass it to method B from A, from where it further gets passed on to C.
Method C would keep writing the content in the queue.
Method B would read from queue, process the data and update the queue.
Method A reads the queue at the end for finally processed data.
Can we achieve it using coroutines or async method itself in any other way ? Wish to avoid calls for reading and writing to queues continuously for every request.
import asyncio
from async_generator import async_generator, yield_, yield_from_
async def fun(n):
print("Finding %d-1" % n)
await asyncio.sleep(n/2)
result = n - 1
print("%d - 1 = %d" % (n, result))
return result
#async_generator
async def main(l):
futures = [ fun(n) for n in l ]
for i, future in enumerate(asyncio.as_completed(futures)):
result = await future
print("inside the main..")
print(result)
await yield_(result)
#async_generator
async def dealer():
l = [2, 4, 6]
gen = main(l)
async for item in gen:
print("inside the dealer....")
await yield_(item)
async def dealer1():
gen = dealer()
async for item in gen:
print("inside dealer 1")
print(item)
if __name__ == "__main__":
loop = asyncio.get_event_loop()
#loop.run_until_complete(cc.main())
loop.run_until_complete(dealer1())
loop.close()
You have support for async generators in python3.6. If you are working with python 3.5 you may use async_generator library(https://pypi.python.org/pypi/async_generator/1.5)

run async while loop independently

Is it possible to run an async while loop independently of another one?
Instead of the actual code I isolated the issue I am having in the following example code
import asyncio, time
class Time:
def __init__(self):
self.start_time = 0
async def dates(self):
while True:
t = time.time()
if self.start_time == 0:
self.start_time = t
yield t
await asyncio.sleep(1)
async def printer(self):
while True:
print('looping') # always called
await asyncio.sleep(self.interval)
async def init(self):
async for i in self.dates():
if i == self.start_time:
self.interval = 3
await self.printer()
print(i) # Never Called
loop = asyncio.get_event_loop()
t = Time()
loop.run_until_complete(t.init())
Is there a way to have the print function run independently so print(i) gets called each time?
What it should do is print(i) each second and every 3 seconds call self.printer(i)
Essentially self.printer is a separate task that does not need to be called very often, only every x seconds(in this case 3).
In JavaScript the solution is to do something like so
setInterval(printer, 3000);
EDIT: Ideally self.printer would also be able to be canceled / stopped if a condition or stopping function is called
The asyncio equivalent of JavaScript's setTimeout would be asyncio.ensure_future:
import asyncio
async def looper():
for i in range(1_000_000_000):
print(f'Printing {i}')
await asyncio.sleep(0.5)
async def main():
print('Starting')
future = asyncio.ensure_future(looper())
print('Waiting for a few seconds')
await asyncio.sleep(4)
print('Cancelling')
future.cancel()
print('Waiting again for a few seconds')
await asyncio.sleep(2)
print('Done')
if __name__ == '__main__':
asyncio.get_event_loop().run_until_complete(main())
You'd want to register your self.printer() coroutine as a separate task; pass it to asyncio.ensure_future() rather than await on it directly:
asyncio.ensure_future(self.printer())
By passing the coroutine to asyncio.ensure_future(), you put it on the list of events that the loop switches between as each awaits on further work to be completed.
With that change, your test code outputs:
1516819094.278697
looping
1516819095.283424
1516819096.283742
looping
1516819097.284152
# ... etc.
Tasks are the asyncio equivalent of threads in a multithreading scenario.

asyncio as_yielded from async generators

I'm looking to be able to yield from a number of async coroutines. Asyncio's as_completed is kind of close to what I'm looking for (i.e. I want any of the coroutines to be able to yield at any time back to the caller and then continue), but that only seems to allow regular coroutines with a single return.
Here's what I have so far:
import asyncio
async def test(id_):
print(f'{id_} sleeping')
await asyncio.sleep(id_)
return id_
async def test_gen(id_):
count = 0
while True:
print(f'{id_} sleeping')
await asyncio.sleep(id_)
yield id_
count += 1
if count > 5:
return
async def main():
runs = [test(i) for i in range(3)]
for i in asyncio.as_completed(runs):
i = await i
print(f'{i} yielded')
if __name__ == '__main__':
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
loop.close()
Replacing runs = [test(i) for i in range(3)] with runs = [test_gen(i) for i in range(3)] and for for i in asyncio.as_completed(runs) to iterate on each yield is what I'm after.
Is this possible to express in Python and are there any third party maybe that give you more options then the standard library for coroutine process flow?
Thanks
You can use aiostream.stream.merge:
from aiostream import stream
async def main():
runs = [test_gen(i) for i in range(3)]
async for x in stream.merge(*runs):
print(f'{x} yielded')
Run it in a safe context to make sure the generators are cleaned up properly after the iteration:
async def main():
runs = [test_gen(i) for i in range(3)]
merged = stream.merge(*runs)
async with merged.stream() as streamer:
async for x in streamer:
print(f'{x} yielded')
Or make it more compact using pipes:
from aiostream import stream, pipe
async def main():
runs = [test_gen(i) for i in range(3)]
await (stream.merge(*runs) | pipe.print('{} yielded'))
More examples in the documentation.
Adressing #nirvana-msu comment
It is possible to identify the generator that yielded a given value by preparing sources accordingly:
async def main():
runs = [test_gen(i) for i in range(3)]
sources = [stream.map(xs, lambda x: (i, x)) for i, xs in enumerate(runs)]
async for i, x in stream.merge(*sources):
print(f'ID {i}: {x}')

Execute future only when accessed

I would like to do something like the following:
import asyncio
async def g():
print('called g')
return 'somevalue'
async def f():
x = g()
loop = asyncio.get_event_loop()
loop.run_until_complete(f())
loop.close()
Where there is no output. Notice that I did not await the g(). This will generate a g was not awaited exception, but I'm looking for behaviour where g most definitely did not run.
This is useful for me where I have a long running operation with complex setup, but I only need its result in certain situations, so why bother running it when it is not needed. Kind of an 'on demand' situation.
How can I do this?
One option is to use simple flags to signal tasks:
import asyncio
import random
async def g(info):
print('> called g')
if not info['skip']:
print('* running g', info['id'])
await asyncio.sleep(random.uniform(1, 3))
else:
print('- skiping g', info['id'])
print('< done g', info['id'])
return info['id']
async def main():
data = [{
'id': i,
'skip': False
} for i in range(10)]
# schedule 10 tasks to run later
tasks = [asyncio.ensure_future(g(info)) for info in data]
# tell some tasks to skip processing
data[2]['skip'] = True
data[5]['skip'] = True
data[6]['skip'] = True
# wait for all results
results = await asyncio.gather(*tasks)
print(results)
print("Done!")
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
loop.close()
A different option would be using task.cancel:
import asyncio
async def coro(x):
print('coro', x)
return x
async def main():
task1 = asyncio.ensure_future(coro(1))
task2 = asyncio.ensure_future(coro(2))
task3 = asyncio.ensure_future(coro(3))
task2.cancel()
for task in asyncio.as_completed([task1, task2, task3]):
try:
result = await task
print("success", result)
except asyncio.CancelledError as e:
print("cancelled", e)
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
loop.close()

Categories