How to restart all tasks in gather when one raises error? - python

I have two tasks. When one task raises an error, I wish to restart them both.
Is the following the appropriate way to catch an exception propagated by one task, and restart the gather for the two tasks?
import asyncio
async def foo():
while True:
await asyncio.sleep(1)
print("foo")
async def bar():
for _ in range(3):
await asyncio.sleep(1)
print("bar")
raise ValueError
async def main():
while True:
footask = asyncio.create_task(foo())
bartask = asyncio.create_task(bar())
bothtasks = asyncio.gather(footask, bartask)
try:
await bothtasks
except ValueError:
print("caught ValueError")
try:
footask.cancel()
except asyncio.CancelledError:
pass
asyncio.run(main())
Basically asyncio intentionally doesn't cancel the other tasks in a gather when one task raises an error. So, since I can't think of anything better, I manually cancel the other task(s) with task.cancel() and handle the asyncio.CancelledError myself.
I'm just not convinced this is the intended use of the api, insights appreciated.
Edit:-
In the asyncio-3.7 docs it reads
If gather() is cancelled, all submitted awaitables (that have not completed yet) are also cancelled.
But the behaviour I observe when I replace footask.cancel() with bothtasks.cancel() is that for every iteration of the while loop, an additional foo is awaited, i.e. the foo appears not to be cancelled by cancelling the gather. The output looks something like this:
foo
bar
foo
bar
foo
bar
caught ValueError
foo
foo
bar
foo
foo
bar
foo
foo
bar
caught ValueError
foo
foo
foo
bar
foo
foo
foo
bar
foo
foo
foo
bar
caught ValueError
...

The standard idiom to ensure that the tasks have processed their cancelation is to add a gather(*tasks, return_exceptions=True) following the cancellation. For example:
async def main():
while True:
footask = asyncio.create_task(foo())
bartask = asyncio.create_task(bar())
tasks = (footask, bartask) # or a list comprehension, etc.
try:
await asyncio.gather(*tasks)
except ValueError:
print("caught ValueError")
for t in tasks:
t.cancel()
await asyncio.gather(*tasks, return_exceptions=True)
Note that you might want to do that for all exceptions, not just ValueError, because otherwise a task completing with a non-ValueError exception will still cause other tasks to continue running.

When exceptions happens footask is not cancelled because as you can read in doc:
If return_exceptions is False (default), the first raised exception is
immediately propagated to the task that awaits on gather(). Other
awaitables in the aws sequence won’t be cancelled and will continue to
run.
So we should manually cancel footask and await it was cancelled:
async def main():
while True:
footask = asyncio.create_task(foo())
bartask = asyncio.create_task(bar())
bothtasks = asyncio.gather(footask, bartask)
try:
await bothtasks
except ValueError:
print("caught ValueError")
footask.cancel() # cancel just mark task to be cancelled
try:
await footask # await actually been cancelled
except asyncio.CancelledError:
pass
Upd:
I wrote advanced_gather that acts like gather, but has additional kawrg cancel_on_exception to cancel every task on exception in one of them. Full code:
import asyncio
async def advanced_gather(
*aws,
loop=None,
return_exceptions=False,
cancel_on_exception=False
):
tasks = [
asyncio.ensure_future(aw, loop=loop)
for aw
in aws
]
try:
return await asyncio.gather(
*tasks,
loop=loop,
return_exceptions=return_exceptions
)
except Exception:
if cancel_on_exception:
for task in tasks:
if not task.done():
task.cancel()
await asyncio.gather(
*tasks,
loop=loop,
return_exceptions=True
)
raise
async def foo():
while True:
await asyncio.sleep(1)
print("foo")
async def bar():
for _ in range(3):
await asyncio.sleep(1)
print("bar")
raise ValueError
async def main():
while True:
try:
await advanced_gather(
foo(),
bar(),
cancel_on_exception=True
)
except ValueError:
print("caught ValueError")
asyncio.run(main())
Different cases of what can happen:
import asyncio
from contextlib import asynccontextmanager, suppress
async def test(_id, raise_exc=False):
if raise_exc:
print(f'we raise RuntimeError inside {_id}')
raise RuntimeError('!')
try:
await asyncio.sleep(0.2)
except asyncio.CancelledError:
print(f'cancelledError was raised inside {_id}')
raise
else:
print(f'everything calm inside {_id}')
#asynccontextmanager
async def prepared_stuff(foo_exc=False):
foo = asyncio.create_task(test('foo', raise_exc=foo_exc))
bar = asyncio.create_task(test('bar'))
gather = asyncio.gather(
foo,
bar
)
await asyncio.sleep(0) # make sure everything started
yield (foo, bar, gather)
try:
await gather
except Exception as exc:
print(f'gather raised {type(exc)}')
finally:
# make sure both tasks finished:
await asyncio.gather(
foo,
bar,
return_exceptions=True
)
print('')
# ----------------------------------------------
async def everyting_calm():
async with prepared_stuff() as (foo, bar, gather):
print('everyting_calm:')
async def foo_raises_exception():
async with prepared_stuff(foo_exc=True) as (foo, bar, gather):
print('foo_raises_exception:')
async def foo_cancelled():
async with prepared_stuff() as (foo, bar, gather):
print('foo_cancelled:')
foo.cancel()
async def gather_cancelled():
async with prepared_stuff() as (foo, bar, gather):
print('gather_cancelled:')
gather.cancel()
async def main():
await everyting_calm()
await foo_raises_exception()
await foo_cancelled()
await gather_cancelled()
asyncio.run(main())

Related

Python - Cancel task in asyncio?

I have written code for async pool below. in __aexit__ i'm cancelling the _worker tasks after the tasks get finished. But when i run the code, the worker tasks are not getting cancelled and the code is running forever. This what the task looks like: <Task pending coro=<AsyncPool._worker() running at \async_pool.py:17> wait_for=<Future cancelled>>. The asyncio.wait_for is getting cancelled but not the worker tasks.
class AsyncPool:
def __init__(self,coroutine,no_of_workers,timeout):
self._loop = asyncio.get_event_loop()
self._queue = asyncio.Queue()
self._no_of_workers = no_of_workers
self._coroutine = coroutine
self._timeout = timeout
self._workers = None
async def _worker(self):
while True:
try:
ret = False
queue_item = await self._queue.get()
ret = True
result = await asyncio.wait_for(self._coroutine(queue_item), timeout = self._timeout,loop= self._loop)
except Exception as e:
print(e)
finally:
if ret:
self._queue.task_done()
async def push_to_queue(self,item):
self._queue.put_nowait(item)
async def __aenter__(self):
assert self._workers == None
self._workers = [asyncio.create_task(self._worker()) for _ in range(self._no_of_workers)]
return self
async def __aexit__(self,type,value,traceback):
await self._queue.join()
for worker in self._workers:
worker.cancel()
await asyncio.gather(*self._workers, loop=self._loop, return_exceptions =True)
To use the Asyncpool:
async def something(item):
print("got", item)
await asyncio.sleep(item)
async def main():
async with AsyncPool(something, 5, 2) as pool:
for i in range(10):
await pool.push_to_queue(i)
asyncio.run(main())
The Output in my terminal:
The problem is that your except Exception exception clause also catches cancellation, and ignores it. To add to the confusion, print(e) just prints an empty line in case of a CancelledError, which is where the empty lines in the output come from. (Changing it to print(type(e)) shows what's going on.)
To correct the issue, change except Exception to something more specific, like except asyncio.TimeoutError. This change is not needed in Python 3.8 where asyncio.CancelledError no longer derives from Exception, but from BaseException, so except Exception doesn't catch it.
When you have an asyncio task created and then cancelled, you still have the task alive that need to be "reclaimed". So you want to await worker for it. However, once you await such a cancelled task, as it will never give you back the expected return value, the asyncio.CancelledError will be raised and you need to catch it somewhere.
Because of this behavior, I don't think you should gather them but to await for each of the cancelled tasks, as they are supposed to return right away:
async def __aexit__(self,type,value,traceback):
await self._queue.join()
for worker in self._workers:
worker.cancel()
for worker in self._workers:
try:
await worker
except asyncio.CancelledError:
print("worker cancelled:", worker)
This appears to work. The event is a counting timer and when it expires it cancels the tasks.
import asyncio
from datetime import datetime as dt
from datetime import timedelta as td
import random
import time
class Program:
def __init__(self):
self.duration_in_seconds = 20
self.program_start = dt.now()
self.event_has_expired = False
self.canceled_success = False
async def on_start(self):
print("On Start Event Start! Applying Overrides!!!")
await asyncio.sleep(random.randint(3, 9))
async def on_end(self):
print("On End Releasing All Overrides!")
await asyncio.sleep(random.randint(3, 9))
async def get_sensor_readings(self):
print("getting sensor readings!!!")
await asyncio.sleep(random.randint(3, 9))
async def evauluate_data(self):
print("checking data!!!")
await asyncio.sleep(random.randint(3, 9))
async def check_time(self):
if (dt.now() - self.program_start > td(seconds = self.duration_in_seconds)):
self.event_has_expired = True
print("Event is DONE!!!")
else:
print("Event is not done! ",dt.now() - self.program_start)
async def main(self):
# script starts, do only once self.on_start()
await self.on_start()
print("On Start Done!")
while not self.canceled_success:
readings = asyncio.ensure_future(self.get_sensor_readings())
analysis = asyncio.ensure_future(self.evauluate_data())
checker = asyncio.ensure_future(self.check_time())
if not self.event_has_expired:
await readings
await analysis
await checker
else:
# close other tasks before final shutdown
readings.cancel()
analysis.cancel()
checker.cancel()
self.canceled_success = True
print("cancelled hit!")
# script ends, do only once self.on_end() when even is done
await self.on_end()
print('Done Deal!')
async def main():
program = Program()
await program.main()

Any chance to shield create_task'ed task?

I need to shield a task been fired with create_task within an aiohttp handler process like
async def handler(request):
asyncio.create_task(long_process())
return {}, 200
So the code is
import asyncio
async def shielded(coro):
try:
await asyncio.shield(coro)
except asyncio.CancelledError:
await coro
raise
def create_task(coro) -> asyncio.Task:
task = asyncio.create_task(shielded(coro))
return task
But this test
async def test_create_task_cancel():
async def coro():
await asyncio.sleep(1)
return None
task = create_task(coro())
await asyncio.sleep(0.1)
task.cancel()
await asyncio.sleep(1)
assert task.done() and task.result() is None
with
RuntimeError: coroutine is being awaited already
aiohttp manual suggests to use aiojobs.Scheduler, but it doesn't work as expected
https://github.com/aio-libs/aiojobs/issues/148
https://github.com/aio-libs/aiojobs/issues/72
Working shield function
async def shielded(coro):
ft = asyncio.ensure_future(coro)
shielded_ft = asyncio.shield(ft)
try:
await shielded_ft
except asyncio.CancelledError:
await ft
raise

Manage asyncio coroutines in dict

I want to manage some coroutines in a dict, while running a main coroutine.
Specifically I want to start endless coroutines, put the handler of them in a dict and cancel them via a dict call again. In my example I want to start 4 Coroutines and they shall cancel one by one again with the coroutine doomsday. I'm using Python 3.6.
import asyncio
import traceback
async def add_to_handler(node, func):
func_handler[node] = asyncio.ensure_future(func, loop=loop)
return
async def test_func1():
while True:
print("1 - HeNlO")
await asyncio.sleep(1)
async def test_func2():
while True:
print("2 - TestFunc2")
await asyncio.sleep(2)
async def test_func3():
while True:
print("3 - Tukan")
await asyncio.sleep(3)
async def test_func4():
while True:
print("4 - Do Coro!")
await asyncio.sleep(4)
async def doomsday():
# Cancel coroutine every 10 seconds
print("launch doomsday")
for i in range(len(func_handler)):
await asyncio.sleep(10)
print("start cancelling with {}".format(i))
func_handler[str(i + 1)].cancel()
return
async def main():
await add_to_handler("1", test_func1)
await add_to_handler("2", test_func2)
await add_to_handler("3", test_func3)
await add_to_handler("4", test_func4)
await doomsday()
while True:
print("z..z..Z..Z...Z")
print(func_handler)
await asyncio.sleep(5)
func_handler = {}
loop = asyncio.get_event_loop()
try:
loop.run_until_complete(main())
except KeyboardInterrupt:
print("stop loop")
loop.close()
I tried it with the .call_latermethod of AbstractEventLoop instead of an endless while loop, but it still doesn't want to work and it seems, that my coroutines are seen as function, but I don't know why. Where is my fault?
try to change this function:
async def add_to_handler(node, func):
func_handler[node] = asyncio.ensure_future(func(), loop=loop)
return None
pay attention on asyncio.ensure_future(func(),loop=loop)

two async tasks - one depends on another in python

I need to write a code where i need to to check in real time a status of some variable. I decited to use asyncio to create two async def functions
import asyncio
async def one():
global flag
flag = True
while flag == True:
await asyncio.sleep(0.2)
print("Doing one")
async def two():
await asyncio.sleep(2)
global flag
flag = False
async def main():
tasks = []
tasks.append(one())
tasks.append(two())
await asyncio.gather(*tasks)
loop = asyncio.get_event_loop()
try:
loop.run_until_complete(main())
finally:
loop.close()
print("Loop ended")
When loop starts, all tasks has been lauched and after 2 seconds def two() sets flag=False, which stops def one(). It's good but i want def one() to perform while loop without await asyncio.sleep(0.2) becouse i dont want to have real live update so i set await asyncio.sleep(0.0).
Is it a good practice?
Using a global variable is indeed bad practice. What you are looking for is asyncio's primitives, specifically the asyncio.Event primitive. Here is what you are doing, but with asyncio.Event:
import asyncio
async def one(event):
while event.is_set() == False:
await asyncio.sleep(0.5)
print("Hello World!")
async def two(event):
await asyncio.sleep(2)
event.set()
async def main():
event = asyncio.Event()
await asyncio.gather(*[one(event), two(event)])
asyncio.run(main())

How to make a asyncio pool cancelable?

I have a pool_map function that can be used to limit the number of simultaneously executing functions.
The idea is to have a coroutine function accepting a single parameter that is mapped to a list of possible parameters, but to also wrap all function calls into a semaphore acquisition, whereupon only a limited number is running at once:
from typing import Callable, Awaitable, Iterable, Iterator
from asyncio import Semaphore
A = TypeVar('A')
V = TypeVar('V')
async def pool_map(
func: Callable[[A], Awaitable[V]],
arg_it: Iterable[A],
size: int=10
) -> Generator[Awaitable[V], None, None]:
"""
Maps an async function to iterables
ensuring that only some are executed at once.
"""
semaphore = Semaphore(size)
async def sub(arg):
async with semaphore:
return await func(arg)
return map(sub, arg_it)
I modified and didn’t test above code for the sake of an example, but my variant works well. E.g. you can use it like this:
from asyncio import get_event_loop, coroutine, as_completed
from contextlib import closing
URLS = [...]
async def run_all(awaitables):
for a in as_completed(awaitables):
result = await a
print('got result', result)
async def download(url): ...
if __name__ != '__main__':
pool = pool_map(download, URLS)
with closing(get_event_loop()) as loop:
loop.run_until_complete(run_all(pool))
But a problem arises if there is an exception thrown while awaiting a future. I can’t see how to cancel all scheduled or still-running tasks, neither the ones still waiting for the semaphore to be acquired.
Is there a library or an elegant building block for this that I don’t know, or do I have to build all parts myself? (i.e. a Semaphore with access to its waiters, a as_finished that provides access to its running task queue, …)
Use ensure_future to get a Task instead of a coroutine:
import asyncio
from contextlib import closing
def pool_map(func, args, size=10):
"""
Maps an async function to iterables
ensuring that only some are executed at once.
"""
semaphore = asyncio.Semaphore(size)
async def sub(arg):
async with semaphore:
return await func(arg)
tasks = [asyncio.ensure_future(sub(x)) for x in args]
return tasks
async def f(n):
print(">>> start", n)
if n == 7:
raise Exception("boom!")
await asyncio.sleep(n / 10)
print("<<< end", n)
return n
async def run_all(tasks):
exc = None
for a in asyncio.as_completed(tasks):
try:
result = await a
print('=== result', result)
except asyncio.CancelledError as e:
print("!!! cancel", e)
except Exception as e:
print("Exception in task, cancelling!")
for t in tasks:
t.cancel()
exc = e
if exc:
raise exc
pool = pool_map(f, range(1, 20), 3)
with closing(asyncio.get_event_loop()) as loop:
loop.run_until_complete(run_all(pool))
Here's a naive solution, based on the fact that cancel is a no-op if the task is already finished:
async def run_all(awaitables):
futures = [asyncio.ensure_future(a) for a in awaitables]
try:
for fut in as_completed(futures):
result = await fut
print('got result', result)
except:
for future in futures:
future.cancel()
await asyncio.wait(futures)

Categories