Related
I'm trying to understand coroutines in python, but I have some troubles grasping how I could end one.
I try to understand the following code :
async def send_recieve():
async with websockets.connect(*parameters*) as _ws:
async def send():
while True:
#function send...
async def recieve():
while True:
#function recieve...
if #condition met:
break
send_result, receive_result = await asyncio.gather(send(), receive())
asyncio.run(send_receive())
When the condition is met, the recieve function is ended, but the send function keep working and I can't end the whole send_recieve async.
I tried to sum up the code to be more clear, I can share the whole version if it's easier to understand.
I get that I miss a condition in the send function fulfilled when the recieve function is ended but I can't understand how I can write it.
If I try to add loop.stop() if the condition is met, it raises the error "RuntimeError: Event loop stopped before Future completed."
The whole code is the following :
async def send_receive():
print(f'Connecting websocket to url ${URL}')
async with websockets.connect(
URL,
extra_headers=(("Authorization", auth_key),),
ping_interval=5,
ping_timeout=20
) as _ws:
await asyncio.sleep(0.3)
print("Receiving SessionBegins ...")
session_begins = await _ws.recv()
print(session_begins)
print("Sending messages ...")
async def send():
while True:
try:
data = stream.read(FRAMES_PER_BUFFER)
data = base64.b64encode(data).decode("utf-8")
json_data = json.dumps({"audio_data":str(data)})
await _ws.send(json_data)
except websockets.exceptions.ConnectionClosedError as e:
print(e)
assert e.code == 4008
break
except Exception as e:
assert False, "Not a websocket 4008 error"
await asyncio.sleep(0.01)
return True
async def receive():
while True:
try:
result_str = await _ws.recv()
majtext = json.loads(result_str)['text']
print(majtext)
except websockets.exceptions.ConnectionClosedError as e:
print(e)
assert e.code == 4008
return
except Exception as e:
assert False, "Not a websocket 4008 error"
if json.loads(result_str)['message_type'] == 'FinalTranscript':
break
send_result, receive_result = await asyncio.gather(send(), receive())
loop = asyncio.get_event_loop()
loop.run_until_complete(send_receive())
how could I tell the send function to end when the receive function is ended
asyncio.gather() waits for both functions to finish. You can instead wait for either function to finish by replacing:
send_result, receive_result = await asyncio.gather(send(), receive())
with:
await asyncio.wait(
[asyncio.create_task(send()), asyncio.create_task(receive())],
return_when=asyncio.FIRST_COMPLETED
)
(Note that "results" of send and receive you retrieved from gather() don't make sense since neither function returns a useful value.)
I have written code for async pool below. in __aexit__ i'm cancelling the _worker tasks after the tasks get finished. But when i run the code, the worker tasks are not getting cancelled and the code is running forever. This what the task looks like: <Task pending coro=<AsyncPool._worker() running at \async_pool.py:17> wait_for=<Future cancelled>>. The asyncio.wait_for is getting cancelled but not the worker tasks.
class AsyncPool:
def __init__(self,coroutine,no_of_workers,timeout):
self._loop = asyncio.get_event_loop()
self._queue = asyncio.Queue()
self._no_of_workers = no_of_workers
self._coroutine = coroutine
self._timeout = timeout
self._workers = None
async def _worker(self):
while True:
try:
ret = False
queue_item = await self._queue.get()
ret = True
result = await asyncio.wait_for(self._coroutine(queue_item), timeout = self._timeout,loop= self._loop)
except Exception as e:
print(e)
finally:
if ret:
self._queue.task_done()
async def push_to_queue(self,item):
self._queue.put_nowait(item)
async def __aenter__(self):
assert self._workers == None
self._workers = [asyncio.create_task(self._worker()) for _ in range(self._no_of_workers)]
return self
async def __aexit__(self,type,value,traceback):
await self._queue.join()
for worker in self._workers:
worker.cancel()
await asyncio.gather(*self._workers, loop=self._loop, return_exceptions =True)
To use the Asyncpool:
async def something(item):
print("got", item)
await asyncio.sleep(item)
async def main():
async with AsyncPool(something, 5, 2) as pool:
for i in range(10):
await pool.push_to_queue(i)
asyncio.run(main())
The Output in my terminal:
The problem is that your except Exception exception clause also catches cancellation, and ignores it. To add to the confusion, print(e) just prints an empty line in case of a CancelledError, which is where the empty lines in the output come from. (Changing it to print(type(e)) shows what's going on.)
To correct the issue, change except Exception to something more specific, like except asyncio.TimeoutError. This change is not needed in Python 3.8 where asyncio.CancelledError no longer derives from Exception, but from BaseException, so except Exception doesn't catch it.
When you have an asyncio task created and then cancelled, you still have the task alive that need to be "reclaimed". So you want to await worker for it. However, once you await such a cancelled task, as it will never give you back the expected return value, the asyncio.CancelledError will be raised and you need to catch it somewhere.
Because of this behavior, I don't think you should gather them but to await for each of the cancelled tasks, as they are supposed to return right away:
async def __aexit__(self,type,value,traceback):
await self._queue.join()
for worker in self._workers:
worker.cancel()
for worker in self._workers:
try:
await worker
except asyncio.CancelledError:
print("worker cancelled:", worker)
This appears to work. The event is a counting timer and when it expires it cancels the tasks.
import asyncio
from datetime import datetime as dt
from datetime import timedelta as td
import random
import time
class Program:
def __init__(self):
self.duration_in_seconds = 20
self.program_start = dt.now()
self.event_has_expired = False
self.canceled_success = False
async def on_start(self):
print("On Start Event Start! Applying Overrides!!!")
await asyncio.sleep(random.randint(3, 9))
async def on_end(self):
print("On End Releasing All Overrides!")
await asyncio.sleep(random.randint(3, 9))
async def get_sensor_readings(self):
print("getting sensor readings!!!")
await asyncio.sleep(random.randint(3, 9))
async def evauluate_data(self):
print("checking data!!!")
await asyncio.sleep(random.randint(3, 9))
async def check_time(self):
if (dt.now() - self.program_start > td(seconds = self.duration_in_seconds)):
self.event_has_expired = True
print("Event is DONE!!!")
else:
print("Event is not done! ",dt.now() - self.program_start)
async def main(self):
# script starts, do only once self.on_start()
await self.on_start()
print("On Start Done!")
while not self.canceled_success:
readings = asyncio.ensure_future(self.get_sensor_readings())
analysis = asyncio.ensure_future(self.evauluate_data())
checker = asyncio.ensure_future(self.check_time())
if not self.event_has_expired:
await readings
await analysis
await checker
else:
# close other tasks before final shutdown
readings.cancel()
analysis.cancel()
checker.cancel()
self.canceled_success = True
print("cancelled hit!")
# script ends, do only once self.on_end() when even is done
await self.on_end()
print('Done Deal!')
async def main():
program = Program()
await program.main()
im trying to check multiple imap login informations asynchronously with aioimaplib.
This code works as long as the imap servers are reachable and / or the clients don't time out.
What is the correct way to catch the exceptions?
Example exception:
ERROR:asyncio:Task exception was never retrieved future: <Task finished coro=<BaseEventLoop.create_connection() done, defined at G:\WinPython-3.5.4\python-3.5.4.amd64\lib\asyncio\base_events.py:679> exception=TimeoutError(10060, "Connect call failed ('74.117.114.100', 993)")>
Code:
account_infos = [
# User Password Server
('user1#web.com', 'password1', 'imap.google.com'),
('user2#web.com', 'password2', 'imap.yandex.com'),
('user3#web.com', 'password3', 'imap.server3.com'),
]
class MailLogin:
def __init__(self):
self.loop = asyncio.get_event_loop()
self.queue = asyncio.Queue(loop=self.loop)
self.max_workers = 2
async def produce_work(self):
for i in account_infos:
await self.queue.put(i)
for _ in range(max_workers):
await self.queue.put((None, None, None))
async def worker(self):
while True:
(username, password, server) = await self.queue.get()
if username is None:
break
while True:
try:
s = IMAP4_SSL(server)
await s.wait_hello_from_server()
r = await s.login(username, password)
await s.logout()
if r.result != 'NO':
print('Information works')
except Exception as e:
# DOES NOT CATCH
print(str(e))
else:
break
def start(self):
try:
self.loop.run_until_complete(
asyncio.gather(self.produce_work(), *[self.worker() for _ in range(self.max_workers)],
loop=self.loop, return_exceptions=True)
)
finally:
print('Done')
if __name__ == '__main__':
MailLogin().start()
There are several ways to do this but the TimeoutError is probably caught in your except. You don't see it because str(e) is an empty string.
You can see the stacks enabling debug mode of asyncio.
First, you can catch the exception as you did:
async def fail_fun():
try:
imap_client = aioimaplib.IMAP4_SSL(host='foo', timeout=1)
await imap_client.wait_hello_from_server()
except Exception as e:
print('Exception : ' + str(e))
if __name__ == '__main__':
get_event_loop().run_until_complete(fail_fun())
Second, you can catch the exception at run_until_complete
async def fail_fun():
imap_client = aioimaplib.IMAP4_SSL(host='foo', timeout=1)
await imap_client.wait_hello_from_server()
if __name__ == '__main__':
try:
get_event_loop().run_until_complete(fail_fun())
except Exception as e:
print('Exception : ' + str(e))
The connection is established wrapping the loop.create_connection coroutine with create_task : we wanted to establish the connection in the IMAP4 constructor and __init__ should return None.
So if your host has a wrong value, you could test it before, or wait for the timeout :
socket.gaierror: [Errno -5] No address associated with hostname
if a host is not responding before the timeout, you can raise the timeout. And if the connection is lost during the connection, you can add a connection lost callback in the IMAP4 constructor.
asyncio.gather and asyncio.wait seem to have similar uses: I have a bunch of async things that I want to execute/wait for (not necessarily waiting for one to finish before the next one starts). They use a different syntax, and differ in some details, but it seems very un-pythonic to me to have 2 functions that have such a huge overlap in functionality. What am I missing?
Although similar in general cases ("run and get results for many tasks"), each function has some specific functionality for other cases:
asyncio.gather()
Returns a Future instance, allowing high level grouping of tasks:
import asyncio
from pprint import pprint
import random
async def coro(tag):
print(">", tag)
await asyncio.sleep(random.uniform(1, 3))
print("<", tag)
return tag
loop = asyncio.get_event_loop()
group1 = asyncio.gather(*[coro("group 1.{}".format(i)) for i in range(1, 6)])
group2 = asyncio.gather(*[coro("group 2.{}".format(i)) for i in range(1, 4)])
group3 = asyncio.gather(*[coro("group 3.{}".format(i)) for i in range(1, 10)])
all_groups = asyncio.gather(group1, group2, group3)
results = loop.run_until_complete(all_groups)
loop.close()
pprint(results)
All tasks in a group can be cancelled by calling group2.cancel() or even all_groups.cancel(). See also .gather(..., return_exceptions=True),
asyncio.wait()
Supports waiting to be stopped after the first task is done, or after a specified timeout, allowing lower level precision of operations:
import asyncio
import random
async def coro(tag):
print(">", tag)
await asyncio.sleep(random.uniform(0.5, 5))
print("<", tag)
return tag
loop = asyncio.get_event_loop()
tasks = [coro(i) for i in range(1, 11)]
print("Get first result:")
finished, unfinished = loop.run_until_complete(
asyncio.wait(tasks, return_when=asyncio.FIRST_COMPLETED))
for task in finished:
print(task.result())
print("unfinished:", len(unfinished))
print("Get more results in 2 seconds:")
finished2, unfinished2 = loop.run_until_complete(
asyncio.wait(unfinished, timeout=2))
for task in finished2:
print(task.result())
print("unfinished2:", len(unfinished2))
print("Get all other results:")
finished3, unfinished3 = loop.run_until_complete(asyncio.wait(unfinished2))
for task in finished3:
print(task.result())
loop.close()
TaskGroup (Python 3.11+)
Update: Python 3.11 introduces TaskGroups which can "automatically" await more than one task without gather() or await():
# Python 3.11+ ONLY!
async def main():
async with asyncio.TaskGroup() as tg:
task1 = tg.create_task(some_coro(...))
task2 = tg.create_task(another_coro(...))
print("Both tasks have completed now.")
asyncio.wait is more low level than asyncio.gather.
As the name suggests, asyncio.gather mainly focuses on gathering the results. It waits on a bunch of futures and returns their results in a given order.
asyncio.wait just waits on the futures. And instead of giving you the results directly, it gives done and pending tasks. You have to manually collect the values.
Moreover, you could specify to wait for all futures to finish or just the first one with wait.
A very important distinction, which is easy to miss, is the default behavior of these two functions, when it comes to exceptions.
I'll use this example to simulate a coroutine that will raise exceptions, sometimes -
import asyncio
import random
async def a_flaky_tsk(i):
await asyncio.sleep(i) # bit of fuzz to simulate a real-world example
if i % 2 == 0:
print(i, "ok")
else:
print(i, "crashed!")
raise ValueError
coros = [a_flaky_tsk(i) for i in range(10)]
await asyncio.gather(*coros) outputs -
0 ok
1 crashed!
Traceback (most recent call last):
File "/Users/dev/PycharmProjects/trading/xxx.py", line 20, in <module>
asyncio.run(main())
File "/Users/dev/.pyenv/versions/3.8.2/lib/python3.8/asyncio/runners.py", line 43, in run
return loop.run_until_complete(main)
File "/Users/dev/.pyenv/versions/3.8.2/lib/python3.8/asyncio/base_events.py", line 616, in run_until_complete
return future.result()
File "/Users/dev/PycharmProjects/trading/xxx.py", line 17, in main
await asyncio.gather(*coros)
File "/Users/dev/PycharmProjects/trading/xxx.py", line 12, in a_flaky_tsk
raise ValueError
ValueError
As you can see, the coros after index 1 never got to execute. Future returned by gather() is done at that point (unlike wait()) and program terminates, but if you could keep the program alive, other coroutines still would have chance to run:
async def main():
coros = [a_flaky_tsk(i) for i in range(10)]
await asyncio.gather(*coros)
if __name__ == '__main__':
loop = asyncio.new_event_loop()
loop.create_task(main())
loop.run_forever()
# 0 ok
# 1 crashed!
# Task exception was never retrieved
# ....
# 2 ok
# 3 crashed!
# 4 ok
# 5 crashed!
# 6 ok
# 7 crashed!
# 8 ok
# 9 crashed!
But await asyncio.wait(coros) continues to execute tasks, even if some of them fail (Future returned by wait() is not done, unlike gather()) -
0 ok
1 crashed!
2 ok
3 crashed!
4 ok
5 crashed!
6 ok
7 crashed!
8 ok
9 crashed!
Task exception was never retrieved
future: <Task finished name='Task-10' coro=<a_flaky_tsk() done, defined at /Users/dev/PycharmProjects/trading/xxx.py:6> exception=ValueError()>
Traceback (most recent call last):
File "/Users/dev/PycharmProjects/trading/xxx.py", line 12, in a_flaky_tsk
raise ValueError
ValueError
Task exception was never retrieved
future: <Task finished name='Task-8' coro=<a_flaky_tsk() done, defined at /Users/dev/PycharmProjects/trading/xxx.py:6> exception=ValueError()>
Traceback (most recent call last):
File "/Users/dev/PycharmProjects/trading/xxx.py", line 12, in a_flaky_tsk
raise ValueError
ValueError
Task exception was never retrieved
future: <Task finished name='Task-2' coro=<a_flaky_tsk() done, defined at /Users/dev/PycharmProjects/trading/xxx.py:6> exception=ValueError()>
Traceback (most recent call last):
File "/Users/dev/PycharmProjects/trading/xxx.py", line 12, in a_flaky_tsk
raise ValueError
ValueError
Task exception was never retrieved
future: <Task finished name='Task-9' coro=<a_flaky_tsk() done, defined at /Users/dev/PycharmProjects/trading/xxx.py:6> exception=ValueError()>
Traceback (most recent call last):
File "/Users/dev/PycharmProjects/trading/xxx.py", line 12, in a_flaky_tsk
raise ValueError
ValueError
Task exception was never retrieved
future: <Task finished name='Task-3' coro=<a_flaky_tsk() done, defined at /Users/dev/PycharmProjects/trading/xxx.py:6> exception=ValueError()>
Traceback (most recent call last):
File "/Users/dev/PycharmProjects/trading/xxx.py", line 12, in a_flaky_tsk
raise ValueError
ValueError
Of course, this behavior can be changed for both by using -
asyncio.gather(..., return_exceptions=True)
or,
asyncio.wait([...], return_when=asyncio.FIRST_EXCEPTION)
But it doesn't end here!
Notice:
Task exception was never retrieved
in the logs above.
asyncio.wait() won't re-raise exceptions from the child tasks until you await them individually. (The stacktrace in the logs are just messages, they cannot be caught!)
done, pending = await asyncio.wait(coros)
for tsk in done:
try:
await tsk
except Exception as e:
print("I caught:", repr(e))
Output -
0 ok
1 crashed!
2 ok
3 crashed!
4 ok
5 crashed!
6 ok
7 crashed!
8 ok
9 crashed!
I caught: ValueError()
I caught: ValueError()
I caught: ValueError()
I caught: ValueError()
I caught: ValueError()
On the other hand, to catch exceptions with asyncio.gather(), you must -
results = await asyncio.gather(*coros, return_exceptions=True)
for result_or_exc in results:
if isinstance(result_or_exc, Exception):
print("I caught:", repr(result_or_exc))
(Same output as before)
I also noticed that you can provide a group of coroutines in wait() by simply specifying the list:
result=loop.run_until_complete(asyncio.wait([
say('first hello', 2),
say('second hello', 1),
say('third hello', 4)
]))
Whereas grouping in gather() is done by just specifying multiple coroutines:
result=loop.run_until_complete(asyncio.gather(
say('first hello', 2),
say('second hello', 1),
say('third hello', 4)
))
In addition to all the previous answers, I would like to tell about the different behavior of gather() and wait() in case they are cancelled.
Gather() cancellation
If gather() is cancelled, all submitted awaitables (that have not completed yet) are also cancelled.
Wait() cancellation
If the wait()ing task is cancelled, it simply throws an CancelledError and the waited tasks remain intact.
Simple example:
import asyncio
async def task(arg):
await asyncio.sleep(5)
return arg
async def cancel_waiting_task(work_task, waiting_task):
await asyncio.sleep(2)
waiting_task.cancel()
try:
await waiting_task
print("Waiting done")
except asyncio.CancelledError:
print("Waiting task cancelled")
try:
res = await work_task
print(f"Work result: {res}")
except asyncio.CancelledError:
print("Work task cancelled")
async def main():
work_task = asyncio.create_task(task("done"))
waiting = asyncio.create_task(asyncio.wait({work_task}))
await cancel_waiting_task(work_task, waiting)
work_task = asyncio.create_task(task("done"))
waiting = asyncio.gather(work_task)
await cancel_waiting_task(work_task, waiting)
asyncio.run(main())
Output:
asyncio.wait()
Waiting task cancelled
Work result: done
----------------
asyncio.gather()
Waiting task cancelled
Work task cancelled
Application example
Sometimes it becomes necessary to combine wait() and gather() functionality. For example, we want to wait for the completion of at least one task and cancel the rest pending tasks after that, and if the waiting itself was canceled, then also cancel all pending tasks.
As real examples, let's say we have a disconnect event and a work task. And we want to wait for the results of the work task, but if the connection was lost, then cancel it. Or we will make several parallel requests, but upon completion of at least one response, cancel all others.
It could be done this way:
import asyncio
from typing import Optional, Tuple, Set
async def wait_any(
tasks: Set[asyncio.Future], *, timeout: Optional[int] = None,
) -> Tuple[Set[asyncio.Future], Set[asyncio.Future]]:
tasks_to_cancel: Set[asyncio.Future] = set()
try:
done, tasks_to_cancel = await asyncio.wait(
tasks, timeout=timeout, return_when=asyncio.FIRST_COMPLETED
)
return done, tasks_to_cancel
except asyncio.CancelledError:
tasks_to_cancel = tasks
raise
finally:
for task in tasks_to_cancel:
task.cancel()
async def task():
await asyncio.sleep(5)
async def cancel_waiting_task(work_task, waiting_task):
await asyncio.sleep(2)
waiting_task.cancel()
try:
await waiting_task
print("Waiting done")
except asyncio.CancelledError:
print("Waiting task cancelled")
try:
res = await work_task
print(f"Work result: {res}")
except asyncio.CancelledError:
print("Work task cancelled")
async def check_tasks(waiting_task, working_task, waiting_conn_lost_task):
try:
await waiting_task
print("waiting is done")
except asyncio.CancelledError:
print("waiting is cancelled")
try:
await waiting_conn_lost_task
print("connection is lost")
except asyncio.CancelledError:
print("waiting connection lost is cancelled")
try:
await working_task
print("work is done")
except asyncio.CancelledError:
print("work is cancelled")
async def work_done_case():
working_task = asyncio.create_task(task())
connection_lost_event = asyncio.Event()
waiting_conn_lost_task = asyncio.create_task(connection_lost_event.wait())
waiting_task = asyncio.create_task(wait_any({working_task, waiting_conn_lost_task}))
await check_tasks(waiting_task, working_task, waiting_conn_lost_task)
async def conn_lost_case():
working_task = asyncio.create_task(task())
connection_lost_event = asyncio.Event()
waiting_conn_lost_task = asyncio.create_task(connection_lost_event.wait())
waiting_task = asyncio.create_task(wait_any({working_task, waiting_conn_lost_task}))
await asyncio.sleep(2)
connection_lost_event.set() # <---
await check_tasks(waiting_task, working_task, waiting_conn_lost_task)
async def cancel_waiting_case():
working_task = asyncio.create_task(task())
connection_lost_event = asyncio.Event()
waiting_conn_lost_task = asyncio.create_task(connection_lost_event.wait())
waiting_task = asyncio.create_task(wait_any({working_task, waiting_conn_lost_task}))
await asyncio.sleep(2)
waiting_task.cancel() # <---
await check_tasks(waiting_task, working_task, waiting_conn_lost_task)
async def main():
print("Work done")
print("-------------------")
await work_done_case()
print("\nConnection lost")
print("-------------------")
await conn_lost_case()
print("\nCancel waiting")
print("-------------------")
await cancel_waiting_case()
asyncio.run(main())
Output:
Work done
-------------------
waiting is done
waiting connection lost is cancelled
work is done
Connection lost
-------------------
waiting is done
connection is lost
work is cancelled
Cancel waiting
-------------------
waiting is cancelled
waiting connection lost is cancelled
work is cancelled
I have a process running with asyncio which should run forever.
I can interact with that process with a ProcessIterator, which can (left out here) send data to stdin and fetch from stdout.
I can access the data with async for fd, data in ProcessIterator(...):.
The problem is now that the execution of this async iterator must be timelimited. If the time runs out, the timeout() function is called,
but the exception does not originate out of the __anext__ function to notify of the timeout.
How can I raise this exception in the async iterator?
I found no way of calling awaitable.throw(something) or similar for it.
class ProcessIterator:
def __init__(self, process, loop, run_timeout):
self.process = process
self.loop = loop
self.run_timeout = run_timeout
# set the global timer
self.overall_timer = self.loop.call_later(
self.run_timeout, self.timeout)
def timeout(self):
# XXX: how do i pass this exception into the iterator?
raise ProcTimeoutError(
self.process.args,
self.run_timeout,
was_global,
)
async def __aiter__(self):
return self
async def __anext__(self):
if self.process.exited:
raise StopAsyncIteration()
else:
# fetch output from the process asyncio.Queue()
entry = await self.process.output_queue.get()
if entry == StopIteration:
raise StopAsyncIteration()
return entry
The usage of the async iterator is now roughly:
async def test_coro(loop):
code = 'print("rofl"); time.sleep(5); print("lol")'
proc = Process([sys.executable, '-u', '-c', code])
await proc.create()
try:
async for fd, line in ProcessIterator(proc, loop, run_timeout=1):
print("%d: %s" % (fd, line))
except ProcessTimeoutError as exc:
# XXX This is the exception I'd like to get here! How can i throw it?
print("timeout: %s" % exc)
await proc.wait()
tl;dr: How can I throw a timed exception so it originates from a async iterator?
EDIT: Added solution 2
Solution 1:
Can the timeout() callback store the ProcTimeoutError exception in an instance variable? Then __anext__() can check the instance variable and raise the exception if it is set.
class ProcessIterator:
def __init__(self, process, loop, run_timeout):
self.process = process
self.loop = loop
self.error = None
self.run_timeout = run_timeout
# set the global timer
self.overall_timer = self.loop.call_later(
self.run_timeout, self.timeout)
def timeout(self):
# XXX: set instance variable
self.error = ProcTimeoutError(
self.process.args,
self.run_timeout,
was_global
)
async def __aiter__(self):
return self
async def __anext__(self):
# XXX: if error is set, then raise the exception
if self.error:
raise self.error
elif self.process.exited:
raise StopAsyncIteration()
else:
# fetch output from the process asyncio.Queue()
entry = await self.process.output_queue.get()
if entry == StopIteration:
raise StopAsyncIteration()
return entry
Solution 2:
Put the exception on the process.output_queue.
....
def timeout(self):
# XXX: set instance variable
self.process.ouput_queue.put(ProcTimeoutError(
self.process.args,
self.run_timeout,
was_global
))
....
# fetch output from the process asyncio.Queue()
entry = await self.process.output_queue.get()
if entry == StopIteration:
raise StopAsyncIteration()
elif entry = ProcTimeoutError:
raise entry
....
If there may be entries on the queue, use a priority queue. Assign ProcTimeoutError a higher priority than the other entries, e.g., (0, ProcTimeoutError) vs (1, other_entry).
Please check out timeout context manager from asyncio:
with asyncio.timeout(10):
async for i in get_iter():
process(i)
It is not released yet but you can copy-paste the implementation from asyncio master branch
You could use get_nowait, which will return entry or throw QueueEmpty immediately. Wrapping it in while loop on self.error with some async sleep should do the trick. Something like:
async def __anext__(self):
if self.process.exited:
raise StopAsyncIteration()
else:
while self.error is None:
try:
entry = self.process.output_queue.get_nowait()
if entry == StopIteration:
raise StopAsyncIteration()
return entry
except asyncio.QueueEmpty:
# some sleep to give back control to ioloop
# since we using nowait
await asyncio.sleep(0.1)
else:
raise self.error
And as a hint approach that is used in Tornado's Queue.get implementation with timeout:
def get(self, timeout=None):
"""Remove and return an item from the queue.
Returns a Future which resolves once an item is available, or raises
`tornado.gen.TimeoutError` after a timeout.
"""
future = Future()
try:
future.set_result(self.get_nowait())
except QueueEmpty:
self._getters.append(future)
_set_timeout(future, timeout)
return future
This is the solution I came up with by now.
See https://github.com/SFTtech/kevin kevin/process.py for the upstream version.
It also features line counting and output timeouts, which I stripped from this example.
class Process:
def __init__(self, command, loop=None):
self.loop = loop or asyncio.get_event_loop()
self.created = False
self.killed = asyncio.Future()
self.proc = self.loop.subprocess_exec(
lambda: WorkerInteraction(self), # see upstream repo
*command)
self.transport = None
self.protocol = None
async def create(self):
self.transport, self.protocol = await self.proc
def communicate(self, timeout):
if self.killed.done():
raise Exception("process was already killed "
"and no output is waiting")
return ProcessIterator(self, self.loop, timeout)
class ProcessIterator:
"""
Asynchronous iterator for the process output.
Use like `async for (fd, data) in ProcessIterator(...):`
"""
def __init__(self, process, loop, run_timeout):
self.process = process
self.loop = loop
self.run_timeout = run_timeout
self.overall_timer = None
if self.run_timeout < INF:
# set the global timer
self.overall_timer = self.loop.call_later(
self.run_timeout,
functools.partial(self.timeout, was_global=True))
def timeout(self):
if not self.process.killed.done():
self.process.killed.set_exception(ProcTimeoutError(
self.process.args,
self.run_timeout,
))
async def __aiter__(self):
return self
async def __anext__(self):
# either the process exits,
# there's an exception (process killed, timeout, ...)
# or the queue gives us the next data item.
# wait for the first of those events.
done, pending = await asyncio.wait(
[self.process.protocol.queue.get(), self.process.killed],
return_when=asyncio.FIRST_COMPLETED)
# at least one of them is done now:
for future in done:
# if something failed, cancel the pending futures
# and raise the exception
# this happens e.g. for a timeout.
if future.exception():
for future_pending in pending:
future_pending.cancel()
# kill the process before throwing the error!
await self.process.pwn()
raise future.exception()
# fetch output from the process
entry = future.result()
# it can be stopiteration to indicate the last data chunk
# as the process exited on its own.
if entry == StopIteration:
if not self.process.killed.done():
self.process.killed.set_result(entry)
# raise the stop iteration
await self.stop_iter(enough=False)
return entry
raise Exception("internal fail: no future was done!")
async def stop_iter(self):
# stop the timer
if self.overall_timer:
self.overall_timer.cancel()
retcode = self.process.returncode()
raise StopAsyncIteration()
The magic function is this:
done, pending = await asyncio.wait(
[self.process.protocol.queue.get(), self.process.killed],
return_when=asyncio.FIRST_COMPLETED)
When the timeout occurs, the queue fetching is aborted reliably.