In python, what's the idiomatic way to establish a one-way communication between two threading.Threads, call them thread a and thread b.
a is the producer, it continuously generates values for b to consume.
b is the consumer, it reads one value generated by a, process the value with a coroutine, and then reads the next value, and so on.
Illustration:
q = very_magic_queue.Queue()
def worker_of_a(q):
while True:
q.put(1)
time.sleep(1)
a = threading.Thread(worker_of_a, args=(q,))
a.start()
async def loop(q):
while True:
# v must be processed in the same order as they are produced
v = await q.get()
print(v)
async def foo():
pass
async def b_main(q):
loop_fut = asyncio.ensure_future(loop(q))
foo_fut = asyncio.ensure_future(foo())
_ = await asyncio.wait([loop_fut, foo_fut], ...)
# blah blah blah
def worker_of_b(q):
asyncio.set_event_loop(asyncio.new_event_loop())
asyncio.get_event_loop().run_until_complete(b_main(q))
b = threading.Thread(worker_of_b, args=(q,))
b.start()
Of course the above code doesn't work, because queue.Queue.get cannot be awaitted, and asyncio.Queue cannot be used in another thread.
I also need a communication channel from b to a.
I would be great if the solution could also work with gevent.
Thanks :)
I had a similar problem -communicate data between a thread and asyncio. The solution I used is to create a sync Queue and add methods for async get and async put using asyncio.sleep to make it non-blocking.
Here is my queue class:
#class to provide queue (sync or asyc morph)
class queMorph(queue.Queue):
def __init__(self,qSize,qNM):
super().__init__(qSize)
self.timeout=0.018
self.me=f'queMorph-{qNM}'
#Introduce methods for async awaitables morph of Q
async def aget(self):
while True:
try:
return self.get_nowait()
except queue.Empty:
await asyncio.sleep(self.timeout)
except Exception as E:
raise
async def aput(self,data):
while True:
try:
return self.put_nowait(data)
except queue.Full:
print(f'{self.me} Queue full on put..')
await asyncio.sleep(self.timeout)
except Exception as E:
raise
To put/get items from queue from the thread (synchronous), use the normal q.get() and q.put() blocking functions.
In the async loop, use q.aget() and q.aput() which do not block.
You can use a synchronized queue from the queue module and defer the wait to a ThreadPoolExecutor:
async def loop(q):
from concurrent.futures import ThreadPoolExecutor
with ThreadPoolExecutor(max_workers=1) as executor:
loop = asyncio.get_event_loop()
while True:
# v must be processed in the same order as they are produced
v = await loop.run_in_executor(executor, q.get)
print(v)
I've used Janus to solve this problem - it's a Python library that gives you a thread-safe queue that can be used to communicate between asyncio and a thread.
def threaded(sync_q):
for i in range(100):
sync_q.put(i)
sync_q.join()
async def async_code(async_q):
for i in range(100):
val = await async_q.get()
assert val == i
async_q.task_done()
queue = janus.Queue()
fut = loop.run_in_executor(None, threaded, queue.sync_q)
await async_code(queue.async_q)
Related
I want to update Queue with several asyncio
I receive data from each A,B,C( using websocket and "while true") and then i want to put in the queue and all the provider will be able to write in the same Queue
( I know that maybe i need to use multiThread or something else but i dont find the right way
**if __name__ == '__main__':
global_queue = queue.Queue()
asyncio.run(A_Orderbook.data_stream(global_queue))
asyncio.run(B_Orderbook.data_stream(global_queue))
asyncio.run(C_Orderbook.data_stream(global_queue))
print(global_queue.qsize())**
Thks
You can do it the following way:
import asyncio
async def worker(worker_name: str, q: asyncio.Queue):
"""Produces tasks for consumer."""
for i in range(1, 6):
await asyncio.sleep(1)
await q.put(f"{worker_name}-{i}")
async def consumer(q: asyncio.Queue):
"""Consumes tasks from workers."""
while True:
item = await q.get()
await asyncio.sleep(1)
print(item)
# we need it to ensure that all tasks were done
q.task_done()
async def main_wrapper():
"""Main function - entry point of our async app."""
q = asyncio.Queue()
# we do not need to await the asyncio task it is run in "parallel"
asyncio.create_task(consumer(q))
await asyncio.gather(*[worker(f"w{i}", q) for i in range(1, 5)]) # create worker-tasks
await q.join() # we wait until asyncio.create_task(consumer(q)) consume all tasks
print("All DONE !")
if __name__ == '__main__':
asyncio.run(main_wrapper())
I have a complex function Vehicle.set_data, which has many nested functions, API calls, DB calls, etc. For the sake of this example, I will simplify it.
I am trying to use Async IO to run Vehicle.set_data on multiple vehicles at once. Here is my Vehicle model:
class Vehicle:
def __init__(self, token):
self.token = token
# Works async
async def set_data(self):
await asyncio.sleep(random.random() * 10)
# Does not work async
# def set_data(self):
# time.sleep(random.random() * 10)
And here is my Async IO routinue:
async def set_vehicle_data(vehicle):
# sleep for T seconds on average
await vehicle.set_data()
def get_random_string():
return ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(5))
async def producer(queue):
count = 0
while True:
count += 1
# produce a token and send it to a consumer
token = get_random_string()
vehicle = Vehicle(token)
print(f'produced {vehicle.token}')
await queue.put(vehicle)
if count > 3:
break
async def consumer(queue):
while True:
vehicle = await queue.get()
# process the token received from a producer
print(f'Starting consumption for vehicle {vehicle.token}')
await set_vehicle_data(vehicle)
queue.task_done()
print(f'Ending consumption for vehicle {vehicle.token}')
async def main():
queue = asyncio.Queue()
# #todo now, do I need multiple producers
producers = [asyncio.create_task(producer(queue))
for _ in range(3)]
consumers = [asyncio.create_task(consumer(queue))
for _ in range(3)]
# with both producers and consumers running, wait for
# the producers to finish
await asyncio.gather(*producers)
print('---- done producing')
# wait for the remaining tasks to be processed
await queue.join()
# cancel the consumers, which are now idle
for c in consumers:
c.cancel()
asyncio.run(main())
In the example above, this commented section of code does not allow multiple vehicles to process at once:
# Does not work async
# def set_data(self):
# time.sleep(random.random() * 10)
Because this is such a complex query in our actual codebase, it would be a tremendous refactor to go flag every single nested function with async and await. Is there any way I can make this function work async without marking up my whole codebase with async?
You can run the function in a separate thread with asyncio.to_thread
await asyncio.to_thread(self.set_data)
If you're using python <3.9 use loop.run_in_executor
loop = asyncio.get_event_loop()
await loop.run_in_executor(None, self.set_data)
Below is (working) code for a generic websocket streamer.
It creates a daemon thread from which performs asyncio.run(...).
The asyncio code spawns 2 tasks, which never complete.
How to correctly destroy this object?
One of the tasks is executing a keepalive 'ping', so I can easily exit that loop using a flag. But the other is blocking on a message from the websocket.
import json
import aiohttp
import asyncio
import gzip
import asyncio
from threading import Thread
class WebSocket:
KEEPALIVE_INTERVAL_S = 10
def __init__(self, url, on_connect, on_msg):
self.url = url
self.on_connect = on_connect
self.on_msg = on_msg
self.streams = {}
self.worker_thread = Thread(name='WebSocket', target=self.thread_func, daemon=True).start()
def thread_func(self):
asyncio.run(self.aio_run())
async def aio_run(self):
async with aiohttp.ClientSession() as session:
self.ws = await session.ws_connect(self.url)
await self.on_connect(self)
async def ping():
while True:
print('KEEPALIVE')
await self.ws.ping()
await asyncio.sleep(WebSocket.KEEPALIVE_INTERVAL_S)
async def main_loop():
async for msg in self.ws:
def extract_data(msg):
if msg.type == aiohttp.WSMsgType.BINARY:
as_bytes = gzip.decompress(msg.data)
as_string = as_bytes.decode('utf8')
as_json = json.loads(as_string)
return as_json
elif msg.type == aiohttp.WSMsgType.TEXT:
return json.loads(msg.data)
elif msg.type == aiohttp.WSMsgType.ERROR:
print('⛔️ aiohttp.WSMsgType.ERROR')
return msg.data
data = extract_data(msg)
self.on_msg(data)
# May want this approach if we want to handle graceful shutdown
# W.task_ping = asyncio.create_task(ping())
# W.task_main_loop = asyncio.create_task(main_loop())
await asyncio.gather(
ping(),
main_loop()
)
async def send_json(self, J):
await self.ws.send_json(J)
I'd suggest the use of asyncio.run_coroutine_threadsafe instead of asyncio.run. It returns a concurrent.futures.Future object which you can cancel:
def thread_func(self):
self.future = asyncio.run_coroutine_threadsafe(
self.aio_run(),
asyncio.get_event_loop()
)
# somewhere else
self.future.cancel()
Another approach would be to make ping and main_loop a task, and cancel them when necessary:
# inside `aio_run`
self.task_ping = asyncio.create_task(ping())
self.main_loop_task = asyncio.create_task(main_loop())
await asyncio.gather(
self.task_ping,
self.main_loop_task
return_exceptions=True
)
# somewhere else
self.task_ping.cancel()
self.main_loop_task.cancel()
This doesn't change the fact that aio_run should also be called with asyncio.run_coroutine_threadsafe. asyncio.run should be used as a main entry point for asyncio programs and should be only called once.
I would like to suggest one more variation of the solution. When finishing coroutines (tasks), I prefer minimizing the use of cancel() (but not excluding), since sometimes it can make it difficult to debug business logic (keep in mind that asyncio.CancelledError does not inherit from an Exception).
In your case, the code might look like this(only changes):
class WebSocket:
KEEPALIVE_INTERVAL_S = 10
def __init__(self, url, on_connect, on_msg):
# ...
self.worker_thread = Thread(name='WebSocket', target=self.thread_func)
self.worker_thread.start()
async def aio_run(self):
self._loop = asyncio.get_event_loop()
# ...
self._ping_task = asyncio.create_task(ping())
self._main_task = asyncio.create_task(main_loop())
await asyncio.gather(
self._ping_task,
self._main_task,
return_exceptions=True
)
# ...
async def stop_ping(self):
self._ping_task.cancel()
try:
await self._ping_task
except asyncio.CancelledError:
pass
async def _stop(self):
# wait ping end before socket closing
await self.stop_ping()
# lead to correct exit from `async for msg in self.ws`
await self.ws.close()
def stop(self):
# wait stopping ping and closing socket
asyncio.run_coroutine_threadsafe(
self._stop(), self._loop
).result()
self.worker_thread.join() # wait thread finish
I want to gather data from asyncio loops running in sibling processes with Python 3.7
Ideally I would use a multiprocess.JoinableQueue, relaying on its join() call for synchronization.
However, its synchronization primitives block the event loop in full (see my partial answer below for an example).
Illustrative prototype:
class MP_GatherDict(dict):
'''A per-process dictionary which can be gathered from a single one'''
def __init__(self):
self.q = multiprocess.JoinableQueue()
super().__init__()
async def worker_process_server(self):
while True:
(await?) self.q.put(dict(self)) # Put a shallow copy
(await?) self.q.join() # Wait for it to be gathered
async def gather(self):
all_dicts = []
while not self.q.empty():
all_dicts.append(await self.q.get())
self.q.task_done()
return all_dicts
Note that the put->get->join->put flow might not work as expected but this question really is about using multiprocess primitives in asyncio event loop...
The question would then be how to best await for multiprocess primitives from an asyncio event loop?
This test shows that multiprocess.Queue.get() blocks the whole event loop:
mp_q = mp.JoinableQueue()
async def mp_queue_wait():
try:
print('Queue:',mp_q.get(timeout=2))
except Exception as ex:
print('Queue:',repr(ex))
async def main_loop_task():
task = asyncio.get_running_loop().create_task(mp_queue_wait())
for i in range(3):
print(i, os.times())
await asyncio.sleep(1)
await task
print(repr(task))
asyncio.run(main_loop_task())
Whose output is:
0 posix.times_result(user=0.41, system=0.04, children_user=0.0, children_system=0.0, elapsed=17208620.18)
Queue: Empty()
1 posix.times_result(user=0.41, system=0.04, children_user=0.0, children_system=0.0, elapsed=17208622.18)
2 posix.times_result(user=0.41, system=0.04, children_user=0.0, children_system=0.0, elapsed=17208623.18)
<Task finished coro=<mp_queue_wait() done,...> result=None>
So I am looking at asyncio.loop.run_in_executor() as the next possible answer, however spawning an executor/thread just for this seems overkill...
Here is same test using the default executor:
async def mp_queue_wait():
try:
result = await asyncio.get_running_loop().run_in_executor(None,mp_q.get,True,2)
except Exception as ex:
result = ex
print('Queue:',repr(result))
return result
And the (desired) result:
0 posix.times_result(user=0.36, system=0.02, children_user=0.0, children_system=0.0, elapsed=17210674.65)
1 posix.times_result(user=0.37, system=0.02, children_user=0.0, children_system=0.0, elapsed=17210675.65)
Queue: Empty()
2 posix.times_result(user=0.37, system=0.02, children_user=0.0, children_system=0.0, elapsed=17210676.66)
<Task finished coro=<mp_queue_wait() done, defined at /home/apozuelo/Documents/5G_SBA/Tera5G/services/db.py:211> result=Empty()>
This comes bit late, but.
You need to create an async wrapper around the mp.JoinableQueue() since both get()and put() block the whole process (GIL).
There are two approaches for this:
Use threads
Use asyncio.sleep() and get_nowait(), put_nowait() methods.
I chose the option 2 since it is easy.
from queue import Queue, Full, Empty
from typing import Any, Generic, TypeVar
from asyncio import sleep
T= TypeVar('T')
class AsyncQueue(Generic[T]):
"""Async wrapper for queue.Queue"""
SLEEP: float = 0.01
def __init__(self, queue: Queue[T]):
self._Q : Queue[T] = queue
async def get(self) -> T:
while True:
try:
return self._Q.get_nowait()
except Empty:
await sleep(self.SLEEP)
async def put(self, item: T) -> None:
while True:
try:
self._Q.put_nowait(item)
return None
except Full:
await sleep(self.SLEEP)
def task_done(self) -> None:
self._Q.task_done()
return None
I have a pool_map function that can be used to limit the number of simultaneously executing functions.
The idea is to have a coroutine function accepting a single parameter that is mapped to a list of possible parameters, but to also wrap all function calls into a semaphore acquisition, whereupon only a limited number is running at once:
from typing import Callable, Awaitable, Iterable, Iterator
from asyncio import Semaphore
A = TypeVar('A')
V = TypeVar('V')
async def pool_map(
func: Callable[[A], Awaitable[V]],
arg_it: Iterable[A],
size: int=10
) -> Generator[Awaitable[V], None, None]:
"""
Maps an async function to iterables
ensuring that only some are executed at once.
"""
semaphore = Semaphore(size)
async def sub(arg):
async with semaphore:
return await func(arg)
return map(sub, arg_it)
I modified and didn’t test above code for the sake of an example, but my variant works well. E.g. you can use it like this:
from asyncio import get_event_loop, coroutine, as_completed
from contextlib import closing
URLS = [...]
async def run_all(awaitables):
for a in as_completed(awaitables):
result = await a
print('got result', result)
async def download(url): ...
if __name__ != '__main__':
pool = pool_map(download, URLS)
with closing(get_event_loop()) as loop:
loop.run_until_complete(run_all(pool))
But a problem arises if there is an exception thrown while awaiting a future. I can’t see how to cancel all scheduled or still-running tasks, neither the ones still waiting for the semaphore to be acquired.
Is there a library or an elegant building block for this that I don’t know, or do I have to build all parts myself? (i.e. a Semaphore with access to its waiters, a as_finished that provides access to its running task queue, …)
Use ensure_future to get a Task instead of a coroutine:
import asyncio
from contextlib import closing
def pool_map(func, args, size=10):
"""
Maps an async function to iterables
ensuring that only some are executed at once.
"""
semaphore = asyncio.Semaphore(size)
async def sub(arg):
async with semaphore:
return await func(arg)
tasks = [asyncio.ensure_future(sub(x)) for x in args]
return tasks
async def f(n):
print(">>> start", n)
if n == 7:
raise Exception("boom!")
await asyncio.sleep(n / 10)
print("<<< end", n)
return n
async def run_all(tasks):
exc = None
for a in asyncio.as_completed(tasks):
try:
result = await a
print('=== result', result)
except asyncio.CancelledError as e:
print("!!! cancel", e)
except Exception as e:
print("Exception in task, cancelling!")
for t in tasks:
t.cancel()
exc = e
if exc:
raise exc
pool = pool_map(f, range(1, 20), 3)
with closing(asyncio.get_event_loop()) as loop:
loop.run_until_complete(run_all(pool))
Here's a naive solution, based on the fact that cancel is a no-op if the task is already finished:
async def run_all(awaitables):
futures = [asyncio.ensure_future(a) for a in awaitables]
try:
for fut in as_completed(futures):
result = await fut
print('got result', result)
except:
for future in futures:
future.cancel()
await asyncio.wait(futures)