How to implement multi-threaded asyncio outside a function? - python

I am trying to run a multi-threaded program with asyncio, but I am failing at the part of adding threads. The program runs ok with asyncio as it is:
async def main(var1, var2):
tasks = list()
for z in var1:
for x in range(5):
tasks.append(get_ip(z, var2))
return await asyncio.gather(*tasks)
loop = asyncio.get_event_loop()
start_time = time.time()
for x in list:
result = loop.run_until_complete(main(x, list2))
loop.run_until_complete(release_main(result))
loop.close()
I want to have the for x in list: in threads, I have 8 CPUs so I would want to run it with 8 threads like for example using: with concurrent.futures.ThreadPoolExecutor(max_workers=8) as executor:.
I have been reading posts and everything but I either mess the result from result, or break something and doesn't work. Help/tips needed.
await loop.run_in_executor() doesn't work if I don't have that in a function, is it really needed? when I add the above code in a function and call it it breaks everything

Asyncio loops are not thread safe and are supposed to be used in a single thread, except for few thread safe function e.g. run_coroutine_threadsafe. And so one thing you can do is to spawn multiple threads and then create a separate loop in each thread:
import asyncio
import multiprocessing
import threading
import queue
# Spawn threads and run new loop in each thread
loops = queue.Queue()
def thread_job():
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
loops.put(loop)
loop.run_forever()
for _ in range(multiprocessing.cpu_count()):
t = threading.Thread(target=thread_job)
t.start()
# Our main
async def main(x, ev):
try:
await asyncio.sleep(1)
print(x, threading.current_thread().ident)
finally:
ev.set() # events are used to signalize that we are done
# Our job, that only runs main
def job(x, ev):
loop = asyncio.get_event_loop()
loop.create_task(main(x, ev))
# Main thread that synchronizes everything
lst = [i for i in range(3)]
events = []
for x in lst:
ev = threading.Event()
events.append(ev)
# by putting the loop back to queue
# we effectively shift the queue and
# distribute the load equally
loop = loops.get()
loops.put(loop)
loop.call_soon_threadsafe(job, x, ev)
for ev in events:
ev.wait()
# Once all events are set, i.e. we are done, cleanup
while not loops.empty():
loop = loops.get()
loop.call_soon_threadsafe(loop.stop)
I use a FIFO queue for loops here, in order to distribute load equally between threads.

Related

Can I start a parallel process from while loop in python?

I need to start another process to run parallel with the while loop:
while True:
#bunch of stuff happening
if #something happens:
#do something (here I have something that takes time and while loop will 'pause' untill this
finishes. I need the while loop to somehow continue looping parallel with
this process.)
I tried something like this:
while True:
#bunch of stuff happening
if #something happens:
exec(open("filename.py").read()) #here I tried to call for another script but the while loop
won't continue. It just runs this script and finishes, but
I need this secont script to run parallel with the while loop looping.
You could use multiprocessing for this. Check the doc here
Here's a minimalistic example, hope this helps you.
import multiprocessing
number_of_processes = 5
def exec_process(filename):
#your exec code goes here
p = multiprocessing.Pool(processes = number_of_processes)
while True:
if: #something happens
p.apply_async(exec_process, (filename,))
p.close()
p.join()
Additionally, it is also good to use callback which becomes like master to your processes where you could define your terminating conditions.
Your definition could be like:
def exec_process(filename):
try:
#do what it does
return True
except:
return False
def callback(result):
if not result:
#do what you want to do in case of failure
#something like p.terminate()
#indicate failure to global variables
#Now apply call becomes:
p.apply_async(exec_process, (filename,), callback=callback)
You can use asyncio to do that. Here's a fully working example of a basic producer/consumer:
import asyncio
import random
from datetime import datetime
from pydantic import BaseModel
class Measurement(BaseModel):
data: float
time: datetime
async def measure(queue: asyncio.Queue):
while True:
# Replicate blocking call to recieve data
await asyncio.sleep(1)
print("Measurement complete!")
for i in range(3):
data = Measurement(
data=random.random(),
time=datetime.utcnow()
)
await queue.put(data)
await queue.put(None)
async def process(queue: asyncio.Queue):
while True:
data = await queue.get()
print(f"Got measurement! {data}")
# Replicate pause for http request
await asyncio.sleep(0.3)
print("Sent data to server")
loop = asyncio.get_event_loop()
queue = asyncio.Queue(loop=loop)
meansurement = measure(queue)
processor = process(queue)
loop.run_until_complete(asyncio.gather(processor, meansurement))
loop.close()

Callback seems not to be executed when attached to another thread

What I want to do is, in the Python console:
Create an asyncio future
Attach a callback to this future
Set the future value (and so get at the same time the callback result)
To do this, I tried that, in the Python console:
from threading import Thread
import asyncio
def start_loop(loop):
asyncio.set_event_loop(loop)
loop.run_forever()
new_loop = asyncio.new_event_loop()
t = Thread(target=start_loop, args=(new_loop,))
t.start()
fut = new_loop.create_future()
fut.add_done_callback(lambda _: print("Hey"))
fut.set_result(True)
I expect "Hey" to be printed, but actually I get nothing.
Additional information:
When I do:
import asyncio
loop = asyncio.get_event_loop()
fut = loop.create_future()
fut.add_done_callback(lambda x: print("Hey"))
fut.set_result(True)
loop.run_forever()
I get the expected result, but I had to call run_forever while the future is already set.
My questions are:
Why my first block of code does not do the job as expected ?
Is there a hypothetical syntax like the following existing ?
import asyncio
loop = asyncio.get_event_loop()
fut = loop.create_future()
# Hypothetical part
loop.run_forever_in_background()
# End of hypothetical part
fut.add_done_callback(lambda x: print("Hey"))
fut.set_result(True)
My high level purpose is to play with futures and asyncio directly in the console, to get a better understanding of it.
Pay attention when playing with asyncio loops, futures etc. in multithreading context.
Almost all asyncio objects are not thread safe, which is
typically not a problem unless there is code that works with them from
outside of a Task or a callback.
...
To schedule a coroutine object from a different OS thread, the
run_coroutine_threadsafe() function should be used. It returns a
concurrent.futures.Future to access the result:
from threading import Thread
import asyncio
def start_loop(loop):
asyncio.set_event_loop(loop)
loop.run_forever()
new_loop = asyncio.new_event_loop()
t = Thread(target=start_loop, args=(new_loop,))
t.start()
async def add_future():
fut = new_loop.create_future()
fut.add_done_callback(lambda _: print("Hey"))
fut.set_result(True)
asyncio.run_coroutine_threadsafe(add_future(), loop=new_loop)
Test:
$ python3 test.py
Hey
Another option is using loop.call_soon_threadsafe(callback, *args, context=None).
https://docs.python.org/3/library/asyncio-dev.html#asyncio-multithreading

Python threading.Thread.join() is blocking

I'm working with asynchronous programming and wrote a small wrapper class for thread-safe execution of co-routines based on some ideas from this thread here: python asyncio, how to create and cancel tasks from another thread. After some debugging, I found that it hangs when calling the Thread class's join() function (I overrode it only for testing). Thinking I made a mistake, I basically copied the code that the OP said he used and tested it to find the same issue.
His mildly altered code:
import threading
import asyncio
from concurrent.futures import Future
import functools
class EventLoopOwner(threading.Thread):
class __Properties:
def __init__(self, loop, thread, evt_start):
self.loop = loop
self.thread = thread
self.evt_start = evt_start
def __init__(self):
threading.Thread.__init__(self)
self.__elo = self.__Properties(None, None, threading.Event())
def run(self):
self.__elo.loop = asyncio.new_event_loop()
asyncio.set_event_loop(self.__elo.loop)
self.__elo.thread = threading.current_thread()
self.__elo.loop.call_soon_threadsafe(self.__elo.evt_start.set)
self.__elo.loop.run_forever()
def stop(self):
self.__elo.loop.call_soon_threadsafe(self.__elo.loop.stop)
def _add_task(self, future, coro):
task = self.__elo.loop.create_task(coro)
future.set_result(task)
def add_task(self, coro):
self.__elo.evt_start.wait()
future = Future()
p = functools.partial(self._add_task, future, coro)
self.__elo.loop.call_soon_threadsafe(p)
return future.result() # block until result is available
def cancel(self, task):
self.__elo.loop.call_soon_threadsafe(task.cancel)
async def foo(i):
return 2 * i
async def main():
elo = EventLoopOwner()
elo.start()
task = elo.add_task(foo(10))
x = await task
print(x)
elo.stop(); print("Stopped")
elo.join(); print("Joined") # note: giving it a timeout does not fix it
if __name__ == "__main__":
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
assert isinstance(loop, asyncio.AbstractEventLoop)
try:
loop.run_until_complete(main())
finally:
loop.close()
About 50% of the time when I run it, It simply stalls and says "Stopped" but not "Joined". I've done some debugging and found that it is correlated to when the Task itself sent an exception. This doesn't happen every time, but since it occurs when I'm calling threading.Thread.join(), I have to assume it is related to the destruction of the loop. What could possibly be causing this?
The exception is simply: "cannot join current thread" which tells me that the .join() is sometimes being run on the thread from which I called it and sometimes from the ELO thread.
What is happening and how can I fix it?
I'm using Python 3.5.1 for this.
Note: This is not replicated on IDE One: http://ideone.com/0LO2D9

Is there a way to use asyncio.Queue in multiple threads?

Let's assume I have the following code:
import asyncio
import threading
queue = asyncio.Queue()
def threaded():
import time
while True:
time.sleep(2)
queue.put_nowait(time.time())
print(queue.qsize())
#asyncio.coroutine
def async():
while True:
time = yield from queue.get()
print(time)
loop = asyncio.get_event_loop()
asyncio.Task(async())
threading.Thread(target=threaded).start()
loop.run_forever()
The problem with this code is that the loop inside async coroutine is never finishing the first iteration, while queue size is increasing.
Why is this happening this way and what can I do to fix it?
I can't get rid of separate thread, because in my real code I use a separate thread to communicate with a serial device, and I haven't find a way to do that using asyncio.
asyncio.Queue is not thread-safe, so you can't use it directly from more than one thread. Instead, you can use janus, which is a third-party library that provides a thread-aware asyncio queue.
import asyncio
import threading
import janus
def threaded(squeue):
import time
while True:
time.sleep(2)
squeue.put_nowait(time.time())
print(squeue.qsize())
#asyncio.coroutine
def async_func(aqueue):
while True:
time = yield from aqueue.get()
print(time)
loop = asyncio.get_event_loop()
queue = janus.Queue(loop=loop)
asyncio.create_task(async_func(queue.async_q))
threading.Thread(target=threaded, args=(queue.sync_q,)).start()
loop.run_forever()
There is also aioprocessing (full-disclosure: I wrote it), which provides process-safe (and as a side-effect, thread-safe) queues as well, but that's overkill if you're not trying to use multiprocessing.
Edit
As pointed it out in other answers, for simple use-cases you can use loop.call_soon_threadsafe to add to the queue, as well.
If you do not want to use another library you can schedule a coroutine from the thread. Replacing the queue.put_nowait with the following works fine.
asyncio.run_coroutine_threadsafe(queue.put(time.time()), loop)
The variable loop represents the event loop in the main thread.
EDIT:
The reason why your async coroutine is not doing anything is that
the event loop never gives it a chance to do so. The queue object is
not threadsafe and if you dig through the cpython code you find that
this means that put_nowait wakes up consumers of the queue through
the use of a future with the call_soon method of the event loop. If
we could make it use call_soon_threadsafe it should work. The major
difference between call_soon and call_soon_threadsafe, however, is
that call_soon_threadsafe wakes up the event loop by calling loop._write_to_self() . So let's call it ourselves:
import asyncio
import threading
queue = asyncio.Queue()
def threaded():
import time
while True:
time.sleep(2)
queue.put_nowait(time.time())
queue._loop._write_to_self()
print(queue.qsize())
#asyncio.coroutine
def async():
while True:
time = yield from queue.get()
print(time)
loop = asyncio.get_event_loop()
asyncio.Task(async())
threading.Thread(target=threaded).start()
loop.run_forever()
Then, everything works as expected.
As for the threadsafe aspect of
accessing shared objects,asyncio.queue uses under the hood
collections.deque which has threadsafe append and popleft.
Maybe checking for queue not empty and popleft is not atomic, but if
you consume the queue only in one thread (the one of the event loop)
it could be fine.
The other proposed solutions, loop.call_soon_threadsafe from Huazuo
Gao's answer and my asyncio.run_coroutine_threadsafe are just doing
this, waking up the event loop.
BaseEventLoop.call_soon_threadsafe is at hand. See asyncio doc for detail.
Simply change your threaded() like this:
def threaded():
import time
while True:
time.sleep(1)
loop.call_soon_threadsafe(queue.put_nowait, time.time())
loop.call_soon_threadsafe(lambda: print(queue.qsize()))
Here's a sample output:
0
1443857763.3355968
0
1443857764.3368602
0
1443857765.338082
0
1443857766.3392274
0
1443857767.3403943
What about just using threading.Lock with asyncio.Queue?
class ThreadSafeAsyncFuture(asyncio.Future):
""" asyncio.Future is not thread-safe
https://stackoverflow.com/questions/33000200/asyncio-wait-for-event-from-other-thread
"""
def set_result(self, result):
func = super().set_result
call = lambda: func(result)
self._loop.call_soon_threadsafe(call) # Warning: self._loop is undocumented
class ThreadSafeAsyncQueue(queue.Queue):
""" asyncio.Queue is not thread-safe, threading.Queue is not awaitable
works only with one putter to unlimited-size queue and with several getters
TODO: add maxsize limits
TODO: make put corouitine
"""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.lock = threading.Lock()
self.loop = asyncio.get_event_loop()
self.waiters = []
def put(self, item):
with self.lock:
if self.waiters:
self.waiters.pop(0).set_result(item)
else:
super().put(item)
async def get(self):
with self.lock:
if not self.empty():
return super().get()
else:
fut = ThreadSafeAsyncFuture()
self.waiters.append(fut)
result = await fut
return result
See also - asyncio: Wait for event from other thread

Checking on a thread / remove from list

I have a thread which extends Thread. The code looks a little like this;
class MyThread(Thread):
def run(self):
# Do stuff
my_threads = []
while has_jobs() and len(my_threads) < 5:
new_thread = MyThread(next_job_details())
new_thread.run()
my_threads.append(new_thread)
for my_thread in my_threads
my_thread.join()
# Do stuff
So here in my pseudo code I check to see if there is any jobs (like a db etc) and if there is some jobs, and if there is less than 5 threads running, create new threads.
So from here, I then check over my threads and this is where I get stuck, I can use .join() but my understanding is that - this then waits until it's finished so if the first thread it checks is still in progress, it then waits till it's done - even if the other threads are finished....
so is there a way to check if a thread is done, then remove it if so?
eg
for my_thread in my_threads:
if my_thread.done():
# process results
del (my_threads[my_thread]) ?? will that work...
As TokenMacGuy says, you should use thread.is_alive() to check if a thread is still running. To remove no longer running threads from your list you can use a list comprehension:
for t in my_threads:
if not t.is_alive():
# get results from thread
t.handled = True
my_threads = [t for t in my_threads if not t.handled]
This avoids the problem of removing items from a list while iterating over it.
mythreads = threading.enumerate()
Enumerate returns a list of all Thread objects still alive.
https://docs.python.org/3.6/library/threading.html
you need to call thread.isAlive()to find out if the thread is still running
The answer has been covered, but for simplicity...
# To filter out finished threads
threads = [t for t in threads if t.is_alive()]
# Same thing but for QThreads (if you are using PyQt)
threads = [t for t in threads if t.isRunning()]
Better way is to use Queue class:
http://docs.python.org/library/queue.html
Look at the good example code in the bottom of documentation page:
def worker():
while True:
item = q.get()
do_work(item)
q.task_done()
q = Queue()
for i in range(num_worker_threads):
t = Thread(target=worker)
t.daemon = True
t.start()
for item in source():
q.put(item)
q.join() # block until all tasks are done
A easy solution to check thread finished or not. It is thread safe
Install pyrvsignal
pip install pyrvsignal
Example:
import time
from threading import Thread
from pyrvsignal import Signal
class MyThread(Thread):
started = Signal()
finished = Signal()
def __init__(self, target, args):
self.target = target
self.args = args
Thread.__init__(self)
def run(self) -> None:
self.started.emit()
self.target(*self.args)
self.finished.emit()
def do_my_work(details):
print(f"Doing work: {details}")
time.sleep(10)
def started_work():
print("Started work")
def finished_work():
print("Work finished")
thread = MyThread(target=do_my_work, args=("testing",))
thread.started.connect(started_work)
thread.finished.connect(finished_work)
thread.start()

Categories