How to use concurrent.future.wait? - python

I'm studying python, here I got some problem on the concurrent.futures.wait() -- Here's the details--
I want to make the main process hold until all the child processes completed. So I used wait() to block the main process. But I always got error , please kind help.
def child_process(args):
pid=os.getpid();
while (args.len() > 0 ):
task=args.pop(0)
time.sleep(1+ random.random()*5) #simulate the worker time
print("Process "+str(pid)+" : "+task[0]+" "+task[1])
return
if (__name__ == "__main__") :
mgr = multiprocessing.Manager()
tasks=mgr.list()
tasks=[[1,10],[2,20],[3,30],[4,40],[5,50],[6,60]]
#executor=ProcessPoolExecutor(max_workers=3)
f=[]
with concurrent.futures.ProcessPoolExecutor(max_workers=3) as executor:
f.append(executor.submit(child_process,tasks))
f.append(executor.submit(child_process,tasks))
f.append(executor.submit(child_process,tasks))
# wait(future,timeout=0,return_when=concurrent.futures.ALL_COMPLETED)
concurrent.futures.wait(f[0])
concurrent.futures.wait(f[1])
concurrent.futures.wait(f[2])
executor.shutdown()
The error is --
C:\Work\python\src\test>python test.py
Traceback (most recent call last):
File "C:\Work\python\src\test\test.py", line 70, in <module>
concurrent.futures.wait(f[0])
File "C:\tools\Python310\lib\concurrent\futures\_base.py", line 290, in wait
fs = set(fs)
TypeError: 'Future' object is not iterable
This puzzles me most -- is that f[0] not a future object returned by submit()?
Then I tried with --
wait(f,timeout=0,return_when=concurrent.futures.ALL_COMPLETED)
the new error is --
C:\Work\python\src\test>python test.py
C:\Work\python\src\test\test.py:68: RuntimeWarning: coroutine 'wait' was never awaited
wait(f,timeout=0,return_when=concurrent.futures.ALL_COMPLETED)
RuntimeWarning: Enable tracemalloc to get the object allocation traceback
I really don't know how to fix it. Please kidn advise. Thanks
Regards
Eisen

Few things to point out:
Wrapping expression with parenthesis in while statement is redundant.
>>> a = 0
>>> while a < 10:
... a += 1
Error message is saying "Future object is not iterable" - which means, f[0] you passed is indeed Future Object which is not wait method was expecting.
>>> from concurrent import futures
>>> help(futures.wait)
Help on function wait in module concurrent.futures._base:
wait(fs, timeout=None, return_when='ALL_COMPLETED')
Wait for the futures in the given sequence to complete.
Args:
fs: The sequence of Futures (possibly created by different Executors) to
wait upon.
# ...
Here we can see argument fs actually expect you the Sequence of Futures.
So instead of this:
concurrent.futures.wait(f[0])
concurrent.futures.wait(f[1])
concurrent.futures.wait(f[2])
You probably want this:
concurrent.futures.wait(f)
Which is still not required since with block wait until all processes stops.
Here's demonstration:
"""
Demo codes for https://stackoverflow.com/q/71458088/10909029
Waiting for child process to complete
"""
import os
import math
import queue
import multiprocessing as mp
from concurrent import futures
def child_process(task_queue: mp.Queue):
# If this doesn't work, save this function in other file. REPL or jupyter especially.
pid = os.getpid()
print(f"[{pid}] Started!")
processed_count = 0
while True:
try:
item = task_queue.get_nowait()
except queue.Empty:
# task done
break
# else continue on
# some workload
try:
print(f"[{pid}] {item}! = {math.factorial(item)}")
finally:
# tell queue we processed the item.
task_queue.task_done()
processed_count += 1
print(f"[{pid}] Task done!")
def main():
# just merely rapping codes in function namespace makes codes tiny bit faster
mp_manager = mp.Manager()
task_queue = mp_manager.Queue()
# populate queue
for n in range(100):
task_queue.put_nowait(n)
# start pool
with futures.ProcessPoolExecutor() as executor:
future_list = [executor.submit(child_process, task_queue) for _ in range(5)]
# can use executor.shutdown(wait=True) instead
# not required since all executor wait for all process to stop when exiting `with` block.
# hence, also no need to manually call executor.shutdown().
futures.wait(future_list)
if __name__ == '__main__':
main()
Output:
[18412] Started!
[18412] 0! = 1
[4680] Started!
[18412] 1! = 1
[2664] Started!
[18412] 2! = 2
[18412] 3! = 6
[17900] Started!
[18412] 4! = 24
[18412] 5! = 120
[4680] 6! = 720
[4680] 7! = 5040
[18412] 8! = 40320
[17900] 9! = 362880
[4680] 10! = 3628800
[18412] 11! = 39916800
...
[17900] 21! = 51090942171709440000
[4680] 22! = 1124000727777607680000
[2664] 23! = 25852016738884976640000
[16792] Started!
[18412] 24! = 620448401733239439360000
[17900] 25! = 15511210043330985984000000
...
[17900] 99! = 933262154439441526816992388562667004907159682643816214685929638952175999932299156089414639761565182862536979208272237582511852109168640000000000000000000000
[18412] Task done!
[17900] Task done!
[16792] Task done!
[2664] Task done!
[4680] Task done!

Related

Python queue stop when max reached

In the code below, I'm putting number on a queue from a thread and retrieving and printing them from the main thread. It's supposed to print number from 0 to 99 but it's stops at 9. The max size of the queue is 10.
def fetch(queue):
for i in range(100):
queue.put(i)
def main():
queue = Queue(maxsize=10)
Thread(target=fetch, args=(queue,)).start()
while not queue.empty():
item = queue.get()
print(item)
When I run this code I get:
0
1
2
3
4
5
6
7
8
9
The program doesn't stop, terminating it with ctl+c results:
^CException ignored in: <module 'threading' from '/usr/lib/python3.10/threading.py'>
Traceback (most recent call last):
File "/usr/lib/python3.10/threading.py", line 1560, in _shutdown
lock.acquire()
KeyboardInterrupt:
The queue.empty() method is notoriously unreliable due to the nature of threading. You should use a sentinal value to mark the end of the queue:
from threading import Thread
from queue import Queue
from time import sleep
def fetch(queue):
sleep(1)
for i in range(100):
queue.put(i)
queue.put(None) # None is a sentinal value
def sink1(queue):
while True:
item = queue.get()
if item == None:
break
print(item)
def main():
queue = Queue(maxsize=10)
t=Thread(target=fetch, args=(queue,))
t.start()
sink1(queue)
main()
print('Done')
I tried your code and it seemed to work for me. I then added sleep(1) to the fetch() function and then the program just quits immediately since the main thread immediately sees an empty queue.

Pass a function if it takes more than 5 seconds

I'm calling a function in a for loop however, I want to check if that function takes longer than 5 seconds to execute, I want to pass that iteration and move on to the next iteration.
I have thought about using the time library, and starting a clock, but the end timer will only execute after the function executes, thus I won't be able to pass that specific iteration within 5 seconds
I am attaching an example below. Hope this might help you:
from threading import Timer
class LoopStopper:
def __init__(self, seconds):
self._loop_stop = False
self._seconds = seconds
def _stop_loop(self):
self._loop_stop = True
def run( self, generator_expression, task):
""" Execute a task a number of times based on the generator_expression"""
t = Timer(self._seconds, self._stop_loop)
t.start()
for i in generator_expression:
task(i)
if self._loop_stop:
break
t.cancel() # Cancel the timer if the loop ends ok.
ls = LoopStopper( 5) # 5 second timeout
ls.run( range(1000000), print) # print numbers from 0 to 999999
Here's some code I've been experimenting with which has a task() which iterates over it params argument and takes a random amount of time to complete each.
I start a thread for each task, waiting for the thread to complete by monitoring a queue of return values. If the thread fails to complete, then the main loop abandons it, and starts the next thread.
The program shows which tasks fail or finish (different every time).
The tasks which finish have their results printed out (the param and the sleep time).
import threading, queue
import random
import time
def task(params, q):
for p in params:
s = random.randint(1,4)
s = s * s
s = s / 8
time.sleep(s)
q.put((p,s), False)
q.put(None, False) # None is sentinal value
def sampleQueue(q, ret, results):
while not q.empty():
item = q.get()
if item:
ret.append(item)
else:
# Found None sentinal
results.append(ret)
return True
return False
old = []
results = []
for p in [1,2,3,4]:
q = queue.SimpleQueue()
t = threading.Thread(target=task, args=([p,p,p,p,p], q))
t.start()
end = time.time() + 5
ret = []
failed = True
while time.time() < end:
time.sleep(0.1)
if sampleQueue(q, ret, results):
failed = False
break
if failed:
print(f'Task {p} failed!')
old.append(t)
else:
print(f'Task {p} finished!')
t.join()
print(results)
print(f'{len(old)} threads failed')
for t in old:
t.join()
print('Done')
Example output:
Task 1 finished!
Task 2 finished!
Task 3 failed!
Task 4 failed!
[[(1, 1.125), (1, 1.125), (1, 2.0), (1, 0.125), (1, 0.5)], [(2, 0.125), (2, 1.125), (2, 0.5), (2, 2.0), (2, 0.125)]]
2 threads failed
Done
I will post an alternative solution using the subprocess module. You need to create a python file with your function, call it as a subprocess, and call the wait method. If the process wont finish in the desired time it will throw an error, so you kill that process and keep going with the iteration.
As an example, this is the function you want to call:
from time import time
import sys
x = eval(sys.argv[1])
t = time()
a = [i for i in range(int(x**5))]
#pipe to the main process the computaiton time
sys.stdout.write('%s'%(time()-t))
And the main function, where I call the previous function on the func.py file:
import subprocess as sp
from subprocess import Popen, PIPE
for i in range(1,50,1):
#call the process
process = Popen(['python','~func.py', '%i'%i],
stdout = PIPE,stdin = PIPE)
try:
#if it finish within 1 sec:
process.wait(1)
print('Finished in: %s s'%(process.stdout.read().decode()))
except:
#else kill the process. It is important to kill it,
#otherwise it will keep running.
print('Timeout')
process.kill()

How to kill a process using the multiprocessing module?

I have a process that is essentially just an infinite loop and I have a second process that is a timer. How can I kill the loop process once the timer is done?
def action():
x = 0
while True:
if x < 1000000:
x = x + 1
else:
x = 0
def timer(time):
time.sleep(time)
exit()
loop_process = multiprocessing.Process(target=action)
loop_process.start()
timer_process = multiprocessing.Process(target=timer, args=(time,))
timer_process.start()
I want the python script to end once the timer is done.
You could do it by using a sharing state between the processes and creating a flag value that all the concurrent processes can access (although this may be somewhat inefficient).
Here's what I'm suggesting:
import multiprocessing as mp
import time
def action(run_flag):
x = 0
while run_flag.value:
if x < 1000000:
x = x + 1
else:
x = 0
print('action() terminating')
def timer(run_flag, secs):
time.sleep(secs)
run_flag.value = False
if __name__ == '__main__':
run_flag = mp.Value('I', True)
loop_process = mp.Process(target=action, args=(run_flag,))
loop_process.start()
timer_process = mp.Process(target=timer, args=(run_flag, 2.0))
timer_process.start()
loop_process.join()
timer_process.join()
print('done')
A simple return statement after else in action() would work perfectly. Moreover, you had an error in your timer function. Your argument had the same name as inbuilt library time.
def action():
x = 0
while True:
if x < 1000000:
x = x + 1
else:
x = 0
return # To exit else it will always revolve in infinite loop
def timer(times):
time.sleep(times)
exit()
loop_process = multiprocessing.Process(target=action)
loop_process.start()
timer_process = multiprocessing.Process(target=timer(10))
timer_process.start()
Hope this answers your question!!!
I think you don't need to make a second process just for a timer.
Graceful Timeout
In case you need clean up before exit in your action process, you can use a Timer-thread and let the while-loop check if it is still alive. This allows your worker process to exit gracefully, but you'll have to pay with reduced performance
because the repeated method call takes some time. Doesn't have to be an issue if it' s not a tight loop, though.
from multiprocessing import Process
from datetime import datetime
from threading import Timer
def action(runtime, x=0):
timer = Timer(runtime, lambda: None) # just returns None on timeout
timer.start()
while timer.is_alive():
if x < 1_000_000_000:
x += 1
else:
x = 0
if __name__ == '__main__':
RUNTIME = 1
p = Process(target=action, args=(RUNTIME,))
p.start()
print(f'{datetime.now()} {p.name} started')
p.join()
print(f'{datetime.now()} {p.name} ended')
Example Output:
2019-02-28 19:18:54.731207 Process-1 started
2019-02-28 19:18:55.738308 Process-1 ended
Termination on Timeout
If you don't have the need for a clean shut down (you are not using shared queues, working with DBs etc.), you can let the parent process terminate() the worker-process after your specified time.
terminate()
Terminate the process. On Unix this is done using the SIGTERM signal; on Windows TerminateProcess() is used. Note that exit handlers and finally clauses, etc., will not be executed.
Note that descendant processes of the process will not be terminated – they will simply become orphaned.
Warning If this method is used when the associated process is using a pipe or queue then the pipe or queue is liable to become corrupted and may become unusable by other process. Similarly, if the process has acquired a lock or semaphore etc. then terminating it is liable to cause other processes to deadlock. docs
If you don't have anything to do in the parent you can simply .join(timeout) the worker-process and .terminate() afterwards.
from multiprocessing import Process
from datetime import datetime
def action(x=0):
while True:
if x < 1_000_000_000:
x += 1
else:
x = 0
if __name__ == '__main__':
RUNTIME = 1
p = Process(target=action)
p.start()
print(f'{datetime.now()} {p.name} started')
p.join(RUNTIME)
p.terminate()
print(f'{datetime.now()} {p.name} terminated')
Example Output:
2019-02-28 19:22:43.705596 Process-1 started
2019-02-28 19:22:44.709255 Process-1 terminated
In case you want to use terminate(), but need your parent unblocked you could also use a Timer-thread within the parent for that.
from multiprocessing import Process
from datetime import datetime
from threading import Timer
def action(x=0):
while True:
if x < 1_000_000_000:
x += 1
else:
x = 0
def timeout(process, timeout):
timer = Timer(timeout, process.terminate)
timer.start()
if __name__ == '__main__':
RUNTIME = 1
p = Process(target=action)
p.start()
print(f'{datetime.now()} {p.name} started')
timeout(p, RUNTIME)
p.join()
print(f'{datetime.now()} {p.name} terminated')
Example Output:
2019-02-28 19:23:45.776951 Process-1 started
2019-02-28 19:23:46.778840 Process-1 terminated

100 percent load with multiprocessing queues

this only replicates my problem to get 100% load for the main python script if it tries to control loop over a shared queue
import multiprocessing
import random
def func1(num, q):
while True:
num = random.randint(1, 101)
if q.empty():
q.put(num)
def func2(num, q):
while True:
num = q.get()
num = num ** 2
if q.empty():
q.put(num)
num = 2
q = multiprocessing.Queue()
p1 = multiprocessing.Process(target=func1, args=(num, q))
p2 = multiprocessing.Process(target=func2, args=(num, q))
p1.daemon = True
p2.daemon = True
p1.start()
p2.start()
running = True
while running:
if not q.empty():
num = q.get(True, 0.1)
print(num)
would there be a better method to control from a script multiple worker processes. Better in sense of no load !?
I'm not sure I understand your program:
What's with the num parameter of func1() and func2()? It never gets used.
func2 will discard its result if func1 happens to have posted another number after func2 got the last number out of the queue.
Why do you daemonize the workers? Are you quite sure this is what you want?
The if not q.empty(): q.get() construct in the main code will sooner or later raise a queue.Empty exception because it's a race between it and the q.get() in func2.
The uncaught queue.Empty exception will terminate the main process, leaving the two workers orphaned - and running.
General advice:
Use different queues for issuing jobs (request queue) and collecting results (response queue). Include the request in the response if necessary.
Think about how to terminate the workers. Consider a "poison pill", i.e. a value in the request queue that causes workers to die, i.e. exit/terminate.
Be really really sure you understand the race conditions in your code, like the one I mentioned above (empty vs. get).
Here's some sample code I hacked up:
import multiprocessing
import time
import random
import os
def request_generator(requests):
while True:
requests.put(random.randint(1, 101))
time.sleep(0.01)
def worker(requests, responses):
worker_id = os.getpid()
while True:
request = requests.get()
response = request ** 2
responses.put((request, response, worker_id))
def main():
requests = multiprocessing.Queue()
responses = multiprocessing.Queue()
gen = multiprocessing.Process(target=request_generator, args=(requests,))
w1 = multiprocessing.Process(target=worker, args=(requests, responses))
w2 = multiprocessing.Process(target=worker, args=(requests, responses))
gen.start()
w1.start()
w2.start()
while True:
req, resp, worker_id = responses.get()
print("worker {}: {} => {}".format(worker_id, req, resp))
if __name__ == "__main__":
main()

Terminating a thread : Python

In the below example, if you execute the program multiple times, it spawns a new thread each time with a new ID.
1. How do I terminate all the threads on task completion ?
2. How can I assign name/ID to the threads ?
import threading, Queue
THREAD_LIMIT = 3
jobs = Queue.Queue(5) # This sets up the queue object to use 5 slots
singlelock = threading.Lock() # This is a lock so threads don't print trough each other
# list
inputlist_Values = [ (5,5),(10,4),(78,5),(87,2),(65,4),(10,10),(65,2),(88,95),(44,55),(33,3) ]
def DoWork(inputlist):
print "Inputlist received..."
print inputlist
# Spawn the threads
print "Spawning the {0} threads.".format(THREAD_LIMIT)
for x in xrange(THREAD_LIMIT):
print "Thread {0} started.".format(x)
# This is the thread class that we instantiate.
worker().start()
# Put stuff in queue
print "Putting stuff in queue"
for i in inputlist:
# Block if queue is full, and wait 5 seconds. After 5s raise Queue Full error.
try:
jobs.put(i, block=True, timeout=5)
except:
singlelock.acquire()
print "The queue is full !"
singlelock.release()
# Wait for the threads to finish
singlelock.acquire() # Acquire the lock so we can print
print "Waiting for threads to finish."
singlelock.release() # Release the lock
jobs.join() # This command waits for all threads to finish.
class worker(threading.Thread):
def run(self):
# run forever
while 1:
# Try and get a job out of the queue
try:
job = jobs.get(True,1)
singlelock.acquire() # Acquire the lock
print self
print "Multiplication of {0} with {1} gives {2}".format(job[0],job[1],(job[0]*job[1]))
singlelock.release() # Release the lock
# Let the queue know the job is finished.
jobs.task_done()
except:
break # No more jobs in the queue
def main():
DoWork(inputlist_Values)
How do I terminate all the threads on task completion?
You could put THREAD_LIMIT sentinel values (e.g., None) at the end of the queue and exit thread's run() method if a thread sees it.
On your main thread exit all non-daemon threads are joined so the program will keep running if any of the threads is alive. Daemon threads are terminated on your program exit.
How can I assign name/ID to the threads ?
You can assign name by passing it to the constructor or by changing .name directly.
Thread identifier .ident is a read-only property that is unique among alive threads. It maybe reused if one thread exits and another starts.
You could rewrite you code using multiprocessing.dummy.Pool that provides the same interface as multiprocessing.Pool but uses threads instead of processes:
#!/usr/bin/env python
import logging
from multiprocessing.dummy import Pool
debug = logging.getLogger(__name__).debug
def work(x_y):
try:
x, y = x_y # do some work here
debug('got %r', x_y)
return x / y, None
except Exception as e:
logging.getLogger(__name__).exception('work%r failed', x_y)
return None, e
def main():
logging.basicConfig(level=logging.DEBUG,
format="%(levelname)s:%(threadName)s:%(asctime)s %(message)s")
inputlist = [ (5,5),(10,4),(78,5),(87,2),(65,4),(10,10), (1,0), (0,1) ]
pool = Pool(3)
s = 0.
for result, error in pool.imap_unordered(work, inputlist):
if error is None:
s += result
print("sum=%s" % (s,))
pool.close()
pool.join()
if __name__ == "__main__":
main()
Output
DEBUG:Thread-1:2013-01-14 15:37:37,253 got (5, 5)
DEBUG:Thread-1:2013-01-14 15:37:37,253 got (87, 2)
DEBUG:Thread-1:2013-01-14 15:37:37,253 got (65, 4)
DEBUG:Thread-1:2013-01-14 15:37:37,254 got (10, 10)
DEBUG:Thread-1:2013-01-14 15:37:37,254 got (1, 0)
ERROR:Thread-1:2013-01-14 15:37:37,254 work(1, 0) failed
Traceback (most recent call last):
File "prog.py", line 11, in work
return x / y, None
ZeroDivisionError: integer division or modulo by zero
DEBUG:Thread-1:2013-01-14 15:37:37,254 got (0, 1)
DEBUG:Thread-3:2013-01-14 15:37:37,253 got (10, 4)
DEBUG:Thread-2:2013-01-14 15:37:37,253 got (78, 5)
sum=78.0
Threads don't stop unless you tell them to stop.
My recommendation is that you add a stop variable into your Thread subclass, and check whether this variable is True or not in your run loop (instead of while 1:).
An example:
class worker(threading.Thread):
def __init__(self):
self._stop = False
def stop(self):
self._stop = True
def run(self):
# run until stopped
while not self._stop:
# do work
Then when your program is quitting (for whatever reason) you have to make sure to call the stop method on all your working threads.
About your second question, doesn't adding a name variable to your Thread subclass work for you?

Categories