I am trying to emulate a scenario where the child spawned by python multiprocessing pool gets killed. The subprocess never returns, but I would like the parent to get notified in such a scenario.The test code I am using is:
import multiprocessing as mp
import time
import os
result_map = {}
def foo_pool(x):
print x,' : ',os.getpid()
pid = os.getpid()
if x == 1:
os.kill(pid,9)
return x
result_list = []
def log_result(result):
print 'callback',result
def apply_async_with_callback():
print os.getpid()
pool = mp.Pool()
for i in range(2):
result_map[i] = pool.apply_async(foo_pool, args = (i, ), callback = log_result)
pool.close()
pool.join()
for k,v in result_map.iteritems():
print k,' : ',v.successful()
print(result_list)
if __name__ == '__main__':
apply_async_with_callback()
The Pool of processes does not expose any mechanism to notify the parent process about a child process termination.
Projects like billiard or pebble might do what you're looking for.
Keep in mind that there's no way to intercept a SIGKILL signal so using signal handlers is pointless.
Related
eg.
I have a controller script.
I have a worker script.
I have 50 python objects that have to be passed to the worker script.
I want them to run in parallel.
The worker script has its own parallelisation of some database fetches.
This i achieve by:
p = Pool(processes=NUM_PROCS)
results = p.starmap(db_fetch, db_fetch_arguments)
p.close()
p.join()
Whats the most pythonic way, i can pass my 50 arguments (python objects, not string arguments), into my worker and make it run in parallel, and not have any issues when the worker tries to spawn more child processes.
Thankyou in advance.
Edit 1:
from multiprocessing import Pool
import os
def worker(num:int):
num_list = list(range(num))
# print('worker start')
with Pool() as p:
p.map(printer, num_list)
def printer(num:int):
# print('printer')
print(f"Printing num {num} - child: {os.getpid()} - parent: {os.getppid()}")
if __name__ == '__main__':
with Pool(4) as controller_pool:
controller_pool.map(worker, [1,2,3])
print('here')
Here I am getting the error: AssertionError: daemonic processes are not allowed to have children
Used the ProcessPoolExecutor from concurrent.futures to have as my controller outer pool. Inside I've used normal multiprocessing.Pool
Thanks.
from multiprocessing import Pool
from concurrent.futures import ProcessPoolExecutor
import os
def worker(num:int):
num_list = list(range(num))
# print('worker start')
with Pool() as p:
p.map(printer, num_list)
def printer(num:int):
# print('printer')
print(f"Printing num {num} - child: {os.getpid()} - parent: {os.getppid()}")
if __name__ == '__main__':
with ProcessPoolExecutor(4) as controller_pool:
controller_pool.map(worker, [1,2,3])
print('here')
I am following an instruction from youtube to learn multiprocessing
from multiprocessing import Pool
import subprocess
import time
def f(n):
sum = 0
for x in range(1000):
sum += x*x
return sum
if __name__ == "__main__":
t1 = time.time()
p = Pool()
result = p.map(f, range(10000))
p.close()
p.join()
print("Pool took: ", time.time()-t1)
I am puzzled about p.close() and p.join()
when processes were closed, they did not exist any more, how could manipulate .join to them?
join() waits for a child process to be killed. Killed processes send a signal informing their parents that they are quite dead. close() doesn't kill any process, It just closes a pipe which informs readers of that pipe, that there will be no more data coming through it.
I know the basic usage of multiprocessing about pools,and I use apply_async() func to avoid block,my problem code such like:
from multiprocessing import Pool, Queue
import time
q = Queue(maxsize=20)
script = "my_path/my_exec_file"
def initQueue():
...
def test_func(queue):
print 'Coming'
While True:
do_sth
...
if __name__ == '__main__':
initQueue()
pool = Pool(processes=3)
for i in xrange(11,20):
result = pool.apply_async(test_func, (q,))
pool.close()
while True:
if q.empty():
print 'Queue is emty,quit'
break
print 'Main Process Lintening'
time.sleep(2)
The results output are always Main Process Linstening,I can;t find word 'Coming'..
The code above has no syntax error and no any Exceptions.
Any one can help, thanks!
I have faces a very strange behavior of Python. It looks like when I start parallel program which uses multiprocessing and in the main process spawn 2 more(producer, consumer) I see 4 processes running. I think there should be only 3: the main, Producer, Consumer. But after some time the 4th process appears.
I have made a minimal example of the code to reproduce the problem. It create two processes in which calculate Fibonacci numbers using recursion:
from multiprocessing import Process, Queue
import os, sys
import time
import signal
def fib(n):
if n == 1 or n == 2:
return 1
result = fib(n-1) + fib(n-2)
return result
def worker(queue, amount):
pid = os.getpid()
def workerProcess(a, b):
print a, b
print 'This is Writer(', pid, ')'
signal.signal(signal.SIGUSR1, workerProcess)
print 'Worker', os.getpid()
for i in range(0, amount):
queue.put(fib(35 - i % 4))
queue.put('end')
print 'Worker finished'
def writer(queue):
pid = os.getpid()
def writerProcess(a, b):
print a, b
print 'This is Writer(', pid, ')'
signal.signal(signal.SIGUSR1, writerProcess)
print 'Writer', os.getpid()
working = True
while working:
if not queue.empty():
value = queue.get()
if value != 'end':
fib(32 + value % 4)
else:
working = False
else:
time.sleep(1)
print 'Writer finished'
def daemon():
print 'Daemon', os.getpid()
while True:
time.sleep(1)
def useProcesses(amount):
q = Queue()
writer_process = Process(target=writer, args=(q,))
worker_process = Process(target=worker, args=(q, amount))
writer_process.daemon = True
worker_process.daemon = True
worker_process.start()
writer_process.start()
def run(amount):
print 'Main', os.getpid()
pid = os.getpid()
def killThisProcess(a, b):
print a, b
print 'Main killed by signal(', pid, ')'
sys.exit(0)
signal.signal(signal.SIGTERM, killThisProcess)
useProcesses(amount)
print 'Ready to exit main'
while True:
time.sleep(1)
def main():
run(1000)
if __name__=='__main__':
main()
What I see in the output is:
$ python python_daemon.py
Main 13257
Ready to exit main
Worker 13258
Writer 13259
but in htop I see the following:
And it looks like the process with PID 13322 is actually a thread. The question is what is it? Who spawn it? Why?
If I send SIGUSR1 to this PID I see in the output appears:
10 <frame object at 0x7f05c14ed5d8>
This is Writer( 13258 )
This question is slightly related with: Python multiprocessing: more processes than requested
The threads belongs to the Queue object.
It uses internally a thread to dispatch the data over a Pipe.
From the docs:
class multiprocessing.Queue([maxsize])
Returns a process shared queue implemented using a pipe and a few locks/semaphores. When a process first puts an item on the queue a feeder thread is started which transfers objects from a buffer into the pipe.
I have a python script (unix-like, based on RHEL), called MyScript, that has two functions, called A and B. I'd like them to run in different, independent processes (detach B and A):
Start script MyScript
Execute function A
Spawn a new process, passing data from function A to B
While function B runs, continue with function A
When function A completes, exit MyScript even if B is still running
I thought I should use multiprocessing to create a daemon process, but the documentation suggests that's not the right usecase. So, I decided to spawn a child process and child^2 process (the child's child), and then force the child to terminate. While this workaround appears to work, it seems really ugly.
Can you help me make it more pythonic? Does the subprocess module have a method that will operate on a function? Sample code below.
import multiprocessing
import time
import sys
import os
def parent_child():
p = multiprocessing.current_process()
print 'Starting parent child:', p.name, p.pid
sys.stdout.flush()
cc = multiprocessing.Process(name='childchild', target=child_child)
cc.daemon = False
cc.start()
print 'Exiting parent child:', p.name, p.pid
sys.stdout.flush()
def child_child():
p = multiprocessing.current_process()
print 'Starting child child:', p.name, p.pid
sys.stdout.flush()
time.sleep(30)
print 'Exiting child child:', p.name, p.pid
sys.stdout.flush()
def main():
print 'starting main', os.getpid()
d = multiprocessing.Process(name='parentchild', target=parent_child)
d.daemon = False
d.start()
time.sleep(5)
d.terminate()
print 'exiting main', os.getpid()
main()
Here is just a random version of your original code that moves the functionality into a single call spawn_detached(callable). It keeps the detached process running even after the program exits:
import time
import os
from multiprocessing import Process, current_process
def spawn_detached(callable):
p = _spawn_detached(0, callable)
# give the process a moment to set up
# and then kill the first child to detach
# the second.
time.sleep(.001)
p.terminate()
def _spawn_detached(count, callable):
count += 1
p = current_process()
print 'Process #%d: %s (%d)' % (count, p.name, p.pid)
if count < 2:
name = 'child'
elif count == 2:
name = callable.func_name
else:
# we should now be inside of our detached process
# so just call the function
return callable()
# otherwise, spawn another process, passing the counter as well
p = Process(name=name, target=_spawn_detached, args=(count, callable))
p.daemon = False
p.start()
return p
def operation():
""" Just some arbitrary function """
print "Entered detached process"
time.sleep(15)
print "Exiting detached process"
if __name__ == "__main__":
print 'starting main', os.getpid()
p = spawn_detached(operation)
print 'exiting main', os.getpid()