Lets assume that i am starting a process in python with the following code:
from multiprocessing import Process
import time
def f(name):
print ('hello ', name)
if __name__ == '__main__':
p = Process(target=f,name = "Process-1", args=('bob',))
p.start()
Now,i want to terminate the process.I can simply do:
p.terminate()
However, i would like to terminate the process by its name.Is that possible?
To do that, you need to store a map between your process objects and their names. Using an helper function it makes your code even easier to read (IMO):
def terminate(procname):
return pmap[procname].terminate()
if __name__ == '__main__':
pmap = {}
pname = "process-1"
p = Process(target=f,name = pname, args=('bob',))
pmap[pname] = p
p.start()
Then to terminate:
terminate(pname)
Related
Current Code
import multiprocessing as mu
import time
global_array=[]
def add_array1(array):
while True:
time.sleep(2.5)
global_array.append(1)
print(global_array)
def add_array2(array):
while True:
time.sleep(3)
global_array.append(2)
print(global_array)
def runInParallel(*fns):
if __name__=='__main__':
proc = []
for fn in fns:
p = mu.Process(target=fn)
p.start()
proc.append(p)
for p in proc:
p.join()
runInParallel(
add_array1(global_array),
add_array2(global_array)
)
When running my code above only the first function add_array1() is appending the value to the array and printing instead of both functions providing the wrong output:
[1]
[1,1]
[1,1,1]
When the actual desired output for the following code is:
[1]
[1,2]
[1,2,1]
[1,2,1,2]
Your problem is that the function call
runInParallel( add_array1(global_array), add_array2(global_array))
executes the functions and provides the return value of the function calls as parameters to runInParallel. As add_array1 is an endless loop, it never returns from the execution. You need to provide your functions as functions - not the returnvalue of the functions as parameters to runInParallel(...)
Start with
runInParallel( add_array1, add_array2) # name of the functions, dont execute em
and change
def runInParallel(*fns):
proc = []
for fn in fns:
p = mu.Process(target=fn, args=(global_array,)) # provide param here
p.start()
proc.append(p)
for p in proc:
p.join()
and then fix the "not joining" problem due to your threaded functions never returning.
Example from the official documentation of multiprocessing.Process:
from multiprocessing import Process
import os
def info(title):
print(title)
print('module name:', __name__)
print('parent process:', os.getppid())
print('process id:', os.getpid())
# Function name is f
def f(name):
info('function f')
print('hello', name)
if __name__ == '__main__':
info('main line')
# f is provided, and args is provided - not f("bob")
p = Process(target=f, args=('bob',))
p.start()
p.join()
You can use the the code below to get your desired output:
import threading
import time
global_array = []
def add_array1():
while True:
time.sleep(2.5)
global_array.append(1)
print(global_array)
def add_array2():
while True:
time.sleep(3)
global_array.append(2)
print(global_array)
def runInParallel(*fns):
if __name__ == '__main__':
proc = []
for fn in fns:
p = threading.Thread(target=fn)
p.start()
proc.append(p)
for p in proc:
p.join()
if __name__ == '__main__':
runInParallel(
add_array1,
add_array2
)
you can use this simple code to get your desired output:
'''
import time
global_array = []
def add_array1(array):
while True:
time.sleep(2.5)
if len(global_array) % 2 == 0:
global_array.append(1)
print(global_array)
else:
global_array.append(2)
print(global_array)
add_array1(global_array)
Thanks to How to run functions in parallel? the following code works.
import time
from multiprocessing import Process
def worker():
time.sleep(2)
print("Working")
def runInParallel(*fns):
proc = []
for fn in fns:
p = Process(target=fn)
p.start()
proc.append(p)
for p in proc:
p.join()
if __name__ == '__main__':
start = time.time()
runInParallel(worker, worker, worker, worker)
print("Total time taken: ", time.time()-start)
However if I add argument to worker() it does not run in parallel anymore.
import time
from multiprocessing import Process
def worker(ii):
time.sleep(ii)
print("Working")
def runInParallel(*fns):
proc = []
for fn in fns:
p = Process(target=fn)
p.start()
proc.append(p)
for p in proc:
p.join()
if __name__ == '__main__':
start = time.time()
runInParallel(worker(2), worker(2), worker(2), worker(2))
print("Total time taken: ", time.time()-start)
What might be the reason for that?
You should modify runInParallel to do iterable unpacking.
import time
from multiprocessing import Process
def worker(ii):
time.sleep(ii)
print("Working")
def runInParallel(*fns):
proc = []
for fn in fns:
func, *args = fn
p = Process(target=func, args=args)
p.start()
proc.append(p)
for p in proc:
p.join()
if __name__ == '__main__':
start = time.time()
runInParallel((worker, 2), (worker, 3), (worker, 5), (worker, 2))
print("Total time taken: ", time.time()-start)
It's because of the difference between worker and worker(). The first is the function, and the latter is a function call. What is happening on the line runInParallel(worker(2), worker(2), worker(2), worker(2)) is that all four calls are run before the execution of runInParallel is even begun. If you add a print(fns) in beginning of runInParallel you will see some difference.
Quick fix:
def worker_caller():
worker(2)
and:
runInParallel(worker_caller, worker_caller, worker_caller, worker_caller)
That's not very convenient but it's mostly intended to show what the problem is. The problem is not in the function worker. The problem is that you're mixing up passing a function and passing a function call. If you changed your first version to:
runInParallel(worker(), worker(), worker(), worker())
then you would run into exactly the same issue.
But you can do this:
runInParallel(lambda:worker(2), lambda: worker(2), lambda: worker(2), lambda: worker(2))
Lambdas are very useful. Here is another version:
a = lambda:worker(2)
b = lambda:worker(4)
c = lambda:worker(3)
d = lambda:worker(1)
runInParallel(a, b, c, d)
To pass arguments, you need to pass them to the Process constructor:
p = Process(target=fn, args=(arg1,))
The Process constructor accepts args and kwargs parameters, which are then passed to the process when it is executed.
The documentation is quite clear about this.
So your code should be modified something like this:
def worker(ii):
time.sleep(ii)
print("Working")
def runInParallel(*fns):
proc = []
for fn in fns:
p = Process(target=fn, args=(2,))
p.start()
proc.append(p)
for p in proc:
p.join()
if __name__ == '__main__':
start = time.time()
runInParallel(worker, worker, worker, worker)
print("Total time taken: ", time.time()-start)
Of course parameters can be different for each process, you need to arrange that the right one is passed to each in args (or kwargs for keyword parameters).
This can be achieved by passing tuples such as runInParallel((worker,2), (worker,3), (worker,5), (worker,1) for example, and then processing the tuples inside runInParallel.
I would like to separate the following code into two python scripts. One in charge of updating myVar. The other one receives the updated myVar.
import multiprocessing
def func(myVar):
myVar = 2
if __name__ == "__main__":
myVar = multiprocessing.Value('d',2) #update myVar from the child process
p = multiprocessing.Process(target = func, args = (myVar,))
p.start()
p.join()
print(myVar) #get the updated myVar
Desired structure would be:
update_myVar.py
def func(myVar):
myVar = 2
#if __name__ == "__main__": #spawn child process for main.py's main()
myVar = multiprocessing.Value('d',2) #update myVar from the child process
p = multiprocessing.Process(target = func, args = (myVar,))
p.start()
p.join()
and main.py
if __name__ == "__main__":
print(myVar)
Thanks!
The end goal is to execute a method in background, but not in parallel : when multiple objects are calling this method, each should wait for their turn to proceed. To achieve running in background, I have to run the method in a subprocess (not a thread), and I need to start it using spawn (not fork). To prevent parallel executions, the obvious solution is to have a global lock shared between processes.
When processes are forked, which is the default on Unix, it is easy to achieve, as highlighted in both of the following codes.
We can share it as a class variable :
import multiprocessing as mp
from time import sleep
class OneAtATime:
l = mp.Lock()
def f(self):
with self.l:
sleep(1)
print("Hello")
if __name__ == "__main__":
a = OneAtATime()
b = OneAtATime()
p1 = mp.Process(target = a.f)
p2 = mp.Process(target = b.f)
p1.start()
p2.start()
Or we can pass it to the method :
import multiprocessing as mp
from time import sleep
class OneAtATime:
def f(self, l):
with l:
sleep(1)
print("Hello")
if __name__ == "__main__":
a = OneAtATime()
b = OneAtATime()
m = mp.Manager()
l = mp.Lock()
p1 = mp.Process(target = a.f, args = (l,))
p2 = mp.Process(target = b.f, args = (l,))
p1.start()
p2.start()
Both of these codes have the appropriate behaviour of printing "hello" at one second of interval.
However, when changing the start method to 'spawn', they become broken.
The first one (1) prints both "hello"s at the same time. This is because the internal state of a class is not pickled, so they do not have the same lock.
The second one (2) fails with FileNotFoundError at runtime. I think it has to do with the fact that locks cannot be pickled : see Python sharing a lock between processes.
In this answer, two fixes are suggested (side note : I cannot use a pool because I want to randomly create an arbitrary number of processes).
I haven't found a way to adapt the second fix, but I tried to implement the first one :
import multiprocessing as mp
from time import sleep
if __name__ == "__main__":
mp.set_start_method('spawn')
class OneAtATime:
def f(self, l):
with l:
sleep(1)
print("Hello")
if __name__ == "__main__":
a = OneAtATime()
b = OneAtATime()
m = mp.Manager()
l = m.Lock()
p1 = mp.Process(target = a.f, args = (l,))
p2 = mp.Process(target = b.f, args = (l,))
p1.start()
p2.start()
This fails with AttributeError and FileNotFoundError (3). In fact it also fails (BrokenPipe) when the fork method is used (4).
What is the proper way of sharing a lock between spawned processes ?
A quick explanation of the four fails I numbered would be nice, too.
I'm running Python 3.6 under Archlinux.
Congratulations, you got yourself 90% of the way there. The last step is actually not very hard to do.
Yes, your final code block fails with an AttributeError, but what specifically is the error? "Can't get attribute 'OneAtATime' on ". This is very similar to a problem you've already encountered - it's not pickling the class OneAtATime.
I made the following change and it worked as you'd like:
file ooat.py:
from time import sleep
class OneAtATime:
def f(self, l):
with l:
sleep(1)
print("Hello")
interactive shell:
import multiprocessing as mp
from oaat import OneAtATime
if __name__ == "__main__":
mp.set_start_method('spawn')
a = OneAtATime()
b = OneAtATime()
m = mp.Manager()
l = m.Lock()
p1 = mp.Process(target = a.f, args = (l,))
p2 = mp.Process(target = b.f, args = (l,))
p1.start()
p2.start()
You may notice, I didn't really do anything - just split your code into two separate files. Try it out, you'll see it works fine. (At least, it did for me, using python 3.5 on ubuntu.)
The last code snippet works, provided the script does not exit prematurely. Joining processes is enough :
import multiprocessing as mp
from time import sleep
class OneAtATime:
def f(self, l):
with l:
sleep(1)
print("Hello")
if __name__ == "__main__":
mp.set_start_method('spawn')
a = OneAtATime()
b = OneAtATime()
m = mp.Manager()
l = m.Lock()
p1 = mp.Process(target = a.f, args = (l,))
p2 = mp.Process(target = b.f, args = (l,))
p1.start()
p2.start()
p1.join()
p2.join()
More info on the error it was causing here https://stackoverflow.com/a/25456494/8194503.
I know the basic usage of multiprocessing about pools,and I use apply_async() func to avoid block,my problem code such like:
from multiprocessing import Pool, Queue
import time
q = Queue(maxsize=20)
script = "my_path/my_exec_file"
def initQueue():
...
def test_func(queue):
print 'Coming'
While True:
do_sth
...
if __name__ == '__main__':
initQueue()
pool = Pool(processes=3)
for i in xrange(11,20):
result = pool.apply_async(test_func, (q,))
pool.close()
while True:
if q.empty():
print 'Queue is emty,quit'
break
print 'Main Process Lintening'
time.sleep(2)
The results output are always Main Process Linstening,I can;t find word 'Coming'..
The code above has no syntax error and no any Exceptions.
Any one can help, thanks!