Not able to use pool.apply_async() - python

I'm just working around with the multiprocessing module of Python but the following code is running continuously but not giving any output. What am I doing wrong? I have also tried pool.close() and pool.join() but with no effect.
Here's what I have tried so far:
import multiprocessing as mp
cpu_count = mp.cpu_count()
pool = mp.Pool(processes = cpu_count)
def func(val):
print "\nInside Function.....\n"
return val*val
results = []
num = 1
while cpu_count >= 1:
results.append(pool.apply_async(func, (num,)))
cpu_count = cpu_count - 1
num = num + 1
output = [p.get() for p in results]
print output

Wrap your code with
if __name__ == '__main__':
...
it's necessary if you working under windows due to multiprocessing implementation.
Following code prints [1, 4, 9, 16] as you expecting:
import multiprocessing as mp
def func(val):
print "\nInside Function.....\n"
return val*val
if __name__ == '__main__':
cpu_count = mp.cpu_count()
pool = mp.Pool(processes = cpu_count)
results = []
num = 1
while cpu_count >= 1:
results.append(pool.apply_async(func, (num,)))
cpu_count = cpu_count - 1
num = num + 1
output = [p.get() for p in results]
print output
pool.close()
pool.join()
Notice pool closing before program exit - just terminating program does not guarantee pool closing, it's processes can remain running.

Related

How to do a race between multiprocessors in python

I have a function that factor a number. It depends on some random condition.
So what I am trying to do it's to run multiple processors in this function and the processor that finds the factor first returns the value and all processors terminate.
What I have so far is very wrong. The processors are not terminating and I also don't know how to get the value that was returned by the function
flag = False
def rho(n, processor):
while True:
x = random.randrange(1, n-1)
x2 = x
gcd = 1
c = random.randrange(1, n-1)
while gcd == 1:
x = (x**2 + c) % n
x2 = (x2**2 + c) % n
x2 = (x2**2 + c) % n
gcd = math.gcd(abs(x - x2), n)
if gcd != n:
flag = True
print("Factor was found from "+process+" and is ", gcd)
return gcd
if __name__ == "__main__":
p1 = multiprocessing.Process(target=rho, args=(91, "process 1" ))
p2 = multiprocessing.Process(target=rho, args=(91, "process 2"))
p1.start()
p2.start()
if flag:
p1.terminate()
p2.terminate()
The output is:
Factor was found from process 2 and is 13
Factor was found from process 1 and is 7
You can use multiprocessing.Pool and it's methods map(), imap_unordered() etc. These will return also values from worker functions.
Example (I used time.sleep() to simulate some time-intesive computation):
from time import sleep
from multiprocessing import Pool
def rho(params):
n, processor = params
# your computation here
# ...
sleep(n)
print("Factor was found from " + processor + " and is 42")
return 42
if __name__ == "__main__":
with Pool() as pool:
for result in pool.imap_unordered(
rho, ((10, "process 1"), (1, "process 2"))
):
print("Result I got:", result)
break # <-- I don't want other results, so break
Prints:
Factor was found from process 2 and is 42
Result I got: 42
EDIT: Two different functions:
from time import sleep
from multiprocessing import Pool
def fn1(n, p):
sleep(n)
print("Factor was found from " + p + " and is 42")
return 42
def fn2(n, p):
sleep(n)
print("Factor was found from " + p + " and is 99")
return 99
def rho(params):
what_to_call, n, processor = params
return what_to_call(n, processor)
if __name__ == "__main__":
with Pool() as pool:
for result in pool.imap_unordered(
rho, ((fn1, 10, "process 1"), (fn2, 1, "process 2"))
):
print("Result I got:", result)
break # <-- I don't want other results, so break

Implementing Numpy into nth factorial of n

I've tried a few methods of implementing numpy into my program below, but every way I've attempted it drastically decreases performance. Is there expected with my use case, or is there a way to implement it with a performance increase instead of decrease?
import multiprocessing
import argparse
from datetime import datetime
from math import log10
parser = argparse.ArgumentParser(
formatter_class=argparse.HelpFormatter,
description="Calcs n factorial",
usage=""
)
parser.add_argument("-n", "--number", type=int, default=2)
args = parser.parse_args()
def getlog(send_end, i, threads, num, n, inc):
begin = datetime.now()
start = num-inc*i
end = num-inc*(i+1) if i < threads-1 else 0
output = sum(map(log10, range(start, end, -n)))
send_end.send(output)
final = datetime.now()
duration = final-begin
print("{},{},{},{}".format(i, duration, start, end))
def main():
n = args.number
num = int('1'*n)
threads = multiprocessing.cpu_count() if num/multiprocessing.cpu_count() > multiprocessing.cpu_count() else 1
inc = int(num/threads)
inc -= inc%n
jobs = []
pipe_list = []
for i in range(threads):
recv_end, send_end = multiprocessing.Pipe(False)
p = multiprocessing.Process(target=getlog, args=(send_end, i, threads, num, n, inc))
jobs.append(p)
pipe_list.append(recv_end)
p.start()
for proc in jobs:
proc.join()
e = sum([output.recv() for output in pipe_list])
print('%.2fe%d' % (10**(e % 1), e // 1))
if __name__ == '__main__':
start = datetime.now()
main()
end = datetime.now()
print(end-start)

python multiprocessing is not using multiple cores

Reading the documentation: https://docs.python.org/2/library/multiprocessing.html
I decided to write a cpu intensive code and compare multiprocessing with serial computation.
First of all, if this library is using multiprocessing, then why I only see 1 python.exe process?
Secondly, why serial computation takes 12 seconds while multiprocessed one takes 22 seconds?
serial code:
from datetime import datetime
def calc_fib(ind):
fb = 1
if ind >= 3:
prev = 1
i = 2
while i < ind:
prev_tmp = fb
fb += prev
prev = prev_tmp
i += 1
return fb
def long_calc_fib(ind):
val = 0
for j in range(500):
val = calc_fib(ind)
return val
if __name__ == "__main__":
t1 = datetime.now()
for i in range(10):
tmp = long_calc_fib(10000)
t2 = datetime.now()
print str(t2 - t1)
multiprocessing pool code:
from datetime import datetime
from multiprocessing.pool import ThreadPool
def calc_fib(ind):
fb = 1
if ind >= 3:
prev = 1
i = 2
while i < ind:
prev_tmp = fb
fb += prev
prev = prev_tmp
i += 1
return fb
def long_calc_fib(ind):
val = 0
for j in range(500):
val = calc_fib(ind)
return val
if __name__ == "__main__":
t1 = datetime.now()
pool = ThreadPool(processes=10)
async_results = []
for i in range(10):
async_results.append(pool.apply_async(long_calc_fib, (10000,)))
for res in async_results:
tmp = res.get()
t2 = datetime.now()
print str(t2 - t1)
My mistake.
I must have used Pool instead of ThreadPool.
By chaning ThreadPool to Pool, I reduced the time to 3 seconds.

How to access the result of a function called in a multiprocessing process?

I am runnning this code :
import random
import multiprocessing
import time
def list_append(count, id):
out_list = []
for i in range(count):
out_list.append(random.random())
return out_list
if __name__ == "__main__":
t0 = time.clock()
size = 10000000 # Number of random numbers to add
procs = 2 # Number of processes to create
jobs = []
for i in range(0, procs):
process = multiprocessing.Process(target=list_append,args=(size, i))
jobs.append(process)
# Start the processes (i.e. calculate the random number lists)
res=[]
for j in jobs:
r= j.start()
res.append(r)
# Ensure all of the processes have finished
for j in jobs:
j.join()
print "List processing complete."
print time.clock()-t0,"seconds"
Unfortunately, at the end of it, res = [None,None] although I want it to be filled with the lists I've filled in the function list_append.
You need to use data structures that can be shared between processes:
def list_append(count, id, res):
# ^^^
out_list = []
for i in range(count):
out_list.append(random.random())
res[id] = out_list # <------
if __name__ == "__main__":
size = 10000000
procs = 2
manager = multiprocessing.Manager() # <---
res = manager.dict() # <---
jobs = []
for i in range(0, procs):
process = multiprocessing.Process(target=list_append,args=(size, i, res))
# ^^^^
jobs.append(process)
for j in jobs:
r = j.start()
for j in jobs:
j.join()
print "List processing complete."
# now `res` will contain results
As avenet commented, using multiprocessing.Pool will be simpler:
def list_append(args):
count, id = args
out_list = []
for i in range(count):
out_list.append(random.random())
return out_list
if __name__ == "__main__":
size = 10000000
procs = 2
pool = multiprocessing.Pool(procs)
res = pool.map(list_append, [(size, i) for i in range(procs)])
pool.close()
pool.join()
print "List processing complete."
# print res

python : multiprocessing managament

with multiprocessing python library I can launch multiprocess, like
import multiprocessing as mu
def worker(n)
print "worker:", n
n = int(1e4)
for i in range(n):
for j in range(n):
i*j
return
if __name__ == '__main__':
jobs = []
for i in range(5):
p = mu.Process(target=worker, args=(i,))
jobs.append(p)
p.start()
and I can get the numbers of processors (cpu cores) with
np = mu.cpu_count()
but if I have a list of process, how I can launch without overcharge the processor ?
if I have a quad core, how I can launch first 4 process? and when finish a process launch other.
References
http://pymotw.com/2/multiprocessing/basics.html
I would suggest side stepping the problem and using multiprocessing.Pool (example, api).
(modified from the example in the docs)
from multiprocessing import Pool
def f(x):
return x*x
if __name__ == '__main__':
num_proc = multiprocessing.cpu_count()
pool = Pool(processes=num_proc)
res = pool.map(f, range(10))
Alternately, you can set up a producer/consumer scheme and have a fixed number of long running sub-processes.
A third really quick and dirty way is using one mu.Queue. Note that get blocks until it gets a result back.
import multiprocessing as mu
import time
res = mu.Queue()
def worker(n):
print "worker:", n
time.sleep(1)
res.put(n)
return
if __name__ == '__main__':
jobs = []
np = mu.cpu_count()
print np
# start first round
for j in range(np):
p = mu.Process(target=worker, args=(j,))
jobs.append(p)
p.start()
# every time one finishes, start the next one
for i in range(np,15):
r = res.get()
print 'res', r
p = mu.Process(target=worker, args=(i,))
jobs.append(p)
p.start()
# get the remaining processes
for j in range(np):
r = res.get()
print 'res', r
I make this solution
import multiprocessing as mu
def worker(n):
print "worker:", n
n = int(1e4/2)
for i in range(n):
for j in range(n):
i*j
return
if __name__ == '__main__':
jobs = []
for i in range(5):
p = mu.Process(target=worker, args=(i,))
jobs.append(p)
running = []
np = mu.cpu_count()
for i in range(np):
p = jobs.pop()
running.append(p)
p.start()
while jobs != []:
for r in running:
if r.exitcode == 0:
running.remove(r)
p = jobs.pop()
p.start()
running.append(p)

Categories