Suppose we have some processes spawned using pool.apply_async(). How can one stop all other processes when either one of them returns a value?
Also, Is this the right way to get running time of an algorithm?
Here's the sample code :-
import timeit
import multiprocessing as mp
data = range(1,200000)
def func(search):
for val in data:
if val >= search:
# Doing something such that other processes stop ????
return val*val
if __name__ == "__main__":
cpu_count = mp.cpu_count()
pool = mp.Pool(processes = cpu_count)
output = []
start = timeit.default_timer()
results = []
while cpu_count >= 1:
results.append(pool.apply_async(func, (150000,)))
cpu_count = cpu_count - 1
output = [p.get() for p in results]
stop = timeit.default_timer()
print output
pool.close()
pool.join()
print "Running Time : " + str(stop - start) + " seconds"
I've never done this, but python docs seems to give an idea about how this should be done.
Refer: https://docs.python.org/2/library/multiprocessing.html#multiprocessing.Process.terminate
In your snippet, I would do this:
while cpu_count >= 1:
if len(results)>0:
pool.terminate()
pool.close()
break
results.append(pool.apply_async(func, (150000,)))
cpu_count = cpu_count - 1
Also your timing method seems okay. I would use time.time() at start and stop and then show the subtraction because I'm used to that.
Related
I tried to parallelize linear search in python using a multiprocessing module and created two processes with it. However, it seems to take even more time than the unparalleled process. The code is as follows:
import multiprocessing
import math
import numpy as np
import os
import time
import random
from time import sleep, time
#from threading import Thread
def linearsearch(arr,search_var):
count=0
index_list=[]
for i in range(len(arr)):
if arr[i]==search_var:
count+=1
index_list.append(i)
def linearsearch_p1(arr,search_var):
count=0
index_list=[]
for i in range(0,round(len(arr)/2)):
if arr[i]==search_var:
count+=1
index_list.append(i)
def linearsearch_p2(arr,search_var):
count=0
index_list=[]
for i in range(round(len(arr)/2),len(arr)):
if arr[i]==search_var:
count+=1
index_list.append(i)
number_to_search=5
input_array=[random.randint(1,10) for i in range(10000)]
array_len=len(input_array)
start=time()
#print(input_array)
linearsearch(input_array,number_to_search)
finish=time()
print(f'Time taken by Single Process Linear Search {finish-start: .2f} second(s) to finish')
start_time = time()
t1 = multiprocessing.Process(target=linearsearch_p1, args=(input_array,number_to_search))
t2 = multiprocessing.Process(target=linearsearch_p2, args=(input_array,number_to_search))
t1.start()
t2.start()
t1.join()
t2.join()
end_time = time()
print(f'It took {end_time- start_time: 0.2f} second(s) to complete.')
The output of the above code on my PC is:
**Time taken by Single Process Linear Search 0.00 second(s) to finish.
It took 0.17 second(s) to complete.**
So, 0.17 is the time using multiprocessing module which is more than 0.00 secs of the single process time. But it should not be. Can anyone explain this kind of behavior?
Below is how I would code your benchmark. I have purposely not included the time to create the child processes in the total multiprocessing time and am only passing half the list to each process to demonstrate that even by doing that and searching larger lists, the multiprocessing version will be less performant than a simple loop. The reason for this is the great overhead in passing the list data to the child processes cannot be overcome by performing the searches in parallel with such a trivial worker function, linearsearch_p2:
import multiprocessing
import random
from time import time
NUMBER_TO_SEARCH = 5
ARRAY_LEN = 1_000_000
HALF_ARRAY_LEN = ARRAY_LEN // 2
def linearsearch(arr):
count = 0
for i in range(ARRAY_LEN):
if arr[i] == NUMBER_TO_SEARCH:
count += 1
return count
def linearsearch_p2(arr, result_queue):
count = 0
for i in range(HALF_ARRAY_LEN):
if arr[i] == NUMBER_TO_SEARCH:
count += 1
result_queue.put(count)
def main():
input_array=[random.randint(1, 10) for i in range(ARRAY_LEN)]
start = time()
count = linearsearch(input_array)
finish = time()
print(f'Count = {count}, Time taken by Single Process Linear Search {finish-start: .2f} second(s) to finish')
p1 = multiprocessing.Process(target=linearsearch_p2, args=(input_array[0:HALF_ARRAY_LEN], result_queue))
p2 = multiprocessing.Process(target=linearsearch_p2, args=(input_array[HALF_ARRAY_LEN:ARRAY_LEN], result_queue))
result_queue = multiprocessing.Queue()
start_time = time()
p1.start()
p2.start()
count = result_queue.get() + result_queue.get()
p1.join()
p2.join()
end_time = time()
print(f'Count = {count}, It took {end_time- start_time: 0.2f} second(s) to complete.')
if __name__ == '__main__':
main()
Prints:
ount = 99757, Time taken by Single Process Linear Search 0.05 second(s) to finish
Count = 99757, It took 0.24 second(s) to complete.
But if the cost to compare elements were much higher so that the search functions required significantly more CPU, then multiprocessing becomes more performant. Here I have made the searching more expensive by repeating the searching REPEAT times:
import multiprocessing
import random
from time import time
NUMBER_TO_SEARCH = 5
ARRAY_LEN = 1_000_000
HALF_ARRAY_LEN = ARRAY_LEN // 2
REPEAT = 50
def linearsearch(arr):
for _ in range(REPEAT):
count = 0
for i in range(ARRAY_LEN):
if arr[i] == NUMBER_TO_SEARCH:
count += 1
return count
def linearsearch_p2(arr, result_queue):
for _ in range(REPEAT):
count = 0
for i in range(HALF_ARRAY_LEN):
if arr[i] == NUMBER_TO_SEARCH:
count += 1
result_queue.put(count)
def main():
input_array=[random.randint(1, 10) for i in range(ARRAY_LEN)]
start = time()
count = linearsearch(input_array)
finish = time()
print(f'Count = {count}, Time taken by Single Process Linear Search {finish-start: .2f} second(s) to finish')
start_time = time()
result_queue = multiprocessing.Queue()
p1 = multiprocessing.Process(target=linearsearch_p2, args=(input_array[0:HALF_ARRAY_LEN], result_queue))
p2 = multiprocessing.Process(target=linearsearch_p2, args=(input_array[HALF_ARRAY_LEN:ARRAY_LEN], result_queue))
p1.start()
p2.start()
count = result_queue.get() + result_queue.get()
p1.join()
p2.join()
end_time = time()
print(f'Count = {count}, It took {end_time- start_time: 0.2f} second(s) to complete.')
if __name__ == '__main__':
main()
Prints:
Count = 99726, Time taken by Single Process Linear Search 2.96 second(s) to finish
Count = 99726, It took 2.08 second(s) to complete.
In the main function, I am calling a process to run imp_workload() method parallely for each DP_WORKLOAD
#!/usr/bin/env python
import multiprocessing
import subprocess
if __name__ == "__main__":
for DP_WORKLOAD in DP_WORKLOAD_NAME:
p1 = multiprocessing.Process(target=imp_workload, args=(DP_WORKLOAD, DP_DURATION_SECONDS, DP_CONCURRENCY, ))
p1.start()
However, inside this imp_workload() method, I need the import_command_run() method to run a number of processes (the number is equivalent to variable DP_CONCURRENCY) but with the sleep of 60 seconds before new execution.
This is the sample code I have written.
def imp_workload(DP_WORKLOAD, DP_DURATION_SECONDS, DP_CONCURRENCY):
while DP_DURATION_SECONDS > 0:
pool = multiprocessing.Pool(processes = DP_CONCURRENCY)
for j in range(DP_CONCURRENCY):
pool.apply_async(import_command_run, args=(DP_WORKLOAD, dp_workload_cmd, j,)
# Sleep for 1 minute
time.sleep(60)
pool.close()
# Clean the schemas after import is completed
clean_schema(DP_WORKLOAD)
# Sleep for 1 minute
time.sleep(60)
def import_command_run(DP_WORKLOAD):
abccmd = 'impdp admin/DP_PDB_ADMIN_PASSWORD#DP_PDB_FULL_NAME SCHEMAS=ABC'
defcmd = 'impdp admin/DP_PDB_ADMIN_PASSWORD#DP_PDB_FULL_NAME SCHEMAS=DEF'
# any of the above commands
run_imp_cmd(eval(dp_workload_cmd))
def run_imp_cmd(cmd):
output = subprocess.Popen([cmd], shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
stdout,stderr = output.communicate()
return stdout
When I tried running it in this format, I got the following error:
time.sleep(60)
^
SyntaxError: invalid syntax
So, how can I kickoff the 'abccmd' job for DP_CONCURRENCY times parallely with a sleep of 1 min between each job and also each of these pool running in multiProcess?
Working on Python 2.7.5 (Due to restrictions, can't use Python 3.x so, will appreciate answers specific to Python 2.x)
P.S. This is a very large script and complex file so I have tried posting only relevant excerpts. Please ask for more details if necessary (or if it is not clear from this much)
Let me offer two possibilities:
Possibility 1
Here is an example of how you would kick off a worker function in parallel with DP_CURRENCY == 4 possible arguments, 0, 1, 2 and 3, cycling over and over for up to DP_DURATION_SECONDS seconds with a pool size of DP_CURRENCY and as soon as a job completes restarting the job but guaranteeing that at least TIME_BETWEEN_SUBMITS == 60 seconds has elapsed between successive restarts.
from __future__ import print_function
from multiprocessing import Pool
import time
from queue import SimpleQueue
TIME_BETWEEN_SUBMITS = 60
def worker(i):
print(i, 'started at', time.time())
time.sleep(40)
print(i, 'ended at', time.time())
return i # the argument
def main():
q = SimpleQueue()
def callback(result):
# every time a job finishes, put result (the argument) on the queue
q.put(result)
DP_CURRENCY = 4
DP_DURATION_SECONDS = TIME_BETWEEN_SUBMITS * 10
pool = Pool(DP_CURRENCY)
t = time.time()
expiration = t + DP_DURATION_SECONDS
# kick off initial tasks:
start_times = [None] * DP_CURRENCY
for i in range(DP_CURRENCY):
pool.apply_async(worker, args=(i,), callback=callback)
start_times[i] = time.time()
while True:
i = q.get() # wait for a job to complete
t = time.time()
if t >= expiration:
break
time_to_wait = TIME_BETWEEN_SUBMITS - (t - start_times[i])
if time_to_wait > 0:
time.sleep(time_to_wait)
pool.apply_async(worker, args=(i,), callback=callback)
start_times[i] = time.time()
# wait for all jobs to complete:
pool.close()
pool.join()
# required by Windows:
if __name__ == '__main__':
main()
Possibility 2
This is closer to what you had in that DP_DURATION_SECONDS == 60 seconds of sleeping is done between successive submission of any two jobs. But to me this doesn't make as much sense. If, for example, the worker function only took 50 seconds to complete, you would not be doing any parallel processing at all. In fact, each job would need to take at least 180 (i.e. (DP_CURRENCY - 1) * TIME_BETWEEN_SUBMITS) seconds to complete in order to have all 4 processes in the pool busy running jobs at the same time.
from __future__ import print_function
from multiprocessing import Pool
import time
from queue import SimpleQueue
TIME_BETWEEN_SUBMITS = 60
def worker(i):
print(i, 'started at', time.time())
# A task must take at least 180 seconds to run to have 4 tasks running in parallel if
# you wait 60 seconds between starting each successive task:
# take 182 seconds to run
time.sleep(3 * TIME_BETWEEN_SUBMITS + 2)
print(i, 'ended at', time.time())
return i # the argument
def main():
q = SimpleQueue()
def callback(result):
# every time a job finishes, put result (the argument) on the queue
q.put(result)
# at most 4 tasks at a time but only if worker takes at least 3 * TIME_BETWEEN_SUBMITS
DP_CURRENCY = 4
DP_DURATION_SECONDS = TIME_BETWEEN_SUBMITS * 10
pool = Pool(DP_CURRENCY)
t = time.time()
expiration = t + DP_DURATION_SECONDS
# kick off initial tasks:
for i in range(DP_CURRENCY):
if i != 0:
time.sleep(TIME_BETWEEN_SUBMITS)
pool.apply_async(worker, args=(i,), callback=callback)
time_last_job_submitted = time.time()
while True:
i = q.get() # wait for a job to complete
t = time.time()
if t >= expiration:
break
time_to_wait = TIME_BETWEEN_SUBMITS - (t - time_last_job_submitted)
if time_to_wait > 0:
time.sleep(time_to_wait)
pool.apply_async(worker, args=(i,), callback=callback)
time_last_job_submitted = time.time()
# wait for all jobs to complete:
pool.close()
pool.join()
# required by Windows:
if __name__ == '__main__':
main()
import random
import time
import multiprocessing
import sys
start = time.time()
numbers1 = []
def NumGenerator(NumbersArray):
while NumberCheck(NumbersArray):
number = random.randint(0,100)
NumbersArray.append(number)
end = time.time()
print(end-start)
print('average is: ' + str(sum(NumbersArray) / len(NumbersArray)))
print(str(NumbersArray).replace("[", "").replace("]", ""))
sys.exit()
def NumberCheck(NumbersArray):
# Checks if the average of the array is 50
if NumbersArray:
if sum(NumbersArray)/len(NumbersArray) != 50:
return True
else: return False
else: return True
process1 = multiprocessing.Process(target=NumGenerator, args=(numbers1,))
process2 = multiprocessing.Process(target=NumGenerator, args=(numbers1,))
process3 = multiprocessing.Process(target=NumGenerator, args=(numbers1,))
process4 = multiprocessing.Process(target=NumGenerator, args=(numbers1,))
process1.start()
process2.start()
process3.start()
process4.start()
process1.join()
process2.join()
process3.join()
process4.join()
This is supposed to run on 4 threads and generate random numbers between 0 and 100 and add them to an array until the average of that array is 50. Currently it does the second part but on just one CPU core.
Try multiprocessing.pool's ThreadPool.
It follows an API similar to multiprocessing.Pool
Import with from multiprocessing.pool import ThreadPool
More info here and there
I am having a problem when multithreading and using queues in python 2.7. I want the code with threads to take about half as long as the one without, but I think I'm doing something wrong. I am using a simple looping technique for the fibonacci sequence to best show the problem.
Here is the code without threads and queues. It printed 19.9190001488 seconds as its execution time.
import time
start_time = time.time()
def fibonacci(priority, num):
if num == 1 or num == 2:
return 1
a = 1
b = 1
for i in range(num-2):
c = a + b
b = a
a = c
return c
print fibonacci(0, 200000)
print fibonacci(1, 100)
print fibonacci(2, 200000)
print fibonacci(3, 2)
print("%s seconds" % (time.time() - start_time))
Here is the code with threads and queues. It printed 21.7269999981 seconds as its execution time.
import time
start_time = time.time()
from Queue import *
from threading import *
numbers = [200000,100,200000,2]
q = PriorityQueue()
threads = []
def fibonacci(priority, num):
if num == 1 or num == 2:
q.put((priority, 1))
return
a = 1
b = 1
for i in range(num-2):
c = a + b
b = a
a = c
q.put((priority, c))
return
for i in range(4):
priority = i
num = numbers[i]
t = Thread(target = fibonacci, args = (priority, num))
threads.append(t)
#print threads
for t in threads:
t.start()
for t in threads:
t.join()
while not q.empty():
ans = q.get()
q.task_done()
print ans[1]
print("%s seconds" % (time.time() - start_time))
What I thought would happen is the multithreaded code takes half as long as the code without threads. Essentially I thought that all the threads work at the same time, so the 2 threads calculating the fibonacci number at 200,000 would finish at the same time, so execution is about twice as fast as the code without threads. Apparently that's not what happened. Am I doing something wrong? I just want to execute all threads at the same time, print in the order that they started and the thread that takes the longest time is pretty much the execution time.
EDIT:
I updated my code to use processes, but now the results aren't being printed. Only an execution time of 0.163000106812 seconds is showing. Here is the new code:
import time
start_time = time.time()
from Queue import *
from multiprocessing import *
numbers = [200000,100,200000,2]
q = PriorityQueue()
processes = []
def fibonacci(priority, num):
if num == 1 or num == 2:
q.put((priority, 1))
return
a = 1
b = 1
for i in range(num-2):
c = a + b
b = a
a = c
q.put((priority, c))
return
for i in range(4):
priority = i
num = numbers[i]
p = Process(target = fibonacci, args = (priority, num))
processes.append(p)
#print processes
for p in processes:
p.start()
for p in processes:
p.join()
while not q.empty():
ans = q.get()
q.task_done()
print ans[1]
print("%s seconds" % (time.time() - start_time))
You've run in one of the basic limiting factors of the CPython implementation, the Global Interpreter Lock or GIL. Effectively this serializes your program, your threads will take turns executing. One thread will own the GIL, while the other threads will wait for the GIL to come free.
One solution would to be use separate processes. Each process would have its own GIL so would execute in parallel. Probably the easiest way to do this is to use Python's multiprocessing module as replacement for the threading module.
I have the following sample code that I am trying to use the multiprocessing module on. The following statement had been working previously under other applications, but one process (which receives a very small amount of data just due to the breakup) finishes first and causes the program to finish. Could someone help me understand why this is not waiting for the others?
def mpProcessor(basePath, jsonData, num_procs = mp.cpu_count()):
manager = mp.Manager()
map = manager.dict()
procs = mp.Pool(processes = num_procs, maxtasksperchild = 1)
chunkSize = len(jsonData) / (num_procs)
dataChunk = [(i, i + chunkSize) for i in range(0, len(jsonData), chunkSize)]
count = 1
for i in dataChunk:
print 'test'
s, e = i
procs.apply_async(processJSON, args = (count, basePath, jsonData[s:e]))
count += 1
procs.close()
procs.join()
return map
def processJSON(proc, basePath, records):
print 'Spawning new process: %d' %os.getpid()
outDict = dict()
print len(records)
for i in range(len(records)):
valid = False
idx = 0
while valid == False:
jsonObject = json.loads(records[i][1])['results'][idx]
if jsonObject['kind'] == 'song':
valid = True
break
else:
idx += 1
tunesTrack = Track()
tunesTrack.setTrackId(jsonObject['trackId'])
print 'Finished processing %d records with process %d' %(len(records), os.getpid())
You seem to be reinventing the wheel.
What you are trying to do could be much more easily achieved by using an initializer with the pool and using map rather than apply_async. As it stands your code snippet is not runnable so I can't be sure what the actual problem is. However, the following should simplify your code and make it easier to debug.
import math
import multiprocessing as mp
def pool_init(basePath_):
global basePath, job_count
basePath = basePath_
job_count = 0
print 'Spawning new process: %d' %os.getpid()
def mpProcessor(basePath, jsonData, num_procs=mp.cpu_count()):
pool = mp.Pool(processes=num_procs, initializer=pool_init, initargs=(basePath,))
# could specify a chunksize, but multiprocessing works out the optimal chunksize
return pool.map(processJSON, jsonData)
# change processJSON to work with single records and
# remove proc and basePath args (as not needed)
def processJSON(record):
global job_count
print 'Starting job %d in process: %d' % (job_count, os.getpid())
valid = False
idx = 0
while valid == False:
jsonObject = json.loads(record[1])['results'][idx]
if jsonObject['kind'] == 'song':
valid = True
break
else:
idx += 1
tunesTrack = Track()
tunesTrack.setTrackId(jsonObject['trackId'])
print 'Finished processing job %d with process %d' % (job_count, os.getpid())
job_count += 1