Print progress of pool.map_async

Print progress of pool.map_async - python

I have the following function
from multiprocessing import Pool
def do_comparison(tupl):
x, y = tupl # unpack arguments
return compare_clusters(x, y)
def distance_matrix(clusters, condensed=False):
pool = Pool()
values = pool.map_async(do_comparison, itertools.combinations(clusters, 2)).get()
do stuff
Is it possible to print the progress of pool.map_async(do_comparison, itertools.combinations(clusters, 2)).get()?
I tried it by adding a count to do_comparison like so
count = 0
def do_comparison(tupl):
global count
count += 1
if count % 1000 == 0:
print count
x, y = tupl # unpack arguments
return compare_clusters(x, y)
But aside from it not looking like a good solution, the numbers don't print until the end of the script. Is there a good way to do this?

I track progress as follows:
import multiprocessing
import time
class PoolProgress:
def __init__(self,pool,update_interval=3):
self.pool = pool
self.update_interval = update_interval
def track(self, job):
task = self.pool._cache[job._job]
while task._number_left>0:
print("Tasks remaining = {0}".format(task._number_left*task._chunksize))
time.sleep(self.update_interval)
def hi(x): #This must be defined before `p` if we are to use in the interpreter
time.sleep(x//2)
return x
a = list(range(50))
p = multiprocessing.Pool()
pp = PoolProgress(p)
res = p.map_async(hi,a)
pp.track(res)

The solution from Richard works well with a low number of jobs, but for some reason, it seems to freeze at a very high number of jobs, I found best to use:
import multiprocessing
import time
def track_job(job, update_interval=3):
while job._number_left > 0:
print("Tasks remaining = {0}".format(
job._number_left * job._chunksize))
time.sleep(update_interval)
def hi(x): #This must be defined before `p` if we are to use in the interpreter
time.sleep(x//2)
return x
a = [x for x in range(50)]
p = multiprocessing.Pool()
res = p.map_async(hi,a)
track_job(res)

Related

Using multiprocessing to double the speed of working on a list

Let's say I have a list like this:
list_base = ['a','b','c','d']
If I used for xxx in list_base:, the loop would parse the list one value at a time. If I want to double the speed of this work, I'm creating a list with two values to iterate over at once and calling multiprocessing.
Basic example
Code 1 (main_code.py):
import api_values
if __name__ == '__main__':
list_base = ['a','b','c','d']
api_values.main(list_base)
Code 2 (api_values.py):
import multiprocessing
import datetime
def add_hour(x):
return str(x) + ' - ' + datetime.datetime.now().strftime('%d/%m/%Y %H:%M')
def main(list_base):
a = list_base
a_pairs = [a[i:i+2] for i in range(0, len(a)-1, 2)]
if (len(a) % 2) != 0:
a_pairs.append([a[-1]])
final_list = []
for a, b in a_pairs:
mp_1 = multiprocessing.Process(target=add_hour, args=(a,))
mp_2 = multiprocessing.Process(target=add_hour, args=(b,))
mp_1.start()
mp_2.start()
mp_1.join()
mp_2.join()
final_list.append(mp_1)
final_list.append(mp_2)
print(final_list)
When I analyze the final_list print it delivers values like this:
[
<Process name='Process-1' pid=9564 parent=19136 stopped exitcode=0>,
<Process name='Process-2' pid=5400 parent=19136 stopped exitcode=0>,
<Process name='Process-3' pid=13396 parent=19136 stopped exitcode=0>,
<Process name='Process-4' pid=5132 parent=19136 stopped exitcode=0>
]
I couldn't get to the return values I want conquered by calling the add_hour(x) function.
I found some answers in this question:
How can I recover the return value of a function passed to multiprocessing.Process?
But I couldn't bring to the scenario I'm using where I need the multiprocessing inside a function and not inside if __name__ == '__main__':
When trying to use it, it always generates errors in relation to the position of the created code structure, I would like some help to be able to visualize the use for my need.
Note:
This codes are a basic's examples, my real use is to extract data from an API that allows for a maximum of two simultaneous calls.
Additional code:
According to #Timus comment (You might want to look into a **Pool** and **.apply_async**), I came to this code it seems to me it worked but I don't know if it is reliable, if there is any improvement that is necessary for its use and this option is the best, feel free to update in a answer:
import multiprocessing
import datetime
final_list = []
def foo_pool(x):
return str(x) + ' - ' + datetime.datetime.now().strftime('%d/%m/%Y %H:%M:%S')
def log_result(result):
final_list.append(result)
def main(list_base):
pool = multiprocessing.Pool()
a = list_base
a_pairs = [a[i:i+2] for i in range(0, len(a)-1, 2)]
if (len(a) % 2) != 0:
a_pairs.append([a[-1]])
for a, b in a_pairs:
pool.apply_async(foo_pool, args = (a, ), callback = log_result)
pool.apply_async(foo_pool, args = (b, ), callback = log_result)
pool.close()
pool.join()
print(final_list)

You don't have to use a callback: Pool.apply_async() gives you a return (an AsyncResult object) which has a .get() method to retrieve the result of the submit. Extension of your attempt:
import time
import multiprocessing
import datetime
from os import getpid
def foo_pool(x):
print(getpid())
time.sleep(2)
return str(x) + ' - ' + datetime.datetime.now().strftime('%d/%m/%Y %H:%M:%S')
def main(list_base):
a = list_base
a_pairs = [a[i:i+2] for i in range(0, len(a)-1, 2)]
if (len(a) % 2) != 0:
a_pairs.append([a[-1]])
final_list = []
with multiprocessing.Pool(processes=2) as pool:
for a, b in a_pairs:
res_1 = pool.apply_async(foo_pool, args=(a,))
res_2 = pool.apply_async(foo_pool, args=(b,))
final_list.extend([res_1.get(), res_2.get()])
print(final_list)
if __name__ == '__main__':
list_base = ['a','b','c','d']
start = time.perf_counter()
main(list_base)
end = time.perf_counter()
print(end - start)
I have added the print(getpid()) to foo_pool to show that you're actually using different processes. And I've used time to illustrate that despite the time.sleep(2) in foo_pool the overall duration of main isn't much more than 2 seconds.

I think you need shared strings between processes. They can be obtained from multiprocessing.Manager().
Your api_values.py should look like this:
import multiprocessing
import datetime
from ctypes import c_wchar_p
def add_hour(x, ret_str):
ret_str.value = str(x) + ' - ' + datetime.datetime.now().strftime('%d/%m/%Y %H:%M')
def main(list_base):
a = list_base
a_pairs = [a[i:i+2] for i in range(0, len(a)-1, 2)]
if (len(a) % 2) != 0:
a_pairs.append([a[-1]])
final_list = []
manager = multiprocessing.Manager()
for a, b in a_pairs:
ret_str_a = manager.Value(c_wchar_p, "")
ret_str_b = manager.Value(c_wchar_p, "")
mp_1 = multiprocessing.Process(target=add_hour, args=(a, ret_str_a))
mp_2 = multiprocessing.Process(target=add_hour, args=(b, ret_str_b))
mp_1.start()
mp_2.start()
mp_1.join()
mp_2.join()
final_list.append(ret_str_a.value)
final_list.append(ret_str_b.value)
print(final_list)
Source: How to share a string amongst multiple processes using Managers() in Python?

Why does python multiprocessing script slow down after a while?

I read an old question Why does this python multiprocessing script slow down after a while? and many others before posting this one. They do not answer the problem I'm having.
IDEA OF THE SCRIPT.
The script generates arrays, 256x256, in a serialised loop. Elements of an array are calculated one-by-one from a list that contains dictionaries with relevant params, one dictionary per an array element (256x256 in total per a list). The list is the way for me to enable parallel calculations.
THE PROBLEM.
In the beginning, the generation of the data speeds up from a dozen seconds up-to a few seconds. Then, after a few iterations, it starts slowing down a fraction of a second with each new array generated to the point it takes forever to calculate anything.
Additional info.
I am using a pool.map function. After making a few small changes to identify which element is being calculated, I also tried using map_async. Unfortunately, it is slower because I need to init the pool each time I finish calculating an array.
When using the pool.map, I init the pool once before anything starts. In this way, I hope to save time initializing the pool in comparison to map_async.
CPU shows low usage, up to ~18%.
In my instance, a hard-drive isn't a bottleneck. All the data necessary for calculations is in RAM. I also do not save data onto a hard-drive keeping everything in RAM.
I also checked if the problem persists if I use a different number of cores, 2-24. No changes either.
I made some additional tests by running and terminating a pool, a. each time an array is generated, b. every 10 arrays. I noticed that in each case execution of the code slows down compared to the previous pool's execution time, i.e. if the previous slowed down to 5s, another one will be 5.Xs and so on. The only time the execution doesn't slow down is when I run the code serially.
Working env: Windows 10, Python 3.7, conda 4.8.2, Spyder 4.
THE QUESTION: Why multiprocessing slows down after a while in the case where only CPU & RAM are involved (no hard-drive slowdown)? Any idea?
UPDATED CODE:
import multiprocessing as mp
from tqdm import tqdm
import numpy as np
import random
def wrapper_(arg):
return tmp.generate_array_elements(
self=arg['self'],
nu1=arg['nu1'],
nu2=arg['nu2'],
innt=arg['innt'],
nu1exp=arg['nu1exp'],
nu2exp=arg['nu2exp'],
ii=arg['ii'],
jj=arg['jj'],
llp=arg['self'].llp,
rr=arg['self'].rr,
)
class tmp:
def __init__(self, multiprocessing, length, n_of_arrays):
self.multiprocessing = multiprocessing
self.inshape = (length,length)
self.length = length
self.ll_len = n_of_arrays
self.num_cpus = 8
self.maxtasksperchild = 10000
self.rr = 0
"""original function is different, modified to return something"""
"""for the example purpose, lp is not relevant here but in general is"""
def get_ll(self, lp):
return [random.sample((range(self.length)),int(np.random.random()*12)+1) for ii in range(self.ll_len)]
"""original function is different, modified to return something"""
def get_ip(self): return np.random.random()
"""original function is different, modified to return something"""
def get_op(self): return np.random.random(self.length)
"""original function is different, modified to return something"""
def get_innt(self, nu1, nu2, ip):
return nu1*nu2/ip
"""original function is different, modified to return something"""
def __get_pp(self, nu1):
return np.exp(nu1)
"""dummy function for the example purpose"""
def dummy_function(self):
"""do important stuff"""
return
"""dummy function for the example purpose"""
def dummy_function_2(self, result):
"""do important stuff"""
return np.reshape(result, np.inshape)
"""dummy function for the example purpose"""
def dummy_function_3(self):
"""do important stuff"""
return
"""original function is different, modified to return something"""
"""for the example purpose, lp is not relevant here but in general is"""
def get_llp(self, ll, lp):
return [{'a': np.random.random(), 'b': np.random.random()} for ii in ll]
"""NOTE, lp is not used here for the example purpose but
in the original code, it's very important variable containg
relevant data for calculations"""
def generate(self, lp={}):
"""create a list that is used to the creation of 2-D array"""
"""providing here a dummy pp param to get_ll"""
ll = self.get_ll(lp)
ip = self.get_ip()
self.op = self.get_op()
"""length of args_tmp = self.length * self.length = 256 * 256"""
args_tmp = [
{'self': self,
'nu1': nu1,
'nu2': nu2,
'ii': ii,
'jj': jj,
'innt': np.abs(self.get_innt(nu1, nu2, ip)),
'nu1exp': np.exp(1j*nu1*ip),
'nu2exp': np.exp(1j*nu2*ip),
} for ii, nu1 in enumerate(self.op) for jj, nu2 in enumerate(self.op)]
pool = {}
if self.multiprocessing:
pool = mp.Pool(self.num_cpus, maxtasksperchild=self.maxtasksperchild)
"""number of arrays is equal to len of ll, here 300"""
for ll_ in tqdm(ll):
"""Generate data"""
self.__generate(ll_, lp, pool, args_tmp)
"""Create a pool of CPU threads"""
if self.multiprocessing:
pool.terminate()
def __generate(self, ll, lp, pool = {}, args_tmp = []):
"""In the original code there are plenty other things done in the code
using class' methods, they are not shown here for the example purpose"""
self.dummy_function()
self.llp = self.get_llp(ll, lp)
"""originally the values is taken from lp"""
self.rr = self.rr
if self.multiprocessing and pool:
result = pool.map(wrapper_, args_tmp)
else:
result = [wrapper_(arg) for arg in args_tmp]
"""In the original code there are plenty other things done in the code
using class' methods, they are not shown here for the example purpose"""
result = self.dummy_function_2(result)
"""original function is different"""
def generate_array_elements(self, nu1, nu2, llp, innt, nu1exp, nu2exp, ii = 0, jj = 0, rr=0):
if rr == 1 and self.inshape[0] - 1 - jj < ii:
return 0
elif rr == -1 and ii > jj:
return 0
elif rr == 0:
"""do nothing"""
ll1 = []
ll2 = []
"""In the original code there are plenty other things done in the code
using class' methods, they are not shown here for the example purpose"""
self.dummy_function_3()
for kk, ll in enumerate(llp):
ll1.append(
self.__get_pp(nu1) *
nu1*nu2*nu1exp**ll['a']*np.exp(1j*np.random.random())
)
ll2.append(
self.__get_pp(nu2) *
nu1*nu2*nu2exp**ll['b']*np.exp(1j*np.random.random())
)
t1 = sum(ll1)
t2 = sum(ll2)
result = innt*np.abs(t1 - t2)
return result
g = tmp(False, 256, 300)
g.generate()

It is hard to tell what is going on in your algorithm. I don't know a lot about multiprocessing but it is probably safer to stick with functions and avoid passing self down into the pooled processes. This is done when you pass args_tmp to wrapper_ in pool.map(). Also overall, try to reduce how much data is passed between the parent and child processes in general. I try to move the generation of the lp list into the pool workers to prevent passing excessive data.
Lastly, altough I don't think it matters in this example code but you should be either cleaning up after using pool or using pool with with.
I rewrote some of your code to try things out and this seems faster but I'm not 100% it adheres to your algorithm. Some of the variable names are hard to distinguish.
This runs a lot faster for me but it is hard to tell if it is producing your solutions accurately. My final conclusion if this is accurate is that the extra data passing was significantly slowing down the pool workers.
#main.py
if __name__ == '__main__':
import os
import sys
file_dir = os.path.dirname(__file__)
sys.path.append(file_dir)
from tmp import generate_1
parallel = True
generate_1(parallel)
#tmp.py
import multiprocessing as mp
import numpy as np
import random
from tqdm import tqdm
from itertools import starmap
def wrapper_(arg):
return arg['self'].generate_array_elements(
nu1=arg['nu1'],
nu2=arg['nu2'],
ii=arg['ii'],
jj=arg['jj'],
lp=arg['self'].lp,
nu1exp=arg['nu1exp'],
nu2exp=arg['nu2exp'],
innt=arg['innt']
)
def generate_1(parallel):
"""create a list that is used to the creation of 2-D array"""
il = np.random.random(256)
"""generating params for parallel data generation"""
"""some params are also calculated here to speed up the calculation process
because they are always the same so they can be calculated just once"""
"""this code creates a list of 256*256 elements"""
args_tmp = [
{
'nu1': nu1,
'nu2': nu2,
'ii': ii,
'jj': jj,
'innt': np.random.random()*nu1+np.random.random()*nu2,
'nu1exp': np.exp(1j*nu1),
'nu2exp': np.exp(1j*nu2),
} for ii, nu1 in enumerate(il) for jj, nu2 in enumerate(il)]
"""init pool"""
"""get list of arrays to generate"""
ip_list = [random.sample((range(256)),int(np.random.random()*12)+1) for ii in range(300)]
map_args = [(idx, ip, args_tmp) for idx, ip in enumerate(ip_list)]
"""separate function to do other important things"""
if parallel:
with mp.Pool(8, maxtasksperchild=10000) as pool:
result = pool.starmap(start_generate_2, map_args)
else:
result = starmap(start_generate_2, map_args)
# Wrap iterator in list call.
return list(result)
def start_generate_2(idx, ip, args_tmp):
print ('starting {idx}'.format(idx=idx))
runner = Runner()
result = runner.generate_2(ip, args_tmp)
print ('finished {idx}'.format(idx=idx))
return result
class Runner():
def generate_2(self, ip, args_tmp):
"""NOTE, the method is much more extensive and uses other methods of the class"""
"""so it must remain a method of the class that is not static!"""
self.lp = [{'a': np.random.random(), 'b': np.random.random()} for ii in ip]
"""this part creates 1-D array of the length of args_tmp, that's 256*256"""
result = map(wrapper_, [dict(args, self=self) for args in args_tmp])
"""it's then reshaped to 2-D array"""
result = np.reshape(list(result), (256,256))
return result
def generate_array_elements(self, nu1, nu2, ii, jj, lp, nu1exp, nu2exp, innt):
"""doing heavy calc"""
""""here is something else"""
if ii > jj: return 0
ll1 = []
ll2 = []
for kk, ll in enumerate(lp):
ll1.append(nu1*nu2*nu1exp**ll['a']*np.exp(1j*np.random.random()))
ll2.append(nu1*nu2*nu2exp**ll['b']*np.exp(1j*np.random.random()))
t1 = sum(ll1)
t2 = sum(ll2)
result = innt*np.abs(t1 - t2)
return result
I'm adding a generic template to show an architecture where you would split the preparation of the shared args away from the task runner and still use classes. The strategy here would be do not create too many tasks(300 seems faster than trying to split them down to 64000), and don't pass too much data to each task. The interface of launch_task should be kept as simple as possible, which in my refactoring of your code would be equivalent to start_generate_2.
import multiprocessing
from itertools import starmap
class Launcher():
def __init__(self, parallel):
self.parallel = parallel
def generate_shared_args(self):
return [(i, j) for i, j in enumerate(range(300))]
def launch(self):
shared_args = self.generate_shared_args()
if self.parallel:
with multiprocessing.Pool(8) as pool:
result = pool.starmap(launch_task, shared_args)
else:
result = starmap(launch_task, shared_args)
# Wrap in list to resolve iterable.
return list(result)
def launch_task(i, j):
task = Task(i, j)
return task.run()
class Task():
def __init__(self, i, j):
self.i = i
self.j = j
def run(self):
return self.i + self.j
if __name__ == '__main__':
parallel = True
launcher = Launcher(parallel)
print(launcher.launch())
There is a warning about the cleanup of pool in the pool documentation here: https://docs.python.org/3/library/multiprocessing.html#multiprocessing.pool.Pool
The first item discusses avoiding shared state and specifically large amounts of data.
https://docs.python.org/3/library/multiprocessing.html#programming-guidelines

Ian Wilson's suggestions were very helpful and one them helped to resolve the issue. That's why his answer is marked as the correct one.
As he suggested, it's better to call pool on a smaller number of tasks. So instead of calling pool.map for each array (N) that is created 256*256 times for each array's element (so N*256*256 tasks in total), now I call pool.map on the function that calculates the whole array so just N times. The array calculation inside the function is done in a serialised way.
I'm still sending self as a param because it's needed in the function but it doesn't have any impact on the performance.
That small change speeds-up a calculation of an array from 7-15s up to 1.5it/s-2s/it!
CURRENT CODE:
import multiprocessing as mp
import tqdm
import numpy as np
import random
def wrapper_(arg):
return tmp.generate_array_elements(
self=arg['self'],
nu1=arg['nu1'],
nu2=arg['nu2'],
innt=arg['innt'],
nu1exp=arg['nu1exp'],
nu2exp=arg['nu2exp'],
ii=arg['ii'],
jj=arg['jj'],
llp=arg['self'].llp,
rr=arg['self'].rr,
)
"""NEW WRAPPER HERE"""
"""Sending self doesn't have bad impact on the performance, at least I don't complain :)"""
def generate(arg):
tmp._tmp__generate(arg['self'], arg['ll'], arg['lp'], arg['pool'], arg['args_tmp'])
class tmp:
def __init__(self, multiprocessing, length, n_of_arrays):
self.multiprocessing = multiprocessing
self.inshape = (length,length)
self.length = length
self.ll_len = n_of_arrays
self.num_cpus = 8
self.maxtasksperchild = 10000
self.rr = 0
"""original function is different, modified to return something"""
"""for the example purpose, lp is not relevant here but in general is"""
def get_ll(self, lp):
return [random.sample((range(self.length)),int(np.random.random()*12)+1) for ii in range(self.ll_len)]
"""original function is different, modified to return something"""
def get_ip(self): return np.random.random()
"""original function is different, modified to return something"""
def get_op(self): return np.random.random(self.length)
"""original function is different, modified to return something"""
def get_innt(self, nu1, nu2, ip):
return nu1*nu2/ip
"""original function is different, modified to return something"""
def __get_pp(self, nu1):
return np.exp(nu1)
"""dummy function for the example purpose"""
def dummy_function(self):
"""do important stuff"""
return
"""dummy function for the example purpose"""
def dummy_function_2(self, result):
"""do important stuff"""
return np.reshape(result, np.inshape)
"""dummy function for the example purpose"""
def dummy_function_3(self):
"""do important stuff"""
return
"""original function is different, modified to return something"""
"""for the example purpose, lp is not relevant here but in general is"""
def get_llp(self, ll, lp):
return [{'a': np.random.random(), 'b': np.random.random()} for ii in ll]
"""NOTE, lp is not used here for the example purpose but
in the original code, it's very important variable containg
relevant data for calculations"""
def generate(self, lp={}):
"""create a list that is used to the creation of 2-D array"""
"""providing here a dummy pp param to get_ll"""
ll = self.get_ll(lp)
ip = self.get_ip()
self.op = self.get_op()
"""length of args_tmp = self.length * self.length = 256 * 256"""
args_tmp = [
{'self': self,
'nu1': nu1,
'nu2': nu2,
'ii': ii,
'jj': jj,
'innt': np.abs(self.get_innt(nu1, nu2, ip)),
'nu1exp': np.exp(1j*nu1*ip),
'nu2exp': np.exp(1j*nu2*ip),
} for ii, nu1 in enumerate(self.op) for jj, nu2 in enumerate(self.op)]
pool = {}
"""MAJOR CHANGE IN THIS PART AND BELOW"""
map_args = [{'self': self, 'idx': (idx, len(ll)), 'll': ll, 'lp': lp, 'pool': pool, 'args_tmp': args_tmp} for idx, ll in enumerate(ll)]
if self.multiprocessing:
pool = mp.Pool(self.num_cpus, maxtasksperchild=self.maxtasksperchild)
for _ in tqdm.tqdm(pool.imap_unordered(generate_js_, map_args), total=len(map_args)):
pass
pool.close()
pool.join()
pbar.close()
else:
for map_arg in tqdm.tqdm(map_args):
generate_js_(map_arg)
def __generate(self, ll, lp, pool = {}, args_tmp = []):
"""In the original code there are plenty other things done in the code
using class' methods, they are not shown here for the example purpose"""
self.dummy_function()
self.llp = self.get_llp(ll, lp)
"""originally the values is taken from lp"""
self.rr = self.rr
"""REMOVED PARALLEL CALL HERE"""
result = [wrapper_(arg) for arg in args_tmp]
"""In the original code there are plenty other things done in the code
using class' methods, they are not shown here for the example purpose"""
result = self.dummy_function_2(result)
"""original function is different"""
def generate_array_elements(self, nu1, nu2, llp, innt, nu1exp, nu2exp, ii = 0, jj = 0, rr=0):
if rr == 1 and self.inshape[0] - 1 - jj < ii:
return 0
elif rr == -1 and ii > jj:
return 0
elif rr == 0:
"""do nothing"""
ll1 = []
ll2 = []
"""In the original code, there are plenty other things done in the code
using class' methods, they are not shown here for the example purpose"""
self.dummy_function_3()
for kk, ll in enumerate(llp):
ll1.append(
self.__get_pp(nu1) *
nu1*nu2*nu1exp**ll['a']*np.exp(1j*np.random.random())
)
ll2.append(
self.__get_pp(nu2) *
nu1*nu2*nu2exp**ll['b']*np.exp(1j*np.random.random())
)
t1 = sum(ll1)
t2 = sum(ll2)
result = innt*np.abs(t1 - t2)
return result
g = tmp(False, 256, 300)
g.generate()
Thank you Ian, again.

Parallelization within a python object

I am working on a simulation where I need to compute an expensive numerical integral at many different time points. Each integrand is a function of the time it is sampling up to, so I must evaluate each of the points independently. Because each integral is independent of all others, this can be implemented in an embarrassingly parallel fashion.
I would like to run this on an HPC cluster, so I have attempted to parallelize this process using mpi4py; however, my current implementation causes each processor to do the entire calculation (including the scattering to other cores) rather than have only the for loop inside of the object parallelized. As written, with n cores this takes n times as long as with one core (not a good sign...).
Because the only step which takes any amount of time is the computation itself, I would like everything except that specific for loop to run on the root node.
Below is a pseudo-code reduction of my current implementation:
import numpy as np
from mpi4py import MPI
COMM = MPI.COMM_WORLD
class Integrand:
def __init__(self, t_max, dt, **kwargs):
self.t_max = t_max
self.dt = dt
self.time_sample = np.arange(0, self.t_max, self.dt)
self.function_args = kwargs
self.final_result = np.empty_like(self.time_sample)
def do_integration(self):
if COMM.rank == 0:
times_partitioned = split(self.time_sample, COMM.size)
else:
times_partitioned = None
times_partitioned = COMM.scatter(times_partitioned, root=0)
results = np.empty(times_partitioned.shape, dtype=complex)
for counter, t in enumerate(times_partitioned):
results = computation(self, t, **self.function_args)
results = MPI.COMM_WORLD.gather(results, root=0)
if COMM.rank is 0:
##inter-leaf back together
for i in range(COMM.size):
self.final_result[i::COMM.size] = results[i]
if __name__ = '__main__':
kwargs_set = [kwargs1, kwargs2, kwargs3, ..., kwargsN]
for kwargs in kwargs_set:
integrand_object = Integrand(**kwargs)
integrand_object.do_integration()
save_and_plot_results(integrand_object.final_result)

A simple way to parallelize this problem without drastically changing how the class is called/used is to make use of a decorator. The decorator (shown below) makes it so that rather than creating the same object on every core, each core creates an object with the chunk of the time steps it needs to evaluate. After they have all been evaluated it gathers their results and returns a single object with the full result to one core. This particular implementation changes the class functionality slightly by forcing evaluation of the integral at creation time.
from functools import wraps
import numpy as np
from mpi4py import MPI
COMM = MPI.COMM_WORLD
def parallelize_integrand(integral_class):
def split(container, count):
return [container[_i::count] for _i in range(count)]
#wraps(integral_class)
def wrapper(*args,**kwargs):
int_object = integral_class(*args, **kwargs)
time_sample_total = int_object.time_sample
if COMM.rank is 0:
split_time = split(time_sample_total,COMM.size)
final_result = np.empty_like(int_object.result)
else:
split_time = None
split_time = COMM.scatter(split_time, root=0)
int_object.time_sample = split_time
int_object.do_integration()
result = int_object.result
result = COMM.gather(result, root=0)
if COMM.rank is 0:
for i in range(COMM.size):
final_result[i::COMM.size] = result[i]
int_object.time_sample = time_sample_total
int_object.result = final_result
return int_object
#parallelize_integrand
class Integrand:
def __init__(self, t_max, dt, **kwargs):
self.t_max = t_max
self.dt = dt
self.time_sample = np.arange(0, self.t_max, self.dt)
self.kwargs = kwargs
self.result = np.empty_like(self.time_sample)
def do_integration(self):
for counter, t in enumerate(self.time_sample):
result[counter] = computation(self, t, **self.kwargs)
if __name__ = '__main__':
kwargs_set = [kwargs1, kwargs2, kwargs3, ..., kwargsN]
for kwargs in kwargs_set:
integrand_object = Integrand(**kwargs)
save_and_plot_results(integrand_object.result)

Not getting a sorted list in python

I have a list of process objects(user-defined) that I want to sort to get the most memory-intensive processes at a time.
But reverse-sorting through the sorted is not yielding the required result.
My code:
import psutil as pu
import time
class proc:
def __init__(self,pid,pname,pmem):
self.pid = pid
self.pname = pname
self.pmem = int(pmem)
# def __lt__(self,other):
# return self.pmem<other.pmem
# def __repr__(self):
# return str(self.pmem)+"\t"+self.pname
if __name__ == "__main__":
meg = 1024*1024
gig = meg*1024
while True:
print(pu.cpu_count())
print(pu.cpu_percent())
print("{:.3f} GB".format(pu.virtual_memory().used/gig))
x = []
for p in pu.pids():
pro = pu.Process(pid=p)
# print(pro.memory_info()[0])
# print(pro.memory_info()[1])
x.append(proc(pid=p,pname=pro.name(),pmem=pro.memory_info()[0]))
sorted(x,key=lambda x:x.pmem,reverse=True)
for i in x:
print(str(i.pmem)+'\t'+i.pname)
time.sleep(5)
Output:
http://pastebin.com/7Pz5Yn7A

You should use sort instead of sorted:
x.sort(key=lambda item: item.pmem, reverse=True)
sort sorts existing list; sorted creates a new one.

Returning two values from pandas.rolling_apply

I am using pandas.rolling_apply to fit data to a distribution and get a value from it, but I need it also report a rolling goodness of fit (specifically, p-value). Currently I'm doing it like this:
def func(sample):
fit = genextreme.fit(sample)
return genextreme.isf(0.9, *fit)
def p_value(sample):
fit = genextreme.fit(sample)
return kstest(sample, 'genextreme', fit)[1]
values = pd.rolling_apply(data, 30, func)
p_values = pd.rolling_apply(data, 30, p_value)
results = pd.DataFrame({'values': values, 'p_value': p_values})
The problem is that I have a lot of data, and the fit function is expensive, so I don't want to call it twice for every sample. What I'd rather do is something like this:
def func(sample):
fit = genextreme.fit(sample)
value = genextreme.isf(0.9, *fit)
p_value = kstest(sample, 'genextreme', fit)[1]
return {'value': value, 'p_value': p_value}
results = pd.rolling_apply(data, 30, func)
Where results is a DataFrame with two columns. If I try to run this, I get an exception:
TypeError: a float is required. Is it possible to achieve this, and if so, how?

I had a similar problem and solved it by using a member function of a separate helper class during apply. That member function does as required return a single value but I store the other calc results as members of the class and can use it afterwards.
Simple Example:
class CountCalls:
def __init__(self):
self.counter = 0
def your_function(self, window):
retval = f(window)
self.counter = self.counter + 1
TestCounter = CountCalls()
pandas.Series.rolling(your_seriesOrDataframeColumn, window = your_window_size).apply(TestCounter.your_function)
print TestCounter.counter
Assume your function f would return a tuple of two values v1,v2. Then you can return v1 and assign it to column_v1 to your dataframe. The second value v2 you simply accumulate in a Series series_val2 within the helper class. Afterwards you just assing that series as new column to your dataframe.
JML

I had a similar problem before. Here's my solution for it:
from collections import deque
class your_multi_output_function_class:
def __init__(self):
self.deque_2 = deque()
self.deque_3 = deque()
def f1(self, window):
self.k = somefunction(y)
self.deque_2.append(self.k[1])
self.deque_3.append(self.k[2])
return self.k[0]
def f2(self, window):
return self.deque_2.popleft()
def f3(self, window):
return self.deque_3.popleft()
func = your_multi_output_function_class()
output = your_pandas_object.rolling(window=10).agg(
{'a':func.f1,'b':func.f2,'c':func.f3}
)

I used and loved #yi-yu's answer so I made it generic:
from collections import deque
from functools import partial
def make_class(func, dim_output):
class your_multi_output_function_class:
def __init__(self, func, dim_output):
assert dim_output >= 2
self.func = func
self.deques = {i: deque() for i in range(1, dim_output)}
def f0(self, *args, **kwargs):
k = self.func(*args, **kwargs)
for queue in sorted(self.deques):
self.deques[queue].append(k[queue])
return k[0]
def accessor(self, index, *args, **kwargs):
return self.deques[index].popleft()
klass = your_multi_output_function_class(func, dim_output)
for i in range(1, dim_output):
f = partial(accessor, klass, i)
setattr(klass, 'f' + str(i), f)
return klass
and given a function f of a pandas Series (windowed but not necessarily) returning, n values, you use it this way:
rolling_func = make_class(f, n)
# dict to map the function's outputs to new columns. Eg:
agger = {'output_' + str(i): getattr(rolling_func, 'f' + str(i)) for i in range(n)}
windowed_series.agg(agger)

I also had the same issue. I solved it by generating a global data frame and feeding it from the rolling function. In the following example script, I generate a random input data. Then, I calculate with a single rolling apply function the min, the max and the mean.
import pandas as pd
import numpy as np
global outputDF
global index
def myFunction(array):
global index
global outputDF
# Some random operation
outputDF['min'][index] = np.nanmin(array)
outputDF['max'][index] = np.nanmax(array)
outputDF['mean'][index] = np.nanmean(array)
index += 1
# Returning a useless variable
return 0
if __name__ == "__main__":
global outputDF
global index
# A random window size
windowSize = 10
# Preparing some random input data
inputDF = pd.DataFrame({ 'randomValue': [np.nan] * 500 })
for i in range(len(inputDF)):
inputDF['randomValue'].values[i] = np.random.rand()
# Pre-Allocate memory
outputDF = pd.DataFrame({ 'min': [np.nan] * len(inputDF),
'max': [np.nan] * len(inputDF),
'mean': [np.nan] * len(inputDF)
})
# Precise the staring index (due to the window size)
d = (windowSize - 1) / 2
index = np.int(np.floor( d ) )
# Do the rolling apply here
inputDF['randomValue'].rolling(window=windowSize,center=True).apply(myFunction,args=())
assert index + np.int(np.ceil(d)) == len(inputDF), 'Length mismatch'
outputDF.set_index = inputDF.index
# Optional : Clean the nulls
outputDF.dropna(inplace=True)
print(outputDF)

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Print progress of pool.map_async - python

Related

Using multiprocessing to double the speed of working on a list

Why does python multiprocessing script slow down after a while?

Parallelization within a python object

Not getting a sorted list in python

Returning two values from pandas.rolling_apply

Categories

Resources