Python - apply_async doesn't execute function - python

Hi I'm trying to use multiprocessing to speed up my code. However, the apply_async doesn't work for me. I tried to do a simple example like:
from multiprocessing.pool import Pool
t = [0, 1, 2, 3, 4, 5]
def cube(x):
t[x] = x**3
pool = Pool(processes=4)
for i in range(6):
pool.apply_async(cube, args=(i, ))
for x in t:
print(x)
It does not really change t as I would expect.
My real code is like:
from multiprocessing.pool import Pool
def func(a, b, c, d):
#some calculations
#save result to files
#no return value
lt = #list of possible value of a
#set values to b, c, d
p = Pool()
for i in lt:
p.apply_async(func, args=(i, b, c, d, ))
Where are the problems here?
Thank you!
Update: Thanks to the comments and answers, now I understand why my simple example won't work. However, I'm still in trouble with my real code. I have checked that my func does not rely on any global variable, so it seems not to be the same problem as my example code.
As suggested, I added a return value to my func, now my code is:
f = Flux("reactor")
d = Detector("Ge")
mv = arange(-6, 1.5, 0.5)
p = Pool()
lt = ["uee", "dee"]
for i in lt:
re = p.apply_async(res, args=(i, d, f, mv, ))
print(re.get())
p.close()
p.join()
Now I get the following error:
Traceback (most recent call last):
File "/Users/Shu/Documents/Programming/Python/Research/debug.py", line 35, in <module>
print(re.get())
File "/usr/local/Cellar/python3/3.6.0/Frameworks/Python.framework/Versions/3.6/lib/python3.6/multiprocessing/pool.py", line 608, in get
raise self._value
File "/usr/local/Cellar/python3/3.6.0/Frameworks/Python.framework/Versions/3.6/lib/python3.6/multiprocessing/pool.py", line 385, in _handle_tasks
put(task)
File "/usr/local/Cellar/python3/3.6.0/Frameworks/Python.framework/Versions/3.6/lib/python3.6/multiprocessing/connection.py", line 206, in send
self._send_bytes(_ForkingPickler.dumps(obj))
File "/usr/local/Cellar/python3/3.6.0/Frameworks/Python.framework/Versions/3.6/lib/python3.6/multiprocessing/reduction.py", line 51, in dumps
cls(buf, protocol).dump(obj)
AttributeError: Can't pickle local object 'Flux.__init__.<locals>.<lambda>'

EDIT: the first example you provided will not work for a simple reason: processes do not share memory. Therefore, the change t[x] = x**3 will not be applied to the parent process leaving the values of the list t unchanged.
You need to actually return the value from the computation and build a new list from that.
def cube(x):
return x**3
t = [0, 1, 2, 3, 4, 5]
p = Pool()
t = p.map(cube, t)
print(t)
If, as you claim in the second example, the results are supposed not to be returned but to be independently stored within files and this does not happen, I'd recommend to check the return value of your function to see whether the function itself is raising an exception or not.
I'd recommend you to get the actual results and see what happens:
p = Pool()
for i in lt:
res = p.apply_async(func, args=(i, b, c, d, ))
print(res.get()) # this will raise an exception if it happens within func
p.close() # do not accept any more tasks
p.join() # wait for the completion of all scheduled jobs

Function quits too soon, try add at the end of your script this code:
import time
time.sleep(3)

Related

running a function in parallel in Python

I'm trying to run a function like f in parallel in Python but have two problems:
When using map, function f is not applied to all permuted tuples of arrays a and b.
When trying to use Pool, I get the following error:
TypeError: '<=' not supported between instances of 'tuple' and 'int'
def f(n,m):
x = n * m
return x
a = (1,2,3,4,5)
b = (3,4,7,8,9)
result = map(f, a, b)
print(list(result))
#now trying parallel computing
from multiprocessing import Pool
pool = Pool(processes=4)
print(*pool.map(f, a, b))
I didn't make any changes for your #1 issue and get the expected result from using map(). You seem to have an incorrect assumption of how it works, but didn't provide expectation vs. actual results for your example.
for #2 to return the same answers as #1, you need starmap() instead of map() for this instance of multiprocessing use, and then zip() the argument lists to provide sets of arguments. If on an OS that doesn't fork (and for portability if you are), run global code only if it is the main process, and not a spawned process by using the documented if __name__ == '__main__': idiom:
from multiprocessing import Pool
def f(n,m):
x = n * m
return x
if __name__ == '__main__':
a = (1,2,3,4,5)
b = (3,4,7,8,9)
result = map(f, a, b)
print(list(result))
#now trying parallel computing
pool = Pool(processes=4)
print(*pool.starmap(f, zip(a, b)))
Output:
[3, 8, 21, 32, 45]
3 8 21 32 45
If you actually want permutations as mentioned in #1, use itertools.starmap or pool.starmap with itertools.product(a,b) as parameters instead.

Python: calling inner() from outer()

I have looked around on SO and surprisingly not found an answer to this question. I assume this is because normally inner/nested functions are used for something in particular (eg. maintaining an environment variable, factories) as opposed to something trivial like I'm trying to use them for. In any case, I can't seem to find any information on how to properly call an inner function from an outer function without having to declare inner() above outer() in the file. The problem is from this problem on HackerRank (https://www.hackerrank.com/challenges/circular-array-rotation/problem).
def circularArrayRotation(a, k, queries):
def rotateArrayRightCircular(arr: list, iterations: int) -> list:
"""
Perform a 'right circular rotation' on an array for number of iterations.
Note: function actually moves last 'iterations' elements of array to front of array.
>>>rotateArrayRightCircular([0,1,2], 1)
[2,0,1]
>>>rotateArrayRightCircular([0,1,2,3,4,5], 3)
[3,4,5,0,1,2]
>>>rotateArrayRightCircular([0,1,2,3,4,5], 6)
[0,1,2,3,4,5]
"""
return arr[-1 * iterations:] + arr[0:-1 * iterations]
k = k % len(a)
a = rotateArrayRightCircular(a, k)
res = []
for n in queries:
res.append(a[n])
return res
The code above does what I want it to, but it's somehow inelegant to me that I have to put the inner function call after the inner function definition. Various errors with different attempts:
# trying 'self.inner()'
Traceback (most recent call last):
File "solution.py", line 52, in <module>
result = circularArrayRotation(a, k, queries)
File "solution.py", line 13, in circularArrayRotation
a = self.rotateArrayRightCircular(a, k)
NameError: name 'self' is not defined
# Removing 'self' and leaving the definition of inner() after the call to inner()
Traceback (most recent call last):
File "solution.py", line 52, in <module>
result = circularArrayRotation(a, k, queries)
File "solution.py", line 13, in circularArrayRotation
a = rotateArrayRightCircular(a, k)
UnboundLocalError: local variable 'rotateArrayRightCircular' referenced before assignment
Any idea how I could include def inner() after the call to inner() without throwing an error?
As a function is executed from top to bottom, and a function is put into existence as the function is processed, what you want is just not possible.
You could put the function before the outer one, making it outer itself, possibly adding some parameters (not necessary here). (BTW, it looks so generic that other parts of the code might want to use it as well, so why not outer?)
But otherwise, you are stuck. It is essetially the same situation as in
def f():
print(a) # a doesn't exist yet, so this is an error
a = 4
Well, you could do it this way:
def circularArrayRotation(a, k, queries):
def inner_code():
k = k % len(a)
a = rotateArrayRightCircular(a, k)
# BTW, instead of the following, you could just do
# return [a[n] for n in queries]
res = []
for n in queries:
res.append(a[n])
return res
def rotateArrayRightCircular(arr: list, iterations: int) -> list:
"""
Perform a 'right circular rotation' on an array for number of iterations.
Note: function actually moves last 'iterations' elements of array to front of array.
>>>rotateArrayRightCircular([0,1,2], 1)
[2,0,1]
>>>rotateArrayRightCircular([0,1,2,3,4,5], 3)
[3,4,5,0,1,2]
>>>rotateArrayRightCircular([0,1,2,3,4,5], 6)
[0,1,2,3,4,5]
"""
return arr[-1 * iterations:] + arr[0:-1 * iterations]
return inner_code()
but I don't see that you gain anything from it.
This is not possible in Python, but is possible in other languages like Javascript and PHP. It is called function hoisting.

Error in use of python multiprocessing module with generator function.

Could some one explain what is wrong with below code
from multiprocessing import Pool
def sq(x):
yield x**2
p = Pool(2)
n = p.map(sq, range(10))
I am getting following error
MaybeEncodingError Traceback (most recent call
last) in ()
5 p = Pool(2)
6
----> 7 n = p.map(sq, range(10))
/home/devil/anaconda3/lib/python3.4/multiprocessing/pool.py in
map(self, func, iterable, chunksize)
258 in a list that is returned.
259 '''
--> 260 return self._map_async(func, iterable, mapstar, chunksize).get()
261
262 def starmap(self, func, iterable, chunksize=None):
/home/devil/anaconda3/lib/python3.4/multiprocessing/pool.py in
get(self, timeout)
606 return self._value
607 else:
--> 608 raise self._value
609
610 def _set(self, i, obj):
MaybeEncodingError: Error sending result: '[, ]'. Reason:
'TypeError("can't pickle generator objects",)'
Many thanks in advance.
You have to use a function not a generator here. Means: change yield by return to convert sq to a function. Pool can't work with generators.
Moreover, when trying to create a working version on Windows, I had a strange repeating error message.
Attempt to start a new process before the current process
has finished its bootstrapping phase.
This probably means that you are on Windows and you have
forgotten to use the proper idiom in the main module:
if __name__ == '__main__':
literally quoting the comment I got, since it's self-explanatory:
the error on windows is because each process spawns a new python process which interprets the python file etc. so everything outside the "if main block" is executed again"
so to be portable, you have to use __name__=="__main__" when running this module:
from multiprocessing import Pool
def sq(x):
return x**2
if __name__=="__main__":
p = Pool(2)
n = p.map(sq, range(10))
print(n)
Result:
[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]
Edit: if you don't want to store the values beforehand, you can use imap:
n = p.imap(sq, range(10))
n is now a generator object. To consume the values (and activate the actual processing), I force iteration through a list and I get the same result as above:
print(list(n))
Note that the documentation indicates that imap is much slower than map.

Running multiprocessing on two different functions in Python 2.7

I have 2 different functions that I want to use multiprocessing for: makeFakeTransactions and elasticIndexing. The function makeFakeTransactions returns a list of dictionaries, which is then added to the async_results list. So essentially, async_results is a list of lists. I want to use this list of lists as input for the elasticIndexing function, but I must wait for the first p.apply_async to finish first before I use the list of lists. How do I ensure that the first batch of multiprocessing is finished before I initiate the next one?
Also, when I run the program as is, it skips the second p.apply_async and just terminates. Do I have to declare a separate multiprocessing.Pool variable to do another multiprocessing operation?
store_num = 1
process_number = 6
num_transactions = 10
p = multiprocessing.Pool(process_number)
async_results = [p.apply_async(makeFakeTransactions, args = (store_num, num_transactions,)) for store_num in xrange(1, 10, 5)]
results = [ar.get() for ar in async_results]
async_results = [p.apply_async(elasticIndexing, args = (result_list,)) for result_list in results]
EDIT:
I tried using p.join() after async_results, but it gives this error:
Traceback (most recent call last):
File "C:\Users\workspace\Proj\test.py", line 210, in <module>
p.join()
File "C:\Python27\lib\multiprocessing\pool.py", line 460, in join
assert self._state in (CLOSE, TERMINATE)
AssertionError

Pass multiple parameters to concurrent.futures.Executor.map?

The concurrent.futures.Executor.map takes a variable number of iterables from which the function given is called. How should I call it if I have a generator that produces tuples that are normally unpacked in place?
The following doesn't work because each of the generated tuples is given as a different argument to map:
args = ((a, b) for (a, b) in c)
for result in executor.map(f, *args):
pass
Without the generator, the desired arguments to map might look like this:
executor.map(
f,
(i[0] for i in args),
(i[1] for i in args),
...,
(i[N] for i in args),
)
One argument that is repeated, one argument in c
from itertools import repeat
for result in executor.map(f, repeat(a), c):
pass
Need to unpack items of c, and can unpack c
from itertools import izip
for result in executor.map(f, *izip(*c)):
pass
Need to unpack items of c, can't unpack c
Change f to take a single argument and unpack the argument in the function.
If each item in c has a variable number of members, or you're calling f only a few times:
executor.map(lambda args, f=f: f(*args), c)
It defines a new function that unpacks each item from c and calls f. Using a default argument for f in the lambda makes f local inside the lambda and so reduces lookup time.
If you've got a fixed number of arguments, and you need to call f a lot of times:
from collections import deque
def itemtee(iterable, n=2):
def gen(it = iter(iterable), items = deque(), next = next):
popleft = items.popleft
extend = items.extend
while True:
if not items:
extend(next(it))
yield popleft()
return [gen()] * n
executor.map(f, *itemtee(c, n))
Where n is the number of arguments to f. This is adapted from itertools.tee.
You need to remove the * on the map call:
args = ((a, b) for b in c)
for result in executor.map(f, args):
pass
This will call f, len(args) times, where f should accept one parameter.
If you want f to accept two parameters you can use a lambda call like:
args = ((a, b) for b in c)
for result in executor.map(lambda p: f(*p), args): # (*p) does the unpacking part
pass
You can use currying to create new function via partial method in Python
from concurrent.futures import ThreadPoolExecutor
from functools import partial
def some_func(param1, param2):
# some code
# currying some_func with 'a' argument is repeated
func = partial(some_func, a)
with ThreadPoolExecutor() as executor:
executor.map(func, list_of_args):
...
If you need to pass more than one the same parameters you can pass them to partial method
func = partial(some_func, a, b, c)
So suppose you have a function which takes 3 arguments and all the 3 arguments are dynamic and keep on changing with every call. For example:
def multiply(a,b,c):
print(a * b * c)
To call this multiple times using threading, I would first create a list of tuples where each tuple is a version of a,b,c:
arguments = [(1,2,3), (4,5,6), (7,8,9), ....]
To we know that concurrent.futures's map function would accept first argument as the target function and second argument as the list of arguments for each version of the function that will be execute. Therefore, you might make a call like this:
for _ in executor.map(multiply, arguments) # Error
But this will give you error that the function expected 3 arguments but got only 1. To solve this problem, we create a helper function:
def helper(numbers):
multiply(numbers[0], numbers[1], numbers[2])
Now, we can call this function using executor as follow:
with ThreadPoolExecutor() as executor:
for _ in executor.map(helper, arguments):
pass
That should give you the desired results.
Here's a code snippet showing how to send multiple arguments to a function with ThreadPoolExecutor:
import concurrent.futures
def hello(first_name: str, last_name: str) -> None:
"""Prints a friendly hello with first name and last name"""
print('Hello %s %s!' % (first_name, last_name))
def main() -> None:
"""Examples showing how to use ThreadPoolExecutor and executer.map
sending multiple arguments to a function"""
# Example 1: Sending multiple arguments using tuples
# Define tuples with sequential arguments to be passed to hello()
args_names = (
('Bruce', 'Wayne'),
('Clark', 'Kent'),
('Diana', 'Prince'),
('Barry', 'Allen'),
)
with concurrent.futures.ThreadPoolExecutor() as executor:
# Using lambda, unpacks the tuple (*f) into hello(*args)
executor.map(lambda f: hello(*f), args_names)
print()
# Example 2: Sending multiple arguments using dict with named keys
# Define dicts with arguments as key names to be passed to hello()
kwargs_names = (
{'first_name': 'Bruce', 'last_name': 'Wayne'},
{'first_name': 'Clark', 'last_name': 'Kent'},
{'first_name': 'Diana', 'last_name': 'Prince'},
{'first_name': 'Barry', 'last_name': 'Allen'},
)
with concurrent.futures.ThreadPoolExecutor() as executor:
# Using lambda, unpacks the dict (**f) into hello(**kwargs)
executor.map(lambda f: hello(**f), kwargs_names)
if __name__ == '__main__':
main()
lets say you have data like this in data frame shown below and you want to pass 1st two columns to a function which will read the images and predict the fetaures and then calculate the difference and return the difference value.
Note: you can have any scenario as per your requirement and respectively you can define the function.
The below code snippet will takes these two columns as argument and pass to the Threadpool mechanism (showing the progress bar also)
''' function that will give the difference of two numpy feature matrix'''
def getDifference(image_1_loc, image_2_loc, esp=1e-7):
arr1 = ''' read 1st image and extract feature '''
arr2 = ''' read 2nd image and extract feature '''
diff = arr1.ravel() - arr2.ravel() + esp
return diff
'''Using ThreadPoolExecutor from concurrent.futures with multiple argument'''
with ThreadPoolExecutor() as executor:
result = np.array(
list(tqdm(
executor.map(lambda x : function(*x), [(i,j) for i,j in df[['image_1','image_2']].values]),
total=len(df)
)
)
)
For ProcessPoolExecutor.map():
Similar to map(func, *iterables) except:
the iterables are collected immediately rather than lazily;
func is executed asynchronously and several calls to func may be made
concurrently.
Therefore, the usage of ProcessPoolExecutor.map() is the same as that of Python's build-in map(). Here is the docs:
Return an iterator that applies function to every item of iterable,
yielding the results. If additional iterable arguments are passed,
function must take that many arguments and is applied to the items
from all iterables in parallel.
Conclusion: pass the several parameters to map().
Try running the following snippet under python 3, and you will be quite clear:
from concurrent.futures import ProcessPoolExecutor
def f(a, b):
print(a+b)
with ProcessPoolExecutor() as pool:
pool.map(f, (0, 1, 2, 3, 4, 5, 6, 7, 8, 9), (0, 1, 2))
# 0, 2, 4
array = [(i, i) for i in range(3)]
with ProcessPoolExecutor() as pool:
pool.map(f, *zip(*array))
# 0, 2, 4
I have seen so many answers here, but none of them is as straight forward as using lambda expressions:
foo(x,y):
pass
want to call above method 10 times, with same value i.e. xVal and yVal?
with concurrent.futures.ThreadPoolExecutor() as executor:
for _ in executor.map( lambda _: foo(xVal, yVal), range(0, 10)):
pass
This works for me:
from concurrent.futures import ThreadPoolExecutor
def concurrent_function(function, list):
with ThreadPoolExecutor() as executor:
executor.map(function, list)
def concurrent_multiply(args = {'a': 1, 'b': 2}):
print(args['a']*args['b'])
concurrent_function(multiply, [{'a': 1, 'b': 1},
{'a': 2, 'b': 2},
{'a': 3, 'b': 3}])
A simple utility that I use all the time is below.
########### Start of Utility Code ###########
import os
import sys
import traceback
from concurrent import futures
from functools import partial
def catch(fn):
def wrap(*args, **kwargs):
result = None
try:
result = fn(*args, **kwargs)
except Exception as err:
type_, value_, traceback_ = sys.exc_info()
return None, (
args,
"".join(traceback.format_exception(type_, value_, traceback_)),
)
else:
return result, (args, None)
return wrap
def top_level_wrap(fn, arg_tuple):
args, kwargs = arg_tuple
return fn(*args, *kwargs)
def create_processes(fn, values, handle_error, handle_success):
cores = os.cpu_count()
max_workers = 2 * cores + 1
to_exec = partial(top_level_wrap, fn)
with futures.ProcessPoolExecutor(max_workers=max_workers) as executor:
for result, error in executor.map(to_exec, values):
args, tb = error
if tb is not None:
handle_error(args, tb)
else:
handle_success(result)
########### End of Utility Code ###########
Example usage -
######### Start of example usage ###########
import time
#catch
def fail_when_5(val):
time.sleep(val)
if val == 5:
raise Exception("Error - val was 5")
else:
return f"No error val is {val}"
def handle_error(args, tb):
print("args is", args)
print("TB is", tb)
def top_level(val, val_2, test=None, test2="ok"):
print(val_2, test, test2)
return fail_when_5(val)
handle_success = print
if __name__ == "__main__":
# SHAPE -> ( (args, kwargs), (args, kwargs), ... )
values = tuple(
((x, x + 1), {"test": f"t_{x+2}", "test2": f"t_{x+3}"}) for x in range(10)
)
create_processes(top_level, values, handle_error, handle_success)
######### End of example usage ###########

Categories