Pass different initarg to different worker in ProcessPoolExecutor - python

In [5]: def fn(x):
...: os.environ["var_{}".format(x)] = x
...:
...:
[PYFLYBY] import os
In [6]: def gn(x):
...: return os.environ["var_{}".format(x)]
...:
...:
...:
a = ["1", "2", "3"]
In [8]: with concurrent.futures.ProcessPoolExecutor(max_workers=3, initializer=fn, initargs=a) as e:
...: r = e.map(gn, a)
...:
Exception in initializer:
Traceback (most recent call last):
File "/opt/python/python-3.7/lib64/python3.7/concurrent/futures/process.py", line 226, in _process_worker
initializer(*initargs)
TypeError: fn() takes 1 positional argument but 3 were given
Exception in initializer:
Traceback (most recent call last):
File "/opt/python/python-3.7/lib64/python3.7/concurrent/futures/process.py", line 226, in _process_worker
initializer(*initargs)
TypeError: fn() takes 1 positional argument but 3 were given
Exception in initializer:
Traceback (most recent call last):
File "/opt/python/python-3.7/lib64/python3.7/concurrent/futures/process.py", line 226, in _process_worker
initializer(*initargs)
TypeError: fn() takes 1 positional argument but 3 were given
So, basically, I want a[0] to be passed to first worker, a[1] to the second and so on... is there any way to accomplish this in this way? Right now, entire a is being passed to fn, which is causing this error.

Your example is not entirely correct, but as for the question:
You can pass a multiprocessing.Queue to initializer function, put to it worker specific data and do one queue.get() in each worker process:
import os
import concurrent.futures
import multiprocessing
import time
def fn(q):
x = q.get()
os.environ["var_x"] = x
def gn(i):
time.sleep(0.5)
return f"pid={os.getpid()} var_x={os.environ['var_x']}\n"
q = multiprocessing.Queue()
a = ["1", "2", "3"]
with concurrent.futures.ProcessPoolExecutor(max_workers=3, initializer=fn, initargs=(q,)) as e:
[q.put(i) for i in a]
print(*e.map(gn, a))
Output:
pid=1218 var_x=1
pid=1219 var_x=2
pid=1220 var_x=3

Related

Use multiprocess in class with python version == 3.9

I am trying to use multiprocessing in a class in the following code:
class test:
def __init__(self):
return
global calc_corr
#staticmethod
def calc_corr(idx, df1, df2):
arr1 = df1.iloc[idx:idx+5, :].values.flatten('F')
arr2 = df2.iloc[idx:idx+5, :].values.flatten('F')
df_tmp = pd.DataFrame([arr1, arr2]).T
df_tmp.dropna(how='any', inplace=True)
corr = df_tmp.corr().iloc[0, 1]
return corr
def aa(self):
df1 = pd.DataFrame(np.random.normal(size=(100, 6)))
df2 = pd.DataFrame(np.random.normal(size=(100, 6)))
with concurrent.futures.ProcessPoolExecutor() as executor:
results = [executor.submit(calc_corr, (i, df1, df2)) for i in range(20)]
for f in concurrent.futures.as_completed(results):
print(f.result())
if __name__ == '__main__':
t = test()
t.aa()
I am using a #staticmethod because it is not related to the class, it's just a computing tool. But using it raises the following error when running the code:
D:\anaconda3\python.exe C:/Users/jonas/Desktop/728_pj/test.py
concurrent.futures.process._RemoteTraceback:
"""
Traceback (most recent call last):
File "D:\anaconda3\lib\multiprocessing\queues.py", line 245, in _feed
obj = _ForkingPickler.dumps(obj)
File "D:\anaconda3\lib\multiprocessing\reduction.py", line 51, in dumps
cls(buf, protocol).dump(obj)
TypeError: cannot pickle 'staticmethod' object
"""
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "C:\Users\jonas\Desktop\728_pj\test.py", line 31, in <module>
t.aa()
File "C:\Users\jonas\Desktop\728_pj\test.py", line 26, in aa
print(f.result())
File "D:\anaconda3\lib\concurrent\futures\_base.py", line 438, in result
return self.__get_result()
File "D:\anaconda3\lib\concurrent\futures\_base.py", line 390, in __get_result
raise self._exception
File "D:\anaconda3\lib\multiprocessing\queues.py", line 245, in _feed
obj = _ForkingPickler.dumps(obj)
File "D:\anaconda3\lib\multiprocessing\reduction.py", line 51, in dumps
cls(buf, protocol).dump(obj)
TypeError: cannot pickle 'staticmethod' object
Process finished with exit code 1
Can anyone help me fix this?
I think it is somehow caused by the staticmethod being declared as global. When I tried removing the global calc_corr line and changing
results = [executor.submit(calc_corr, (i, df1, df2)) for i in range(20)] to
results = [executor.submit(self.calc_corr, i, df1, df2) for i in range(20)] it seemed to work fine. I'm not actually sure of the reason what you wrote doesn't work but hopefully this will.
Note: Removing the tuple for the arguments is unrelated to this issue but was causing another issue afterwards.

How to pass stacktrace between processes in Python?

I'm trying to create a python decorator which takes a function with args and kwargs, executes it in a new process, shuts it down and returns whatever the function returned, including raising the same exception, if any.
For now, my decorator handles functions okay, if they raise no exceptions, but fails to provide the traceback. How do I pass it back to the parent process?
from functools import wraps
from multiprocessing import Process, Queue
import sys
def process_wrapper(func):
#wraps(func)
def wrapper(*args, **kwargs):
# queue for communicating between parent and child processes
q = Queue()
def func_to_q(_q: Queue, *_args, **_kwargs):
# do the same as func, but put result into the queue. Also put
# there an exception if any.
try:
_res = func(*_args, **_kwargs)
_q.put(_res)
except:
_q.put(sys.exc_info())
# start another process and wait for it to join
p = Process(target=func_to_q, args=(q, )+args, kwargs=kwargs)
p.start()
p.join()
# get result from the queue and return it, or raise if it's an exception
res = q.get(False)
if isinstance(res, tuple) and isinstance(res[0], Exception):
raise res[1].with_traceback(res[2])
else:
return res
return wrapper
if __name__ == '__main__':
#process_wrapper
def ok():
return 'ok'
#process_wrapper
def trouble():
def inside():
raise UserWarning
inside()
print(ok())
print(trouble())
I expect result to be something like:
ok
Traceback (most recent call last):
File "/temp.py", line 47, in <module>
print(trouble())
File "/temp.py", line 44, in trouble
inside()
File "/temp.py", line 43, in inside
raise UserWarning
UserWarning
Process finished with exit code 1
But it seems like the child process cannot put stacktrace into the queue and I get the following:
ok
Traceback (most recent call last):
File "/temp.py", line 47, in <module>
print(trouble())
File "/temp.py", line 26, in wrapper
res = q.get(False)
File "/usr/lib/python3.6/multiprocessing/queues.py", line 107, in get
raise Empty
queue.Empty
Process finished with exit code 1
Also, if the child puts into the queue only the exception itself _q.put(sys.exc_info()[1]), parent gets it from there and raises but with new stacktrace (note missing call to inside()):
ok
Traceback (most recent call last):
File "/temp.py", line 47, in <module>
print(trouble())
File "/temp.py", line 28, in wrapper
raise res
UserWarning
Process finished with exit code 1
Take a look at multiprocessing.pool.py and the stringification-hack for sending Exceptions to the parent. You can use multiprocessing.pool.ExceptionWithTraceback from there.
That's just enough code for demonstrating the basic principle:
from multiprocessing import Process, Queue
from multiprocessing.pool import ExceptionWithTraceback
def worker(outqueue):
try:
result = (True, 1 / 0) # will raise ZeroDivisionError
except Exception as e:
e = ExceptionWithTraceback(e, e.__traceback__)
result = (False, e)
outqueue.put(result)
if __name__ == '__main__':
q = Queue()
p = Process(target=worker, args=(q,))
p.start()
success, value = q.get()
p.join()
if success:
print(value)
else:
raise value # raise again
Output:
multiprocessing.pool.RemoteTraceback:
"""
Traceback (most recent call last):
File "/home/...", line 7, in worker
result = (True, 1 / 0) # will raise ZeroDivisionError
ZeroDivisionError: division by zero
"""
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/home/...", line 23, in <module>
raise value
ZeroDivisionError: division by zero
Process finished with exit code 1

Class variable in multiprocessing - python

Here is my code:
import multiprocessing
import dill
class Some_class():
class_var = 'Foo'
def __init__(self, param):
self.name = param
def print_name(self):
print("we are in object "+self.name)
print(Some_class.class_var)
def run_dill_encoded(what):
fun, args = dill.loads(what)
return fun(*args)
def apply_async(pool, fun, args):
return pool.apply_async(run_dill_encoded, (dill.dumps((fun, args)),))
if __name__ == '__main__':
list_names = [Some_class('object_1'), Some_class('object_2')]
pool = multiprocessing.Pool(processes=4)
results = [apply_async(pool, Some_class.print_name, args=(x,)) for x in list_names]
output = [p.get() for p in results]
print(output)
It returns error:
multiprocessing.pool.RemoteTraceback:
"""
Traceback (most recent call last):
File "C:\Python34\lib\multiprocessing\pool.py", line 119, in worker
result = (True, func(*args, **kwds))
File "C:\...\temp_obj_output_standard.py", line 18, in run_dill_encoded
return fun(*args)
File "C:/...temp_obj_output_standard.py", line 14, in print_name
print(Some_class.class_var)
NameError: name 'Some_class' is not defined
"""
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "C:/...temp_obj_output_standard.py", line 31, in <module>
output = [p.get() for p in results]
File "C:/...temp_obj_output_standard.py", line 31, in <listcomp>
output = [p.get() for p in results]
File "C:\Python34\lib\multiprocessing\pool.py", line 599, in get
raise self._value
NameError: name 'Some_class' is not defined
Process finished with exit code 1
The code works fine without line print(Some_class.class_var). What is wrong with accessing class variables, both objects should have it and I don't think processes should conflict about it. Am I missing something?
Any suggestions on how to troubleshoot it? Do not worry about run_dill_encoded and
apply_async, I am using this solution until I compile multiprocess on Python 3.x.
P.S. This is already enough, but stackoverflow wants me to put more details, not really sure what to put.

KeyError: 0 using multiprocessing in python

I have the following code inwhich I try to call a function compute_cluster which do some computations and write the results in a txt file (each process write its results in different txt files independently), however, when I run the following code:
def main():
p = Pool(19)
p.map(compute_cluster, [(l, r) for l in range(6, 25) for r in range(1, 4)])
p.close()
if __name__ == "__main__":
main()
it crashes with the following errors:
File "RMSD_calc.py", line 124, in <module>
main()
File "RMSD_calc.py", line 120, in main
p.map(compute_cluster, [(l, r) for l in range(6, 25) for r in range(1, 4)])
File "/usr/local/lib/python2.7/multiprocessing/pool.py", line 225, in map
return self.map_async(func, iterable, chunksize).get()
File "/usr/local/lib/python2.7/multiprocessing/pool.py", line 522, in get
raise self._value
KeyError: 0
and when I searched online for the meaning of "KeyError: 0" i didn't find anything helpful so any suggestions why this error happens is highly appreciated
KeyError happens in compute_cluster() in a child process and p.map() reraises it for you in the parent:
from multiprocessing import Pool
def f(args):
d = {}
d[0] # <-- raises KeyError
if __name__=="__main__":
p = Pool()
p.map(f, [None])
Output
Traceback (most recent call last):
File "raise-exception-in-child.py", line 9, in <module>
p.map(f, [None])
File "/usr/lib/python2.7/multiprocessing/pool.py", line 227, in map
return self.map_async(func, iterable, chunksize).get()
File "/usr/lib/python2.7/multiprocessing/pool.py", line 528, in get
raise self._value
KeyError: 0
To see the full traceback, catch the exception in the child process:
import logging
from multiprocessing import Pool
def f(args):
d = {}
d[0] # <-- raises KeyError
def f_mp(args):
try:
return f(args)
except Exception:
logging.exception("f(%r) failed" % (args,))
if __name__=="__main__":
p = Pool()
p.map(f_mp, [None])
Output
ERROR:root:f(None) failed
Traceback (most recent call last):
File "raise-exception-in-child.py", line 10, in f_mp
return f(args)
File "raise-exception-in-child.py", line 6, in f
d[0] # <-- raises KeyError
KeyError: 0
It shows that d[0] caused the exception.

"Uncatching" an exception in python

How should I "rethrow" an exception, that is, suppose:
I try something in my code, and unfortunately it fails.
I try some "clever" workaround, which happens to also fail this time
If I throw the exception from the (failing) workaround, it's going to be pretty darn confusing for the user, so I think it may be best to rethrow the original exception (?), with the descriptive traceback it comes with (about the actual problem)...
Note: the motivating example for this is when calling np.log(np.array(['1'], dtype=object)), where it tries a witty workaround and gives an AttributeError (it's "really" a TypeError).
One way I can think of is just to re-call the offending function, but this seems doged (for one thing theoretically the original function may exert some different behaviour the second time it's called):
Okay this is one awful example, but here goes...
def f():
raise Exception("sparrow")
def g():
raise Exception("coconut")
def a():
f()
Suppose I did this:
try:
a()
except:
# attempt witty workaround
g()
---------------------------------------------------------------------------
Exception Traceback (most recent call last)
<ipython-input-4-c76b7509b315> in <module>()
3 except:
4 # attempt witty workaround
----> 5 g()
6
<ipython-input-2-e641f2f9a7dc> in g()
4
5 def g():
----> 6 raise Exception("coconut")
7
8
Exception: coconut
Well, the problem doesn't really lie with the coconut at all, but the sparrow:
try:
a()
except:
# attempt witty workaround
try:
g()
except:
# workaround failed, I want to rethrow the exception from calling a()
a() # ideally don't want to call a() again
---------------------------------------------------------------------------
Exception Traceback (most recent call last)
<ipython-input-4-e641f2f9a7dc> in <module>()
19 except:
20 # workaround failed, I want to rethrow the exception from calling a()
---> 21 a() # ideally don't want to call a() again
<ipython-input-3-e641f2f9a7dc> in a()
8
9 def a():
---> 10 f()
11
12
<ipython-input-1-e641f2f9a7dc> in f()
1 def f():
----> 2 raise Exception("sparrow")
3
4
5 def g():
Exception: sparrow
Is there a standard way to deal with this, or am I thinking about it completely wrong?
If you want to make it appear to the end user that you never called g(), then you need to store the traceback from the first error, call the second function and then throw the original with the original traceback. (otherwise, in Python2, bare raise re-raises the second exception rather than the first). The problem is that there is no 2/3 compatible way to raise with traceback, so you have to wrap the Python 2 version in an exec statement (since it's a SyntaxError in Python 3).
Here's a function that lets you do that (I added this to the pandas codebase recently):
import sys
if sys.version_info[0] >= 3:
def raise_with_traceback(exc, traceback=Ellipsis):
if traceback == Ellipsis:
_, _, traceback = sys.exc_info()
raise exc.with_traceback(traceback)
else:
# this version of raise is a syntax error in Python 3
exec("""
def raise_with_traceback(exc, traceback=Ellipsis):
if traceback == Ellipsis:
_, _, traceback = sys.exc_info()
raise exc, None, traceback
""")
raise_with_traceback.__doc__ = (
"""Raise exception with existing traceback.
If traceback is not passed, uses sys.exc_info() to get traceback."""
)
And then you can use it like this (I also changed the Exception types for clarity).
def f():
raise TypeError("sparrow")
def g():
raise ValueError("coconut")
def a():
f()
try:
a()
except TypeError as e:
import sys
# save the traceback from the original exception
_, _, tb = sys.exc_info()
try:
# attempt witty workaround
g()
except:
raise_with_traceback(e, tb)
And in Python 2, you only see a() and f():
Traceback (most recent call last):
File "test.py", line 40, in <module>
raise_with_traceback(e, tb)
File "test.py", line 31, in <module>
a()
File "test.py", line 28, in a
f()
File "test.py", line 22, in f
raise TypeError("sparrow")
TypeError: sparrow
But in Python 3, it still notes there was an additional exception too, because you are raising within its except clause [which flips the order of the errors and makes it much more confusing for the user]:
Traceback (most recent call last):
File "test.py", line 38, in <module>
g()
File "test.py", line 25, in g
raise ValueError("coconut")
ValueError: coconut
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "test.py", line 40, in <module>
raise_with_traceback(e, tb)
File "test.py", line 6, in raise_with_traceback
raise exc.with_traceback(traceback)
File "test.py", line 31, in <module>
a()
File "test.py", line 28, in a
f()
File "test.py", line 22, in f
raise TypeError("sparrow")
TypeError: sparrow
If you absolutely want it to look like the g() Exception never happened in both Python 2 and Python 3, you need to check that you are out of the except clause first:
try:
a()
except TypeError as e:
import sys
# save the traceback from the original exception
_, _, tb = sys.exc_info()
handled = False
try:
# attempt witty workaround
g()
handled = True
except:
pass
if not handled:
raise_with_traceback(e, tb)
Which gets you the following traceback in Python 2:
Traceback (most recent call last):
File "test.py", line 56, in <module>
raise_with_traceback(e, tb)
File "test.py", line 43, in <module>
a()
File "test.py", line 28, in a
f()
File "test.py", line 22, in f
raise TypeError("sparrow")
TypeError: sparrow
And this traceback in Python 3:
Traceback (most recent call last):
File "test.py", line 56, in <module>
raise_with_traceback(e, tb)
File "test.py", line 6, in raise_with_traceback
raise exc.with_traceback(traceback)
File "test.py", line 43, in <module>
a()
File "test.py", line 28, in a
f()
File "test.py", line 22, in f
raise TypeError("sparrow")
TypeError: sparrow
It does add an additional non-useful line of traceback that shows the raise exc.with_traceback(traceback) to the user, but it is relatively clean.
Here is something totally nutty that I wasn't sure would work, but it works in both python 2 and 3. (It does however, require the exception to be encapsulated into a function...)
def f():
print ("Fail!")
raise Exception("sparrow")
def g():
print ("Workaround fail.")
raise Exception("coconut")
def a():
f()
def tryhard():
ok = False
try:
a()
ok = True
finally:
if not ok:
try:
g()
return # "cancels" sparrow Exception by returning from finally
except:
pass
>>> tryhard()
Fail!
Workaround fail.
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "<stdin>", line 4, in tryhard
File "<stdin>", line 2, in a
File "<stdin>", line 3, in f
Exception: sparrow
Which is the correct exception and the right stack trace, and with no hackery.
>>> def g(): print "Worked around." # workaround is successful in this case
>>> tryhard()
Fail!
Worked around.
>>> def f(): print "Success!" # normal method works
>>> tryhard()
Success!
Ian Bicking has a nice primer on re-raising.
As a corollary, my rule is to only catch Exceptions that the code knows how to deal with. Very few methods actually hit this rule. For example, if I'm reading a file and an IOException is thrown, there is very little that method could reasonably do.
As a corollary to that, catching exceptions in "main" is reasonable if you can return to a good state and you don't just want to dump the user out; this only obtains in interactive programs.
The relevant section from the primer being the update:
try:
a()
except:
exc_info = sys.exc_info()
try:
g()
except:
# If this happens, it clobbers exc_info,
# which is why we had to save it above
import traceback
print >> sys.stderr, "Error in revert_stuff():"
# py3 print("Error in revert_stuff():", file=sys.stderr)
traceback.print_exc()
raise exc_info[0], exc_info[1], exc_info[2]
In python 3, the final raise could be written as:
ei = exc_info[1]
ei.filname = exc_info[0]
ei.__traceback__ = exc_info[2]
raise ei
In Python 3 (specifically tested on 3.3.2), this all works better, there's no need for saving sys.exc_info. Don't re-raise the original exception within the second exception handler. Just note that the 2nd attempt failed and raise the original in the scope of the original handler, like so:
#!python3
try:
a()
except Exception:
g_failed = False
try:
g()
except Exception:
g_failed = True
raise
Python 3 output correctly raising "sparrow" and showing traceback through a() and f():
Traceback (most recent call last):
File "x3.py", line 13, in <module>
a()
File "x3.py", line 10, in a
f()
File "x3.py", line 4, in f
raise Exception("sparrow")
Exception: sparrow
However, the same script on Python 2 incorrectly raising "coconut" and only showing g():
Traceback (most recent call last):
File "x3.py", line 17, in <module>
g()
File "x3.py", line 7, in g
raise Exception("coconut")
Exception: coconut
Here are the modifications to make Python 2 work correctly:
#!python2
import sys
try:
a()
except Exception:
exc = sys.exc_info()
try:
g()
except Exception:
raise exc[0], exc[1], exc[2] # Note doesn't care that it is nested.
Now Python 2 correctly shows "sparrow" and both a() and f() traceback:
Traceback (most recent call last):
File "x2.py", line 14, in <module>
a()
File "x2.py", line 11, in a
f()
File "x2.py", line 5, in f
raise Exception("sparrow")
Exception: sparrow
Capture the error in your except clause, then manually re-raise it later. Capture the traceback, and reprint it via the traceback module.
import sys
import traceback
def f():
raise Exception("sparrow")
def g():
raise Exception("coconut")
def a():
f()
try:
print "trying a"
a()
except Exception as e:
print sys.exc_info()
(_,_,tb) = sys.exc_info()
print "trying g"
try:
g()
except:
print "\n".join(traceback.format_tb(tb))
raise e
In Python 3, within a function, this can be done in a very slick way, following up the answer from #Mark Tolonen, who uses a boolean. You can't do this outside a function because there's no way to break out of the outer try statement: the function is needed for return.
#!python3
def f():
raise Exception("sparrow")
def g():
raise Exception("coconut")
def a():
f()
def h():
try:
a()
except:
try:
g()
return # Workaround succeeded!
except:
pass # Oh well, that didn't work.
raise # Re-raises *first* exception.
h()
This results in:
Traceback (most recent call last):
File "uc.py", line 23, in <module>
h()
File "uc.py", line 14, in h
a()
File "uc.py", line 10, in a
f()
File "uc.py", line 4, in f
raise Exception("sparrow")
Exception: sparrow
...and if instead g succeeds:
def g(): pass
...then it doesn't raise an exception.
try:
1/0 # will raise ZeroDivisionError
except Exception as first:
try:
x/1 # will raise NameError
except Exception as second:
raise first # will re-raise ZeroDivisionError

Categories