Parallel Python not able to take arguments properly - python

I have just started using Parallel Python (pp) in Python3, and I am currently having trouble with submitting arguments of object function.
Is it possible that argument cannot be a list? I could not find anyone having the same error message as me, so I am confused.
import pp, numpy
class myobj:
def __init__(self):
"""some code"""
def myfunc(self, data, n):
return [data[numpy.random.randint(0, N)] for i in range(N)]
if __name__ == "__main__":
ppservers = ()
job_server = pp.Server(ppservers = ppservers)
proc = myobj()
data = [[1, 2, 3], [4, 5, 6]]
N = 2
results = []
for i in range(10):
f = job_server.submit(proc.myfunc, (data, N), modules=('numpy',))
results.append(f)
for f in results:
val = f()
print(val)
A fatal error has occured during the function execution
Traceback (most recent call last):
File "/anaconda3/envs/mvi/lib/python3.6/site-packages/ppft/__main__.py", line 94, in run
__fname, __fobjs = self.t.creceive(preprocess)
File "/anaconda3/envs/mvi/lib/python3.6/site-packages/ppft/transport.py", line 128, in creceive
self.rcache[hash1] = tuple(map(preprocess, (msg, )))[0]
File "/anaconda3/envs/mvi/lib/python3.6/site-packages/ppft/__main__.py", line 60, in preprocess
fobjs = [compile(fsource, '<string>', 'exec') for fsource in fsources]
File "/anaconda3/envs/mvi/lib/python3.6/site-packages/ppft/__main__.py", line 60, in <listcomp>
fobjs = [compile(fsource, '<string>', 'exec') for fsource in fsources]
File "<string>", line 1
gging(self, dataset, N):
^
SyntaxError: invalid syntax
None

Related

Use multiprocess in class with python version == 3.9

I am trying to use multiprocessing in a class in the following code:
class test:
def __init__(self):
return
global calc_corr
#staticmethod
def calc_corr(idx, df1, df2):
arr1 = df1.iloc[idx:idx+5, :].values.flatten('F')
arr2 = df2.iloc[idx:idx+5, :].values.flatten('F')
df_tmp = pd.DataFrame([arr1, arr2]).T
df_tmp.dropna(how='any', inplace=True)
corr = df_tmp.corr().iloc[0, 1]
return corr
def aa(self):
df1 = pd.DataFrame(np.random.normal(size=(100, 6)))
df2 = pd.DataFrame(np.random.normal(size=(100, 6)))
with concurrent.futures.ProcessPoolExecutor() as executor:
results = [executor.submit(calc_corr, (i, df1, df2)) for i in range(20)]
for f in concurrent.futures.as_completed(results):
print(f.result())
if __name__ == '__main__':
t = test()
t.aa()
I am using a #staticmethod because it is not related to the class, it's just a computing tool. But using it raises the following error when running the code:
D:\anaconda3\python.exe C:/Users/jonas/Desktop/728_pj/test.py
concurrent.futures.process._RemoteTraceback:
"""
Traceback (most recent call last):
File "D:\anaconda3\lib\multiprocessing\queues.py", line 245, in _feed
obj = _ForkingPickler.dumps(obj)
File "D:\anaconda3\lib\multiprocessing\reduction.py", line 51, in dumps
cls(buf, protocol).dump(obj)
TypeError: cannot pickle 'staticmethod' object
"""
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "C:\Users\jonas\Desktop\728_pj\test.py", line 31, in <module>
t.aa()
File "C:\Users\jonas\Desktop\728_pj\test.py", line 26, in aa
print(f.result())
File "D:\anaconda3\lib\concurrent\futures\_base.py", line 438, in result
return self.__get_result()
File "D:\anaconda3\lib\concurrent\futures\_base.py", line 390, in __get_result
raise self._exception
File "D:\anaconda3\lib\multiprocessing\queues.py", line 245, in _feed
obj = _ForkingPickler.dumps(obj)
File "D:\anaconda3\lib\multiprocessing\reduction.py", line 51, in dumps
cls(buf, protocol).dump(obj)
TypeError: cannot pickle 'staticmethod' object
Process finished with exit code 1
Can anyone help me fix this?
I think it is somehow caused by the staticmethod being declared as global. When I tried removing the global calc_corr line and changing
results = [executor.submit(calc_corr, (i, df1, df2)) for i in range(20)] to
results = [executor.submit(self.calc_corr, i, df1, df2) for i in range(20)] it seemed to work fine. I'm not actually sure of the reason what you wrote doesn't work but hopefully this will.
Note: Removing the tuple for the arguments is unrelated to this issue but was causing another issue afterwards.

Class variable in multiprocessing - python

Here is my code:
import multiprocessing
import dill
class Some_class():
class_var = 'Foo'
def __init__(self, param):
self.name = param
def print_name(self):
print("we are in object "+self.name)
print(Some_class.class_var)
def run_dill_encoded(what):
fun, args = dill.loads(what)
return fun(*args)
def apply_async(pool, fun, args):
return pool.apply_async(run_dill_encoded, (dill.dumps((fun, args)),))
if __name__ == '__main__':
list_names = [Some_class('object_1'), Some_class('object_2')]
pool = multiprocessing.Pool(processes=4)
results = [apply_async(pool, Some_class.print_name, args=(x,)) for x in list_names]
output = [p.get() for p in results]
print(output)
It returns error:
multiprocessing.pool.RemoteTraceback:
"""
Traceback (most recent call last):
File "C:\Python34\lib\multiprocessing\pool.py", line 119, in worker
result = (True, func(*args, **kwds))
File "C:\...\temp_obj_output_standard.py", line 18, in run_dill_encoded
return fun(*args)
File "C:/...temp_obj_output_standard.py", line 14, in print_name
print(Some_class.class_var)
NameError: name 'Some_class' is not defined
"""
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "C:/...temp_obj_output_standard.py", line 31, in <module>
output = [p.get() for p in results]
File "C:/...temp_obj_output_standard.py", line 31, in <listcomp>
output = [p.get() for p in results]
File "C:\Python34\lib\multiprocessing\pool.py", line 599, in get
raise self._value
NameError: name 'Some_class' is not defined
Process finished with exit code 1
The code works fine without line print(Some_class.class_var). What is wrong with accessing class variables, both objects should have it and I don't think processes should conflict about it. Am I missing something?
Any suggestions on how to troubleshoot it? Do not worry about run_dill_encoded and
apply_async, I am using this solution until I compile multiprocess on Python 3.x.
P.S. This is already enough, but stackoverflow wants me to put more details, not really sure what to put.

How to obtain full stack trace after exception from function passed as parameter?

I have wrapper function, that takes other function as parameter, catches an exception and does something with it:
def exceptionCatchingWrapper(funcToCall,destForException,*args,**kwargs):
try:
r=funcToCall(*args,**kwargs)
except:
destForException["exc_info"]=sys.exc_info()
else:
return r
I realized that when an exception is caught, the stack trace taken from sys.exc_info() contains only information about exceptionCatchingWrapper() itself and nothing deeper. Is it possible and how to obtain full stack trace after such call?
import traceback
def a(x):
b(x)
def b(x):
x/0
d = {}
exceptionCatchingWrapper(a, d, 10)
Traceback is stored in the dictionary:
>>> traceback.print_tb(d['exc_info'][2]
File "<stdin>", line 3, in exceptionCatchingWrapper
File "<stdin>", line 2, in a
File "<stdin>", line 2, in b
>>> traceback.print_exception(d['exc_info'][0],d['exc_info'][1],d['exc_info'][2])
Traceback (most recent call last):
File "<stdin>", line 3, in exceptionCatchingWrapper
File "<stdin>", line 2, in a
File "<stdin>", line 2, in b
ZeroDivisionError: integer division or modulo by zero
More information in the traceback module documentation.
Not sure if this is what you need but these might be a way you can print the traceback:
import traceback
try:
s += 1 #this doesnt exist yet
except:
a = traceback.format_exc()
print a
-or-
import traceback, sys
def DummyFunc2():
s += 1 #this doesnt exist yet
def DummyFunc1():
DummyFunc2()
try:
DummyFunc1()
except:
_, err, tb = sys.exc_info()
tb_lines = traceback.extract_tb(tb)
for idx, trace in enumerate( traceback.format_list(tb_lines) ):
print "[INDEX %d]\n%s" % (idx,trace)
print err
output:
>>>
[INDEX 0]
File "C:/Python27/Lib/site-packages/xy/printtrace.py", line 9, in <module>
DummyFunc1()
[INDEX 1]
File "C:/Python27/Lib/site-packages/xy/printtrace.py", line 6, in DummyFunc1
DummyFunc2()
[INDEX 2]
File "C:/Python27/Lib/site-packages/xy/printtrace.py", line 4, in DummyFunc2
s += 1 #this doesnt exist yet
local variable 's' referenced before assignment
>>>

Why does this implementation of multiprocessing.pool not work?

Here is the code I am using:
def initFunction(arg1, arg2):
def funct(value):
return arg1 * arg2 * value
return funct
os.system("taskset -p 0xff %d" % os.getpid())
pool = Pool(processes=4)
t = np.linspace(0,1,10e3)
a,b,c,d,e,f,g,h = sy.symbols('a,b,c,d,e,f,g,h',commutative=False)
arg1 = sy.Matrix([[a,b],[c,d]])
arg2 = sy.Matrix([[e,f],[g,h]])
myFunct = initFunction(arg1, arg2)
m3 = map(myFunct,t) # this works
m4 = pool.map(myFunct,t) # this does NOT work
The error I'm getting is:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/lib/python2.7/dist-packages/spyderlib/widgets/externalshell/sitecustomize.py", line 540, in runfile
execfile(filename, namespace)
File "/home/justin/Research/mapTest.py", line 46, in <module>
m4 = pool.map(myFunct,t)
File "/usr/lib/python2.7/multiprocessing/pool.py", line 251, in map
return self.map_async(func, iterable, chunksize).get()
File "/usr/lib/python2.7/multiprocessing/pool.py", line 558, in get
raise self._value
cPickle.PicklingError: Can't pickle <type 'function'>: attribute lookup __builtin__.function failed
So what does this error mean and how can I multiprocess this map function?
Objects that you pass between processes when using multiprocessing must be importable from the __main__ module, so that they can be unpickled in the child. Nested functions, like funct, are not importable from __main__, so you get that error. You can achieve what you're trying by using a functools.partial instead:
from multiprocessing import Pool
from functools import partial
def funct(arg1, arg2, value):
return arg1 * arg2 * value
if __name__ == "__main__":
t = [1,2,3,4]
arg1 = 4
arg2 = 5
pool = Pool(processes=4)
func = partial(funct, arg1, arg2)
m4 = pool.map(func,t)
print(m4)
Output:
[20, 40, 60, 80]

KeyError: 0 using multiprocessing in python

I have the following code inwhich I try to call a function compute_cluster which do some computations and write the results in a txt file (each process write its results in different txt files independently), however, when I run the following code:
def main():
p = Pool(19)
p.map(compute_cluster, [(l, r) for l in range(6, 25) for r in range(1, 4)])
p.close()
if __name__ == "__main__":
main()
it crashes with the following errors:
File "RMSD_calc.py", line 124, in <module>
main()
File "RMSD_calc.py", line 120, in main
p.map(compute_cluster, [(l, r) for l in range(6, 25) for r in range(1, 4)])
File "/usr/local/lib/python2.7/multiprocessing/pool.py", line 225, in map
return self.map_async(func, iterable, chunksize).get()
File "/usr/local/lib/python2.7/multiprocessing/pool.py", line 522, in get
raise self._value
KeyError: 0
and when I searched online for the meaning of "KeyError: 0" i didn't find anything helpful so any suggestions why this error happens is highly appreciated
KeyError happens in compute_cluster() in a child process and p.map() reraises it for you in the parent:
from multiprocessing import Pool
def f(args):
d = {}
d[0] # <-- raises KeyError
if __name__=="__main__":
p = Pool()
p.map(f, [None])
Output
Traceback (most recent call last):
File "raise-exception-in-child.py", line 9, in <module>
p.map(f, [None])
File "/usr/lib/python2.7/multiprocessing/pool.py", line 227, in map
return self.map_async(func, iterable, chunksize).get()
File "/usr/lib/python2.7/multiprocessing/pool.py", line 528, in get
raise self._value
KeyError: 0
To see the full traceback, catch the exception in the child process:
import logging
from multiprocessing import Pool
def f(args):
d = {}
d[0] # <-- raises KeyError
def f_mp(args):
try:
return f(args)
except Exception:
logging.exception("f(%r) failed" % (args,))
if __name__=="__main__":
p = Pool()
p.map(f_mp, [None])
Output
ERROR:root:f(None) failed
Traceback (most recent call last):
File "raise-exception-in-child.py", line 10, in f_mp
return f(args)
File "raise-exception-in-child.py", line 6, in f
d[0] # <-- raises KeyError
KeyError: 0
It shows that d[0] caused the exception.

Categories