I have just started using Parallel Python (pp) in Python3, and I am currently having trouble with submitting arguments of object function.
Is it possible that argument cannot be a list? I could not find anyone having the same error message as me, so I am confused.
import pp, numpy
class myobj:
def __init__(self):
"""some code"""
def myfunc(self, data, n):
return [data[numpy.random.randint(0, N)] for i in range(N)]
if __name__ == "__main__":
ppservers = ()
job_server = pp.Server(ppservers = ppservers)
proc = myobj()
data = [[1, 2, 3], [4, 5, 6]]
N = 2
results = []
for i in range(10):
f = job_server.submit(proc.myfunc, (data, N), modules=('numpy',))
results.append(f)
for f in results:
val = f()
print(val)
A fatal error has occured during the function execution
Traceback (most recent call last):
File "/anaconda3/envs/mvi/lib/python3.6/site-packages/ppft/__main__.py", line 94, in run
__fname, __fobjs = self.t.creceive(preprocess)
File "/anaconda3/envs/mvi/lib/python3.6/site-packages/ppft/transport.py", line 128, in creceive
self.rcache[hash1] = tuple(map(preprocess, (msg, )))[0]
File "/anaconda3/envs/mvi/lib/python3.6/site-packages/ppft/__main__.py", line 60, in preprocess
fobjs = [compile(fsource, '<string>', 'exec') for fsource in fsources]
File "/anaconda3/envs/mvi/lib/python3.6/site-packages/ppft/__main__.py", line 60, in <listcomp>
fobjs = [compile(fsource, '<string>', 'exec') for fsource in fsources]
File "<string>", line 1
gging(self, dataset, N):
^
SyntaxError: invalid syntax
None
Related
I am trying to use multiprocessing in a class in the following code:
class test:
def __init__(self):
return
global calc_corr
#staticmethod
def calc_corr(idx, df1, df2):
arr1 = df1.iloc[idx:idx+5, :].values.flatten('F')
arr2 = df2.iloc[idx:idx+5, :].values.flatten('F')
df_tmp = pd.DataFrame([arr1, arr2]).T
df_tmp.dropna(how='any', inplace=True)
corr = df_tmp.corr().iloc[0, 1]
return corr
def aa(self):
df1 = pd.DataFrame(np.random.normal(size=(100, 6)))
df2 = pd.DataFrame(np.random.normal(size=(100, 6)))
with concurrent.futures.ProcessPoolExecutor() as executor:
results = [executor.submit(calc_corr, (i, df1, df2)) for i in range(20)]
for f in concurrent.futures.as_completed(results):
print(f.result())
if __name__ == '__main__':
t = test()
t.aa()
I am using a #staticmethod because it is not related to the class, it's just a computing tool. But using it raises the following error when running the code:
D:\anaconda3\python.exe C:/Users/jonas/Desktop/728_pj/test.py
concurrent.futures.process._RemoteTraceback:
"""
Traceback (most recent call last):
File "D:\anaconda3\lib\multiprocessing\queues.py", line 245, in _feed
obj = _ForkingPickler.dumps(obj)
File "D:\anaconda3\lib\multiprocessing\reduction.py", line 51, in dumps
cls(buf, protocol).dump(obj)
TypeError: cannot pickle 'staticmethod' object
"""
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "C:\Users\jonas\Desktop\728_pj\test.py", line 31, in <module>
t.aa()
File "C:\Users\jonas\Desktop\728_pj\test.py", line 26, in aa
print(f.result())
File "D:\anaconda3\lib\concurrent\futures\_base.py", line 438, in result
return self.__get_result()
File "D:\anaconda3\lib\concurrent\futures\_base.py", line 390, in __get_result
raise self._exception
File "D:\anaconda3\lib\multiprocessing\queues.py", line 245, in _feed
obj = _ForkingPickler.dumps(obj)
File "D:\anaconda3\lib\multiprocessing\reduction.py", line 51, in dumps
cls(buf, protocol).dump(obj)
TypeError: cannot pickle 'staticmethod' object
Process finished with exit code 1
Can anyone help me fix this?
I think it is somehow caused by the staticmethod being declared as global. When I tried removing the global calc_corr line and changing
results = [executor.submit(calc_corr, (i, df1, df2)) for i in range(20)] to
results = [executor.submit(self.calc_corr, i, df1, df2) for i in range(20)] it seemed to work fine. I'm not actually sure of the reason what you wrote doesn't work but hopefully this will.
Note: Removing the tuple for the arguments is unrelated to this issue but was causing another issue afterwards.
Here is my code:
import multiprocessing
import dill
class Some_class():
class_var = 'Foo'
def __init__(self, param):
self.name = param
def print_name(self):
print("we are in object "+self.name)
print(Some_class.class_var)
def run_dill_encoded(what):
fun, args = dill.loads(what)
return fun(*args)
def apply_async(pool, fun, args):
return pool.apply_async(run_dill_encoded, (dill.dumps((fun, args)),))
if __name__ == '__main__':
list_names = [Some_class('object_1'), Some_class('object_2')]
pool = multiprocessing.Pool(processes=4)
results = [apply_async(pool, Some_class.print_name, args=(x,)) for x in list_names]
output = [p.get() for p in results]
print(output)
It returns error:
multiprocessing.pool.RemoteTraceback:
"""
Traceback (most recent call last):
File "C:\Python34\lib\multiprocessing\pool.py", line 119, in worker
result = (True, func(*args, **kwds))
File "C:\...\temp_obj_output_standard.py", line 18, in run_dill_encoded
return fun(*args)
File "C:/...temp_obj_output_standard.py", line 14, in print_name
print(Some_class.class_var)
NameError: name 'Some_class' is not defined
"""
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "C:/...temp_obj_output_standard.py", line 31, in <module>
output = [p.get() for p in results]
File "C:/...temp_obj_output_standard.py", line 31, in <listcomp>
output = [p.get() for p in results]
File "C:\Python34\lib\multiprocessing\pool.py", line 599, in get
raise self._value
NameError: name 'Some_class' is not defined
Process finished with exit code 1
The code works fine without line print(Some_class.class_var). What is wrong with accessing class variables, both objects should have it and I don't think processes should conflict about it. Am I missing something?
Any suggestions on how to troubleshoot it? Do not worry about run_dill_encoded and
apply_async, I am using this solution until I compile multiprocess on Python 3.x.
P.S. This is already enough, but stackoverflow wants me to put more details, not really sure what to put.
I have wrapper function, that takes other function as parameter, catches an exception and does something with it:
def exceptionCatchingWrapper(funcToCall,destForException,*args,**kwargs):
try:
r=funcToCall(*args,**kwargs)
except:
destForException["exc_info"]=sys.exc_info()
else:
return r
I realized that when an exception is caught, the stack trace taken from sys.exc_info() contains only information about exceptionCatchingWrapper() itself and nothing deeper. Is it possible and how to obtain full stack trace after such call?
import traceback
def a(x):
b(x)
def b(x):
x/0
d = {}
exceptionCatchingWrapper(a, d, 10)
Traceback is stored in the dictionary:
>>> traceback.print_tb(d['exc_info'][2]
File "<stdin>", line 3, in exceptionCatchingWrapper
File "<stdin>", line 2, in a
File "<stdin>", line 2, in b
>>> traceback.print_exception(d['exc_info'][0],d['exc_info'][1],d['exc_info'][2])
Traceback (most recent call last):
File "<stdin>", line 3, in exceptionCatchingWrapper
File "<stdin>", line 2, in a
File "<stdin>", line 2, in b
ZeroDivisionError: integer division or modulo by zero
More information in the traceback module documentation.
Not sure if this is what you need but these might be a way you can print the traceback:
import traceback
try:
s += 1 #this doesnt exist yet
except:
a = traceback.format_exc()
print a
-or-
import traceback, sys
def DummyFunc2():
s += 1 #this doesnt exist yet
def DummyFunc1():
DummyFunc2()
try:
DummyFunc1()
except:
_, err, tb = sys.exc_info()
tb_lines = traceback.extract_tb(tb)
for idx, trace in enumerate( traceback.format_list(tb_lines) ):
print "[INDEX %d]\n%s" % (idx,trace)
print err
output:
>>>
[INDEX 0]
File "C:/Python27/Lib/site-packages/xy/printtrace.py", line 9, in <module>
DummyFunc1()
[INDEX 1]
File "C:/Python27/Lib/site-packages/xy/printtrace.py", line 6, in DummyFunc1
DummyFunc2()
[INDEX 2]
File "C:/Python27/Lib/site-packages/xy/printtrace.py", line 4, in DummyFunc2
s += 1 #this doesnt exist yet
local variable 's' referenced before assignment
>>>
Here is the code I am using:
def initFunction(arg1, arg2):
def funct(value):
return arg1 * arg2 * value
return funct
os.system("taskset -p 0xff %d" % os.getpid())
pool = Pool(processes=4)
t = np.linspace(0,1,10e3)
a,b,c,d,e,f,g,h = sy.symbols('a,b,c,d,e,f,g,h',commutative=False)
arg1 = sy.Matrix([[a,b],[c,d]])
arg2 = sy.Matrix([[e,f],[g,h]])
myFunct = initFunction(arg1, arg2)
m3 = map(myFunct,t) # this works
m4 = pool.map(myFunct,t) # this does NOT work
The error I'm getting is:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/lib/python2.7/dist-packages/spyderlib/widgets/externalshell/sitecustomize.py", line 540, in runfile
execfile(filename, namespace)
File "/home/justin/Research/mapTest.py", line 46, in <module>
m4 = pool.map(myFunct,t)
File "/usr/lib/python2.7/multiprocessing/pool.py", line 251, in map
return self.map_async(func, iterable, chunksize).get()
File "/usr/lib/python2.7/multiprocessing/pool.py", line 558, in get
raise self._value
cPickle.PicklingError: Can't pickle <type 'function'>: attribute lookup __builtin__.function failed
So what does this error mean and how can I multiprocess this map function?
Objects that you pass between processes when using multiprocessing must be importable from the __main__ module, so that they can be unpickled in the child. Nested functions, like funct, are not importable from __main__, so you get that error. You can achieve what you're trying by using a functools.partial instead:
from multiprocessing import Pool
from functools import partial
def funct(arg1, arg2, value):
return arg1 * arg2 * value
if __name__ == "__main__":
t = [1,2,3,4]
arg1 = 4
arg2 = 5
pool = Pool(processes=4)
func = partial(funct, arg1, arg2)
m4 = pool.map(func,t)
print(m4)
Output:
[20, 40, 60, 80]
I have the following code inwhich I try to call a function compute_cluster which do some computations and write the results in a txt file (each process write its results in different txt files independently), however, when I run the following code:
def main():
p = Pool(19)
p.map(compute_cluster, [(l, r) for l in range(6, 25) for r in range(1, 4)])
p.close()
if __name__ == "__main__":
main()
it crashes with the following errors:
File "RMSD_calc.py", line 124, in <module>
main()
File "RMSD_calc.py", line 120, in main
p.map(compute_cluster, [(l, r) for l in range(6, 25) for r in range(1, 4)])
File "/usr/local/lib/python2.7/multiprocessing/pool.py", line 225, in map
return self.map_async(func, iterable, chunksize).get()
File "/usr/local/lib/python2.7/multiprocessing/pool.py", line 522, in get
raise self._value
KeyError: 0
and when I searched online for the meaning of "KeyError: 0" i didn't find anything helpful so any suggestions why this error happens is highly appreciated
KeyError happens in compute_cluster() in a child process and p.map() reraises it for you in the parent:
from multiprocessing import Pool
def f(args):
d = {}
d[0] # <-- raises KeyError
if __name__=="__main__":
p = Pool()
p.map(f, [None])
Output
Traceback (most recent call last):
File "raise-exception-in-child.py", line 9, in <module>
p.map(f, [None])
File "/usr/lib/python2.7/multiprocessing/pool.py", line 227, in map
return self.map_async(func, iterable, chunksize).get()
File "/usr/lib/python2.7/multiprocessing/pool.py", line 528, in get
raise self._value
KeyError: 0
To see the full traceback, catch the exception in the child process:
import logging
from multiprocessing import Pool
def f(args):
d = {}
d[0] # <-- raises KeyError
def f_mp(args):
try:
return f(args)
except Exception:
logging.exception("f(%r) failed" % (args,))
if __name__=="__main__":
p = Pool()
p.map(f_mp, [None])
Output
ERROR:root:f(None) failed
Traceback (most recent call last):
File "raise-exception-in-child.py", line 10, in f_mp
return f(args)
File "raise-exception-in-child.py", line 6, in f
d[0] # <-- raises KeyError
KeyError: 0
It shows that d[0] caused the exception.