Why does this implementation of multiprocessing.pool not work? - python

Here is the code I am using:
def initFunction(arg1, arg2):
def funct(value):
return arg1 * arg2 * value
return funct
os.system("taskset -p 0xff %d" % os.getpid())
pool = Pool(processes=4)
t = np.linspace(0,1,10e3)
a,b,c,d,e,f,g,h = sy.symbols('a,b,c,d,e,f,g,h',commutative=False)
arg1 = sy.Matrix([[a,b],[c,d]])
arg2 = sy.Matrix([[e,f],[g,h]])
myFunct = initFunction(arg1, arg2)
m3 = map(myFunct,t) # this works
m4 = pool.map(myFunct,t) # this does NOT work
The error I'm getting is:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/lib/python2.7/dist-packages/spyderlib/widgets/externalshell/sitecustomize.py", line 540, in runfile
execfile(filename, namespace)
File "/home/justin/Research/mapTest.py", line 46, in <module>
m4 = pool.map(myFunct,t)
File "/usr/lib/python2.7/multiprocessing/pool.py", line 251, in map
return self.map_async(func, iterable, chunksize).get()
File "/usr/lib/python2.7/multiprocessing/pool.py", line 558, in get
raise self._value
cPickle.PicklingError: Can't pickle <type 'function'>: attribute lookup __builtin__.function failed
So what does this error mean and how can I multiprocess this map function?

Objects that you pass between processes when using multiprocessing must be importable from the __main__ module, so that they can be unpickled in the child. Nested functions, like funct, are not importable from __main__, so you get that error. You can achieve what you're trying by using a functools.partial instead:
from multiprocessing import Pool
from functools import partial
def funct(arg1, arg2, value):
return arg1 * arg2 * value
if __name__ == "__main__":
t = [1,2,3,4]
arg1 = 4
arg2 = 5
pool = Pool(processes=4)
func = partial(funct, arg1, arg2)
m4 = pool.map(func,t)
print(m4)
Output:
[20, 40, 60, 80]

Related

Calling a python script within another python script with args

Current Implementation which needs optimization
import subprocess
childprocess = subprocess.Popen(
['python',
'/full_path_to_directory/called_script.py',
'arg1',
'arg2'],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
returnVal = childprocess.communicate()[0]
print(retVal)
Is this a correct way to call another script(called_script.py) within the current working directory?
Is there a better way to call the other script? I used import script but it gives me below error
called_script.py
def func(arg1, arg2, arg3):
#doSomething
#sys.out.write(returnVal)
if __name__ == "__main__":
func(arg1, arg2, arg3)
Implementation 2 (throws exception and errored out)
caller_script.py
Both of them are under the same path (i.e. /home/bin)
import called_script
returnVal = called_script.func(arg1,arg2,arg3)
print(returnVal)
Output:
nullNone
Traceback (most recent call last):
File "/path_to_caller/caller_script.py", line 89, in <module>
l.simple_bind_s(binddn, pw)
File "/usr/lib64/python2.6/site-packages/ldap/ldapobject.py", line 206, in simple_bind_s
msgid = self.simple_bind(who,cred,serverctrls,clientctrls)
File "/usr/lib64/python2.6/site-packages/ldap/ldapobject.py", line 200, in simple_bind
return self._ldap_call(self._l.simple_bind,who,cred,EncodeControlTuples(serverctrls),EncodeControlTuples(clientctrls))
File "/usr/lib64/python2.6/site-packages/ldap/ldapobject.py", line 96, in _ldap_call
result = func(*args,**kwargs)
TypeError: argument 2 must be string or read-only buffer, not None
Another alternative I used and gave me an error is
Implementation 3(throws exception and errors out)
caller_script.py
import ldap
returnVal = subprocess.call(['python','called_script.py','arg1','arg2'])
print(returnVal)
l = ldap.initialize(cp.get('some_config_ref','some_url'))
try:
l.protocol_version = ldap.VERSION3
l.simple_bind_s(binddn, returnVal)
except ldap.INVALID_CREDENTIALS:
sys.stderr.write("Your username or password is incorrect.")
sys.exit(1)
except ldap.LDAPError, e:
if type(e.message) == dict and e.message.has_key('xyz'):
sys.stderr.write(e.message['xyz'])
else:
sys.stderr.write(e)
sys.exit(1)
Output:
returnVal0Traceback (most recent call last):
File "./path_to_script/caller_script.py", line 88, in <module>
l.simple_bind_s(binddn, pw)
File "/usr/lib64/python2.6/site-packages/ldap/ldapobject.py", line 206, in simple_bind_s
msgid = self.simple_bind(who,cred,serverctrls,clientctrls)
File "/usr/lib64/python2.6/site-packages/ldap/ldapobject.py", line 200, in simple_bind
return self._ldap_call(self._l.simple_bind,who,cred,EncodeControlTuples(serverctrls),EncodeControlTuples(clientctrls))
File "/usr/lib64/python2.6/site-packages/ldap/ldapobject.py", line 96, in _ldap_call
result = func(*args,**kwargs)
TypeError: argument 2 must be string or read-only buffer, not int
Here is an example where you are calling a function from another file, you pass one value, a list, which can have an arbitrary amount of numbers, and you get the sum. Make sure they are in the same directory or you will need the path. The function in your example "script.py" does not allow you to pass a value.
called_script.py
def add_many(list_add):
the_sum = sum(list_add)
return the_sum
caller_script.py
import called_script
a_list = [1, 2, 3, 4]
the_sum = called_script.add_many(a_list)
print(the_sum)

Parallel Python not able to take arguments properly

I have just started using Parallel Python (pp) in Python3, and I am currently having trouble with submitting arguments of object function.
Is it possible that argument cannot be a list? I could not find anyone having the same error message as me, so I am confused.
import pp, numpy
class myobj:
def __init__(self):
"""some code"""
def myfunc(self, data, n):
return [data[numpy.random.randint(0, N)] for i in range(N)]
if __name__ == "__main__":
ppservers = ()
job_server = pp.Server(ppservers = ppservers)
proc = myobj()
data = [[1, 2, 3], [4, 5, 6]]
N = 2
results = []
for i in range(10):
f = job_server.submit(proc.myfunc, (data, N), modules=('numpy',))
results.append(f)
for f in results:
val = f()
print(val)
A fatal error has occured during the function execution
Traceback (most recent call last):
File "/anaconda3/envs/mvi/lib/python3.6/site-packages/ppft/__main__.py", line 94, in run
__fname, __fobjs = self.t.creceive(preprocess)
File "/anaconda3/envs/mvi/lib/python3.6/site-packages/ppft/transport.py", line 128, in creceive
self.rcache[hash1] = tuple(map(preprocess, (msg, )))[0]
File "/anaconda3/envs/mvi/lib/python3.6/site-packages/ppft/__main__.py", line 60, in preprocess
fobjs = [compile(fsource, '<string>', 'exec') for fsource in fsources]
File "/anaconda3/envs/mvi/lib/python3.6/site-packages/ppft/__main__.py", line 60, in <listcomp>
fobjs = [compile(fsource, '<string>', 'exec') for fsource in fsources]
File "<string>", line 1
gging(self, dataset, N):
^
SyntaxError: invalid syntax
None

program with python, but got an error _pickle.PicklingError

I wrote the code below:
import random, time, queue
from multiprocessing.managers import BaseManager
task_queue = queue.Queue()
result_queue = queue.Queue()
class QueueManager(BaseManager):
pass
QueueManager.register('get_task_queue', callable=lambda: task_queue)
QueueManager.register('get_result_queue', callable=lambda: result_queue)
manager = QueueManager(address=('', 5000), authkey=b'abd')
manager.start()
task = manager.get_task_queue()
result = manager.get_result_queue()
for i in range(10):
n = random.randint(0, 10000)
print('Put task %d...' % n)
task.put(n)
print('Try get result...')
for i in range(10):
r = result.get(timeout=10)
print('Result: %s' % r)
manager.shutdown()
print('master exit.')
but when it runs, I receive this error:
Traceback (most recent call last):
File "D:/PycharmProjects/test/task_master.py", line 23, in <module>
manager.start()
File "C:\Users\tang_ke\AppData\Local\Programs\Python\Python35-32\lib\multiprocessing\managers.py", line 479, in start
self._process.start()
File "C:\Users\tang_ke\AppData\Local\Programs\Python\Python35-32\lib\multiprocessing\process.py", line 105, in start
self._popen = self._Popen(self)
File "C:\Users\tang_ke\AppData\Local\Programs\Python\Python35-32\lib\multiprocessing\context.py", line 313, in _Popen
return Popen(process_obj)
File "C:\Users\tang_ke\AppData\Local\Programs\Python\Python35-32\lib\multiprocessing\popen_spawn_win32.py", line 66, in __init__
reduction.dump(process_obj, to_child)
File "C:\Users\tang_ke\AppData\Local\Programs\Python\Python35-32\lib\multiprocessing\reduction.py", line 59, in dump
ForkingPickler(file, protocol).dump(obj)
_pickle.PicklingError: Can't pickle <function <lambda> at 0x03A67C48>: attribute lookup <lambda> on __main__ failed
Process finished with exit code 1
I got answer in the site:讨论 - 廖雪峰的官方网站
step 1:
don't use "lambda" in "QueueManager.register",you have to replace a function,example:
def return_task_queue():
global task_queue
return task_queue
QueueManager.register('get_task_queue', callable=return_task_queue)
step 2:
you have to add IPAddress when you create "QueueManager",example:
QueueManager(address=('127.0.0.1', 5000), authkey=b'abc')
step 3:
you have to put all functions about queuemanager and task and result in a function...example:
def test():
QueueManager.register('get_task_queue', callable=return_task_queue)
QueueManager.register('get_result_queue', callable=return_result_queue)
manager = QueueManager(address=('127.0.0.1', 5000), authkey=b'abc')
manager.start()
....
and you have to use the function "test" in MAIN function,example:
if __name__ == '__main__':
test()

Pass a function as argument to a process target with Pool.map()

I'm developing a software to benchmark some scripts Python using different methods (mono-thread, multi-threads, multi-processes). So I need to execute the same function (with same arguments, etc...) in differents processes.
How to pass the function to execute as argument to a process target ?
What I currently understand is that a reference to a function cannot work because the function referenced is not visible for other processes, that's why I tried with a custom manager for the shared memory.
Here a simplified code:
#!/bin/python
from multiprocessing import Pool
from multiprocessing.managers import BaseManager
from itertools import repeat
class FunctionManager(BaseManager):
pass
def maFunction(a, b):
print(a + b)
def threadedFunction(f_i_args):
(f, i, args) = f_i_args
f(*args)
FunctionManager.register('Function', maFunction)
myManager = FunctionManager()
myManager.start()
myManager.Function(0, 0) # Test 1
threadedFunction((maFunction, 0, (1, 1))) # Test 2
p = Pool()
args = zip(repeat(myManager.Function), range(10), repeat(2, 2))
p.map(threadedFunction, args) # Does not work
p.join()
myManager.shutdown()
The current pickling error at "p.map()" is the following :
2
0
Traceback (most recent call last):
File "./test.py", line 27, in <module>
p.map(threadedFunction, args) # Does not work
File "/usr/lib/python3.5/multiprocessing/pool.py", line 260, in map
return self._map_async(func, iterable, mapstar, chunksize).get()
File "/usr/lib/python3.5/multiprocessing/pool.py", line 608, in get
raise self._value
File "/usr/lib/python3.5/multiprocessing/pool.py", line 385, in _handle_tasks
put(task)
File "/usr/lib/python3.5/multiprocessing/connection.py", line 206, in send
self._send_bytes(ForkingPickler.dumps(obj))
File "/usr/lib/python3.5/multiprocessing/reduction.py", line 50, in dumps
cls(buf, protocol).dump(obj)
_pickle.PicklingError: Can't pickle <class 'weakref'>: attribute lookup weakref on builtins failed
I got a bit different error from running your code. Your key problem I think is that you pass a function to FunctionManager.register() instead of a class. I also had to remove your zip to make it work and create a list manually, but this you can probably fix. This is just an example.
The following code works and does something using your exact structure. I would do this a bit differently and not use BaseManager, but I assume you have your reasons.
#!/usr/bin/python3.5
from multiprocessing import Pool
from multiprocessing.managers import BaseManager
from itertools import repeat
class FunctionManager(BaseManager):
pass
class maClass(object):
def __init__(self):
pass
def maFunction(self,a, b):
print(a + b)
def threadedFunction(f_i_args):
(f, i, args) = f_i_args
f(*args)
FunctionManager.register('Foobar', maClass)
myManager = FunctionManager()
myManager.start()
foobar = myManager.Foobar()
foobar.maFunction(0, 0) # Test 1
threadedFunction((foobar.maFunction, 0, (1, 1))) # Test 2
p = Pool()
#args = list(zip(repeat(foobar.maFunction), range(10), repeat(2, 2)))
args = []
for i in range(10):
args.append([foobar.maFunction, i, (i,2)])
p.map(threadedFunction, args) # Does now work
p.close()
p.join()
myManager.shutdown()
Or did I misunderstand your problem completely?
Hannu

Class variable in multiprocessing - python

Here is my code:
import multiprocessing
import dill
class Some_class():
class_var = 'Foo'
def __init__(self, param):
self.name = param
def print_name(self):
print("we are in object "+self.name)
print(Some_class.class_var)
def run_dill_encoded(what):
fun, args = dill.loads(what)
return fun(*args)
def apply_async(pool, fun, args):
return pool.apply_async(run_dill_encoded, (dill.dumps((fun, args)),))
if __name__ == '__main__':
list_names = [Some_class('object_1'), Some_class('object_2')]
pool = multiprocessing.Pool(processes=4)
results = [apply_async(pool, Some_class.print_name, args=(x,)) for x in list_names]
output = [p.get() for p in results]
print(output)
It returns error:
multiprocessing.pool.RemoteTraceback:
"""
Traceback (most recent call last):
File "C:\Python34\lib\multiprocessing\pool.py", line 119, in worker
result = (True, func(*args, **kwds))
File "C:\...\temp_obj_output_standard.py", line 18, in run_dill_encoded
return fun(*args)
File "C:/...temp_obj_output_standard.py", line 14, in print_name
print(Some_class.class_var)
NameError: name 'Some_class' is not defined
"""
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "C:/...temp_obj_output_standard.py", line 31, in <module>
output = [p.get() for p in results]
File "C:/...temp_obj_output_standard.py", line 31, in <listcomp>
output = [p.get() for p in results]
File "C:\Python34\lib\multiprocessing\pool.py", line 599, in get
raise self._value
NameError: name 'Some_class' is not defined
Process finished with exit code 1
The code works fine without line print(Some_class.class_var). What is wrong with accessing class variables, both objects should have it and I don't think processes should conflict about it. Am I missing something?
Any suggestions on how to troubleshoot it? Do not worry about run_dill_encoded and
apply_async, I am using this solution until I compile multiprocess on Python 3.x.
P.S. This is already enough, but stackoverflow wants me to put more details, not really sure what to put.

Categories