Python: threading + curve_fit: null argument to internal routine - python

I have got some problems using the following code, which is supposed to do gaussian fits using threads:
from PIL import Image
import numpy as np
from scipy.optimize import curve_fit
import threading
class myThread (threading.Thread):
def __init__(self, index):
threading.Thread.__init__(self)
self.index = index
def run(self):
for i in np.arange(n_Bild.shape[1]):
curve_fit(self.gauss, x_x, Intensitaet[self.index, ...], p0=(Intensitaet[self.index, i], i, 1, 0))
def gauss(self, x, a, b, c, d):
return a * np.exp(-(x-b) ** 2 / (2 * c ** 2)) + d
Bild = Image.open("test.bmp")
n_Bild = np.asarray(Bild)
Intensitaet = np.zeros((n_Bild.shape[0], n_Bild.shape[1]), dtype=np.uint32)
Intensitaet += n_Bild[..., ..., 0]
Intensitaet += n_Bild[..., ..., 1]
Intensitaet += n_Bild[..., ..., 2]
x_x = np.arange(n_Bild.shape[1]) #Pixel auf "x"-Achse
threads = []
# Create new threads
thread0 = myThread(0)
thread1 = myThread(1)
# Add threads to thread list
threads.append(thread0)
threads.append(thread1)
# Start new Threads
thread0.start()
thread1.start()
# Wait for all threads to complete
for t in threads:
t.join()
print "finished"
If I run my programm I get an error:
SystemError: null argument to internal routine
Exception in thread Thread-2:
Traceback (most recent call last):
File "C:\Anaconda\lib\threading.py", line 808, in __bootstrap_inner
self.run()
File "G:/DropBox/Daten/Dropbox/Uni/Bachelorarbeit/Python/ThreadTest.py", line 12, in run
curve_fit(self.gauss, x_x, Intensitaet[self.index, ...], p0=(Intensitaet[self.index, i], i, 1, 0))
File "C:\Anaconda\lib\site-packages\scipy\optimize\minpack.py", line 533, in curve_fit
res = leastsq(func, p0, args=args, full_output=1, **kw)
File "C:\Anaconda\lib\site-packages\scipy\optimize\minpack.py", line 378, in leastsq
gtol, maxfev, epsfcn, factor, diag)
error: Internal error constructing argument list.#
If I only run one thread instead of two, the programm works fine, but I have no idea what i'm doing wrong.
Thanks for your help.

I believe that leastsq() is not threadsafe, and you need to either use a threading.Lock() around your calls to curve_fit() (which might defeat your purpose) or use multiprocessing.

Related

concurrent.futures error when using imports from another file?

I have a piece of toy code which does some dummy work to test parallelization. The code works fine as it is, but it fails if I try to import a class from another file. It gives me the error BrokenProcessPool: A process in the process pool was terminated abruptly while the future was running or pending.
#from Geometry import * <- this line causes the code to break
import concurrent.futures
import itertools
import random
import time
class ConProc:
def dummy(self, param):
time.sleep(random.random() * 3) # simulate a longer job
return param[0] * param[1]
def main(self):
ht_iterator = range(4)
wt_iterator = range(5)
paramlist = list(itertools.product(ht_iterator, wt_iterator))
with concurrent.futures.ProcessPoolExecutor() as executor:
ret = executor.map(self.dummy, paramlist)
for result in ret:
print(result)
if __name__ == '__main__':
cp = ConProc()
cp.main()
Contents of Geometry.py-
import math
import numpy as np
class vector(np.ndarray):
def __new__(cls, input_array):
obj = np.asarray(input_array).view(cls)
return obj
def __array_finalize__(self, obj):
if obj is None: return
class ray(vector):
pass
class sphere:
def __init__(self, center, radius, material):
self.center = center
self.radius = radius
self.material = material
def intersects(self, ray, ray_direction):
# import pdb; pdb.set_trace()
sphere_to_ray = ray - self.center
b = np.dot(2*ray_direction, sphere_to_ray)
c = np.dot(sphere_to_ray, sphere_to_ray) - self.radius*self.radius
disc = b * b - 4 * c
if disc >= 0:
dist = (-b - math.sqrt(disc)) / 2
if dist > 0:
return dist
return None
def normal(self, hit_pos):
return (hit_pos - self.center) / np.linalg.norm(hit_pos - self.center)
I find this problem puzzling because this error occurs even if I don't actually use anything from Geometry.
Sometimes I also get this error BrokenProcessPool: A child process terminated abruptly, the process pool is not usable anymore
Additional Info:
Stack-trace -
Traceback (most recent call last):
File "C:\Users\test_conc.py", line 42, in <module>
test = cp.main()
File "C:\Users\test_conc.py", line 35, in main
for result in ret:
File "C:\ProgramData\Anaconda3\lib\concurrent\futures\process.py", line 476, in _chain_from_iterable_of_lists
for element in iterable:
File "C:\ProgramData\Anaconda3\lib\concurrent\futures\_base.py", line 586, in result_iterator
yield fs.pop().result()
File "C:\ProgramData\Anaconda3\lib\concurrent\futures\_base.py", line 432, in result
return self.__get_result()
File "C:\ProgramData\Anaconda3\lib\concurrent\futures\_base.py", line 384, in __get_result
raise self._exception
BrokenProcessPool: A process in the process pool was terminated abruptly while the future was running or pending.

Proper way to share a list between processes?

I wanted to set up two subprocesses in which subprocess1 keeps generating data (in type of list), and subprocess2 is in charge of processing the data sent from subprocess1.
I used multiprocessing.Manager().list() to create a shared list. But this is the error it reports:
FileNotFoundError: [WinError 2]
Code
I simplified the code as below:
ps: need to run it in terminal.
import multiprocessing as mp
import random
import time
def generator(a, b, tick): # simulating data collection,and a list will be generated at random and passed to another shared list.
counter = 0
while True:
time.sleep(1)
a.append([random.uniform(1,5), random.uniform(1,5), random.uniform(1,5), random.uniform(1,5)])
counter += 1
print('generate says', a[:])
if counter%5 == 0:
b.append(a[:])
tick.value = 1 # Telling 'printer' func to print.
for _ in a:
a.remove(_)
def printer(b, tick): # simulating data processing, and only printing data received from the 'generator' func here.
while True:
time.sleep(1)
if tick.value == 1:
time.sleep(1)
print('printer says', b[:])
tick.value = 0
for _ in b:
b.remove(_)
if __name__=='__main__':
tick=mp.Value('i', 0)
a = mp.Manager().list()
b = mp.Manager().list()
p1 = mp.Process(target=generator, args=(a, b, tick))
p2 = mp.Process(target=printer, args=(b, tick))
p1.start()
p2.start()
Error
Traceback (most recent call last):
File "d:\miniconda\lib\multiprocessing\process.py", line 297, in _bootstrap
self.run()
File "d:\miniconda\lib\multiprocessing\process.py", line 99, in run
self._target(*self._args, **self._kwargs)
File "D:\Program Files (x86)\onedrive\nilm\pi\redd\niubi.py", line 9, in generater
a.append([random.uniform(1,5),random.uniform(1,5),random.uniform(1,5),random.uniform(1,5)])
File "<string>", line 2, in append
File "d:\miniconda\lib\multiprocessing\managers.py", line 792, in _callmethod
self._connect()
File "d:\miniconda\lib\multiprocessing\managers.py", line 779, in _connect
conn = self._Client(self._token.address, authkey=self._authkey)
File "d:\miniconda\lib\multiprocessing\connection.py", line 490, in Client
c = PipeClient(address)
File "d:\miniconda\lib\multiprocessing\connection.py", line 691, in PipeClient
_winapi.WaitNamedPipe(address, 1000)
FileNotFoundError: [WinError 2] The system cannot find the file specified.
There were a few things to fix, but the primary issue is that you should include Process.join, as seen below:
import multiprocessing as mp
import random
import time
... # generator and printer definitions are unchanged
if __name__=='__main__':
manager = mp.Manager() # Create an instance of the manager
a = manager.list()
b = manager.list()
tick = mp.Value('i', 0)
p1 = mp.Process(target=generator, args=(tick, a, b))
p2 = mp.Process(target=printer, args=(tick, b))
p1.start()
p2.start()
p1.join() # Join, to ensure p1 and p2 end
p2.join()

Python multiprocessing blocks indefinately in waiter.acquire()

Can someone explain why this code blocks and cannot complete?
I've followed a couple of examples for multiprocessing and I've writting some very similar code that does not get blocked. But, obviously, I cannot see what is the difference between that working code and that below. Everything sets up fine, I think. It gets all the way to .get(), but none of the processes ever finish.
The problem is that python3 blocks indefinitely in waiter.acquire(), which you can tell by interrupting it and reading the backtrace.
$ python3 ./try415.py
^CTraceback (most recent call last):
File "./try415.py", line 43, in <module>
ps = [ res.get() for res in proclist ]
File "./try415.py", line 43, in <listcomp>
ps = [ res.get() for res in proclist ]
File "/usr/lib64/python3.6/multiprocessing/pool.py", line 638, in get
self.wait(timeout)
File "/usr/lib64/python3.6/multiprocessing/pool.py", line 635, in wait
self._event.wait(timeout)
File "/usr/lib64/python3.6/threading.py", line 551, in wait
signaled = self._cond.wait(timeout)
File "/usr/lib64/python3.6/threading.py", line 295, in wait
waiter.acquire()
KeyboardInterrupt
Here's the code
from multiprocessing import Pool
from scipy import optimize
import numpy as np
def func(t, a, b, c):
return 0.5*a*t**2 + b*t + c
def funcwrap(t, params):
return func(t, *params)
def fitWithErr(procid, yFitValues, simga, func, p0, args, bounds):
np.random.seed() # force new seed
randomDelta = np.random.normal(0., sigma, len(yFitValues))
randomdataY = yFitValues + randomDelta
errfunc = lambda p, x, y: func(p, x) -y
optResult = optimize.least_squares(errfunc, p0, args=args, bounds=bounds)
return optResult.x
def fit_bootstrap(function, datax, datay, p0, bounds, aprioriUnc):
errfunc = lambda p, x, y: function(x,p) - y
optResult = optimize.least_squares(errfunc, x0=p0, args=(datax, datay), bounds=bounds)
pfit = optResult.x
residuals = optResult.fun
fity = function(datax, pfit)
numParallelProcesses = 2**2 # should be equal to number of ALUs
numTrials = 2**2 # this many random data sets are generated and fitted
trialParameterList = list()
for i in range(0,numTrials):
trialParameterList.append( [i, fity, aprioriUnc, function, p0, (datax, datay), bounds] )
with Pool(processes=numParallelProcesses) as pool:
proclist = [ pool.apply_async(fitWithErr, args) for args in trialParameterList ]
ps = [ res.get() for res in proclist ]
ps = np.array(ps)
mean_pfit = np.mean(ps,0)
return mean_pfit
if __name__ == '__main__':
x = np.linspace(0,3,2000)
p0 = [-9.81, 1., 0.]
y = funcwrap(x, p0)
bounds = [ (-20,-1., -1E-6),(20,3,1E-6) ]
fit_bootstrap(funcwrap, x, y, p0, bounds=bounds, aprioriUnc=0.1)
Sorry for giving out the wrong answer. It's so irresponsible for not verify it. Here is the answer from me.
with Pool(processes=numParallelProcesses) as pool:
This line is wrong as with will call exit function not close. Here is exit function body:
def __exit__(self, exc_type, exc_val, exc_tb):
self.terminate()
All of the process will be terminated and never excuted.
Code:
ps = [ res.get() for res in proclist ]
there is no timeout parameter. Here is the get function body:
def get(self, timeout=None):
self.wait(timeout)
if not self.ready():
raise TimeoutError
if self._success:
return self._value
else:
raise self._value
It will always wait if no timeout. That's why it hang.
You need to change
with Pool(processes=numParallelProcesses) as pool:
proclist = [ pool.apply_async(fitWithErr, args) for args in trialParameterList ]
to:
pool=Pool(processes=numParallelProcesses)
proclist = [ pool.apply_async(fitWithErr, args) for args in trialParameterList ]
pool.close()
Indent
After all that, it was just that I didn't realize some code was not in the with clause that was supposed to be. (Besides some typos and other bugs, which I've now fixed.) Intermezzo strikes again!
Thanks to Snowy for making me go through it a different way until I found my error. I it was just not clear what I intended to do. Snowy's ode is a perfectly valid and equivalent code. However, for the record, timeout is not necessary. And, more importantly, with is perfectly valid for Process if you use it correctly, as shown in the very first paragraph of the Python3.6.6 multiprocessing documentation, which is where I got it. I just messed it up, somehow. The code I was trying to write was simply:
with Pool(processes=numParallelProcesses) as pool:
proclist = [ pool.apply_async(fitWithErr, args) for args in trialParameterList ]
ps = [ res.get() for res in proclist ]
ps = np.array(ps)
mean_pfit = np.mean(ps,0)
Works like I expected.

In Python, what will happen if I pass a normal variable to a function and use apply_async to execute it with multiple processes?

I met some behavior of Python multiprocessing which I cannot understand...
For example:
from multiprocessing import Pool
import time
import sys
def f(x):
time.sleep(10)
print(x)
return x * x
def f2(x, f):
time.sleep(10)
print(x, file=f)
return x * x
if __name__ == '__main__':
p = Pool(5)
for t in range(10):
p.apply_async(f, args=(t,))
p.close()
p.join() # Here it blocks and prints the number, which is normal.
p = Pool(5)
for t in range(10):
p.apply_async(f2, args=(t, sys.stdout))
p.close()
p.join() # Here it does not block and nothing happends(no output at all)...
The output is:
3
1
0
2
4
5
9
6
7
8
I know that we have to use something like shared variables to pass to the function when using multiprocessing and apply_async, but what will happen if I pass a normal variable to a function used in apply_async?
The multiprocessing.Pool executes your logic in a separate process. If the logic raises and exception, the Pool will return it to the caller.
In your code you are not collecting the output of your functions, therefore you don't notice the real issue.
Try to modify your code as follows:
p = Pool(5)
for t in range(10):
task = p.apply_async(f2, args=(t, sys.stdout))
task.get()
You will then get the actual exception which was raised within f2:
Traceback (most recent call last):
File "asd.py", line 24, in <module>
p.apply_async(f2, args=(t, sys.stdout)).get()
File "/usr/lib/python3.5/multiprocessing/pool.py", line 608, in get
raise self._value
File "/usr/lib/python3.5/multiprocessing/pool.py", line 385, in _handle_tasks
put(task)
File "/usr/lib/python3.5/multiprocessing/connection.py", line 206, in send
self._send_bytes(ForkingPickler.dumps(obj))
File "/usr/lib/python3.5/multiprocessing/reduction.py", line 50, in dumps
cls(buf, protocol).dump(obj)
TypeError: cannot serialize '_io.TextIOWrapper' object
It turns out that sys.stdout is not picklable. Which, in this case, is not an issue as sys.stdout unique per process. You can avoid passing it over the function and just use it as is within f2.

Pass a function as argument to a process target with Pool.map()

I'm developing a software to benchmark some scripts Python using different methods (mono-thread, multi-threads, multi-processes). So I need to execute the same function (with same arguments, etc...) in differents processes.
How to pass the function to execute as argument to a process target ?
What I currently understand is that a reference to a function cannot work because the function referenced is not visible for other processes, that's why I tried with a custom manager for the shared memory.
Here a simplified code:
#!/bin/python
from multiprocessing import Pool
from multiprocessing.managers import BaseManager
from itertools import repeat
class FunctionManager(BaseManager):
pass
def maFunction(a, b):
print(a + b)
def threadedFunction(f_i_args):
(f, i, args) = f_i_args
f(*args)
FunctionManager.register('Function', maFunction)
myManager = FunctionManager()
myManager.start()
myManager.Function(0, 0) # Test 1
threadedFunction((maFunction, 0, (1, 1))) # Test 2
p = Pool()
args = zip(repeat(myManager.Function), range(10), repeat(2, 2))
p.map(threadedFunction, args) # Does not work
p.join()
myManager.shutdown()
The current pickling error at "p.map()" is the following :
2
0
Traceback (most recent call last):
File "./test.py", line 27, in <module>
p.map(threadedFunction, args) # Does not work
File "/usr/lib/python3.5/multiprocessing/pool.py", line 260, in map
return self._map_async(func, iterable, mapstar, chunksize).get()
File "/usr/lib/python3.5/multiprocessing/pool.py", line 608, in get
raise self._value
File "/usr/lib/python3.5/multiprocessing/pool.py", line 385, in _handle_tasks
put(task)
File "/usr/lib/python3.5/multiprocessing/connection.py", line 206, in send
self._send_bytes(ForkingPickler.dumps(obj))
File "/usr/lib/python3.5/multiprocessing/reduction.py", line 50, in dumps
cls(buf, protocol).dump(obj)
_pickle.PicklingError: Can't pickle <class 'weakref'>: attribute lookup weakref on builtins failed
I got a bit different error from running your code. Your key problem I think is that you pass a function to FunctionManager.register() instead of a class. I also had to remove your zip to make it work and create a list manually, but this you can probably fix. This is just an example.
The following code works and does something using your exact structure. I would do this a bit differently and not use BaseManager, but I assume you have your reasons.
#!/usr/bin/python3.5
from multiprocessing import Pool
from multiprocessing.managers import BaseManager
from itertools import repeat
class FunctionManager(BaseManager):
pass
class maClass(object):
def __init__(self):
pass
def maFunction(self,a, b):
print(a + b)
def threadedFunction(f_i_args):
(f, i, args) = f_i_args
f(*args)
FunctionManager.register('Foobar', maClass)
myManager = FunctionManager()
myManager.start()
foobar = myManager.Foobar()
foobar.maFunction(0, 0) # Test 1
threadedFunction((foobar.maFunction, 0, (1, 1))) # Test 2
p = Pool()
#args = list(zip(repeat(foobar.maFunction), range(10), repeat(2, 2)))
args = []
for i in range(10):
args.append([foobar.maFunction, i, (i,2)])
p.map(threadedFunction, args) # Does now work
p.close()
p.join()
myManager.shutdown()
Or did I misunderstand your problem completely?
Hannu

Categories