I have a piece of toy code which does some dummy work to test parallelization. The code works fine as it is, but it fails if I try to import a class from another file. It gives me the error BrokenProcessPool: A process in the process pool was terminated abruptly while the future was running or pending.
#from Geometry import * <- this line causes the code to break
import concurrent.futures
import itertools
import random
import time
class ConProc:
def dummy(self, param):
time.sleep(random.random() * 3) # simulate a longer job
return param[0] * param[1]
def main(self):
ht_iterator = range(4)
wt_iterator = range(5)
paramlist = list(itertools.product(ht_iterator, wt_iterator))
with concurrent.futures.ProcessPoolExecutor() as executor:
ret = executor.map(self.dummy, paramlist)
for result in ret:
print(result)
if __name__ == '__main__':
cp = ConProc()
cp.main()
Contents of Geometry.py-
import math
import numpy as np
class vector(np.ndarray):
def __new__(cls, input_array):
obj = np.asarray(input_array).view(cls)
return obj
def __array_finalize__(self, obj):
if obj is None: return
class ray(vector):
pass
class sphere:
def __init__(self, center, radius, material):
self.center = center
self.radius = radius
self.material = material
def intersects(self, ray, ray_direction):
# import pdb; pdb.set_trace()
sphere_to_ray = ray - self.center
b = np.dot(2*ray_direction, sphere_to_ray)
c = np.dot(sphere_to_ray, sphere_to_ray) - self.radius*self.radius
disc = b * b - 4 * c
if disc >= 0:
dist = (-b - math.sqrt(disc)) / 2
if dist > 0:
return dist
return None
def normal(self, hit_pos):
return (hit_pos - self.center) / np.linalg.norm(hit_pos - self.center)
I find this problem puzzling because this error occurs even if I don't actually use anything from Geometry.
Sometimes I also get this error BrokenProcessPool: A child process terminated abruptly, the process pool is not usable anymore
Additional Info:
Stack-trace -
Traceback (most recent call last):
File "C:\Users\test_conc.py", line 42, in <module>
test = cp.main()
File "C:\Users\test_conc.py", line 35, in main
for result in ret:
File "C:\ProgramData\Anaconda3\lib\concurrent\futures\process.py", line 476, in _chain_from_iterable_of_lists
for element in iterable:
File "C:\ProgramData\Anaconda3\lib\concurrent\futures\_base.py", line 586, in result_iterator
yield fs.pop().result()
File "C:\ProgramData\Anaconda3\lib\concurrent\futures\_base.py", line 432, in result
return self.__get_result()
File "C:\ProgramData\Anaconda3\lib\concurrent\futures\_base.py", line 384, in __get_result
raise self._exception
BrokenProcessPool: A process in the process pool was terminated abruptly while the future was running or pending.
Related
I am trying to calculate some values of satellites, the data-generation takes quite long so I want to implement this using multiprocessing.
The problem is that I get this error from pyEphem, TypeError: can't pickle ephem.EarthSatellite objects. The pyEphem objects are not used in the functions that I want to parallelize.
This is an example file of my code (minimized).
This is my main file:
main.py
import ephem
import numpy
import math
import multiprocessing as mp
from SampleSats import Sats
GPS_Satellites = []
SFrames = 1
TLE = ["GPS BIIR-3 (PRN 11)",
"1 25933U 99055A 18090.43292845 -.00000054 00000-0 00000+0 0 9994",
"2 25933 51.8367 65.0783 0165007 100.2058 316.9161 2.00568927135407"]
# PRN TLE file from CelesTrak
GPS_Satellites.append(Sats(TLE))
Position = ephem.Observer()
Position.date = '2018/3/31 00:00' # 1st January 2018 at 00:00 UTC
Position.lon, Position.lat = "36.845663", "-37.161123" # Coordinates for desired Position
# Calculate Satellites
for Frames in range(SFrames):
print("Generate Signals for Time: ", Position.date)
for Sats in GPS_Satellites: # par
Sats.compute(Position)
if ((float(repr(Sats.ephemeris.alt)) * 180 / math.pi) < 5) or ( # Calculate angle above horizon
(float(repr(Sats.ephemeris.alt)) * 180 / math.pi) > 90):
Sats.visible = 0
else:
Sats.visible = 1
with mp.Pool() as pool:
for value, obj in zip(pool.map(Sats.genSignal, GPS_Satellites), GPS_Satellites):
obj.Signal = value
Position.date = Position.date + 6*ephem.second # 1 Subframe is 6 seconds long
This is the Sats class that i wrote:
sats.py:
import ephem
import numpy
class Sats:
"""Save Satellites as Objects"""
def __init__(self, tle):
""":param tle: Two Line Element for ephemeris data also used to get PRN Number from name"""
self.ephemeris = ephem.readtle(tle[0], tle[1], tle[2])
self.visible = 1
self.subframes = 0
self.CAseq = [x for x in range(1023)]
self.Out = []
self.Signal = numpy.zeros(int(300*20*1023), dtype=numpy.int8)
def compute(self, pos):
self.ephemeris.compute(pos)
self.Out.append(numpy.arange(0, 299, 1))
self.subframes += 1
def calcData(self, bit, prn):
return (self.Out[self.subframes - 1].item(0)[0][bit] + self.CAseq[prn]) % 2
def genSignal(self):
if(self.visible == 1):
for bit in range(300): # 1 Subframe is 300 Bit long
for x in range(20): # The PRN Sequence reoccurs every ms -> 20 times per pit
for prn in range(1023): # length of the prn sequence
self.Signal[bit*x*prn] = (-1 if (self.calcData(bit, prn))==0 else 1)
else:
self.Signal = numpy.zeros(300*20*1023)
return self.Signal
Traceback:
Traceback (most recent call last):
File "C:/Users/PATH_TO_PROJECT/SampleTest.py", line 33, in <module>
for value, obj in zip(pool.map(Sats.genSignal, GPS_Satellites), GPS_Satellites):
File "C:\Program Files\Python36\lib\multiprocessing\pool.py", line 266, in map
return self._map_async(func, iterable, mapstar, chunksize).get()
File "C:\Program Files\Python36\lib\multiprocessing\pool.py", line 644, in get
raise self._value
File "C:\Program Files\Python36\lib\multiprocessing\pool.py", line 424, in _handle_tasks
put(task)
File "C:\Program Files\Python36\lib\multiprocessing\connection.py", line 206, in send
self._send_bytes(_ForkingPickler.dumps(obj))
File "C:\Program Files\Python36\lib\multiprocessing\reduction.py", line 51, in dumps
cls(buf, protocol).dump(obj)
TypeError: can't pickle ephem.EarthSatellite objects
The reason is something like this... when you try to pickle a function, it can attempt to pickle globals(), so whatever you have in your global namespace is also pickled (just in case your function has a reference to something in globals() -- yes, that's unexpected, but that's how it is). So, an easy fix is to isolate the function you want to pickle in another file -- in this case, put the multiprocessing stuff in one file and the other code in another file... so there's less in globals() for the pickler to struggle with. Another thing that might help is to use multiprocess instead of multiprocessing -- multiprocess uses the dill serializer instead of pickle, so you have a better chance of serializing objects that will be sent across the workers in the Pool.
I'm trying to run pyalgotrade's event profiler. I'm using custom data, it works when I run it with the default stratergy/predicate 'BuyOnGap' however when I try and run it with a simple custom strategy it throw the error:
Traceback (most recent call last):
File "C:\Users\David\Desktop\Python\Coursera\Computational Finance\Week2\PyAlgoTrade\Bitfinex\FCT\FCT_single_event_test.py", line 43, in <module>
main(True)
File "C:\Users\David\Desktop\Python\Coursera\Computational Finance\Week2\PyAlgoTrade\Bitfinex\FCT\FCT_single_event_test.py", line 35, in main
eventProfiler.run(feed, True)
File "C:\Python27\lib\site-packages\pyalgotrade\eventprofiler.py", line 215, in run
disp.run()
File "C:\Python27\lib\site-packages\pyalgotrade\dispatcher.py", line 102, in run
eof, eventsDispatched = self.__dispatch()
File "C:\Python27\lib\site-packages\pyalgotrade\dispatcher.py", line 90, in __dispatch
if self.__dispatchSubject(subject, smallestDateTime):
File "C:\Python27\lib\site-packages\pyalgotrade\dispatcher.py", line 68, in __dispatchSubject
ret = subject.dispatch() is True
File "C:\Python27\lib\site-packages\pyalgotrade\feed\__init__.py", line 105, in dispatch
self.__event.emit(dateTime, values)
File "C:\Python27\lib\site-packages\pyalgotrade\observer.py", line 59, in emit
handler(*args, **kwargs)
File "C:\Python27\lib\site-packages\pyalgotrade\eventprofiler.py", line 172, in __onBars
eventOccurred = self.__predicate.eventOccurred(instrument, self.__feed[instrument])
File "C:\Python27\lib\site-packages\pyalgotrade\eventprofiler.py", line 89, in eventOccurred
raise NotImplementedError()
NotImplementedError
My code is:
from pyalgotrade import eventprofiler
from pyalgotrade.technical import stats
from pyalgotrade.technical import roc
from pyalgotrade.technical import ma
from pyalgotrade.barfeed import csvfeed
class single_event_strat( eventprofiler.Predicate ):
def __init__(self,feed):
self.__returns = {} # CLASS ATTR
for inst in feed.getRegisteredInstruments():
priceDS = feed[inst].getAdjCloseDataSeries() # STORE: priceDS ( a temporary representation )
self.__returns[inst] = roc.RateOfChange( priceDS, 1 )
# CALC: ATTR <- Returns over the adjusted close values, consumed priceDS
#( could be expressed as self.__returns[inst] = roc.RateOfChange( ( feed[inst].getAdjCloseDataSeries() ), 1 ),
#but would be less readable
def eventOccoured( self, instrument, aBarDS):
if (aBarDS[-1].getVolume() > 10 and aBarDS[-1].getClose() > 5 ):
return True
else:
return False
def main(plot):
feed = csvfeed.GenericBarFeed(0)
feed.addBarsFromCSV('FCT', "FCT_daily_converted.csv")
predicate = single_event_strat(feed)
eventProfiler = eventprofiler.Profiler( predicate, 5, 5)
eventProfiler.run(feed, True)
results = eventProfiler.getResults()
print "%d events found" % (results.getEventCount())
if plot:
eventprofiler.plot(results)
if __name__ == "__main__":
main(True)
What does this error mean ?
Does anyone know what's wrong and how to fix it ?
Here is a link to the eventprofiler code:
http://pastebin.com/QD220VQb
As a bonus does anyone know where I can find examples of the profiler being used? other that the example pyalgotrade gives, seen here
I think you just made a spelling mistake in eventOccurred method definition
def eventOccoured( self, instrument, aBarDS):
should be replaced by
def eventOccurred( self, instrument, aBarDS):
I'm trying to write in the same shared array in a parallel processing python script.
When I do it outside a class, in a normal script, everything works right. But when I try to do it through a class (using the same code), I get the
Runtime Error: SynchronizedArray objects should only be shared between processes through inheritance.
My script is the following (without a class):
import numpy
import ctypes
from multiprocessing import Pool, Array, cpu_count
n = 2
total_costs_matrix_base = Array(ctypes.c_double, n*n)
total_costs_matrix = numpy.ctypeslib.as_array(
total_costs_matrix_base.get_obj())
total_costs_matrix = total_costs_matrix.reshape(n,n)
def set_total_costs_matrix( i, j, def_param = total_costs_matrix_base):
total_costs_matrix[i,j] = i * j
if __name__ == "__main__":
pool = Pool(processes=cpu_count())
iterable = []
for i in range(n):
for j in range(i+1,n):
iterable.append((i,j))
pool.starmap(set_total_costs_matrix, iterable)
total_costs_matrix.dump('some/path/to/file')
That script works well. The one that doesn't is the following (which uses a class):
import numpy
import ctypes
from multiprocessing import Pool, Array, cpu_count
class CostComputation(object):
"""Computes the cost matrix."""
def __init__(self):
self.n = 2
self.total_costs_matrix_base = Array(ctypes.c_double, self.n*self.n)
self.total_costs_matrix = numpy.ctypeslib.as_array(
self.total_costs_matrix_base.get_obj())
self.total_costs_matrix = self.total_costs_matrix.reshape(self.n,self.n)
def set_total_costs_matrix(self, i, j, def_param = None):
def_param = self.total_costs_matrix_base
self.total_costs_matrix[i,j] = i * j
def write_cost_matrix(self):
pool = Pool(processes=cpu_count())
iterable = []
for i in range(self.n):
for j in range(i+1,self.n):
iterable.append((i,j))
pool.starmap(self.set_total_costs_matrix, iterable)
self.total_costs_matrix.dump('some/path/to/file')
After this, I would call write_cost_matrix from another file, after creating an instance of CostComputation.
I read this answer but still couldn't solve my problem.
I'm using Python 3.4.2 in a Mac OSX Yosemite 10.10.4.
EDIT
When using the class CostComputation, the script I'm using is:
from cost_computation import CostComputation
cc = CostComputation()
cc.write_costs_matrix()
The whole error is:
Traceback (most recent call last):
File "app.py", line 65, in <module>
cc.write_cost_matrix()
File "/path/to/cost_computation.py", line 75, in write_cost_matrix
pool.starmap(self.set_total_costs_matrix, iterable)
File "/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/multiprocessing/pool.py", line 268, in starmap
return self._map_async(func, iterable, starmapstar, chunksize).get()
File "/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/multiprocessing/pool.py", line 599, in get
raise self._value
File "/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/multiprocessing/pool.py", line 383, in _handle_tasks
put(task)
File "/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/multiprocessing/connection.py", line 206, in send
self._send_bytes(ForkingPickler.dumps(obj))
File "/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/multiprocessing/reduction.py", line 50, in dumps
cls(buf, protocol).dump(obj)
File "/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/multiprocessing/sharedctypes.py", line 192, in __reduce__
assert_spawning(self)
File "/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/multiprocessing/context.py", line 347, in assert_spawning
' through inheritance' % type(obj).__name__
RuntimeError: SynchronizedArray objects should only be shared between processes through inheritance
Try creating a second class which contains the shared data only. Then use that class object in your main class.
I want to run something like this:
from multiprocessing import Pool
import time
import random
class Controler(object):
def __init__(self):
nProcess = 10
pages = 10
self.__result = []
self.manageWork(nProcess,pages)
def BarcodeSearcher(x):
return x*x
def resultCollector(self,result):
self.__result.append(result)
def manageWork(self,nProcess,pages):
pool = Pool(processes=nProcess)
for pag in range(pages):
pool.apply_async(self.BarcodeSearcher, args = (pag, ), callback = self.resultCollector)
print self.__result
if __name__ == '__main__':
Controler()
but the code result the error :
Exception in thread Thread-1:
Traceback (most recent call last):
File "C:\Python26\lib\threading.py", line 522, in __bootstrap_inner
self.run()
File "C:\Python26\lib\threading.py", line 477, in run
self.__target(*self.__args, **self.__kwargs)
File "C:\python26\lib\multiprocessing\pool.py", line 225, in _handle_tasks
put(task)
PicklingError: Can't pickle <type 'instancemethod'>: attribute lookup __builtin__.instancemethod failed
I've seen the posts (post1 , post2) to solve my problem. I'm looking for something like Mike McKerns solution in the second post but without using pathos.
This works, using copy_reg, as suggested by Alex Martelli in the first link you provided:
import copy_reg
import types
import multiprocessing
def _pickle_method(m):
if m.im_self is None:
return getattr, (m.im_class, m.im_func.func_name)
else:
return getattr, (m.im_self, m.im_func.func_name)
copy_reg.pickle(types.MethodType, _pickle_method)
class Controler(object):
def __init__(self):
nProcess = 10
pages = 10
self.__result = []
self.manageWork(nProcess, pages)
def BarcodeSearcher(self, x):
return x*x
def resultCollector(self, result):
self.__result.append(result)
def manageWork(self, nProcess, pages):
pool = multiprocessing.Pool(processes=nProcess)
for pag in range(pages):
pool.apply_async(self.BarcodeSearcher, args=(pag,),
callback=self.resultCollector)
pool.close()
pool.join()
print(self.__result)
if __name__ == '__main__':
Controler()
I have got some problems using the following code, which is supposed to do gaussian fits using threads:
from PIL import Image
import numpy as np
from scipy.optimize import curve_fit
import threading
class myThread (threading.Thread):
def __init__(self, index):
threading.Thread.__init__(self)
self.index = index
def run(self):
for i in np.arange(n_Bild.shape[1]):
curve_fit(self.gauss, x_x, Intensitaet[self.index, ...], p0=(Intensitaet[self.index, i], i, 1, 0))
def gauss(self, x, a, b, c, d):
return a * np.exp(-(x-b) ** 2 / (2 * c ** 2)) + d
Bild = Image.open("test.bmp")
n_Bild = np.asarray(Bild)
Intensitaet = np.zeros((n_Bild.shape[0], n_Bild.shape[1]), dtype=np.uint32)
Intensitaet += n_Bild[..., ..., 0]
Intensitaet += n_Bild[..., ..., 1]
Intensitaet += n_Bild[..., ..., 2]
x_x = np.arange(n_Bild.shape[1]) #Pixel auf "x"-Achse
threads = []
# Create new threads
thread0 = myThread(0)
thread1 = myThread(1)
# Add threads to thread list
threads.append(thread0)
threads.append(thread1)
# Start new Threads
thread0.start()
thread1.start()
# Wait for all threads to complete
for t in threads:
t.join()
print "finished"
If I run my programm I get an error:
SystemError: null argument to internal routine
Exception in thread Thread-2:
Traceback (most recent call last):
File "C:\Anaconda\lib\threading.py", line 808, in __bootstrap_inner
self.run()
File "G:/DropBox/Daten/Dropbox/Uni/Bachelorarbeit/Python/ThreadTest.py", line 12, in run
curve_fit(self.gauss, x_x, Intensitaet[self.index, ...], p0=(Intensitaet[self.index, i], i, 1, 0))
File "C:\Anaconda\lib\site-packages\scipy\optimize\minpack.py", line 533, in curve_fit
res = leastsq(func, p0, args=args, full_output=1, **kw)
File "C:\Anaconda\lib\site-packages\scipy\optimize\minpack.py", line 378, in leastsq
gtol, maxfev, epsfcn, factor, diag)
error: Internal error constructing argument list.#
If I only run one thread instead of two, the programm works fine, but I have no idea what i'm doing wrong.
Thanks for your help.
I believe that leastsq() is not threadsafe, and you need to either use a threading.Lock() around your calls to curve_fit() (which might defeat your purpose) or use multiprocessing.