Shared arrays in multiprocessing Python - python

I'm trying to write in the same shared array in a parallel processing python script.
When I do it outside a class, in a normal script, everything works right. But when I try to do it through a class (using the same code), I get the
Runtime Error: SynchronizedArray objects should only be shared between processes through inheritance.
My script is the following (without a class):
import numpy
import ctypes
from multiprocessing import Pool, Array, cpu_count
n = 2
total_costs_matrix_base = Array(ctypes.c_double, n*n)
total_costs_matrix = numpy.ctypeslib.as_array(
total_costs_matrix_base.get_obj())
total_costs_matrix = total_costs_matrix.reshape(n,n)
def set_total_costs_matrix( i, j, def_param = total_costs_matrix_base):
total_costs_matrix[i,j] = i * j
if __name__ == "__main__":
pool = Pool(processes=cpu_count())
iterable = []
for i in range(n):
for j in range(i+1,n):
iterable.append((i,j))
pool.starmap(set_total_costs_matrix, iterable)
total_costs_matrix.dump('some/path/to/file')
That script works well. The one that doesn't is the following (which uses a class):
import numpy
import ctypes
from multiprocessing import Pool, Array, cpu_count
class CostComputation(object):
"""Computes the cost matrix."""
def __init__(self):
self.n = 2
self.total_costs_matrix_base = Array(ctypes.c_double, self.n*self.n)
self.total_costs_matrix = numpy.ctypeslib.as_array(
self.total_costs_matrix_base.get_obj())
self.total_costs_matrix = self.total_costs_matrix.reshape(self.n,self.n)
def set_total_costs_matrix(self, i, j, def_param = None):
def_param = self.total_costs_matrix_base
self.total_costs_matrix[i,j] = i * j
def write_cost_matrix(self):
pool = Pool(processes=cpu_count())
iterable = []
for i in range(self.n):
for j in range(i+1,self.n):
iterable.append((i,j))
pool.starmap(self.set_total_costs_matrix, iterable)
self.total_costs_matrix.dump('some/path/to/file')
After this, I would call write_cost_matrix from another file, after creating an instance of CostComputation.
I read this answer but still couldn't solve my problem.
I'm using Python 3.4.2 in a Mac OSX Yosemite 10.10.4.
EDIT
When using the class CostComputation, the script I'm using is:
from cost_computation import CostComputation
cc = CostComputation()
cc.write_costs_matrix()
The whole error is:
Traceback (most recent call last):
File "app.py", line 65, in <module>
cc.write_cost_matrix()
File "/path/to/cost_computation.py", line 75, in write_cost_matrix
pool.starmap(self.set_total_costs_matrix, iterable)
File "/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/multiprocessing/pool.py", line 268, in starmap
return self._map_async(func, iterable, starmapstar, chunksize).get()
File "/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/multiprocessing/pool.py", line 599, in get
raise self._value
File "/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/multiprocessing/pool.py", line 383, in _handle_tasks
put(task)
File "/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/multiprocessing/connection.py", line 206, in send
self._send_bytes(ForkingPickler.dumps(obj))
File "/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/multiprocessing/reduction.py", line 50, in dumps
cls(buf, protocol).dump(obj)
File "/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/multiprocessing/sharedctypes.py", line 192, in __reduce__
assert_spawning(self)
File "/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/multiprocessing/context.py", line 347, in assert_spawning
' through inheritance' % type(obj).__name__
RuntimeError: SynchronizedArray objects should only be shared between processes through inheritance

Try creating a second class which contains the shared data only. Then use that class object in your main class.

Related

Sharing dictionary over multiprocesses (TypeError: cannot pickle 'weakref' object)

I want to create a class Storage where each object has a dictionary orderbooks as a property.
I want to write on orderbooks from the main process by invoking the method write, but I want to defer this action to another process and ensuring that the dictionary orderbooks is accessible from the main process.
To do so, I create a Mananger() that I pass during the definition of the object and that is used to notify the processes about the changes of the dictionary. My code is the following:
from multiprocessing import Process, Manager
class Storage():
def __init__(self,manager):
self.manager = manager
self.orderbooks = self.manager.dict()
def store_value(self,el):
self.orderbooks[el[0]] = el[1]
def write(self,el:list):
p = Process(target=self.store_value,args=(el,))
p.start()
if __name__ == '__main__':
manager=Manager()
book1 = Storage(manager)
book1.write([0,1])
However, when I run this code, I get the following error
Traceback (most recent call last):
File "/Users/main_user/PycharmProjects/handle_queue/main.py", line 21, in <module>
book1.write([0,1])
File "/Users/main_user/PycharmProjects/handle_queue/main.py", line 13, in write
p.start()
File "/Users/main_user/opt/anaconda3/envs/handle_queue/lib/python3.10/multiprocessing/process.py", line 121, in start
self._popen = self._Popen(self)
File "/Users/main_user/opt/anaconda3/envs/handle_queue/lib/python3.10/multiprocessing/context.py", line 224, in _Popen
return _default_context.get_context().Process._Popen(process_obj)
File "/Users/main_user/opt/anaconda3/envs/handle_queue/lib/python3.10/multiprocessing/context.py", line 284, in _Popen
return Popen(process_obj)
File "/Users/main_user/opt/anaconda3/envs/handle_queue/lib/python3.10/multiprocessing/popen_spawn_posix.py", line 32, in __init__
super().__init__(process_obj)
File "/Users/main_user/opt/anaconda3/envs/handle_queue/lib/python3.10/multiprocessing/popen_fork.py", line 19, in __init__
self._launch(process_obj)
File "/Users/main_user/opt/anaconda3/envs/handle_queue/lib/python3.10/multiprocessing/popen_spawn_posix.py", line 47, in _launch
reduction.dump(process_obj, fp)
File "/Users/main_user/opt/anaconda3/envs/handle_queue/lib/python3.10/multiprocessing/reduction.py", line 60, in dump
ForkingPickler(file, protocol).dump(obj)
TypeError: cannot pickle 'weakref' object
What is wrong with my code?
Per Aaron's posted comment:
from multiprocessing import Process, Manager
class Storage():
def __init__(self, orderbooks):
self.orderbooks = orderbooks
def store_value(self, el):
self.orderbooks[el[0]] = el[1]
def write(self, el: list):
p = Process(target=self.store_value, args=(el,))
p.start()
# Ensure we do not return until store_value has
# completed updating the dictionary:
p.join()
if __name__ == '__main__':
manager = Manager()
orderbooks = manager.dict()
book1 = Storage(orderbooks)
book1.write([0, 1])
print(orderbooks)
Prints:
{0: 1}

Python 3.9: multiprocessing process start() got an error| TypeError: cannot pickle 'weakref' object

I'm trying to decrease running time by using multiprocessing.
I got a weird error TypeError: cannot pickle 'weakref' object
I'm not quite sure why this error occurs because I also use this approach to run another program but it run normally. Can someone explain why this error occurs.
I already follow this Solution but it did not work for me.
import multiprocessing
from scipy import stats
import numpy as np
import pandas as pd
class T_TestFeature:
def __init__(self, data, classes):
self.data = data
self.classes = classes
self.manager = multiprocessing.Manager()
self.pval = self.manager.list()
def preform(self):
process = []
for i in range(10):
process.append(multiprocessing.Process(target=self.t_test, args=(i,)))
for p in process:
p.start()
for p in process:
p.join()
def t_test(self, k):
index_samples = np.array(self.data)[:,k]
rs1 = [index_samples[i] for i in range(len(index_samples)) if self.classes[i] == "Virginia"]
rs2 = [index_samples[i] for i in range(len(index_samples)) if self.classes[i] != "Virginia"]
self.pval.append(stats.ttest_ind(rs1, rs2, equal_var=False).pvalue)
def main():
df = pd.read_excel("/Users/xxx/Documents/Project/src/flattened.xlsx")
flattened = df.values.T
y = df.columns
result = T_TestFeature(flattened, y)
result.preform()
print(result.pval)
if __name__ == "__main__":
main()
Traceback (most recent call last):
File "/Users/xxx/Documents/Project/src/t_test.py", line 41, in <module>
main()
File "/Users/xxx/Documents/Project/src/t_test.py", line 37, in main
result.preform()
File "/Users/xxx/Documents/Project/src/t_test.py", line 21, in preform
p.start()
File "/Users/xxx/opt/anaconda3/lib/python3.9/multiprocessing/process.py", line 121, in start
self._popen = self._Popen(self)
File "/Users/xxx/opt/anaconda3/lib/python3.9/multiprocessing/context.py", line 284, in _Popen
return Popen(process_obj)
File "/Users/xxx/opt/anaconda3/lib/python3.9/multiprocessing/popen_spawn_posix.py", line 32, in __init__
super().__init__(process_obj)
File "/Users/xxx/opt/anaconda3/lib/python3.9/multiprocessing/popen_fork.py", line 19, in __init__
self._launch(process_obj)
File "/Users/x/opt/anaconda3/lib/python3.9/multiprocessing/popen_spawn_posix.py", line 47, in _xxlaunch
reduction.dump(process_obj, fp)
File "/Users/xxx/opt/anaconda3/lib/python3.9/multiprocessing/reduction.py", line 60, in dump
ForkingPickler(file, protocol).dump(obj)
TypeError: cannot pickle 'weakref' object
Here is a simpler way to reproduce your issue:
from multiprocessing import Manager, Process
class A:
def __init__(self):
self.manager = Manager()
def start(self):
print("started")
if __name__ == "__main__":
a = A()
proc = Process(target=a.start)
proc.start()
proc.join()
You cannot pickle instances containing manager objects, because they contain reference to the manager process they started (therefore, in general you can't pickle instances containing objects of class Process).
A simple fix would be to not store the manager. It will automatically be garbage collected once no references to the managed list remains:
def __init__(self, data, classes):
self.data = data
self.classes = classes
manager = multiprocessing.Manager()
self.pval = manager.list()

How to assign a value to a sliced output signal?

I'm a beginner with myhdl.
I try to translate the following Verilog code to MyHDL:
module ModuleA(data_in, data_out, clk);
input data_in;
output reg data_out;
input clk;
always #(posedge clk) begin
data_out <= data_in;
end
endmodule
module ModuleB(data_in, data_out, clk);
input [1:0] data_in;
output [1:0] data_out;
input clk;
ModuleA instance1(data_in[0], data_out[0], clk);
ModuleA instance2(data_in[1], data_out[1], clk);
endmodule
Currently, I have this code:
import myhdl
#myhdl.block
def ModuleA(data_in, data_out, clk):
#myhdl.always(clk.posedge)
def logic():
data_out.next = data_in
return myhdl.instances()
#myhdl.block
def ModuleB(data_in, data_out, clk):
instance1 = ModuleA(data_in(0), data_out(0), clk)
instance2 = ModuleA(data_in(1), data_out(1), clk)
return myhdl.instances()
# Create signals
data_in = myhdl.Signal(myhdl.intbv()[2:])
data_out = myhdl.Signal(myhdl.intbv()[2:])
clk = myhdl.Signal(bool())
# Instantiate the DUT
dut = ModuleB(data_in, data_out, clk)
# Convert tfe DUT to Verilog
dut.convert()
But it doesn't works because signal slicing produce a read-only shadow signal (cf MEP-105).
So, what is it the good way in MyHDL to have a writable slice of a signal?
Edit:
This is the error I get
$ python demo.py
Traceback (most recent call last):
File "demo.py", line 29, in <module>
dut.convert()
File "/home/killruana/.local/share/virtualenvs/myhdl_sandbox-dYpBu4o5/lib/python3.6/site-packages/myhdl-0.10-py3.6.egg/myhdl/_block.py", line 342, in convert
File "/home/killruana/.local/share/virtualenvs/myhdl_sandbox-dYpBu4o5/lib/python3.6/site-packages/myhdl-0.10-py3.6.egg/myhdl/conversion/_toVerilog.py", line 177, in __call__
File "/home/killruana/.local/share/virtualenvs/myhdl_sandbox-dYpBu4o5/lib/python3.6/site-packages/myhdl-0.10-py3.6.egg/myhdl/conversion/_analyze.py", line 170, in _analyzeGens
File "/usr/lib/python3.6/ast.py", line 253, in visit
return visitor(node)
File "/home/killruana/.local/share/virtualenvs/myhdl_sandbox-dYpBu4o5/lib/python3.6/site-packages/myhdl-0.10-py3.6.egg/myhdl/conversion/_analyze.py", line 1072, in visit_Module
File "/home/killruana/.local/share/virtualenvs/myhdl_sandbox-dYpBu4o5/lib/python3.6/site-packages/myhdl-0.10-py3.6.egg/myhdl/conversion/_misc.py", line 148, in raiseError
myhdl.ConversionError: in file demo.py, line 4:
Signal has multiple drivers: data_out
You can use an intermediate list of Signal(bool()) as placeholder.
#myhdl.block
def ModuleB(data_in, data_out, clk):
tsig = [myhdl.Signal(bool(0)) for _ in range(len(data_in))]
instances = []
for i in range(len(data_in)):
instances.append(ModuleA(data_in(i), tsig[i], clk))
#myhdl.always_comb
def assign():
for i in range(len(data_out)):
data_out.next[i] = tsig[i]
return myhdl.instances()
A quick (probably non-fulfilling) comment, is that the intbv is treated as a single entity that can't have multiple drives. Two references that might help shed some light:
http://jandecaluwe.com/hdldesign/counting.html
http://docs.myhdl.org/en/stable/manual/structure.html#converting-between-lists-of-signals-and-bit-vectors

How do I pickle pyEphem objects for multiprocessing?

I am trying to calculate some values of satellites, the data-generation takes quite long so I want to implement this using multiprocessing.
The problem is that I get this error from pyEphem, TypeError: can't pickle ephem.EarthSatellite objects. The pyEphem objects are not used in the functions that I want to parallelize.
This is an example file of my code (minimized).
This is my main file:
main.py
import ephem
import numpy
import math
import multiprocessing as mp
from SampleSats import Sats
GPS_Satellites = []
SFrames = 1
TLE = ["GPS BIIR-3 (PRN 11)",
"1 25933U 99055A 18090.43292845 -.00000054 00000-0 00000+0 0 9994",
"2 25933 51.8367 65.0783 0165007 100.2058 316.9161 2.00568927135407"]
# PRN TLE file from CelesTrak
GPS_Satellites.append(Sats(TLE))
Position = ephem.Observer()
Position.date = '2018/3/31 00:00' # 1st January 2018 at 00:00 UTC
Position.lon, Position.lat = "36.845663", "-37.161123" # Coordinates for desired Position
# Calculate Satellites
for Frames in range(SFrames):
print("Generate Signals for Time: ", Position.date)
for Sats in GPS_Satellites: # par
Sats.compute(Position)
if ((float(repr(Sats.ephemeris.alt)) * 180 / math.pi) < 5) or ( # Calculate angle above horizon
(float(repr(Sats.ephemeris.alt)) * 180 / math.pi) > 90):
Sats.visible = 0
else:
Sats.visible = 1
with mp.Pool() as pool:
for value, obj in zip(pool.map(Sats.genSignal, GPS_Satellites), GPS_Satellites):
obj.Signal = value
Position.date = Position.date + 6*ephem.second # 1 Subframe is 6 seconds long
This is the Sats class that i wrote:
sats.py:
import ephem
import numpy
class Sats:
"""Save Satellites as Objects"""
def __init__(self, tle):
""":param tle: Two Line Element for ephemeris data also used to get PRN Number from name"""
self.ephemeris = ephem.readtle(tle[0], tle[1], tle[2])
self.visible = 1
self.subframes = 0
self.CAseq = [x for x in range(1023)]
self.Out = []
self.Signal = numpy.zeros(int(300*20*1023), dtype=numpy.int8)
def compute(self, pos):
self.ephemeris.compute(pos)
self.Out.append(numpy.arange(0, 299, 1))
self.subframes += 1
def calcData(self, bit, prn):
return (self.Out[self.subframes - 1].item(0)[0][bit] + self.CAseq[prn]) % 2
def genSignal(self):
if(self.visible == 1):
for bit in range(300): # 1 Subframe is 300 Bit long
for x in range(20): # The PRN Sequence reoccurs every ms -> 20 times per pit
for prn in range(1023): # length of the prn sequence
self.Signal[bit*x*prn] = (-1 if (self.calcData(bit, prn))==0 else 1)
else:
self.Signal = numpy.zeros(300*20*1023)
return self.Signal
Traceback:
Traceback (most recent call last):
File "C:/Users/PATH_TO_PROJECT/SampleTest.py", line 33, in <module>
for value, obj in zip(pool.map(Sats.genSignal, GPS_Satellites), GPS_Satellites):
File "C:\Program Files\Python36\lib\multiprocessing\pool.py", line 266, in map
return self._map_async(func, iterable, mapstar, chunksize).get()
File "C:\Program Files\Python36\lib\multiprocessing\pool.py", line 644, in get
raise self._value
File "C:\Program Files\Python36\lib\multiprocessing\pool.py", line 424, in _handle_tasks
put(task)
File "C:\Program Files\Python36\lib\multiprocessing\connection.py", line 206, in send
self._send_bytes(_ForkingPickler.dumps(obj))
File "C:\Program Files\Python36\lib\multiprocessing\reduction.py", line 51, in dumps
cls(buf, protocol).dump(obj)
TypeError: can't pickle ephem.EarthSatellite objects
The reason is something like this... when you try to pickle a function, it can attempt to pickle globals(), so whatever you have in your global namespace is also pickled (just in case your function has a reference to something in globals() -- yes, that's unexpected, but that's how it is). So, an easy fix is to isolate the function you want to pickle in another file -- in this case, put the multiprocessing stuff in one file and the other code in another file... so there's less in globals() for the pickler to struggle with. Another thing that might help is to use multiprocess instead of multiprocessing -- multiprocess uses the dill serializer instead of pickle, so you have a better chance of serializing objects that will be sent across the workers in the Pool.

Pass a function as argument to a process target with Pool.map()

I'm developing a software to benchmark some scripts Python using different methods (mono-thread, multi-threads, multi-processes). So I need to execute the same function (with same arguments, etc...) in differents processes.
How to pass the function to execute as argument to a process target ?
What I currently understand is that a reference to a function cannot work because the function referenced is not visible for other processes, that's why I tried with a custom manager for the shared memory.
Here a simplified code:
#!/bin/python
from multiprocessing import Pool
from multiprocessing.managers import BaseManager
from itertools import repeat
class FunctionManager(BaseManager):
pass
def maFunction(a, b):
print(a + b)
def threadedFunction(f_i_args):
(f, i, args) = f_i_args
f(*args)
FunctionManager.register('Function', maFunction)
myManager = FunctionManager()
myManager.start()
myManager.Function(0, 0) # Test 1
threadedFunction((maFunction, 0, (1, 1))) # Test 2
p = Pool()
args = zip(repeat(myManager.Function), range(10), repeat(2, 2))
p.map(threadedFunction, args) # Does not work
p.join()
myManager.shutdown()
The current pickling error at "p.map()" is the following :
2
0
Traceback (most recent call last):
File "./test.py", line 27, in <module>
p.map(threadedFunction, args) # Does not work
File "/usr/lib/python3.5/multiprocessing/pool.py", line 260, in map
return self._map_async(func, iterable, mapstar, chunksize).get()
File "/usr/lib/python3.5/multiprocessing/pool.py", line 608, in get
raise self._value
File "/usr/lib/python3.5/multiprocessing/pool.py", line 385, in _handle_tasks
put(task)
File "/usr/lib/python3.5/multiprocessing/connection.py", line 206, in send
self._send_bytes(ForkingPickler.dumps(obj))
File "/usr/lib/python3.5/multiprocessing/reduction.py", line 50, in dumps
cls(buf, protocol).dump(obj)
_pickle.PicklingError: Can't pickle <class 'weakref'>: attribute lookup weakref on builtins failed
I got a bit different error from running your code. Your key problem I think is that you pass a function to FunctionManager.register() instead of a class. I also had to remove your zip to make it work and create a list manually, but this you can probably fix. This is just an example.
The following code works and does something using your exact structure. I would do this a bit differently and not use BaseManager, but I assume you have your reasons.
#!/usr/bin/python3.5
from multiprocessing import Pool
from multiprocessing.managers import BaseManager
from itertools import repeat
class FunctionManager(BaseManager):
pass
class maClass(object):
def __init__(self):
pass
def maFunction(self,a, b):
print(a + b)
def threadedFunction(f_i_args):
(f, i, args) = f_i_args
f(*args)
FunctionManager.register('Foobar', maClass)
myManager = FunctionManager()
myManager.start()
foobar = myManager.Foobar()
foobar.maFunction(0, 0) # Test 1
threadedFunction((foobar.maFunction, 0, (1, 1))) # Test 2
p = Pool()
#args = list(zip(repeat(foobar.maFunction), range(10), repeat(2, 2)))
args = []
for i in range(10):
args.append([foobar.maFunction, i, (i,2)])
p.map(threadedFunction, args) # Does now work
p.close()
p.join()
myManager.shutdown()
Or did I misunderstand your problem completely?
Hannu

Categories