How to use multiprocessing with inheritance? - python

I'm trying to speed up the running time of the script with multiprocessing. When I've tried the same multiprocessing code with more simple definitions like resizing images on different directories the multiprocessing works well but when I tried it with the code seen below, it runs but it does not give any output or raise any errors and I was wondering what could be the reason for this.
I was also wondering how could I use multiprocessing with this code, maybe inheritance is the problem?
class Skeleton:
def __init__(self, path, **kwargs):
if type(path) is str:
self.path = path
self.inputStack = loadStack(self.path).astype(bool)
if kwargs != {}:
aspectRatio = kwargs["aspectRatio"]
self.inputStack = ndimage.interpolation.zoom(self.inputStack, zoom=aspectRatio, order=2,
prefilter=False)
def setThinningOutput(self, mode="reflect"):
# Thinning output
self.skeletonStack = get_thinned(self.inputStack, mode)
def setNetworkGraph(self, findSkeleton=False):
# Network graph of the crowded region removed output
self.skeletonStack = self.inputStack
self.graph = get_networkx_graph_from_array(self.skeletonStack)
def setPrunedSkeletonOutput(self):
# Prune unnecessary segments in crowded regions removed skeleton
self.setNetworkGraph(findSkeleton=True)
self.outputStack = pr.getPrunedSkeleton(self.skeletonStack, self.graph)
saveStack(self.outputStack, self.path + "pruned/")
class Trabeculae (Skeleton):
pass
def TrabeculaeY (path):
path_mrb01_square = Trabeculae(path)
path_mrb01_square.setPrunedSkeletonOutput()
if __name__=='__main__':
path1 = (r' ')
path2 = (r' ')
path3 = (r' ')
the_list =[]
the_list.append(path1)
the_list.append(path2)
the_list.append(path3)
for i in range (0,len(the_list)):
p1 = multiprocessing.Process(target=TrabeculaeY, args=(the_list[i],))
p1.start()
p1.join()

Inheritance is not a problem for multiprocessing.
You must not join() the processes inside the loop. It means that the loop waits until p1 finished doing its work, before it continues with the next one.
Instead, start all processes in a loop, then wait for all processes in a second loop like this:
if __name__=='__main__':
path1 = (r' ')
path2 = (r' ')
path3 = (r' ')
the_list =[]
the_list.append(path1)
the_list.append(path2)
the_list.append(path3)
started_processes = []
for i in range (0,len(the_list)):
p1 = multiprocessing.Process(target=TrabeculaeY, args=(the_list[i],))
p1.start()
started_processes.append(p1)
for p in started_processes:
p.join()
Full code I used for testing:
import multiprocessing
class Skeleton:
def __init__(self, path, **kwargs):
self.path = path
pass
def setThinningOutput(self, mode="reflect"):
pass
def setNetworkGraph(self, findSkeleton=False):
pass
def setPrunedSkeletonOutput(self):
print(self.path)
class Trabeculae(Skeleton):
pass
def TrabeculaeY(path:str):
path_mrb01_square = Trabeculae(path)
path_mrb01_square.setPrunedSkeletonOutput()
if __name__ == '__main__':
the_list = [r'1', r'2', r'3']
started_processes = []
for path in the_list:
process = multiprocessing.Process(target=TrabeculaeY, args=path)
process.start()
started_processes.append(process)
for process in started_processes:
process.join()

Related

Use generator to iterate through data from a multiprocess

I would like to perform the following below using multiprocess, instead of subprocess.Popen. This is because I cannot pass objects using popen. I know my simple example below does not use/pass objects, but that is what I want to do.
Sample code is:
main.py
import subprocess
class ProcReader():
def __init__(self, python_file):
self.proc = subprocess.Popen(['python', python_file], stdout=subprocess.PIPE)
def __iter__(self):
return self
def __next__(self):
while True:
line = self.proc.stdout.readline()
if not line:
raise StopIteration
return line
if __name__ == "__main__":
r1 = ProcReader("test1.py")
r2 = ProcReader("test2.py")
r3 = ProcReader("test3.py")
for l1, l2, l3 in zip(r1, r2, r3):
d1 = l1.decode('utf-8').strip().split(",")
d2 = l2.decode('utf-8').strip().split(",")
d3 = l3.decode('utf-8').strip().split(",")
print(f"{d1[0]}:{d1[1]},{d2[0]}:{d2[1]},{d3[1]}:{d3[1]}")
test#.py
for x in range(10):
print("test1,{}".format(x))
My sample code is in python3, but I would like an equivalent, using multiprocess, in python2.7. Should the equivalent also read from stdout? Or should it utilize the queue and just have a worker reading from the queue?
Update---------
My example using multiprocessing:
import time
from multiprocessing import Process, Queue
def writer1(queue):
for x in range(10):
time.sleep(1)
queue.put("test1,{}".format(x))
def writer2(queue):
for x in range(10):
time.sleep(2)
queue.put("test2,{}".format(x))
def writer3(queue):
for x in range(10):
queue.put("test3,{}".format(x))
if __name__=='__main__':
q1 = Queue()
q2 = Queue()
q3 = Queue()
writer_1 = Process(target=writer1, args=((q1),))
writer_1.daemon = True
writer_1.start()
writer_2 = Process(target=writer2, args=((q2),))
writer_2.daemon = True
writer_2.start()
writer_3 = Process(target=writer3, args=((q3),))
writer_3.daemon = True
writer_3.start()
while True:
msg1 = q1.get()
msg2 = q2.get()
msg3 = q3.get()
if msg1 and msg2 and msg3:
d1 = msg1.strip().split(",")
d2 = msg2.strip().split(",")
d3 = msg3.strip().split(",")
print("{}:{},{}:{},{}:{}".format(d1[0],d1[1],
d2[0],d2[1],
d3[0],d3[1]))
else:
break
Didnt realize q1.get() waits until something is there, I added sleep to verify this. Also, how do I check that the process is done writing? Seems to be just waiting at the end
To adapt your second example for my comment about sentinel objects, maybe you're looking for something like
import os
import time
from multiprocessing import Process, Queue
def writer(queue):
value = os.getpid()
for x in range(10):
time.sleep(0.1)
queue.put("{},{}".format(value, x))
queue.put(None)
def spawn_process():
q = Queue()
p = Process(target=writer, args=(q,))
p.daemon = True
p.start()
return (p, q)
if __name__ == "__main__":
processes_and_queues = [spawn_process() for x in range(3)]
processes, queues = zip(*processes_and_queues)
live_queues = list(queues)
while live_queues:
messages = []
for queue in live_queues:
message = queue.get()
if message is None:
live_queues.remove(queue)
messages.append(message)
if len(messages) == len(processes):
print(messages)
It outputs (e.g.)
['51748,0', '51749,0', '51750,0']
['51748,1', '51749,1', '51750,1']
['51748,2', '51749,2', '51750,2']
['51748,3', '51749,3', '51750,3']
['51748,4', '51749,4', '51750,4']
['51748,5', '51749,5', '51750,5']
['51748,6', '51749,6', '51750,6']
['51748,7', '51749,7', '51750,7']
['51748,8', '51749,8', '51750,8']
['51748,9', '51749,9', '51750,9']

Python multiprocessing's Pool constants

I'm beginner with the multiprocessing module in python and I want to use concurrent execution ONLY for my def func. Moreover I'm using some constants in my code and I have problem with them.
The code is (python 3.6.8):
from multiprocessing import Pool
FIRST_COUNT=10
print("Enter your path")
PATH=input()
some_list=[]
for i in range(10000):
some_list.append(i)
def func(some_list):
.....
if __name__ == "__main__":
chunks = [some_list[i::4] for i in range(4)]
pool = Pool(processes=4)
pool.map(func,chunks)
When I try to start this programm, I see the message Enter your path 5 times and 5 times I need to input my path. i.e this code execute 1 + 4(for each processes) times.
I want to use FIRST_COUNT, PATH and some_list like a constants, and use multiprocesseing only for func. How can i do this. Please, help me.
You should put code inside if __name__ == "__main__": to execute it only once
if __name__ == "__main__":
FIRST_COUNT = 10
PATH = input("Enter your path: ")
some_list = list(range(10000))
#some_list = []
#for i in range(10000):
# some_list.append(i)
chunks = [some_list[i::4] for i in range(4)]
pool = Pool(processes=4)
results = pool.map(func, chunks)
print(results)
If you want to use FIRST_COUNT, PATH then better send it to func as arguments.
You will have to create tuples with FIRST_COUNT, PATH in chunks
chunks = [(FIRST_COUNT, PATH, some_list[i::4]) for i in range(4)]
and function will have to get it as tuple and unpack it
def func(args):
first_count, paht, some_list = args
Working example
from multiprocessing import Pool
def func(args):
first_count, path, some_list = args
result = sum(some_list)
print(first_count, path, result)
return result
if __name__ == "__main__":
FIRST_COUNT = 10
PATH = input("Enter your path: ")
some_list = list(range(10000))
#some_list = []
#for i in range(10000):
# some_list.append(i)
chunks = [(FIRST_COUNT, PATH, some_list[i::4]) for i in range(4)]
pool = Pool(processes=4)
all_results = pool.map(func, chunks)
print('all results:', all_results)
EDIT: You can also use starmap() instead of map()
all_results = pool.starmap(func, chunks)
and then you can use (without unpacking arguments)
def func(first_count, path, some_list):

Process containing object method doesn't recognize edit to object

I have the following situation process=Process(target=sample_object.run) I then would like to edit a property of the sample_object: sample_object.edit_property(some_other_object).
class sample_object:
def __init__(self):
self.storage=[]
def edit_property(self,some_other_object):
self.storage.append(some_other_object)
def run:
while True:
if len(self.storage) is not 0:
print "1"
#I know it's an infinite loop. It's just an example.
_______________________________________________________
from multiprocessing import Process
from sample import sample_object
from sample2 import some_other_object
class driver:
if __name__ == "__main__":
samp = sample_object()
proc = Process(target=samp.run)
proc.start()
while True:
some = some_other_object()
samp.edit_property(some)
#I know it's an infinite loop
The previous code never prints "1". How would I connect the Process to the sample_object so that an edit made to the object whose method Process is calling is recognized by the process? In other words, is there a way to get .run to recognize the change in sample_object ?
Thank you.
You can use multiprocessing.Manager to share Python data structures between processes.
from multiprocessing import Process, Manager
class A(object):
def __init__(self, storage):
self.storage = storage
def add(self, item):
self.storage.append(item)
def run(self):
while True:
if self.storage:
print 1
if __name__ == '__main__':
manager = Manager()
storage = manager.list()
a = A(storage)
p = Process(target=a.run)
p.start()
for i in range(10):
a.add({'id': i})
p.join()

Shared variable in Python Process subclass

I was wondering if it would be possible to create some sort of static set in a Python Process subclass to keep track the types processes that are currently running asynchronously.
class showError(Process):
# Define some form of shared set that is shared by all Processes
displayed_errors = set()
def __init__(self, file_name, error_type):
super(showError, self).__init__()
self.error_type = error_type
def run(self):
if error_type not in set:
displayed_errors.add(error_type)
message = 'Please try again. ' + str(self.error_type)
winsound.MessageBeep(-1)
result = win32api.MessageBox(0, message, 'Error', 0x00001000)
if result == 0:
displayed_errors.discard(error_type)
That way, when I create/start multiple showError processes with the same error_type, subsequent error windows will not be created. So how can we define this shared set?
You can use a multiprocessing.Manager.dict (there's no set object available, but you can use a dict in the same way) and share that between all your subprocesses.
import multiprocessing as mp
if __name__ == "__main__":
m = mp.Manager()
displayed_errors = m.dict()
subp = showError("some filename", "some error type", displayed_errors)
Then change showError.__init__ to accept the shared dict:
def __init__(self, file_name, error_type, displayed_errors):
super(showError, self).__init__()
self.error_type = error_type
self.displayed_errors = displayed_errors
Then this:
displayed_errors.add(error_type)
Becomes:
self.displayed_errors[error_type] = 1
And this:
displayed_errors.discard(error_type)
Becomes:
try:
del self.displayed_errors[error_type]
except KeyError:
pass

change object value in threads using python

I am very new to Python, thus am possibly asking a simple question.
I am wrting a multiprocess code with Python:
from multiprocessing import Process
from multiprocessing import Queue
class myClass(object):
def __init__(self):
self.__i = 0
self.__name = 'rob'
return
def target_func(self, name, q):
self.__name = name
print 'Hello', self.__name
self.__i += 1
print self.__i
q.put([self.__i, self.__name])
return
def name(self):
return self.__name
def i(self):
return self.__i
if __name__ == '__main__':
mc = myClass()
q = Queue()
p = Process(target = mc.target_func, args = ('bob', q,))
p.start()
ret = q.get()
p.join()
p2 = Process(target = mc.target_func, args = ('tom', q,))
p2.start()
ret = q.get()
p2.join()
I expect the print out should be
Hello bob
1
Hello tom
2
But actually, the print out is
Hello bob
1
Hello tom
1 <------------------ Why it's not 2?
May I know what am I wrong?
Many thanks.
target_func is called in separated process. mc is copied to each subprocess; not shared between processes.
Using Thread, you will get expected(?) result. For safety you should use lock; I omitted it in following code.
from threading import Thread
from Queue import Queue
....
if __name__ == '__main__':
mc = myClass()
q = Queue()
p = Thread(target = mc.target_func, args = ('bob', q,))
p.start()
ret = q.get()
p.join()
p2 = Thread(target = mc.target_func, args = ('tom', q,))
p2.start()
ret = q.get()
p2.join()
Processes don't share memory, unlike threads. The name __i in the second process refers to a different variable, whose initial value was copied from the original process when you launched the subprocess.
You can use the Value or Array data types to transfer information from one process to another, or you can use the Queue to push data from the subprocess back the the original. All of these classes are included in the multiprocessing module
http://docs.python.org/2/library/multiprocessing.html#multiprocessing.Queue
http://docs.python.org/2/library/multiprocessing.html#multiprocessing.Value
http://docs.python.org/2/library/multiprocessing.html#multiprocessing.Array
The value of the variable is still the same since each process you create gets a full copy of the memory space of the parent process, including a copy of the mc class instance that you created earlier. Hence, when you modify the instance variable of mc from within each process, it does not affect the variable in your main process. Here's a more concise example of this behavior:
from multiprocessing import Process
class A(object):
def __init__(self):
self.var = 1
print "Initialized class: ",self
def test(self):
print self
print "Variable value:",self.var
self.var += 1
if __name__ == '__main__':
a = A()
p1 = Process(target = a.test)
#Creates a copy of the curent memory space and will print "Variable value: 1"
p1.start()
p2 = Process(target = a.test)
#Will still print "Variable value: 1"
p2.start()

Categories