I'm launching 3 processes and I want them to put a string into a shared array, at the index corresponding to the process (i).
Look at the code below, the output generated is:
['test 0', None, None]
['test 1', 'test 1', None]
['test 2', 'test 2', 'test 2']
Why 'test 0' get overwritten by test 1, and test 1 by test 2?
What I want is (order is not important) :
['test 0', None, None]
['test 0', 'test 1', None]
['test 0', 'test 1', 'test 2']
The code :
#!/usr/bin/env python
import multiprocessing
from multiprocessing import Value, Lock, Process, Array
import ctypes
from ctypes import c_int, c_char_p
class Consumer(multiprocessing.Process):
def __init__(self, task_queue, result_queue, arr, lock):
multiprocessing.Process.__init__(self)
self.task_queue = task_queue
self.result_queue = result_queue
self.arr = arr
self.lock = lock
def run(self):
proc_name = self.name
while True:
next_task = self.task_queue.get()
if next_task is None:
self.task_queue.task_done()
break
answer = next_task(arr=self.arr, lock=self.lock)
self.task_queue.task_done()
self.result_queue.put(answer)
return
class Task(object):
def __init__(self, i):
self.i = i
def __call__(self, arr=None, lock=None):
with lock:
arr[self.i] = "test %d" % self.i
print arr[:]
def __str__(self):
return 'ARC'
def run(self):
print 'IN'
if __name__ == '__main__':
tasks = multiprocessing.JoinableQueue()
results = multiprocessing.Queue()
arr = Array(ctypes.c_char_p, 3)
lock = multiprocessing.Lock()
num_consumers = multiprocessing.cpu_count() * 2
consumers = [Consumer(tasks, results, arr, lock) for i in xrange(num_consumers)]
for w in consumers:
w.start()
for i in xrange(3):
tasks.put(Task(i))
for i in xrange(num_consumers):
tasks.put(None)
I'm running Python 2.7.3 (Ubuntu)
This problem seems similar to this one. There, J.F. Sebastian speculated that the assignment to arr[i] points arr[i] to a memory address that was only meaningful to the subprocess making the assignment. The other subprocesses retrieve garbage when looking at that address.
There are at least two ways to avoid this problem. One is to use a multiprocessing.manager list:
import multiprocessing as mp
class Consumer(mp.Process):
def __init__(self, task_queue, result_queue, lock, lst):
mp.Process.__init__(self)
self.task_queue = task_queue
self.result_queue = result_queue
self.lock = lock
self.lst = lst
def run(self):
proc_name = self.name
while True:
next_task = self.task_queue.get()
if next_task is None:
self.task_queue.task_done()
break
answer = next_task(lock = self.lock, lst = self.lst)
self.task_queue.task_done()
self.result_queue.put(answer)
return
class Task(object):
def __init__(self, i):
self.i = i
def __call__(self, lock, lst):
with lock:
lst[self.i] = "test {}".format(self.i)
print([lst[i] for i in range(3)])
if __name__ == '__main__':
tasks = mp.JoinableQueue()
results = mp.Queue()
manager = mp.Manager()
lst = manager.list(['']*3)
lock = mp.Lock()
num_consumers = mp.cpu_count() * 2
consumers = [Consumer(tasks, results, lock, lst) for i in xrange(num_consumers)]
for w in consumers:
w.start()
for i in xrange(3):
tasks.put(Task(i))
for i in xrange(num_consumers):
tasks.put(None)
tasks.join()
Another way is to use a shared array with a fixed size such as mp.Array('c', 10).
import multiprocessing as mp
class Consumer(mp.Process):
def __init__(self, task_queue, result_queue, arr, lock):
mp.Process.__init__(self)
self.task_queue = task_queue
self.result_queue = result_queue
self.arr = arr
self.lock = lock
def run(self):
proc_name = self.name
while True:
next_task = self.task_queue.get()
if next_task is None:
self.task_queue.task_done()
break
answer = next_task(arr = self.arr, lock = self.lock)
self.task_queue.task_done()
self.result_queue.put(answer)
return
class Task(object):
def __init__(self, i):
self.i = i
def __call__(self, arr, lock):
with lock:
arr[self.i].value = "test {}".format(self.i)
print([a.value for a in arr])
if __name__ == '__main__':
tasks = mp.JoinableQueue()
results = mp.Queue()
arr = [mp.Array('c', 10) for i in range(3)]
lock = mp.Lock()
num_consumers = mp.cpu_count() * 2
consumers = [Consumer(tasks, results, arr, lock) for i in xrange(num_consumers)]
for w in consumers:
w.start()
for i in xrange(3):
tasks.put(Task(i))
for i in xrange(num_consumers):
tasks.put(None)
tasks.join()
I speculate that the reason why this works when mp.Array(ctypes.c_char_p, 3) does not, is because mp.Array('c', 10) has a fixed size so the memory address never changes, while mp.Array(ctypes.c_char_p, 3) has a variable size, so the memory address might change when arr[i] is assigned to a bigger string.
Perhaps this is what the docs are warning about when it states,
Although it is possible to store a pointer in shared memory remember
that this will refer to a location in the address space of a specific
process. However, the pointer is quite likely to be invalid in the
context of a second process and trying to dereference the pointer from
the second process may cause a crash.
Related
Is there a Pool class for worker threads, similar to the multiprocessing module's Pool class?
I like for example the easy way to parallelize a map function
def long_running_func(p):
c_func_no_gil(p)
p = multiprocessing.Pool(4)
xs = p.map(long_running_func, range(100))
however I would like to do it without the overhead of creating new processes.
I know about the GIL. However, in my usecase, the function will be an IO-bound C function for which the python wrapper will release the GIL before the actual function call.
Do I have to write my own threading pool?
I just found out that there actually is a thread-based Pool interface in the multiprocessing module, however it is hidden somewhat and not properly documented.
It can be imported via
from multiprocessing.pool import ThreadPool
It is implemented using a dummy Process class wrapping a python thread. This thread-based Process class can be found in multiprocessing.dummy which is mentioned briefly in the docs. This dummy module supposedly provides the whole multiprocessing interface based on threads.
In Python 3 you can use concurrent.futures.ThreadPoolExecutor, i.e.:
executor = ThreadPoolExecutor(max_workers=10)
a = executor.submit(my_function)
See the docs for more info and examples.
Yes, and it seems to have (more or less) the same API.
import multiprocessing
def worker(lnk):
....
def start_process():
.....
....
if(PROCESS):
pool = multiprocessing.Pool(processes=POOL_SIZE, initializer=start_process)
else:
pool = multiprocessing.pool.ThreadPool(processes=POOL_SIZE,
initializer=start_process)
pool.map(worker, inputs)
....
For something very simple and lightweight (slightly modified from here):
from Queue import Queue
from threading import Thread
class Worker(Thread):
"""Thread executing tasks from a given tasks queue"""
def __init__(self, tasks):
Thread.__init__(self)
self.tasks = tasks
self.daemon = True
self.start()
def run(self):
while True:
func, args, kargs = self.tasks.get()
try:
func(*args, **kargs)
except Exception, e:
print e
finally:
self.tasks.task_done()
class ThreadPool:
"""Pool of threads consuming tasks from a queue"""
def __init__(self, num_threads):
self.tasks = Queue(num_threads)
for _ in range(num_threads):
Worker(self.tasks)
def add_task(self, func, *args, **kargs):
"""Add a task to the queue"""
self.tasks.put((func, args, kargs))
def wait_completion(self):
"""Wait for completion of all the tasks in the queue"""
self.tasks.join()
if __name__ == '__main__':
from random import randrange
from time import sleep
delays = [randrange(1, 10) for i in range(100)]
def wait_delay(d):
print 'sleeping for (%d)sec' % d
sleep(d)
pool = ThreadPool(20)
for i, d in enumerate(delays):
pool.add_task(wait_delay, d)
pool.wait_completion()
To support callbacks on task completion you can just add the callback to the task tuple.
Hi to use the thread pool in Python you can use this library :
from multiprocessing.dummy import Pool as ThreadPool
and then for use, this library do like that :
pool = ThreadPool(threads)
results = pool.map(service, tasks)
pool.close()
pool.join()
return results
The threads are the number of threads that you want and tasks are a list of task that most map to the service.
Yes, there is a threading pool similar to the multiprocessing Pool, however, it is hidden somewhat and not properly documented. You can import it by following way:-
from multiprocessing.pool import ThreadPool
Just I show you simple example
def test_multithread_stringio_read_csv(self):
# see gh-11786
max_row_range = 10000
num_files = 100
bytes_to_df = [
'\n'.join(
['%d,%d,%d' % (i, i, i) for i in range(max_row_range)]
).encode() for j in range(num_files)]
files = [BytesIO(b) for b in bytes_to_df]
# read all files in many threads
pool = ThreadPool(8)
results = pool.map(self.read_csv, files)
first_result = results[0]
for result in results:
tm.assert_frame_equal(first_result, result)
Here's the result I finally ended up using. It's a modified version of the classes by dgorissen above.
File: threadpool.py
from queue import Queue, Empty
import threading
from threading import Thread
class Worker(Thread):
_TIMEOUT = 2
""" Thread executing tasks from a given tasks queue. Thread is signalable,
to exit
"""
def __init__(self, tasks, th_num):
Thread.__init__(self)
self.tasks = tasks
self.daemon, self.th_num = True, th_num
self.done = threading.Event()
self.start()
def run(self):
while not self.done.is_set():
try:
func, args, kwargs = self.tasks.get(block=True,
timeout=self._TIMEOUT)
try:
func(*args, **kwargs)
except Exception as e:
print(e)
finally:
self.tasks.task_done()
except Empty as e:
pass
return
def signal_exit(self):
""" Signal to thread to exit """
self.done.set()
class ThreadPool:
"""Pool of threads consuming tasks from a queue"""
def __init__(self, num_threads, tasks=[]):
self.tasks = Queue(num_threads)
self.workers = []
self.done = False
self._init_workers(num_threads)
for task in tasks:
self.tasks.put(task)
def _init_workers(self, num_threads):
for i in range(num_threads):
self.workers.append(Worker(self.tasks, i))
def add_task(self, func, *args, **kwargs):
"""Add a task to the queue"""
self.tasks.put((func, args, kwargs))
def _close_all_threads(self):
""" Signal all threads to exit and lose the references to them """
for workr in self.workers:
workr.signal_exit()
self.workers = []
def wait_completion(self):
"""Wait for completion of all the tasks in the queue"""
self.tasks.join()
def __del__(self):
self._close_all_threads()
def create_task(func, *args, **kwargs):
return (func, args, kwargs)
To use the pool
from random import randrange
from time import sleep
delays = [randrange(1, 10) for i in range(30)]
def wait_delay(d):
print('sleeping for (%d)sec' % d)
sleep(d)
pool = ThreadPool(20)
for i, d in enumerate(delays):
pool.add_task(wait_delay, d)
pool.wait_completion()
another way can be adding the process to thethread queue pool
import concurrent.futures
with concurrent.futures.ThreadPoolExecutor(max_workers=cpus) as executor:
for i in range(10):
a = executor.submit(arg1, arg2,....)
The overhead of creating the new processes is minimal, especially when it's just 4 of them. I doubt this is a performance hot spot of your application. Keep it simple, optimize where you have to and where profiling results point to.
There is no built in thread based pool. However, it can be very quick to implement a producer/consumer queue with the Queue class.
From:
https://docs.python.org/2/library/queue.html
from threading import Thread
from Queue import Queue
def worker():
while True:
item = q.get()
do_work(item)
q.task_done()
q = Queue()
for i in range(num_worker_threads):
t = Thread(target=worker)
t.daemon = True
t.start()
for item in source():
q.put(item)
q.join() # block until all tasks are done
If you don't mind executing other's code, here's mine:
Note: There is lot of extra code you may want to remove [added for better clarificaiton and demonstration how it works]
Note: Python naming conventions were used for method names and variable names instead of camelCase.
Working procedure:
MultiThread class will initiate with no of instances of threads by sharing lock, work queue, exit flag and results.
SingleThread will be started by MultiThread once it creates all instances.
We can add works using MultiThread (It will take care of locking).
SingleThreads will process work queue using a lock in middle.
Once your work is done, you can destroy all threads with shared boolean value.
Here, work can be anything. It can automatically import (uncomment import line) and process module using given arguments.
Results will be added to results and we can get using get_results
Code:
import threading
import queue
class SingleThread(threading.Thread):
def __init__(self, name, work_queue, lock, exit_flag, results):
threading.Thread.__init__(self)
self.name = name
self.work_queue = work_queue
self.lock = lock
self.exit_flag = exit_flag
self.results = results
def run(self):
# print("Coming %s with parameters %s", self.name, self.exit_flag)
while not self.exit_flag:
# print(self.exit_flag)
self.lock.acquire()
if not self.work_queue.empty():
work = self.work_queue.get()
module, operation, args, kwargs = work.module, work.operation, work.args, work.kwargs
self.lock.release()
print("Processing : " + operation + " with parameters " + str(args) + " and " + str(kwargs) + " by " + self.name + "\n")
# module = __import__(module_name)
result = str(getattr(module, operation)(*args, **kwargs))
print("Result : " + result + " for operation " + operation + " and input " + str(args) + " " + str(kwargs))
self.results.append(result)
else:
self.lock.release()
# process_work_queue(self.work_queue)
class MultiThread:
def __init__(self, no_of_threads):
self.exit_flag = bool_instance()
self.queue_lock = threading.Lock()
self.threads = []
self.work_queue = queue.Queue()
self.results = []
for index in range(0, no_of_threads):
thread = SingleThread("Thread" + str(index+1), self.work_queue, self.queue_lock, self.exit_flag, self.results)
thread.start()
self.threads.append(thread)
def add_work(self, work):
self.queue_lock.acquire()
self.work_queue._put(work)
self.queue_lock.release()
def destroy(self):
self.exit_flag.value = True
for thread in self.threads:
thread.join()
def get_results(self):
return self.results
class Work:
def __init__(self, module, operation, args, kwargs={}):
self.module = module
self.operation = operation
self.args = args
self.kwargs = kwargs
class SimpleOperations:
def sum(self, *args):
return sum([int(arg) for arg in args])
#staticmethod
def mul(a, b, c=0):
return int(a) * int(b) + int(c)
class bool_instance:
def __init__(self, value=False):
self.value = value
def __setattr__(self, key, value):
if key != "value":
raise AttributeError("Only value can be set!")
if not isinstance(value, bool):
raise AttributeError("Only True/False can be set!")
self.__dict__[key] = value
# super.__setattr__(key, bool(value))
def __bool__(self):
return self.value
if __name__ == "__main__":
multi_thread = MultiThread(5)
multi_thread.add_work(Work(SimpleOperations(), "mul", [2, 3], {"c":4}))
while True:
data_input = input()
if data_input == "":
pass
elif data_input == "break":
break
else:
work = data_input.split()
multi_thread.add_work(Work(SimpleOperations(), work[0], work[1:], {}))
multi_thread.destroy()
print(multi_thread.get_results())
I have an object:
from multiprocessing import Pool
import time
class ASYNC(object):
def __init__(self, THREADS=[]):
print('do')
pool = Pool(processes=len(THREADS))
self.THREAD_POOL = {}
thread_index = 0
for thread_ in THREADS:
self.THREAD_POOL[thread_index] = {
'thread': thread_['thread'],
'args': thread_['args'],
'callback': thread_['callback']
}
self.THREAD_POOL[thread_index]['running'] = True
pool.apply_async(self.run, [thread_index], callback=thread_['callback'])
thread_index += 1
def run(self, thread_index):
print('enter')
while(self.THREAD_POOL[thread_index]['running']):
print("loop")
self.THREAD_POOL[thread_index]['thread'](self.THREAD_POOL[thread_index])#HERE
time.sleep(1)
self.THREAD_POOL[thread_index]['running'] = False
def wait_for_finish(self):
for pool in self.THREAD_POOL:
while(self.THREAD_POOL[pool]['running']):
print("sleep" + str(self.THREAD_POOL[pool]['running']))
time.sleep(1)
def x(pool):#HERE
print(str(pool))
if(pool['args'][0] >= 15):
pool['running'] = False
pool['args'][0] += 1
def y(str):
print("done")
A = ASYNC([{'thread': x, 'args':[10], 'callback':y}])
print("start")
A.wait_for_finish()
I am having issues passing self.THREAD_POOL[thread_index] as reference to def x(pool)
I need x(pool) to change the value of the variable in the object.
If i check the value in wait_for_finish then the object is not changed.
Passing object by reference: (tested and works properly)
x = {"1":"one", "2","two"}
def test(a):
a["1"] = "ONE"
print(x["1"])#outputs ONE as expected
this means that dictionaries in python are passed by reference; So, why in my code is it passing by value?
SOLUTION
#DevShark
from multiprocessing import Process, Value, Array
def f(n, a):
n.value = 3.1415927
for i in range(len(a)):
a[i] = -a[i]
if __name__ == '__main__':
num = Value('d', 0.0)
arr = Array('i', range(10))
p = Process(target=f, args=(num, arr))
p.start()
p.join()
print num.value
print arr[:]
according to the documentation, you should not do this unless absolutely needed. I decided not to use this. https://docs.python.org/2/library/multiprocessing.html#multiprocessing.JoinableQueue
instead i will be doing:
from multiprocessing import Pool
import time
class ASYNC(object):
def __init__(self, THREADS=[]):
print('do')
pool = Pool(processes=len(THREADS))
self.THREAD_POOL = {}
thread_index = 0
for thread_ in THREADS:
self.THREAD_POOL[thread_index] = {
'thread': thread_['thread'],
'args': thread_['args'],
'callback': thread_['callback']
}
self.THREAD_POOL[thread_index]['running'] = True
pool.apply_async(self.run, [thread_index], callback=thread_['callback'])
thread_index += 1
def run(self, thread_index):
print('enter')
while(self.THREAD_POOL[thread_index]['running']):
print("loop")
self.THREAD_POOL[thread_index]['thread'](thread_index)
time.sleep(1)
self.THREAD_POOL[thread_index]['running'] = False
def wait_for_finish(self):
for pool in self.THREAD_POOL:
while(self.THREAD_POOL[pool]['running']):
print("sleep" + str(self.THREAD_POOL[pool]['running']))
time.sleep(1)
def x(index):
global A
A.THREAD_POOL[index]
print(str(pool))
if(pool['args'][0] >= 15):
pool['running'] = False
pool['args'][0] += 1
def y(str):
print("done")
A = ASYNC([{'thread': x, 'args':[10], 'callback':y}])
print("start")
A.wait_for_finish()
You are running your function in a different process. That's the way multiprocessing works. Therefore it does not matter what you do with the object, modifications will not be seen in other processes.
To share data between process, see the doc as someone noted in a comment.
Data can be stored in a shared memory map using Value or Array.
I want to use python's multiprocessing module in a class, which itself uses subprocesses to not block the main call.
The minimal example looks like this:
import multiprocessing as mp
class mpo():
def __init__(self):
cpu = mp.cpu_count()
self.Pool = mp.Pool(processes = 2)
self.alive = True
self.p = mp.Process(target = self.sub,args=())
def worker():
print 'Alive'
def sub(self):
print self.alive
for i in range(2):
print i
self.Pool.apply_async(self.worker, args=())
print 'done'
self.Pool.close()
# self.Pool.join()
I commented the last line out, as it raises an assertion Error (can only join a child process).
When I do:
m =mpo()
m.p.start()
The output is
True
0
1
done
My main question is, why the print statement in the worker thread never is reached?
Update:
The updated code looks like this.
import multiprocessing as mp
class mpo():
def __init__(self):
cpu = mp.cpu_count()
self.alive = True
self.p = mp.Process(target = self.sub,args=())
self.result=[]
def worker(self):
self.result.append(1)
print 'Alive'
def sub(self):
print self.alive
Pool = mp.Pool(processes = 2)
for i in range(2):
print i
Pool.apply_async(self.worker, args=())
print 'done'
Pool.close()
Pool.join()
The pool now doesn't have to be inherited as it is created in the subprocess. Instead of the print statement the result is appended to the calling object and the pool is properly joined. Nevertheless, there is no result showing up.
so I think this may correspond to a simple example of what you are looking for:
import multiprocessing as mp
def worker(arg):
#print 'Alive'+str(arg)
return "Alive and finished {0}".format(arg)
class mpo():
def __init__(self):
cpu = mp.cpu_count()
self.alive = True
self.pool = mp.Pool(processes = 2)
def sub(self,arguments):
self.results=self.pool.map_async(worker, arguments)
return self.results
if __name__=="__main__":
s=mpo()
s.sub(range(10))
print s.results.get()
Additionally you can call
self.results.ready()
to find out whether the processes have finished their work. You do not have to put this inside of another process because the map_async call does not block the rest of your program.
EDIT:
Concerning your comment, I do not really see the value of putting the calculation in a separate process, because the function is already running in separate processes (in the pool). You only add complexity by nesting it in another subprocess, but it is possible:
import multiprocessing as mp
def worker(arg):
#print 'Alive'+str(arg)
return "Alive and finished {0}".format(arg)
class mpo():
def __init__(self):
cpu = mp.cpu_count()
self.alive = True
self.pool = mp.Pool(processes = 2)
def sub(self,arguments):
self.results=self.pool.map_async(worker, arguments)
return self.results
def run_calculation(q):
s=mpo()
results=s.sub(range(10))
q.put(results.get())
queue=mp.Queue()
proc=mp.Process(target=run_calculation,args=(queue,))
proc.start()
proc.join()
queue.get()
For the following code, I expect the output of dr.hello to be 10, since 10 spawned processes call updateHello once to increment dr.hello. But it is now 0, what is the reason and how do I change it?
from multiprocessing import Process, Lock
class myWorker:
def __init__(self, lock, driver, i):
self.idx=i
self.driver=driver
self.lock=lock
def run(self):
self.driver.updateHello(self.lock,self.idx)
class driver:
hello=0
def __init__(self):
self.lock=Lock()
def workerrun(self,lock, i):
worker1=myWorker(lock,self,i)
worker1.run()
def run(self):
D=[Process(target=self.workerrun,args=(self.lock,i)) for i in range(10)]
for d in D:
d.start()
for d in D:
d.join()
def updateHello(self,l,i):
l.acquire()
self.hello+=1
print "update from",i
l.release()
if __name__=='__main__':
dr=driver()
dr.run()
print dr.hello
I think you need a multiprocessing.Value for a shared variable
from multiprocessing import Process, Lock, Value
class myWorker:
def __init__(self, lock, driver, i):
self.idx=i
self.driver=driver
self.lock=lock
def run(self):
self.driver.updateHello(self.lock,self.idx)
class driver:
hello = Value("i",lock=True) # create shared variable of type int
hello.value = 0
def __init__(self):
self.lock=Lock()
def workerrun(self,lock, i):
worker1=myWorker(lock,self,i)
worker1.run()
def run(self):
D=[Process(target=self.workerrun,args=(self.lock,i)) for i in range(10)]
for d in D:
d.start()
for d in D:
d.join()
def updateHello(self,l,i):
with self.lock: # acquire lock
driver.hello.value += 1
print ("update from", i)
# release lock
if __name__=='__main__':
dr=driver()
dr.run()
print(driver.hello.value)
Ipython output:
if __name__=='__main__':
dr=driver()
dr.run()
print(driver.hello.value)
## -- End pasted text --
('update from', 0)
('update from', 1)
('update from', 2)
('update from', 5)
('update from', 6)
('update from', 7)
('update from', 4)
('update from', 3)
('update from', 8)
('update from', 9)
10
This works now, I do it by changing Padraic solution from class variable to instance variable
from multiprocessing import Process, Lock, Value
class myWorker:
def __init__(self, lock, driver, i):
self.idx=i
self.driver=driver
self.lock=lock
def run(self):
self.driver.updateHello(self.lock,self.idx)
class driver(object):
#hello = Value("i",lock=True) # create shared variable of type int
#hello.value = 0
def __init__(self):
self.lock=Lock()
self.hello=Value("i",0)
def workerrun(self,lock, i):
worker1=myWorker(lock,self,i)
worker1.run()
def run(self):
D=[Process(target=self.workerrun,args=(self.lock,i)) for i in range(10)]
for d in D:
d.start()
for d in D:
d.join()
def updateHello(self,l,i):
with self.lock: # acquire lock
self.hello.value += 1
print ("update from", i)
# release lock
if __name__=='__main__':
dr=driver()
dr.run()
print(dr.hello.value)
I'm using a Python program to compute an average value of a list of floats.
Following the program logic:
The program is started with some arguments.
A list "hostgroups" is created.
For-in loop over the list "hostgroups" starting a function worker(hosgroup,var1, var2,var3,...)
Inside the worker-function two variables are build with some input variables of the worker
4a. Inside the worker a subworker-function is called with some input variables of the worker
4b. The subworker returns some new variables
4c. Back in the worker
4d. some things are done
4d. At last in the worker a final-function is called with some variables.
So far, so fine!
My next step is to set up a multiprocessing... who can help?
UPDATE:
Here is my actual approach:
class Consumer(multiprocessing.Process):
def __init__(self, task_queue, result_queue):
multiprocessing.Process.__init__(self)
self.task_queue = task_queue
self.result_queue = result_queue
def run(self):
proc_name = self.name
while True:
next_task = self.task_queue.get()
if next_task is None:
# Poison pill means shutdown
print '%s: Exiting' % proc_name
self.task_queue.task_done()
break
print '%s: %s' % (proc_name, next_task)
answer = next_task()
self.task_queue.task_done()
self.result_queue.put(answer)
return
class Task(object):
def __init__(self, hostgroup, lock):
self.hostgroup = hostgroup
self.lock = lock
def __call__(self):
print 'Doing something fancy for %s!' % self.hostgroup
try:
lock.acquire()
worker(self.hostgroup,hostsfile,mod_inputfile,outputdir,testmode,backup_dir,start_time,end_time,rrdname,unit,yesterday,now_epoch,rrd_interval,rrd_heartbeat,name)
finally:
lock.release()
def __str__(self):
return 'str %s' % self.hostgroup
if __name__ == '__main__':
lock = multiprocessing.Lock()
# Establish communication queues
tasks = multiprocessing.JoinableQueue()
results = multiprocessing.Queue()
# Start consumers
num_consumers = multiprocessing.cpu_count() * 2
print 'Creating %d consumers' % num_consumers
consumers = [ Consumer(tasks, results)
for i in xrange(num_consumers) ]
for w in consumers:
w.start()
# Enqueue jobs
for hostgroup in hostgroups:
tasks.put(Task(hostgroup,lock))
# Add a poison pill for each consumer
for i in xrange(num_consumers):
tasks.put(None)
# Wait for all of the tasks to finish
tasks.join()
---> Fine, so far! But no Lock is possible, all results are the same....
Why is lock.acquire() not working?
I find multiprocessing.Pool to be much easier to use than the Queue class. The basic setup is
from multiprocessing import Pool
p = Pool(processes=<number of processes>)
p.map(function, [a, b, c])
Which will call function(a), function(b), function(c) in independent processes