Related
Is there a Pool class for worker threads, similar to the multiprocessing module's Pool class?
I like for example the easy way to parallelize a map function
def long_running_func(p):
c_func_no_gil(p)
p = multiprocessing.Pool(4)
xs = p.map(long_running_func, range(100))
however I would like to do it without the overhead of creating new processes.
I know about the GIL. However, in my usecase, the function will be an IO-bound C function for which the python wrapper will release the GIL before the actual function call.
Do I have to write my own threading pool?
I just found out that there actually is a thread-based Pool interface in the multiprocessing module, however it is hidden somewhat and not properly documented.
It can be imported via
from multiprocessing.pool import ThreadPool
It is implemented using a dummy Process class wrapping a python thread. This thread-based Process class can be found in multiprocessing.dummy which is mentioned briefly in the docs. This dummy module supposedly provides the whole multiprocessing interface based on threads.
In Python 3 you can use concurrent.futures.ThreadPoolExecutor, i.e.:
executor = ThreadPoolExecutor(max_workers=10)
a = executor.submit(my_function)
See the docs for more info and examples.
Yes, and it seems to have (more or less) the same API.
import multiprocessing
def worker(lnk):
....
def start_process():
.....
....
if(PROCESS):
pool = multiprocessing.Pool(processes=POOL_SIZE, initializer=start_process)
else:
pool = multiprocessing.pool.ThreadPool(processes=POOL_SIZE,
initializer=start_process)
pool.map(worker, inputs)
....
For something very simple and lightweight (slightly modified from here):
from Queue import Queue
from threading import Thread
class Worker(Thread):
"""Thread executing tasks from a given tasks queue"""
def __init__(self, tasks):
Thread.__init__(self)
self.tasks = tasks
self.daemon = True
self.start()
def run(self):
while True:
func, args, kargs = self.tasks.get()
try:
func(*args, **kargs)
except Exception, e:
print e
finally:
self.tasks.task_done()
class ThreadPool:
"""Pool of threads consuming tasks from a queue"""
def __init__(self, num_threads):
self.tasks = Queue(num_threads)
for _ in range(num_threads):
Worker(self.tasks)
def add_task(self, func, *args, **kargs):
"""Add a task to the queue"""
self.tasks.put((func, args, kargs))
def wait_completion(self):
"""Wait for completion of all the tasks in the queue"""
self.tasks.join()
if __name__ == '__main__':
from random import randrange
from time import sleep
delays = [randrange(1, 10) for i in range(100)]
def wait_delay(d):
print 'sleeping for (%d)sec' % d
sleep(d)
pool = ThreadPool(20)
for i, d in enumerate(delays):
pool.add_task(wait_delay, d)
pool.wait_completion()
To support callbacks on task completion you can just add the callback to the task tuple.
Hi to use the thread pool in Python you can use this library :
from multiprocessing.dummy import Pool as ThreadPool
and then for use, this library do like that :
pool = ThreadPool(threads)
results = pool.map(service, tasks)
pool.close()
pool.join()
return results
The threads are the number of threads that you want and tasks are a list of task that most map to the service.
Yes, there is a threading pool similar to the multiprocessing Pool, however, it is hidden somewhat and not properly documented. You can import it by following way:-
from multiprocessing.pool import ThreadPool
Just I show you simple example
def test_multithread_stringio_read_csv(self):
# see gh-11786
max_row_range = 10000
num_files = 100
bytes_to_df = [
'\n'.join(
['%d,%d,%d' % (i, i, i) for i in range(max_row_range)]
).encode() for j in range(num_files)]
files = [BytesIO(b) for b in bytes_to_df]
# read all files in many threads
pool = ThreadPool(8)
results = pool.map(self.read_csv, files)
first_result = results[0]
for result in results:
tm.assert_frame_equal(first_result, result)
Here's the result I finally ended up using. It's a modified version of the classes by dgorissen above.
File: threadpool.py
from queue import Queue, Empty
import threading
from threading import Thread
class Worker(Thread):
_TIMEOUT = 2
""" Thread executing tasks from a given tasks queue. Thread is signalable,
to exit
"""
def __init__(self, tasks, th_num):
Thread.__init__(self)
self.tasks = tasks
self.daemon, self.th_num = True, th_num
self.done = threading.Event()
self.start()
def run(self):
while not self.done.is_set():
try:
func, args, kwargs = self.tasks.get(block=True,
timeout=self._TIMEOUT)
try:
func(*args, **kwargs)
except Exception as e:
print(e)
finally:
self.tasks.task_done()
except Empty as e:
pass
return
def signal_exit(self):
""" Signal to thread to exit """
self.done.set()
class ThreadPool:
"""Pool of threads consuming tasks from a queue"""
def __init__(self, num_threads, tasks=[]):
self.tasks = Queue(num_threads)
self.workers = []
self.done = False
self._init_workers(num_threads)
for task in tasks:
self.tasks.put(task)
def _init_workers(self, num_threads):
for i in range(num_threads):
self.workers.append(Worker(self.tasks, i))
def add_task(self, func, *args, **kwargs):
"""Add a task to the queue"""
self.tasks.put((func, args, kwargs))
def _close_all_threads(self):
""" Signal all threads to exit and lose the references to them """
for workr in self.workers:
workr.signal_exit()
self.workers = []
def wait_completion(self):
"""Wait for completion of all the tasks in the queue"""
self.tasks.join()
def __del__(self):
self._close_all_threads()
def create_task(func, *args, **kwargs):
return (func, args, kwargs)
To use the pool
from random import randrange
from time import sleep
delays = [randrange(1, 10) for i in range(30)]
def wait_delay(d):
print('sleeping for (%d)sec' % d)
sleep(d)
pool = ThreadPool(20)
for i, d in enumerate(delays):
pool.add_task(wait_delay, d)
pool.wait_completion()
another way can be adding the process to thethread queue pool
import concurrent.futures
with concurrent.futures.ThreadPoolExecutor(max_workers=cpus) as executor:
for i in range(10):
a = executor.submit(arg1, arg2,....)
The overhead of creating the new processes is minimal, especially when it's just 4 of them. I doubt this is a performance hot spot of your application. Keep it simple, optimize where you have to and where profiling results point to.
There is no built in thread based pool. However, it can be very quick to implement a producer/consumer queue with the Queue class.
From:
https://docs.python.org/2/library/queue.html
from threading import Thread
from Queue import Queue
def worker():
while True:
item = q.get()
do_work(item)
q.task_done()
q = Queue()
for i in range(num_worker_threads):
t = Thread(target=worker)
t.daemon = True
t.start()
for item in source():
q.put(item)
q.join() # block until all tasks are done
If you don't mind executing other's code, here's mine:
Note: There is lot of extra code you may want to remove [added for better clarificaiton and demonstration how it works]
Note: Python naming conventions were used for method names and variable names instead of camelCase.
Working procedure:
MultiThread class will initiate with no of instances of threads by sharing lock, work queue, exit flag and results.
SingleThread will be started by MultiThread once it creates all instances.
We can add works using MultiThread (It will take care of locking).
SingleThreads will process work queue using a lock in middle.
Once your work is done, you can destroy all threads with shared boolean value.
Here, work can be anything. It can automatically import (uncomment import line) and process module using given arguments.
Results will be added to results and we can get using get_results
Code:
import threading
import queue
class SingleThread(threading.Thread):
def __init__(self, name, work_queue, lock, exit_flag, results):
threading.Thread.__init__(self)
self.name = name
self.work_queue = work_queue
self.lock = lock
self.exit_flag = exit_flag
self.results = results
def run(self):
# print("Coming %s with parameters %s", self.name, self.exit_flag)
while not self.exit_flag:
# print(self.exit_flag)
self.lock.acquire()
if not self.work_queue.empty():
work = self.work_queue.get()
module, operation, args, kwargs = work.module, work.operation, work.args, work.kwargs
self.lock.release()
print("Processing : " + operation + " with parameters " + str(args) + " and " + str(kwargs) + " by " + self.name + "\n")
# module = __import__(module_name)
result = str(getattr(module, operation)(*args, **kwargs))
print("Result : " + result + " for operation " + operation + " and input " + str(args) + " " + str(kwargs))
self.results.append(result)
else:
self.lock.release()
# process_work_queue(self.work_queue)
class MultiThread:
def __init__(self, no_of_threads):
self.exit_flag = bool_instance()
self.queue_lock = threading.Lock()
self.threads = []
self.work_queue = queue.Queue()
self.results = []
for index in range(0, no_of_threads):
thread = SingleThread("Thread" + str(index+1), self.work_queue, self.queue_lock, self.exit_flag, self.results)
thread.start()
self.threads.append(thread)
def add_work(self, work):
self.queue_lock.acquire()
self.work_queue._put(work)
self.queue_lock.release()
def destroy(self):
self.exit_flag.value = True
for thread in self.threads:
thread.join()
def get_results(self):
return self.results
class Work:
def __init__(self, module, operation, args, kwargs={}):
self.module = module
self.operation = operation
self.args = args
self.kwargs = kwargs
class SimpleOperations:
def sum(self, *args):
return sum([int(arg) for arg in args])
#staticmethod
def mul(a, b, c=0):
return int(a) * int(b) + int(c)
class bool_instance:
def __init__(self, value=False):
self.value = value
def __setattr__(self, key, value):
if key != "value":
raise AttributeError("Only value can be set!")
if not isinstance(value, bool):
raise AttributeError("Only True/False can be set!")
self.__dict__[key] = value
# super.__setattr__(key, bool(value))
def __bool__(self):
return self.value
if __name__ == "__main__":
multi_thread = MultiThread(5)
multi_thread.add_work(Work(SimpleOperations(), "mul", [2, 3], {"c":4}))
while True:
data_input = input()
if data_input == "":
pass
elif data_input == "break":
break
else:
work = data_input.split()
multi_thread.add_work(Work(SimpleOperations(), work[0], work[1:], {}))
multi_thread.destroy()
print(multi_thread.get_results())
I have a requirement of creating child processes, receive results using Future and then kill some of them when required.
For this I have subclassed multiprocessing.Process class and return a Future object from the start() method.
The problem is that I am not able to receive the result in the cb() function as it never gets called.
Please help/suggest if this can be done in some other way or something I am missing in my current implementation?
Following is my current approach
from multiprocessing import Process, Queue
from concurrent.futures import _base
import threading
from time import sleep
def foo(x,q):
print('result {}'.format(x*x))
result = x*x
sleep(5)
q.put(result)
class MyProcess(Process):
def __init__(self, target, args):
super().__init__()
self.target = target
self.args = args
self.f = _base.Future()
def run(self):
q = Queue()
worker_thread = threading.Thread(target=self.target, args=(self.args+ (q,)))
worker_thread.start()
r = q.get(block=True)
print('setting result {}'.format(r))
self.f.set_result(result=r)
print('done setting result')
def start(self):
f = _base.Future()
run_thread = threading.Thread(target=self.run)
run_thread.start()
return f
def cb(future):
print('received result in callback {}'.format(future))
def main():
p1 = MyProcess(target=foo, args=(2,))
f = p1.start()
f.add_done_callback(fn=cb)
sleep(10)
if __name__ == '__main__':
main()
print('Main thread dying')
In your start method you create a new Future which you then return. This is a different future then the one you set the result on, this future is just not used at all. Try:
def start(self):
run_thread = threading.Thread(target=self.run)
run_thread.start()
return self.f
However there are more problems with your code. You override the start method of the process, replacing it with execution on a worker thread, therefore actually bypassing multiprocessing. Also you shouldn't import the _base module, that is an implementation detail as seen from the leading underscore. You should import concurrent.futures.Future (it's the same class, but through public API).
This really uses multiprocessing:
from multiprocessing import Process, Queue
from concurrent.futures import Future
import threading
from time import sleep
def foo(x,q):
print('result {}'.format(x*x))
result = x*x
sleep(5)
q.put(result)
class MyProcess(Process):
def __init__(self, target, args):
super().__init__()
self.target = target
self.args = args
self.f = Future()
def run(self):
q = Queue()
worker_thread = threading.Thread(target=self.target, args=(self.args+ (q,)))
worker_thread.start()
r = q.get(block=True)
print('setting result {}'.format(r))
self.f.set_result(result=r)
print('done setting result')
def cb(future):
print('received result in callback {}: {}'.format(future, future.result()))
def main():
p1 = MyProcess(target=foo, args=(2,))
p1.f.add_done_callback(fn=cb)
p1.start()
p1.join()
sleep(10)
if __name__ == '__main__':
main()
print('Main thread dying')
And you're already in a new process now, spawning a worker thread to execute your target function shouldn't really be necessary, you could just execute your target function directly instead. Should the target function raise an Exception you wouldn't know about it, your callback will only be called on success. So if you fix that, then you're left with:
from multiprocessing import Process
from concurrent.futures import Future
import threading
from time import sleep
def foo(x):
print('result {}'.format(x*x))
result = x*x
sleep(5)
return result
class MyProcess(Process):
def __init__(self, target, args):
super().__init__()
self.target = target
self.args = args
self.f = Future()
def run(self):
try:
r = self.target(*self.args)
print('setting result {}'.format(r))
self.f.set_result(result=r)
print('done setting result')
except Exception as ex:
self.f.set_exception(ex)
def cb(future):
print('received result in callback {}: {}'.format(future, future.result()))
def main():
p1 = MyProcess(target=foo, args=(2,))
p1.f.add_done_callback(fn=cb)
p1.start()
p1.join()
sleep(10)
if __name__ == '__main__':
main()
print('Main thread dying')
This is basically what a ProcessPoolExecutor does.
I have a list of input data and would like to process it in parallel, but processing each takes time as network io is involved. CPU usage is not a problem.
I would not like to have the overhead of additional processes since I have a lot of things to process at a time and do not want to setup inter process communication.
# the parallel execution equivalent of this?
import time
input_data = [1,2,3,4,5,6,7]
input_processor = time.sleep
results = map(input_processor, input_data)
The code I am using makes use of twisted.internet.defer so a solution involving that is fine as well.
You can easily define Worker threads that work in parallel till a queue is empty.
from threading import Thread
from collections import deque
import time
# Create a new class that inherits from Thread
class Worker(Thread):
def __init__(self, inqueue, outqueue, func):
'''
A worker that calls func on objects in inqueue and
pushes the result into outqueue
runs until inqueue is empty
'''
self.inqueue = inqueue
self.outqueue = outqueue
self.func = func
super().__init__()
# override the run method, this is starte when
# you call worker.start()
def run(self):
while self.inqueue:
data = self.inqueue.popleft()
print('start')
result = self.func(data)
self.outqueue.append(result)
print('finished')
def test(x):
time.sleep(x)
return 2 * x
if __name__ == '__main__':
data = 12 * [1, ]
queue = deque(data)
result = deque()
# create 3 workers working on the same input
workers = [Worker(queue, result, test) for _ in range(3)]
# start the workers
for worker in workers:
worker.start()
# wait till all workers are finished
for worker in workers:
worker.join()
print(result)
As expected, this runs ca. 4 seconds.
One could also write a simple Pool class to get rid of the noise in the main function:
from threading import Thread
from collections import deque
import time
class Pool():
def __init__(self, n_threads):
self.n_threads = n_threads
def map(self, func, data):
inqueue = deque(data)
result = deque()
workers = [Worker(inqueue, result, func) for i in range(self.n_threads)]
for worker in workers:
worker.start()
for worker in workers:
worker.join()
return list(result)
class Worker(Thread):
def __init__(self, inqueue, outqueue, func):
'''
A worker that calls func on objects in inqueue and
pushes the result into outqueue
runs until inqueue is empty
'''
self.inqueue = inqueue
self.outqueue = outqueue
self.func = func
super().__init__()
# override the run method, this is starte when
# you call worker.start()
def run(self):
while self.inqueue:
data = self.inqueue.popleft()
print('start')
result = self.func(data)
self.outqueue.append(result)
print('finished')
def test(x):
time.sleep(x)
return 2 * x
if __name__ == '__main__':
data = 12 * [1, ]
pool = Pool(6)
result = pool.map(test, data)
print(result)
You can use the multiprocessing module. Without knowing more about how you want it to process, you can use a pool of workers:
import multiprocessing as mp
import time
input_processor = time.sleep
core_num = mp.cpu_count()
pool=Pool(processes = core_num)
result = [pool.apply_async(input_processor(i)) for for i in range(1,7+1) ]
result_final = [p.get() for p in results]
for n in range(1,7+1):
print n, result_final[n]
The above keeps track of the order each task is done. It also does not allow the processes to talk to each other.
Editted:
To call this as a function, you should input the input data and number of processors:
def parallel_map(processor_count, input_data):
pool=Pool(processes = processor_count)
result = [pool.apply_async(input_processor(i)) for for i in input_data ]
result_final = np.array([p.get() for p in results])
result_data = np.vstack( (input_data, result_final))
return result_data
I assume you are using Twisted. In that case, you can launch multiple deferreds and wait for the completion of all of them using DeferredList:
http://twistedmatrix.com/documents/15.4.0/core/howto/defer.html#deferredlist
If input_processor is a non-blocking call (returns deferred):
def main():
input_data = [1,2,3,4,5,6,7]
input_processor = asyn_function
for entry in input_data:
requests.append(defer.maybeDeferred(input_processor, entry))
deferredList = defer.DeferredList(requests, , consumeErrors=True)
deferredList.addCallback(gotResults)
return deferredList
def gotResults(results):
for (success, value) in result:
if success:
print 'Success:', value
else:
print 'Failure:', value.getErrorMessage()
In case input_processor is a long/blocking function, you can use deferToThread instead of maybeDeferred:
def main():
input_data = [1,2,3,4,5,6,7]
input_processor = syn_function
for entry in input_data:
requests.append(threads.deferToThread(input_processor, entry))
deferredList = defer.DeferredList(requests, , consumeErrors=True)
deferredList.addCallback(gotResults)
return deferredList
Is there an easy way to track the CPU time of not only a Process but of any child processes launched by it?
I tried sub-classing multiprocessing.Process to time an arbitrary function, like:
import time
from multiprocessing import Process
class TimedProcess(Process):
daemon = True
def __init__(self, *args, **kwargs):
super(TimedProcess, self).__init__(*args, **kwargs)
self.t0 = time.clock()
#property
def duration_seconds(self):
return time.clock() - self.t0
p = TimedProcess(target=my_long_running_func)
p.start()
while p.is_alive():
print p.duration_seconds
time.sleep(1)
However, when I tried to time functions involving Scikits-learn or other code involving c-extensions or sub-processes, I found my duration_sections would often report 0, or just a few seconds, even though the code would run for hours. How would I fix this?
Your code almost prints the CPU time, but you're calling time.clock() in the parent process instead of the child process. By using multiprocessing.Pipe, you can pass the values from the child to the parent process:
import time
from threading import Thread
from multiprocessing import Process, Pipe
class TimedProcess(Process):
daemon = True
def __init__(self, *args, **kwargs):
super(TimedProcess, self).__init__(*args, **kwargs)
self.parent_conn, self.child_conn = Pipe()
self.child_finished = False
self._duration = 0.0
def get_duration(self):
if not self.child_finished:
self.parent_conn.send(None)
result = self.parent_conn.recv()
if result == 'done':
self.child_finished = True
else:
self._duration = result
return self._duration
def run(self):
try:
t0 = time.clock()
Thread(target=self._run).start()
while True:
request = self.child_conn.recv()
self.child_conn.send(time.clock() - t0)
if request == 'stop':
break
finally:
self.child_conn.send('done')
def _run(self):
try:
super(TimedProcess, self).run()
finally:
self.parent_conn.send('stop')
p = TimedProcess(target=my_long_running_func)
p.start()
while p.is_alive():
time.sleep(1)
print p.get_duration()
I was under the impression that python passed objects by reference, so logically (I thought), passing a dictionary to a thread's worker function would allow me to set new key-value pairs that would be preserved after the worker function had returned. Unfortunately, it seems as though I am wrong!
Without further ado, here's a test case:
from Queue import Queue
from threading import Thread
def work_fn(dictionary, dfield):
dictionary[dfield] = True
class Worker(Thread):
"""Thread executing tasks from a given tasks queue"""
def __init__(self, tasks):
Thread.__init__(self)
self.tasks = tasks
self.daemon = True
self.start()
def run(self):
while True:
func, args, kargs = self.tasks.get()
try:
func(*args, **kargs)
except Exception, e:
print e
self.tasks.task_done()
class ThreadPool(object):
"""Pool of threads consuming tasks from a queue"""
def __init__(self, num_threads):
self.tasks = Queue(num_threads)
for i in range(num_threads):
Worker(self.tasks)
def add_task(self, func, *args, **kargs):
"""Add a task to the queue"""
self.tasks.put((func, args, kargs))
def wait_completion(self):
"""Wait for completion of all the tasks in the queue"""
self.tasks.join()
if __name__ == '__main__':
pool = ThreadPool(4)
data = [{} for _ in range(10)]
for i, d in enumerate(data):
pool.add_task(work_fn, d, str(i))
pool.wait_completion()
for d in data:
print d.keys()
What is actually going on here?
What should I be doing differently?
I don't see where you are waiting for the tasks to finish before printing the results. It seems like you need to do the collection of results in a subsequent loop after spawning the jobs.