Make python generator run in background - python

Right now I have some code that does roughly the following
def generator():
while True:
value = do_some_lengthy_IO()
yield value
def model():
for datapoint in generator():
do_some_lengthy_computation(datapoint)
Right now, the I/O and the computation happen in serial. Ideally the should be running in parallel concurrently (the generator having ready the next value) since they share nothing but the value being passed. I started looking into this and got very confused with the multiprocessing, threading, and async stuff and could not get a minimal working example going. Also, since some of this seems to be recent features, I am using Python 3.6.

I ended up figuring it out. The simplest way is to use the multiprocessing package and use a pipe to communicate with the child process. I wrote a wrapper that can take any generator
import time
import multiprocessing
def bg(gen):
def _bg_gen(gen, conn):
while conn.recv():
try:
conn.send(next(gen))
except StopIteration:
conn.send(StopIteration)
return
parent_conn, child_conn = multiprocessing.Pipe()
p = multiprocessing.Process(target=_bg_gen, args=(gen, child_conn))
p.start()
parent_conn.send(True)
while True:
parent_conn.send(True)
x = parent_conn.recv()
if x is StopIteration:
return
else:
yield x
def generator(n):
for i in range(n):
time.sleep(1)
yield i
#This takes 2s/iteration
for i in generator(100):
time.sleep(1)
#This takes 1s/iteration
for i in bg(generator(100)):
time.sleep(1)
The only missing thing right now is that for infinite generators the process is never killed but that can be easily added by doing a parent_conn.send(False).

Related

Fill a Queue with Objects from several data loaders using multiprocessing

I work on a machine learning input pipeline. I wrote a data loader that reads in data from a large .hdf file and returns slices, which takes roughly 2 seconds per slice. Therefore I would like to use a queue, that takes in objects from several data loaders and can return single objects from the queue via a next function (like a generator). Furthermore the processes that fill the queue should run somehow in the background, refilling the queue when it is not full. I do not get it to work properly. It worked with a single dataloader, giving me 4 times the same slices..
import multiprocessing as mp
class Queue_Generator():
def __init__(self, data_loader_list):
self.pool = mp.Pool(4)
self.data_loader_list = data_loader_list
self.queue = mp.Queue(maxsize=16)
self.pool.map(self.fill_queue, self.data_loader_list)
def fill_queue(self,gen):
self.queue.put(next(gen))
def __next__(self):
yield self.queue.get()
What I get from this:
NotImplementedError: pool objects cannot be passed between processes or pickled
Thanks in advance
Your specific error means that you cannot have a pool as part of your class when you are passing class methods to a pool. What I would suggest could be the following:
import multiprocessing as mp
from queue import Empty
class QueueGenerator(object):
def __init__(self, data_loader_list):
self.data_loader_list = data_loader_list
self.queue = mp.Queue(maxsize=16)
def __iter__(self):
processes = list()
for _ in range(4):
pr = mp.Process(target=fill_queue, args=(self.queue, self.data_loader_list))
pr.start()
processes.append(pr)
return self
def __next__(self):
try:
return self.queue.get(timeout=1) # this should have a value, otherwise your loop will never stop. make it something that ensures your processes have enough time to update the queue but not too long that your program freezes for an extended period of time after all information is processed
except Empty:
raise StopIteration
# have fill queue as a separate function
def fill_queue(queue, gen):
while True:
try:
value = next(gen)
queue.put(value)
except StopIteration: # assumes the given data_loader_list is an iterator
break
print('stopping')
gen = iter(range(70))
qg = QueueGenerator(gen)
for val in qg:
print(val)
# test if it works several times:
for val in qg:
print(val)
The next issue for you to solve I think is to have the data_loader_list be something that provides new information in every separate process. But since you have not given any information about that I can't help you with that. The above does however provide you a way to have the processes fill your queue which is then passed out as an iterator.
Not quite sure why you are yielding in __next__, that doesn't look quite right to me. __next__ should return a value, not a generator object.
Here is a simple way that you can return the results of parallel functions as a generator. It may or may not meet your specific requirements but can be tweaked to suit. It will keep on processing data_loader_list until it is exhausted. This may use a lot of memory compared to keeping, for example, 4 items in a Queue at all times.
import multiprocessing as mp
def read_lines(data_loader):
from time import sleep
sleep(2)
return f'did something with {data_loader}'
def make_gen(data_loader_list):
with mp.Pool(4) as pool:
for result in pool.imap(read_lines, data_loader_list):
yield result
if __name__ == '__main__':
data_loader_list = [i for i in range(15)]
result_generator = make_gen(data_loader_list)
print(type(result_generator))
for i in result_generator:
print(i)
Using imap means that the results can be processed as they are produced. map and map_async would block in the for loop until all results were ready. See this question for more.

How to make a Python generator execution asynchronous?

I have a piece of of code that looks like this:
def generator():
while True:
result = very_long_computation()
yield result
def caller():
g = generator()
for i in range(n):
element = next(g)
another_very_long_computation()
Basically, I'd like to overlap the execution of very_long_computation() and another_very_long_computation() as much as possible.
Is there simple way to make the generator asynchronous? I'd like the generator to start computing the next iteration of the while loop right after result has been yielded, so that (ideally) the next result is ready to be yielded before the succesive next() call in caller().
There is no simple way, especially since you've got very_long_computation and another_very_long_computation instead of very_slow_io. Even if you moved generator into its own thread, you'd be limited by CPython's global interpreter lock, preventing any performance benefit.
You could move the work into a worker process, but the multiprocessing module isn't the drop-in replacement for threading it likes to pretend to be. It's full of weird copy semantics, unintuitive restrictions, and platform-dependent behavior, as well as just having a lot of communication overhead.
If you've got I/O along with your computation, it's fairly simple to shove the generator's work into its own thread to at least get some work done during the I/O:
from queue import Queue
import threading
def worker(queue, n):
gen = generator()
for i in range(n):
queue.put(next(gen))
def caller():
queue = Queue()
worker_thread = threading.Thread(worker, args=(queue, n))
worker_thread.start()
for i in range(n):
element = queue.get()
another_very_long_computation()

Subprocess not thread safe, alternatives?

I'm using python 2.7 and do not have the option of upgrading or back-porting subprocess32. I am using it in a threaded environment in which usually it works fine, however sometimes the subprocess creation is not returning and so the thread hangs, even strace does not work in the instance of a hang, so I get no feedback.
E.G. this line can cause a hang (data returned is small so it is not a pipe issue):
process = subprocess.Popen(cmd,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT)
I have subsequently read that subprocess is not thread safe in python 2.7 and that "various issues" were fixed in the newest versions. I am using multiple threads calling subprocess.
I have demonstrated this problem with the following code (as a demonstrable example - not my actual code) which starts numerous threads with a subprocess each:
import os, time, threading, sys
from subprocess import Popen
i=0
class Process:
def __init__(self, args):
self.args = args
def run(self):
global i
retcode = -1
try:
self.process = Popen(self.args)
i+=1
if i == 10:
sys.stdout.write("Complete\n")
while self.process.poll() is None:
time.sleep(1.0)
retcode = self.process.returncode
except:
sys.stdout.write("ERROR\n")
return retcode
def main():
processes = [Process(["/bin/cat"]) for _ in range(10)]
# start all processes
for p in processes:
t = threading.Thread(target=Process.run, args=(p,))
t.daemon = True
t.start()
sys.stdout.write("all threads started\n")
# wait for Ctrl+C
while True:
time.sleep(1.0)
main()
This will often result in 1 or more subprocess calls never returning. Does anybody have more information on this or a solution/alternative
I am thinking of using the deprecated commands.getoutput instead but do not know if that is thread safe? It certainly seems to work correctly for the code above.
If the bulk of what your threads is doing is just waiting on subprocesses you can acomplish this much more effectively with coroutines. With python2 you would inplement this with generators so the necessary changes to the run function are:
replace time.sleep(1.0) with yield to pass control to another routine
replace return retcode with self.retcode = retcode or similar since generators can't return a value before python3.3
Then the main function could be something like this:
def main():
processes = [Process(["/bin/cat"]) for _ in range(10)]
#since p.run() is a generator this doesn't run any of the code yet
routines = [p.run() for p in processes]
while routines:
#iterate in reverse so we can remove routines while iterating without skipping any
for routine in reversed(routines):
try:
next(routine) #continue the routine to next yield
except StopIteration:
#this routine has finished, we no longer need to check it
routines.remove(routine)
This is intended to give you a place to start from, I'd recommend adding print statements around the yields or use pythontutor to better understand the order of execution.
This has the benefit of never having any threads waiting for anything, just one thread doing a section of processing at a time which can be much more efficient then many idling threads.

Creating a timeout function in Python with multiprocessing

I'm trying to create a timeout function in Python 2.7.11 (on Windows) with the multiprocessing library.
My basic goal is to return one value if the function times out and the actual value if it doesn't timeout.
My approach is the following:
from multiprocessing import Process, Manager
def timeoutFunction(puzzleFileName, timeLimit):
manager = Manager()
returnVal = manager.list()
# Create worker function
def solveProblem(return_val):
return_val[:] = doSomeWork(puzzleFileName) # doSomeWork() returns list
p = Process(target=solveProblem, args=[returnVal])
p.start()
p.join(timeLimit)
if p.is_alive():
p.terminate()
returnVal = ['Timeout']
return returnVal
And I call the function like this:
if __name__ == '__main__':
print timeoutFunction('example.txt', 600)
Unfortunately this doesn't work and I receive some sort of EOF error in pickle.py
Can anyone see what I'm doing wrong?
Thanks in advance,
Alexander
Edit: doSomeWork() is not an actual function. Just a filler for some other work I do. That work is not done in parallel and does not use any shared variables. I'm only trying to run a single function and have it possibly timeout.
You can use the Pebble library for this.
from pebble import concurrent
from concurrent.futures import TimeoutError
TIMEOUT_IN_SECONDS = 10
#concurrent.process(timeout=TIMEOUT_IN_SECONDS)
def function(foo, bar=0):
return foo + bar
future = function(1, bar=2)
try:
result = future.result() # blocks until results are ready or timeout
except TimeoutError as error:
print "Function took longer than %d seconds" % error.args[1]
result = 'timeout'
The documentation has more complete examples.
The library will terminate the function if it timeouts so you don't need to worry about IO or CPU being wasted.
EDIT:
If you're doing an assignment, you can still look at its implementation.
Short example:
from multiprocessing import Pipe, Process
def worker(pipe, function, args, kwargs):
try:
results = function(*args, **kwargs)
except Exception as error:
results = error
pipe.send(results)
pipe = Pipe(duplex=False)
process = Process(target=worker, args=(pipe, function, args, kwargs))
if pipe.poll(timeout=5):
process.terminate()
process.join()
results = 'timeout'
else:
results = pipe.recv()
Pebble provides a neat API, takes care of corner cases and uses more robust mechanisms. Yet this is more or less what it does under the hood.
The problem seems to have been that the function solveProblem was defined inside my outer function. Python doesn't seem to like that. Once I moved it outside it worked fine.
I'll mark noxdafox answer as an answer as I implementing the pebble solution led me to this answer.
Thanks all!

asyncio tasks getting unexpectedly defered

I've been trying to learn a bit about asyncio, and I'm having some unexpected behavior. I've set up a simple fibonacci server that supports multiple connections using streams. The fib calculation is written recursively, so I can simulate long running calculations by entering in a large number. As expected, long running calculations block I/O until the long running calculation completes.
Here's the problem though. I rewrote the fibonacci function to be a coroutine. I expected that by yielding from each recursion, control would fall back to the event loop, and awaiting I/O tasks would get a chance to execute, and that you'd even be able to run multiple fib calculations concurrently. This however doesn't seem to be the case.
Here's the code:
import asyncio
#asyncio.coroutine
def fib(n):
if n < 1:
return 1
a = yield from fib(n-1)
b = yield from fib(n-2)
return a + b
#asyncio.coroutine
def fib_handler(reader, writer):
print('Connection from : {}'.format(writer.transport.get_extra_info('peername')))
while True:
req = yield from reader.readline()
if not req:
break
print(req)
n = int(req)
result = yield from fib(n)
writer.write('{}\n'.format(result).encode('ascii'))
yield from writer.drain()
writer.close()
print("Closed")
def server(address):
loop = asyncio.get_event_loop()
fib_server = asyncio.start_server(fib_handler, *address, loop=loop)
fib_server = loop.run_until_complete(fib_server)
try:
loop.run_forever()
except KeyboardInterrupt:
print('closing...')
fib_server.close()
loop.run_until_complete(fib_server.wait_closed())
loop.close()
server(('', 25000))
This server runs perfectly well if you netcat to port 25000 and start entering in numbers. However if you start a long running calculation (say 35), no other calculations will run until the first completes. In fact, additional connections won't even be processed.
I know that the event loop is feeding back the yields from recursive fib calls, so control has to be falling all the way down. But I thought that the loop would process the other calls in the I/O queues (such as spawning a second fib_handler) before "trampolining" back to the fib function.
I'm sure I must be misunderstanding something or that there is some kind of bug I'm overlooking but I can't for the life of me find it.
Any insight you can provide will be much appreciated.
The first issue is that you're calling yield from fib(n) inside of fib_handler. Including yield from means that fib_handler will block until the call to fib(n) is complete, which means it can't handle any input you provide while fib is running. You would have this problem even if all you did was I/O inside of fib. To fix this, you should use asyncio.async(fib(n)) (or preferably, asyncio.ensure_future(fib(n)), if you have a new enough version of Python) to schedule fib with the event loop, without actually blocking fib_handler. From there, you can use Future.add_done_callback to write the result to the client when it's ready:
import asyncio
from functools import partial
from concurrent.futures import ProcessPoolExecutor
#asyncio.coroutine
def fib(n):
if n < 1:
return 1
a = yield from fib(n-1)
b = yield from fib(n-2)
return a + b
def do_it(writer, result):
writer.write('{}\n'.format(result.result()).encode('ascii'))
asyncio.async(writer.drain())
#asyncio.coroutine
def fib_handler(reader, writer):
print('Connection from : {}'.format(writer.transport.get_extra_info('peername')))
executor = ProcessPoolExecutor(4)
loop = asyncio.get_event_loop()
while True:
req = yield from reader.readline()
if not req:
break
print(req)
n = int(req)
result = asyncio.async(fib(n))
# Write the result to the client when fib(n) is done.
result.add_done_callback(partial(do_it, writer))
writer.close()
print("Closed")
That said, this change alone still won't completely fix the problem; while it will allow multiple clients to connect and issue commands concurrently, a single client will still get synchronous behavior. This happens because when you call yield from coro() directly on a coroutine function, control isn't given back to the event loop until coro() (or another coroutine called by coro) actually executes some non-blocking I/O. Otherwise, Python will just execute coro without yielding control. This is a useful performance optimization, since giving control to the event loop when your coroutine isn't actually going to do blocking I/O is a waste of time, especially given Python's high function call overhead.
In your case, fib never does any I/O, so once you call yield from fib(n-1) inside of fib itself, the event loop never gets to run again until its done recursing, which will block fib_handler from reading any subsequent input from the client until the call to fib is done. Wrapping all your calls to fib in asyncio.async guarantees that control is given to the event loop each time you make a yield from asyncio.async(fib(...)) call. When I made this change, in addition to using asyncio.async(fib(n)) in fib_handler, I was able to process multiple inputs from a single client concurrently. Here's the full example code:
import asyncio
from functools import partial
from concurrent.futures import ProcessPoolExecutor
#asyncio.coroutine
def fib(n):
if n < 1:
return 1
a = yield from fib(n-1)
b = yield from fib(n-2)
return a + b
def do_it(writer, result):
writer.write('{}\n'.format(result.result()).encode('ascii'))
asyncio.async(writer.drain())
#asyncio.coroutine
def fib_handler(reader, writer):
print('Connection from : {}'.format(writer.transport.get_extra_info('peername')))
executor = ProcessPoolExecutor(4)
loop = asyncio.get_event_loop()
while True:
req = yield from reader.readline()
if not req:
break
print(req)
n = int(req)
result = asyncio.async(fib(n))
result.add_done_callback(partial(do_it, writer))
writer.close()
print("Closed")
Input/Output on client-side:
dan#dandesk:~$ netcat localhost 25000
35 # This was input
4 # This was input
8 # output
24157817 # output
Now, even though this works, I wouldn't use this implementation, since its doing a bunch of CPU-bound work in a single-threaded program that also wants to serve I/O in that same thread. This isn't going to scale very well, and won't have ideal performance. Instead, I'd recommend using loop.run_in_executor to run the calls to fib in a background process, which allows the asyncio thread to run at full capacity, and also allows us to scale the calls to fib across multiple cores:
import asyncio
from functools import partial
from concurrent.futures import ProcessPoolExecutor
def fib(n):
if n < 1:
return 1
a = fib(n-1)
b = fib(n-2)
return a + b
def do_it(writer, result):
writer.write('{}\n'.format(result.result()).encode('ascii'))
asyncio.async(writer.drain())
#asyncio.coroutine
def fib_handler(reader, writer):
print('Connection from : {}'.format(writer.transport.get_extra_info('peername')))
executor = ProcessPoolExecutor(8) # 8 Processes in the pool
loop = asyncio.get_event_loop()
while True:
req = yield from reader.readline()
if not req:
break
print(req)
n = int(req)
result = loop.run_in_executor(executor, fib, n)
result.add_done_callback(partial(do_it, writer))
writer.close()
print("Closed")

Categories