How can I call xtail by tornado.proces.Subprocess?
import subprocess
from tornado.ioloop import IOLoop
from tornado import gen
from tornado import process
class Reader(object):
def __init__(self, xwatch_path, max_idle=600, ioloop=None):
self.xwatch_path = xwatch_path
self.ioloop = ioloop
self.max_idle = max_idle
#gen.coroutine
def call_subprocess(self, cmd, stdin_data=None, stdin_async=False):
stdin = STREAM if stdin_async else subprocess.PIPE
sub_process = process.Subprocess(
cmd, stdin=stdin, stdout=STREAM, stderr=STREAM, io_loop=self.ioloop
)
if stdin_data:
if stdin_async:
yield gen.Task(sub_process.stdin.write, stdin_data)
else:
sub_process.stdin.write(stdin_data)
if stdin_async or stdin_data:
sub_process.stdin.close()
result, error = yield [
gen.Task(sub_process.stdout.read_until, '\n'),
gen.Task(sub_process.stderr.read_until, '\n')
]
print result
raise gen.Return((result, error))
#gen.coroutine
def popen(self):
while True:
result, error = yield self.call_subprocess(['xtail', self.xwatch_path])
print result, error
def read_log(ioloop):
access_reader = AccessLogReader(
'/home/vagrant/logs')
ioloop.add_callback(access_reader.popen)
def main():
ioloop = IOLoop.instance()
read_log(ioloop)
ioloop.start()
if __name__ == '__main__':
main()
I would like to collect a few of the log changes in the log folder, ready to use xtail multiple folders to collect logs, and then I develop the environment for debugging.
I use Vim to modify the ~/log/123.txt file, but I can't see the output.
The statement
result, error = yield [
gen.Task(sub_process.stdout.read_until, '\n'),
gen.Task(sub_process.stderr.read_until, '\n')
]
reads one line of the process's standard output and one line of standard error, and blocks until it has read both lines. If xtail only writes to one of the two streams, this will never complete.
You probably want to read in a loop (note that gen.Task is not necessary):
#gen.coroutine
def read_from_stream(stream):
try:
while True:
line = yield stream.read_until('\n')
print(line)
except StreamClosedError:
return
If you care about the difference between stdout and stderr, read from them separately. This will print lines from each stream as they arrive, and stop when both streams are closed:
yield [read_from_stream(sub_process.stdout), read_from_stream(sub_process.stderr)]
If you don't, merge them by passing stdout=STREAM, stderr=subprocess.STDOUT when creating the subprocess, and only read from sub_process.stdout.
Related
I'm using asyncio subprocess to execute a subcommand. I want to see the long-running process and save the content at the same time to a buffer for later use. Furthermore, I found this related question (Getting live output from asyncio subprocess), but it mainly centers around the use case for ssh.
The asyncio subprocess docs have an example for reading the output line-by-line, which goes into the direction of what I want to achieve. (https://docs.python.org/3/library/asyncio-subprocess.html#examples)
import asyncio
import sys
async def get_date():
code = 'import datetime; print(datetime.datetime.now())'
# Create the subprocess; redirect the standard output
# into a pipe.
proc = await asyncio.create_subprocess_exec(
sys.executable, '-c', code,
stdout=asyncio.subprocess.PIPE)
# Read one line of output.
data = await proc.stdout.readline()
line = data.decode('ascii').rstrip()
# Wait for the subprocess exit.
await proc.wait()
return line
date = asyncio.run(get_date())
print(f"Current date: {date}")
I adapted this example to the following:
async def subprocess_async(cmd, **kwargs):
cmd_list = shlex.split(cmd)
proc = await asyncio.create_subprocess_exec(
*cmd_list,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.STDOUT, **kwargs)
full_log = ""
while True:
buf = await proc.stdout.readline()
if not buf:
break
full_log += buf.decode()
print(f' {buf.decode().rstrip()}')
await proc.wait()
res = subprocess.CompletedProcess(cmd, proc.returncode, stdout=full_log.encode(), stderr=b'')
return res
The issue here is, that the proc.returncode value sometimes becomes None. I guess, I have a misunderstanding, how proc.wait() works and when it is safe to stop reading the output. How do I achieve continuous output using asyncio subprocess?
Your code is working fine as-is for me. What command are you trying to run that is causing your issue?
Two things I can think of to help, are
Instead of calling .wait() afterwards, set the returncode as the loop condition to keep running.
Don't wait for full line returns in case the program is like ffmpeg where it will do some tricks to paste over itself in console and not actually send newline characters.
Example code:
import asyncio, shlex, subprocess, sys
async def subprocess_async(cmd, **kwargs):
cmd_list = shlex.split(cmd)
proc = await asyncio.create_subprocess_exec(
*cmd_list,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.STDOUT,
**kwargs)
full_log = b""
while proc.returncode is None:
buf = await proc.stdout.read(20)
if not buf:
break
full_log += buf
sys.stdout.write(buf.decode())
res = subprocess.CompletedProcess(cmd, proc.returncode, stdout=full_log, stderr=b'')
return res
if __name__ == '__main__':
asyncio.run(subprocess_async("ffprobe -i video.mp4"))
I need to read the output of several asyncio tasks running concurrently.
These tasks are actually created using asyncio.create_subprocess_exec().
In the simplest form I would need to print stdout/stderr of a single process while accumulating lines in separate strings.
My current (working) code is:
async def run_command(*args, stdin=None, can_fail=False, echo=False):
"""
Run command asynchronously in subprocess.
Waits for command completion and returns return code, stdout and stdin
Example from:
http://asyncio.readthedocs.io/en/latest/subprocess.html
"""
# Create subprocess
try:
process = await asyncio.create_subprocess_exec(
*args,
stdin=asyncio.subprocess.PIPE,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE
)
except (FileNotFoundError, OSError):
if not can_fail:
log.error("run_command(%s): Error FileNotFound", args)
return -1, '', 'File "%s" NotFound' % args[0]
# Status
log.debug("run_command(%s): pid=%s", args, process.pid)
# Wait for the subprocess to finish
stdout, stderr = await process.communicate(stdin)
# Progress
if process.returncode == 0:
log.debug("run_command(%s): ok: %s", process.pid, stdout.decode().strip())
else:
log.debug("run_command(%s): ko: %s", process.pid, stderr.decode().strip())
# Result
result = process.returncode, stdout.decode().strip(), stderr.decode().strip()
# Return stdout
return result
Problem with this code is I see nothing till process terminates; some of the spawned processes may take several minutes to complete and would print "interesting" info while executing. How can I print (or log) output as soon as it happens while capturing? (I am aware that omitting capture the underlying process would print, but I also need the capture)
I tried to do something along the lines:
_stdout = ''
while True:
data = process.stdout.readline()
if not data:
break
print(data)
_stdout += data.decode()
but I have no idea how to extend this to multiple streams (in this case just stdout/stderr, but potentially expanding to multiple programs). Is there something akin to select() call?
Any hint welcome
Is there something akin to select() call?
The answer to this must be yes, as asyncio is wholly built around a call to select(). However it's not always obvious how to translate that to a select on the level of streams. The thing to notice is that you shouldn't try to select the stream exactly - instead, start reading on the stream and rely on the ability to select the progress of the coroutines. The equivalent of select() would thus be to use asyncio.wait(return_when=FIRST_COMPLETED) to drive the reads in a loop.
An even more elegant alternative is to spawn separate tasks where each does its thing, and just let them run in parallel. The code is easier to understand than with a select, boiling down to a single call to gather, and yet under the hood asyncio performs exactly the kind of select() that was requested:
import asyncio, sys, io
async def _read_all(stream, echo):
# helper function to read the whole stream, optionally
# displaying data as it arrives
buf = io.BytesIO() # BytesIO is preferred to +=
while True:
chunk = await stream.read(4096)
if len(chunk) == 0:
break
buf.write(chunk)
if echo:
sys.stdout.buffer.write(chunk)
return buf.getvalue()
async def run_command(*args, stdin=None, echo=False):
process = await asyncio.create_subprocess_exec(
*args,
stdin=asyncio.subprocess.PIPE,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE
)
if stdin is not None:
process.stdin.write(stdin)
process.stdin.close()
stdout, stderr = await asyncio.gather(
_read_all(process.stdout, echo),
_read_all(process.stderr, echo)
)
return process.returncode, stdout.decode().strip(), stderr.decode().strip()
Note that asyncio's write() is not a coroutine, it defaults to writing in the background, so we don't need to include the write among the coroutines we gather().
I'm taking my first foray into the python mutliprocessing module and I'm running into some problems. I'm very familiar with the threading module but I need to make sure the processes I'm executing are running in parallel.
Here's an outline of what I'm trying to do. Please ignore things like undeclared variables/functions because I can't paste my code in full.
import multiprocessing
import time
def wrap_func_to_run(host, args, output):
output.append(do_something(host, args))
return
def func_to_run(host, args):
return do_something(host, args)
def do_work(server, client, server_args, client_args):
server_output = func_to_run(server, server_args)
client_output = func_to_run(client, client_args)
#handle this output and return a result
return result
def run_server_client(server, client, server_args, client_args, server_output, client_output):
server_process = multiprocessing.Process(target=wrap_func_to_run, args=(server, server_args, server_output))
server_process.start()
client_process = multiprocessing.Process(target=wrap_func_to_run, args=(client, client_args, client_output))
client_process.start()
server_process.join()
client_process.join()
#handle the output and return some result
def run_in_parallel(server, client):
#set up commands for first process
server_output = client_output = []
server_cmd = "cmd"
client_cmd = "cmd"
process_one = multiprocessing.Process(target=run_server_client, args=(server, client, server_cmd, client_cmd, server_output, client_output))
process_one.start()
#set up second process to run - but this one can run here
result = do_work(server, client, "some server args", "some client args")
process_one.join()
#use outputs above and the result to determine result
return final_result
def main():
#grab client
client = client()
#grab server
server = server()
return run_in_parallel(server, client)
if __name__ == "__main__":
main()
Here's the error I'm getting:
Error in sys.exitfunc:
Traceback (most recent call last):
File "/usr/lib64/python2.7/atexit.py", line 24, in _run_exitfuncs
func(*targs, **kargs)
File "/usr/lib64/python2.7/multiprocessing/util.py", line 319, in _exit_function
p.join()
File "/usr/lib64/python2.7/multiprocessing/process.py", line 143, in join
assert self._parent_pid == os.getpid(), 'can only join a child process'
AssertionError: can only join a child process
I've tried a lot of different things to fix this but my feeling is that there's something wrong with the way I'm using this module.
EDIT:
So I created a file that will reproduce this by simulating the client/server and the work they do - Also I missed an important point which was that I was running this in unix. Another important bit of information was that do_work in my actual case involves using os.fork(). I was unable to reproduce the error without also using os.fork() so I'm assuming the problem is there. In my real world case, that part of the code was not mine so I was treating it like a black box (likely a mistake on my part). Anyways here's the code to reproduce -
#!/usr/bin/python
import multiprocessing
import time
import os
import signal
import sys
class Host():
def __init__(self):
self.name = "host"
def work(self):
#override - use to simulate work
pass
class Server(Host):
def __init__(self):
self.name = "server"
def work(self):
x = 0
for i in range(10000):
x+=1
print x
time.sleep(1)
class Client(Host):
def __init__(self):
self.name = "client"
def work(self):
x = 0
for i in range(5000):
x+=1
print x
time.sleep(1)
def func_to_run(host, args):
print host.name + " is working"
host.work()
print host.name + ": " + args
return "done"
def do_work(server, client, server_args, client_args):
print "in do_work"
server_output = client_output = ""
child_pid = os.fork()
if child_pid == 0:
server_output = func_to_run(server, server_args)
sys.exit(server_output)
time.sleep(1)
client_output = func_to_run(client, client_args)
# kill and wait for server to finish
os.kill(child_pid, signal.SIGTERM)
(pid, status) = os.waitpid(child_pid, 0)
return (server_output == "done" and client_output =="done")
def run_server_client(server, client, server_args, client_args):
server_process = multiprocessing.Process(target=func_to_run, args=(server, server_args))
print "Starting server process"
server_process.start()
client_process = multiprocessing.Process(target=func_to_run, args=(client, client_args))
print "Starting client process"
client_process.start()
print "joining processes"
server_process.join()
client_process.join()
print "processes joined and done"
def run_in_parallel(server, client):
#set up commands for first process
server_cmd = "server command for run_server_client"
client_cmd = "client command for run_server_client"
process_one = multiprocessing.Process(target=run_server_client, args=(server, client, server_cmd, client_cmd))
print "Starting process one"
process_one.start()
#set up second process to run - but this one can run here
print "About to do work"
result = do_work(server, client, "server args from do work", "client args from do work")
print "Joining process one"
process_one.join()
#use outputs above and the result to determine result
print "Process one has joined"
return result
def main():
#grab client
client = Client()
#grab server
server = Server()
return run_in_parallel(server, client)
if __name__ == "__main__":
main()
If I remove the use of os.fork() in do_work I don't get the error and the code behaves like I would have expected it before (except for the passing of outputs which I've accepted as my mistake/misunderstanding). I can change the old code to not use os.fork() but I'd also like to know why this caused this problem and if there's a workable solution.
EDIT 2:
I started working on a solution that omits os.fork() before the accepted answer. Here's what I have with some tweaking to the amount of simulated work that can be done -
#!/usr/bin/python
import multiprocessing
import time
import os
import signal
import sys
from Queue import Empty
class Host():
def __init__(self):
self.name = "host"
def work(self, w):
#override - use to simulate work
pass
class Server(Host):
def __init__(self):
self.name = "server"
def work(self, w):
x = 0
for i in range(w):
x+=1
print x
time.sleep(1)
class Client(Host):
def __init__(self):
self.name = "client"
def work(self, w):
x = 0
for i in range(w):
x+=1
print x
time.sleep(1)
def func_to_run(host, args, w, q):
print host.name + " is working"
host.work(w)
print host.name + ": " + args
q.put("ZERO")
return "done"
def handle_queue(queue):
done = False
results = []
return_val = 0
while not done:
#try to grab item from Queue
tr = None
try:
tr = queue.get_nowait()
print "found element in queue"
print tr
except Empty:
done = True
if tr is not None:
results.append(tr)
for el in results:
if el != "ZERO":
return_val = 1
return return_val
def do_work(server, client, server_args, client_args):
print "in do_work"
server_output = client_output = ""
child_pid = os.fork()
if child_pid == 0:
server_output = func_to_run(server, server_args)
sys.exit(server_output)
time.sleep(1)
client_output = func_to_run(client, client_args)
# kill and wait for server to finish
os.kill(child_pid, signal.SIGTERM)
(pid, status) = os.waitpid(child_pid, 0)
return (server_output == "done" and client_output =="done")
def run_server_client(server, client, server_args, client_args, w, mq):
local_queue = multiprocessing.Queue()
server_process = multiprocessing.Process(target=func_to_run, args=(server, server_args, w, local_queue))
print "Starting server process"
server_process.start()
client_process = multiprocessing.Process(target=func_to_run, args=(client, client_args, w, local_queue))
print "Starting client process"
client_process.start()
print "joining processes"
server_process.join()
client_process.join()
print "processes joined and done"
if handle_queue(local_queue) == 0:
mq.put("ZERO")
def run_in_parallel(server, client):
#set up commands for first process
master_queue = multiprocessing.Queue()
server_cmd = "server command for run_server_client"
client_cmd = "client command for run_server_client"
process_one = multiprocessing.Process(target=run_server_client, args=(server, client, server_cmd, client_cmd, 400000000, master_queue))
print "Starting process one"
process_one.start()
#set up second process to run - but this one can run here
print "About to do work"
#result = do_work(server, client, "server args from do work", "client args from do work")
run_server_client(server, client, "server args from do work", "client args from do work", 5000, master_queue)
print "Joining process one"
process_one.join()
#use outputs above and the result to determine result
print "Process one has joined"
return_val = handle_queue(master_queue)
print return_val
return return_val
def main():
#grab client
client = Client()
#grab server
server = Server()
val = run_in_parallel(server, client)
if val:
print "failed"
else:
print "passed"
return val
if __name__ == "__main__":
main()
This code has some tweaked printouts just to see exactly what is happening. I used a multiprocessing.Queue to store and share outputs across the processes and back into my main thread to be handled. I think this solves the python portion of my problem but there's still some issues in the code I'm working on. The only other thing I can say is that the equivalent to func_to_run involves sending a command over ssh and grabbing any err along with the output. For some reason, this works perfectly fine for a command that has a low execution time, but not well for a command that has a much larger execution time/output. I tried simulating this with the drastically different work values in my code here but haven't been able to reproduce similar results.
EDIT 3
Library code I'm using (again not mine) uses Popen.wait() for the ssh commands and I just read this:
Popen.wait()
Wait for child process to terminate. Set and return returncode attribute.
Warning This will deadlock when using stdout=PIPE and/or stderr=PIPE and the >child process generates enough output to a pipe such that it blocks waiting for >the OS pipe buffer to accept more data. Use communicate() to avoid that.
I adjusted the code to not buffer and just print as it is received and everything works.
I can change the old code to not use os.fork() but I'd also like to know why this caused this problem and if there's a workable solution.
The key to understanding the problem is knowing exactly what fork() does. CPython docs state "Fork a child process." but this presumes you understand the C library call fork().
Here's what glibc's manpage says about it:
fork() creates a new process by duplicating the calling process. The new process, referred to as the child, is an exact duplicate of the calling process, referred to as the parent, except for the following points: ...
It's basically as if you took your program and made a copy of its program state (heap, stack, instruction pointer, etc) with small differences and let it execute independent of the original. When this child process exits naturally, it will use exit() and that will trigger atexit() handlers registered by the multiprocessing module.
What can you do to avoid it?
omit os.fork(): use multiprocessing instead, like you are exploring now
probably effective: import multiprocessing after executing fork(), only in the child or parent as necessary.
use _exit() in the child (CPython docs state, "Note The standard way to exit is sys.exit(n). _exit() should normally only be used in the child process after a fork().")
https://docs.python.org/2/library/os.html#os._exit
In addition to the excellent solution from Cain, if you're facing the same situation as I was, where you can't control how the subprocesses are created, you can try to unregister the atexit function in your subprocesses to get rid of these messages:
import atexit
from multiprocessing.util import _exit_function
atexit.unregister(_exit_function)
ATTENTION: This may lead to leakage. For instance, if your subprocesses have their own children, they won't be cleared. So clearify your situation and test thoroughly afterwards.
It seems to me that you are threading it one time too many. I would not thread it from run_in_parallel, but simply calling run_server_client with the proper arguments, because they will thread inside.
I have a program that creates a subprocess within a thread, so that the thread can be constantly checking for specific output conditions (from either stdout or stderr), and call the appropriate callbacks, while the rest of the program continues. Here is a pared-down version of that code:
import select
import subprocess
import threading
def run_task():
command = ['python', 'a-script-that-outputs-lines.py']
proc = subprocess.Popen(command, stdout = subprocess.PIPE, stderr = subprocess.PIPE)
while True:
ready, _, _ = select.select((proc.stdout, proc.stderr), (), (), .1)
if proc.stdout in ready:
next_line_to_process = proc.stdout.readline()
# process the output
if proc.stderr in ready:
next_line_to_process = proc.stderr.readline()
# process the output
if not ready and proc.poll() is not None:
break
thread = threading.Thread(target = run_task)
thread.run()
It works reasonably well, but I would like the thread to exit once two conditions are met: the running child process has finished, and all of the data in stdout and stderr has been processed.
The difficulty I have is that if my last condition is as it is above (if not ready and proc.poll() is not None), then the thread never exits, because once stdout and stderr's file descriptors are marked as ready, they never become unready (even after all of the data has been read from them, and read() would hang or readline() would return an empty string).
If I change that condition to just if proc.poll() is not None, then the loop exists when the program exits, and I can't guarantee that it's seen all of the data that needs to be processed.
Is this just the wrong approach, or is there a way to reliably determine when you've read all of the data that will ever be written to a file descriptor? Or is this an issue specific to trying to read from the stderr/stdout of a subprocess?
I have been trying this on Python 2.5 (running on OS X) and also tried select.poll() and select.epoll()-based variants on Python 2.6 (running on Debian with a 2.6 kernel).
select module is appropriate if you want to find out whether you can read from a pipe without blocking.
To make sure that you've read all data, use a simpler condition if proc.poll() is not None: break and call rest = [pipe.read() for pipe in [p.stdout, p.stderr]] after the loop.
It is unlikely that a subprocess closes its stdout/stderr before its shutdown therefore you could skip the logic that handles EOF for simplicity.
Don't call Thread.run() directly, use Thread.start() instead. You probably don't need the separate thread here at all.
Don't call p.stdout.readline() after the select(), it may block, use os.read(p.stdout.fileno(), limit) instead. Empty bytestring indicates EOF for the corresponding pipe.
As an alternative or in addition to you could make the pipes non-blocking using fcntl module:
import os
from fcntl import fcntl, F_GETFL, F_SETFL
def make_nonblocking(fd):
return fcntl(fd, F_SETFL, fcntl(fd, F_GETFL) | os.O_NONBLOCK)
and handle io/os errors while reading.
My eventual solution, as I mentioned above, was the following, in case this is helpful to anyone. I think it is the right approach, since I'm now 97.2% sure you can't do this with just select()/poll() and read():
import select
import subprocess
import threading
def run_task():
command = ['python', 'a-script-that-outputs-lines.py']
proc = subprocess.Popen(command, stdout = subprocess.PIPE, stderr = subprocess.PIPE)
while True:
ready, _, _ = select.select((proc.stdout, proc.stderr), (), (), .1)
if proc.stdout in ready:
next_line_to_process = proc.stdout.readline()
if next_line_to_process:
# process the output
elif proc.returncode is not None:
# The program has exited, and we have read everything written to stdout
ready = filter(lambda x: x is not proc.stdout, ready)
if proc.stderr in ready:
next_line_to_process = proc.stderr.readline()
if next_line_to_process:
# process the output
elif proc.returncode is not None:
# The program has exited, and we have read everything written to stderr
ready = filter(lambda x: x is not proc.stderr, ready)
if proc.poll() is not None and not ready:
break
thread = threading.Thread(target = run_task)
thread.run()
You could do a raw os.read(fd, size) on the pipe's file descriptor instead of using readline(). This is a non-blocking operation which can also detect EOF (in that case it returns an empty string or byte object). You'd have to implement the line splitting and buffering yourself. Use something like this:
class NonblockingReader():
def __init__(self, pipe):
self.fd = pipe.fileno()
self.buffer = ""
def readlines(self):
data = os.read(self.fd, 2048)
if not data:
return None
self.buffer += data
if os.linesep in self.buffer:
lines = self.buffer.split(os.linesep)
self.buffer = lines[-1]
return lines[:-1]
else:
return []
I've been writing a small Python script that executes some shell commands using the subprocess module and a helper function:
import subprocess as sp
def run(command, description):
"""Runs a command in a formatted manner. Returns its return code."""
start=datetime.datetime.now()
sys.stderr.write('%-65s' % description)
s=sp.Popen(command, shell=True, stderr=sp.PIPE, stdout=sp.PIPE)
out,err=s.communicate()
end=datetime.datetime.now()
duration=end-start
status='Done' if s.returncode==0 else 'Failed'
print '%s (%d seconds)' % (status, duration.seconds)
The following lines reads the standard output and error:
s=sp.Popen(command, shell=True, stderr=sp.PIPE, stdout=sp.PIPE)
out,err=s.communicate()
As you can see, stdout and stderr are not used. Suppose that I want to write the output and error messages to a log file, in a formatted way, e.g.:
[STDOUT: 2011-01-17 14:53:55] <message>
[STDERR: 2011-01-17 14:53:56] <message>
My question is, what's the most Pythonic way to do it? I thought of three options:
Inherit the file object and override the write method.
Use a Delegate class which implements write.
Connect to the PIPE itself in some way.
UPDATE : reference test script
I'm checking the results with this script, saved as test.py:
#!/usr/bin/python
import sys
sys.stdout.write('OUT\n')
sys.stdout.flush()
sys.stderr.write('ERR\n')
sys.stderr.flush()
Any ideas?
1 and 2 are reasonable solutions, but overriding write() won't be enough.
The problem is that Popen needs file handles to attach to the process, so Python file objects doesn't work, they have to be OS level. To solve that you have to have a Python object that has a os level file handle. The only way I can think of solving that is to use pipes, so you have an os level file handle to write to. But then you need another thread that sits and polls that pipe for things to read in so it can log it. (So this is more strictly an implementation of 2, as it delegates to logging).
Said and done:
import io
import logging
import os
import select
import subprocess
import time
import threading
LOG_FILENAME = 'output.log'
logging.basicConfig(filename=LOG_FILENAME,level=logging.DEBUG)
class StreamLogger(io.IOBase):
def __init__(self, level):
self.level = level
self.pipe = os.pipe()
self.thread = threading.Thread(target=self._flusher)
self.thread.start()
def _flusher(self):
self._run = True
buf = b''
while self._run:
for fh in select.select([self.pipe[0]], [], [], 0)[0]:
buf += os.read(fh, 1024)
while b'\n' in buf:
data, buf = buf.split(b'\n', 1)
self.write(data.decode())
time.sleep(1)
self._run = None
def write(self, data):
return logging.log(self.level, data)
def fileno(self):
return self.pipe[1]
def close(self):
if self._run:
self._run = False
while self._run is not None:
time.sleep(1)
os.close(self.pipe[0])
os.close(self.pipe[1])
So that class starts a os level pipe that Popen can attach the stdin/out/error to for the subprocess. It also starts a thread that polls the other end of that pipe once a second for things to log, which it then logs with the logging module.
Possibly this class should implement more things for completeness, but it works in this case anyway.
Example code:
with StreamLogger(logging.INFO) as out:
with StreamLogger(logging.ERROR) as err:
subprocess.Popen("ls", stdout=out, stderr=err, shell=True)
output.log ends up like so:
INFO:root:output.log
INFO:root:streamlogger.py
INFO:root:and
INFO:root:so
INFO:root:on
Tested with Python 2.6, 2.7 and 3.1.
I would think any implementation of 1 and 3 would need to use similar techniques. It is a bit involved, but unless you can make the Popen command log correctly itself, I don't have a better idea).
I would suggest option 3, with the logging standard library package. In this case I'd say the other 2 were overkill.
1 and 2 won't work. Here's an implementation of the principle:
import subprocess
import time
FileClass = open('tmptmp123123123.tmp', 'w').__class__
class WrappedFile(FileClass):
TIMETPL = "%Y-%m-%d %H:%M:%S"
TEMPLATE = "[%s: %s] "
def __init__(self, name, mode='r', buffering=None, title=None):
self.title = title or name
if buffering is None:
super(WrappedFile, self).__init__(name, mode)
else:
super(WrappedFile, self).__init__(name, mode, buffering)
def write(self, s):
stamp = time.strftime(self.TIMETPL)
if not s:
return
# Add a line with timestamp per line to be written
s = s.split('\n')
spre = self.TEMPLATE % (self.title, stamp)
s = "\n".join(["%s %s" % (spre, line) for line in s]) + "\n"
super(WrappedFile, self).write(s)
The reason it doesn't work is that Popen never calls stdout.write. A wrapped file will work fine when we call its write method and will even be written to if passed to Popen, but the write will happen in a lower layer, skipping the write method.
This simple solution worked for me:
import sys
import datetime
import tempfile
import subprocess as sp
def run(command, description):
"""Runs a command in a formatted manner. Returns its return code."""
with tempfile.SpooledTemporaryFile(8*1024) as so:
print >> sys.stderr, '%-65s' % description
start=datetime.datetime.now()
retcode = sp.call(command, shell=True, stderr=sp.STDOUT, stdout=so)
end=datetime.datetime.now()
so.seek(0)
for line in so.readlines():
print >> sys.stderr,'logging this:', line.rstrip()
duration=end-start
status='Done' if retcode == 0 else 'Failed'
print >> sys.stderr, '%s (%d seconds)' % (status, duration.seconds)
REF_SCRIPT = r"""#!/usr/bin/python
import sys
sys.stdout.write('OUT\n')
sys.stdout.flush()
sys.stderr.write('ERR\n')
sys.stderr.flush()
"""
SCRIPT_NAME = 'refscript.py'
if __name__ == '__main__':
with open(SCRIPT_NAME, 'w') as script:
script.write(REF_SCRIPT)
run('python ' + SCRIPT_NAME, 'Reference script')
This uses Adam Rosenfield's make_async and read_async. Whereas my original answer used select.epoll and was thus Linux-only, it now uses select.select, which should work under Unix or Windows.
This logs output from the subprocess to /tmp/test.log as it occurs:
import logging
import subprocess
import shlex
import select
import fcntl
import os
import errno
def make_async(fd):
# https://stackoverflow.com/a/7730201/190597
'''add the O_NONBLOCK flag to a file descriptor'''
fcntl.fcntl(fd, fcntl.F_SETFL, fcntl.fcntl(fd, fcntl.F_GETFL) | os.O_NONBLOCK)
def read_async(fd):
# https://stackoverflow.com/a/7730201/190597
'''read some data from a file descriptor, ignoring EAGAIN errors'''
try:
return fd.read()
except IOError, e:
if e.errno != errno.EAGAIN:
raise e
else:
return ''
def log_process(proc,stdout_logger,stderr_logger):
loggers = { proc.stdout: stdout_logger, proc.stderr: stderr_logger }
def log_fds(fds):
for fd in fds:
out = read_async(fd)
if out.strip():
loggers[fd].info(out)
make_async(proc.stdout)
make_async(proc.stderr)
while True:
# Wait for data to become available
rlist, wlist, xlist = select.select([proc.stdout, proc.stderr], [], [])
log_fds(rlist)
if proc.poll() is not None:
# Corner case: check if more output was created
# between the last call to read_async and now
log_fds([proc.stdout, proc.stderr])
break
if __name__=='__main__':
formatter = logging.Formatter('[%(name)s: %(asctime)s] %(message)s')
handler = logging.FileHandler('/tmp/test.log','w')
handler.setFormatter(formatter)
stdout_logger=logging.getLogger('STDOUT')
stdout_logger.setLevel(logging.DEBUG)
stdout_logger.addHandler(handler)
stderr_logger=logging.getLogger('STDERR')
stderr_logger.setLevel(logging.DEBUG)
stderr_logger.addHandler(handler)
proc = subprocess.Popen(shlex.split('ls -laR /tmp'),
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
log_process(proc,stdout_logger,stderr_logger)