I have the following code and am trying to run in in Idle in linux.
import sys
from subprocess import PIPE, Popen
from threading import Thread
try:
from Queue import Queue, Empty
except ImportError:
from queue import Queue, Empty # python 3.x
ON_POSIX = 'posix' in sys.builtin_module_names
def enqueue_output(out, queue):
for line in iter(out.readline, b''):
queue.put(line)
out.close()
p = Popen(['youtube-dl', '-l', '-c', 'https://www.youtube.com/watch?v=utV1sdjr4PY'], stdout=PIPE, bufsize=1, close_fds=ON_POSIX)
q = Queue()
t = Thread(target=enqueue_output, args=(p.stdout, q))
t.daemon = True # thread dies with the program
t.start()
# ... do other things here
# read line without blocking
while True:
try: line = q.get_nowait() # or q.get(timeout=.1)
except Empty:
pass
#print('no output yet')
else: # got line
print line
But is is always printing "no output yet".
Edit: I edited the code and it is working. But I have another problem. The percentage of the download is updated in a single line, but the code reads it only after the line is complete
OK, let's put the comments in an answer.
import sys, os
from subprocess import PIPE, Popen
from time import sleep
import pty
master, slave = pty.openpty()
stdout = os.fdopen(master)
p = Popen(['youtube-dl', '-l', '-c', 'https://www.youtube.com/watch?v=AYlb-7TXMxM'], shell=False,stdout=slave,stderr=slave, close_fds=True)
while True:
#line = stdout.readline().rstrip() - will strip the new line
line = stdout.readline()
if line != b'':
sys.stdout.write("\r%s" % line)
sys.stdout.flush()
sleep(.1)
If you want a thread and a diferent while, I sugest wrapping in a class and avoid queue. The output is „unbuffered” - thanks #FilipMalckzak
Related
I want to utilize subprocess Popen to call strace on Linux.
I also want to catch every line of output strace gives, in realtime if possible.
I came up with the following code for that, but for some reason I can't get it working. I'll only get the output AFTER I terminate the program.
from threading import Thread
from queue import Queue, Empty
pid = 1
def enqueue_output(out, queue):
for line in iter(out.readline, b''):
queue.put(line)
out.close()
p = Popen(["strace", "-p", pid], stdout=subprocess.PIPE, bufsize=1)
q = Queue()
t = Thread(target=enqueue_output, args=(p.stdout, q))
t.daemon = True # thread dies with the program
t.start()
try:
line = q.get_nowait()
print("Got it! "+line)
except Empty:
pass
Here is a short working example:
Please note that:
strace writes to stderr (unless -o filename is given)
all arguments must be strings (or bytes), i.e. pid must be given as "1"
line buffering works only with universal newlines
you must be root to trace process 1
import subprocess
PID = 1
p = subprocess.Popen(
["strace", "-p", str(PID)],
stdin=subprocess.DEVNULL, stderr=subprocess.PIPE,
universal_newlines=True, bufsize=1)
for line in p.stderr:
line = line.rstrip()
print(line)
I have implemented a variant on the code in this question:
A non-blocking read on a subprocess.PIPE in Python
To try and read the output in real time from this dummy program test.py:
import time, sys
print "Hello there"
for i in range(100):
time.sleep(0.1)
sys.stdout.write("\r%d"%i)
sys.stdout.flush()
print
print "Go now or I shall taunt you once again!"
The variation on the other question is that the calling program must read character by character, not line by line, as the dummy program test.py outputs progress indication all on one line by use of \r. So here it is:
import sys,time
from subprocess import PIPE, Popen
from threading import Thread
try:
from Queue import Queue, Empty
except ImportError:
from queue import Queue, Empty # Python 3.x
ON_POSIX = 'posix' in sys.builtin_module_names
def enqueue_output(out, queue):
while True:
buffersize = 1
data = out.read(buffersize)
if not data:
break
queue.put(data)
out.close()
p = Popen(sys.executable + " test.py", stdout=PIPE, bufsize=1, close_fds=ON_POSIX)
q = Queue()
t = Thread(target=enqueue_output, args=(p.stdout, q))
t.daemon = True # Thread dies with the program
t.start()
while True:
p.poll()
if p.returncode:
break
# Read line without blocking
try:
char = q.get_nowait()
time.sleep(0.1)
except Empty:
pass
else: # Got line
sys.stdout.write(char)
sys.stdout.flush()
print "left loop"
sys.exit(0)
Two problems with this
It never exits - p.returncode never returns a value and the loop is not left. How can I fix it?
It's really slow! Is there a way to make it more efficient without increasing buffersize?
As #Markku K. pointed out, you should use bufsize=0 to read one byte at a time.
Your code doesn't require a non-blocking read. You can simplify it:
import sys
from functools import partial
from subprocess import Popen, PIPE
p = Popen([sys.executable, "test.py"], stdout=PIPE, bufsize=0)
for b in iter(partial(p.stdout.read, 1), b""):
print b # it should print as soon as `sys.stdout.flush()` is called
# in the test.py
p.stdout.close()
p.wait()
Note: reading 1 byte at a time is very inefficient.
Also, in general, there could be a block-buffering issue that sometimes can be solved using pexpect, pty modules or unbuffer, stdbuf, script command-line utilities.
For Python processes you could use -u flag to force unbuffering (binary layer) of stdin, stdout, stderr streams.
I'm running into some difficulties getting output from a subprocess stdout pipe. I'm launching some third party code via it, in order to extract log output. Up until a recent update of the third party code, everything worked fine. After the update, python has started blocking indefinitely, and not actually showing any output. I can manually launch the third party app fine and see output.
A basic version of the code I'm using:
import subprocess, time
from threading import Thread
def enqueue_output(out):
print "Hello from enqueue_output"
for line in iter(out.readline,''):
line = line.rstrip("\r\n")
print "Got %s" % line
out.close()
proc = subprocess.Popen("third_party.exe", stdout=subprocess.PIPE, bufsize=1)
thread = Thread(target=enqueue_output, args=(proc.stdout,))
thread.daemon = True
thread.start()
time.sleep(30)
This works perfectly if I substitute third_party.exe for this script:
import time, sys
while True:
print "Test"
sys.stdout.flush()
time.sleep(1)
So I'm unclear as to magic needs to be done to get this working with the original command.
These are all variants of the subprocess.Popen line I've tried with no success:
proc = subprocess.Popen("third_party.exe", stdout=subprocess.PIPE, bufsize=0)
proc = subprocess.Popen("third_party.exe", stdout=subprocess.PIPE, shell=True)
proc = subprocess.Popen("third_party.exe", stdout=subprocess.PIPE, creationflags=subprocess.CREATE_NEW_CONSOLE)
si = subprocess.STARTUPINFO()
si.dwFlags = subprocess.STARTF_USESTDHANDLES | subprocess.STARTF_USESHOWWINDOW
proc = subprocess.Popen("third_party.exe", stdout=subprocess.PIPE, startupinfo=si)
Edit 1:
I can't actually use .communicate() in this case. The app I'm launching remains running for long periods of time (days to weeks). The only way I could actually test .communicate() would be to kill the app shortly after it launches, which I don't feel would give me valid results.
Even the non-threaded version of this fails:
import subprocess, time
from threading import Thread
proc = subprocess.Popen("third_party.exe", stdout=subprocess.PIPE, stderr=subprocess.PIPE)
print "App started, reading output..."
for line in iter(proc.stdout.readline,''):
line = line.rstrip("\r\n")
print "Got: %s" % line
Edit 2:
Thanks to jdi, the following works okay:
import tempfile, time, subprocess
w = "test.txt"
f = open("test.txt","a")
p = subprocess.Popen("third_party.exe", shell=True, stdout=f,
stderr=subprocess.STDOUT, bufsize=0)
time.sleep(30)
with open("test.txt", 'r') as r:
for line in r:
print line
f.close()
First I would recommend that you simplify this example to make sure you can actually read anything. Remove the complication of the thread from the mix:
proc = subprocess.Popen("third_party.exe", stdout=subprocess.PIPE, bufsize=1)
print proc.communicate()
If that works, great. Then you are having problems possibly with how you are reading the stdout directly or possibly in your thread.
If this does not work, have you tried piping stderr to stdout as well?
proc = subprocess.Popen("third_party.exe",
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT, bufsize=1)
Update
Since you say communicate() is deadlocking, here is another approach you can try to see if its a problem with the internal buffer of subprocess...
import tempfile
import subprocess
w = tempfile.NamedTemporaryFile()
p = subprocess.Popen('third_party.exe', shell=True, stdout=w,
stderr=subprocess.STDOUT, bufsize=0)
with open(w.name, 'r') as r:
for line in r:
print line
w.close()
args = ['svn','log','-v']
def foo(info=''):
import logging
import subprocess
import tempfile
try:
pipe = subprocess.Popen(args,bufsize = 0,\
stdout = subprocess.PIPE,\
stderr=subprocess.STDOUT)
except Exception as e:
logging.error(str(e))
return False
while 1:
s = pipe.stdout.read()
if s:
print s,
if pipe.returncode is None:
pipe.poll()
else:
break
if not 0 == pipe.returncode:
return False
return True
print foo()
This one should works,not thread,temp file magic.
I want to run many processes in parallel with ability to take stdout in any time. How should I do it? Do I need to run thread for each subprocess.Popen() call, a what?
You can do it in a single thread.
Suppose you have a script that prints lines at random times:
#!/usr/bin/env python
#file: child.py
import os
import random
import sys
import time
for i in range(10):
print("%2d %s %s" % (int(sys.argv[1]), os.getpid(), i))
sys.stdout.flush()
time.sleep(random.random())
And you'd like to collect the output as soon as it becomes available, you could use select on POSIX systems as #zigg suggested:
#!/usr/bin/env python
from __future__ import print_function
from select import select
from subprocess import Popen, PIPE
# start several subprocesses
processes = [Popen(['./child.py', str(i)], stdout=PIPE,
bufsize=1, close_fds=True,
universal_newlines=True)
for i in range(5)]
# read output
timeout = 0.1 # seconds
while processes:
# remove finished processes from the list (O(N**2))
for p in processes[:]:
if p.poll() is not None: # process ended
print(p.stdout.read(), end='') # read the rest
p.stdout.close()
processes.remove(p)
# wait until there is something to read
rlist = select([p.stdout for p in processes], [],[], timeout)[0]
# read a line from each process that has output ready
for f in rlist:
print(f.readline(), end='') #NOTE: it can block
A more portable solution (that should work on Windows, Linux, OSX) can use reader threads for each process, see Non-blocking read on a subprocess.PIPE in python.
Here's os.pipe()-based solution that works on Unix and Windows:
#!/usr/bin/env python
from __future__ import print_function
import io
import os
import sys
from subprocess import Popen
ON_POSIX = 'posix' in sys.builtin_module_names
# create a pipe to get data
input_fd, output_fd = os.pipe()
# start several subprocesses
processes = [Popen([sys.executable, 'child.py', str(i)], stdout=output_fd,
close_fds=ON_POSIX) # close input_fd in children
for i in range(5)]
os.close(output_fd) # close unused end of the pipe
# read output line by line as soon as it is available
with io.open(input_fd, 'r', buffering=1) as file:
for line in file:
print(line, end='')
#
for p in processes:
p.wait()
You can also collect stdout from multiple subprocesses concurrently using twisted:
#!/usr/bin/env python
import sys
from twisted.internet import protocol, reactor
class ProcessProtocol(protocol.ProcessProtocol):
def outReceived(self, data):
print data, # received chunk of stdout from child
def processEnded(self, status):
global nprocesses
nprocesses -= 1
if nprocesses == 0: # all processes ended
reactor.stop()
# start subprocesses
nprocesses = 5
for _ in xrange(nprocesses):
reactor.spawnProcess(ProcessProtocol(), sys.executable,
args=[sys.executable, 'child.py'],
usePTY=True) # can change how child buffers stdout
reactor.run()
See Using Processes in Twisted.
You don't need to run a thread for each process. You can peek at the stdout streams for each process without blocking on them, and only read from them if they have data available to read.
You do have to be careful not to accidentally block on them, though, if you're not intending to.
You can wait for process.poll() to finish, and run other stuff concurrently:
import time
import sys
from subprocess import Popen, PIPE
def ex1() -> None:
command = 'sleep 2.1 && echo "happy friday"'
proc = Popen(command, shell=True, stderr=PIPE, stdout=PIPE)
while proc.poll() is None:
# do stuff here
print('waiting')
time.sleep(0.05)
out, _err = proc.communicate()
print(out, file=sys.stderr)
sys.stderr.flush()
assert proc.poll() == 0
ex1()
this snippet will ping an ip address in windows and get output line each 2 seconds, however, I found there's a very slowly memory increasement of ping.exe process after run it, if I deploy it to ping 1000 ip parallel, soon it will cause server hang, I think it may because of stdout buffer, may I know how to clear the stdout or limit its size? thanks!
...
proc = subprocess.Popen(['c:\windows\system32\ping.exe','127.0.0.1', '-l', '10000', '-t'],stdout=subprocess.PIPE, creationflags=subprocess.CREATE_NEW_PROCESS_GROUP)
while True:
time.sleep(2)
os.kill(proc.pid, signal.CTRL_BREAK_EVENT)
line = proc.stdout.readline()
ping is producing many more lines than you're reading due to the 2 second timeout between reads. I'd move the os.kill call into another thread, and use the main thread to read every line from proc.stdout:
import sys, os
import subprocess
import threading
import signal
import time
#Use ctrl-c and ctrl-break to terminate the script/ping
def sigbreak(signum, frame):
import sys
if proc.poll() is None:
print('Killing ping...')
proc.kill()
sys.exit(0)
signal.signal(signal.SIGBREAK, sigbreak)
signal.signal(signal.SIGINT, sigbreak)
#executes in a separate thread
def run(pid):
while True:
time.sleep(2)
try:
os.kill(pid, signal.CTRL_BREAK_EVENT)
except WindowsError:
#quit the thread if ping is dead
break
cmd = [r'c:\windows\system32\ping.exe', '127.0.0.1', '-l', '10000', '-t']
flags = subprocess.CREATE_NEW_PROCESS_GROUP
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, creationflags=flags)
threading.Thread(target=run, args=(proc.pid,)).start()
while True:
line = proc.stdout.readline()
if b'statistics' in line:
#I don't know what you're doing with the ping stats.
#I'll just print them.
for n in range(4):
encoding = getattr(sys.stdout, 'encoding', 'ascii')
print(line.decode(encoding).rstrip())
line = proc.stdout.readline()
print()
Try ping.py instead of juggling with the ping.exe