Subprocess buffer overflow - python

I use python to test an already compiled binary. The idea is that:
I open the subprocess with a different program (this separate process does nothing and listens to the commands)
Then I send various commands to this subprocess (using mysubprocess.stdin.write())
Then (depending on my need) I validate the output from the subprocess or ignore it
My problem is, that sometimes I'd like to ignore the output. For example, I'd like to send 1M commands to the subprocess, ignore the result (to speed up the simulation time) and check only the output of the last one.
However, it seems (this is my suspicion only!), that I have to consume the stdout buffer. Otherwise, it hangs forever...
Here is an example:
from subprocess import Popen, PIPE
class Simulation:
def __init__(self, path):
self.proc = Popen([path], stdin=PIPE, stdout=PIPE)
def executeCmd(self, cmd):
self.proc.stdin.write(cmd.encode('utf-8'))
self.proc.stdin.flush()
output = ""
line = ""
while '</end>' not in line:
line = self.proc.stdout.readline().decode('utf-8')
output += line
return output
def executeCmd_IgnoreOutput(self, cmd):
self.proc.stdin.write(cmd.encode('utf-8'))
self.proc.stdin.flush()
## self.proc.stdout.read() #< can't do that since subprocess is still running and there is no EOF sign
self.proc.stdout.flush() #< does not clear the buffer :(
def tearDown(self):
self.proc.stdin.write("exit_command")
self.proc.stdin.flush()
exit_code = self.proc.wait()
simulation = Simulation("\path\to\binary")
output = simulation.executeCmd("command")
#do something with the output
for i in range(1000000):
simulation.executeCmd_IgnoreOutput("command") #hangs after few thousand iterations
simulation.tearDown()
executeCmd consumes the whole output (I cannot use read since the subprocess is still running and there is no EOF at the end of the output`). But this is very expensive - I have to iterate through all lines...
So my prototype was to create executeCmd_IgnoreOutput which doesn't consume the buffer. But it hangs after a few thousand iterations.
My questions are:
Maybe I made a mistake at the very beginning- is the subprocess package suitable for usage as above? Maybe I should use a different tool for such a purpose...
If so, then how can I clear up the stdout buffer? (flush doesn't work in that case - it still hangs)
Or maybe it hangs for different reasons (any ideas?)

To drain the stdout pipe, make sure it isn't buffered, then read it in chunks until there's nothing to read:
import subprocess
class Simulation:
def __init__(self, path):
self.proc = subprocess.Popen(
[path],
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
bufsize=0, # unbuffered
)
def executeCmd(self, cmd):
self.proc.stdin.write(cmd.encode("utf-8"))
self.proc.stdin.flush()
output = b""
while True:
data = self.proc.stdout.read(65536)
if not data:
break
output += data
return output
def tearDown(self):
self.executeCmd("exit_command")
return self.proc.wait()

Related

how can I write to a processes stdin and read from its stdout while it is still running?

Edit: I am running this on windows as the servers are administered on my main gaming rig
I am trying to create a minecraft server controller so that I can streamline the process of administering servers. I have been able to use subprocess to start and stop the server with no issues however when I try to get it to issue commands It will not. Unless I use subprocess.communicate() the problem with this solution however is that it causes the script to wait forever and eventually crash, leaving the server running. Here is the code for what I have.
import subprocess
class Server:
def __init__(self, server_start_bat, dir):
self.server_start_bat = server_start_bat
self.dir = dir
def start_server(self):
self.server = subprocess.Popen(self.server_start_bat, cwd=self.dir, shell=True, stdin=subprocess.PIPE,
universal_newlines=True, text=True)
def stop_server(self):
self.run_command('stop')
self.server.communicate()
def op_me(self):
self.run_command(f'op Username')
def clear_weather(self):
self.run_command(f'weather clear 1000000')
def mob_griefing_on(self):
self.run_command(f'gamerule mobGriefing True')
def mob_griefing_off(self):
self.run_command(f'gamerule mobGriefing True')
def set_time_day(self):
self.run_command(f'time set day')
def set_time_night(self):
self.run_command(f'time set night')
def run_command(self, command_text):
self.server.stdin.write(f'{command_text}\n')
Edit: Check out Non-blocking read on a subprocess.PIPE in python
I was battling a very similar issue and it took me 3 days to get it all running.
The problem with subprocess.communicate is that you can only call it once, and the output seems to be given only once the subprocess terminates (in the experience i had, could very well be wrong).
The question i had, which your question imho boils down to was: how can I write to a processes stdin and read from its stdout while it is still running?
I ended up using Pipes. Note that they have to me flushed if you write something that is smaller that BUFSIZE, which is 0x1000 in my case. See man pipe for more info.
Anyway, below is my code.
import time
import pty
import os
import subprocess
PIPE_BUFSIZE = 4096
_control = False
class Pipe:
def __init__(self, flags=None, terminal=True):
"""Creates a Pipe you can easily write to and read from. Default is to open up a pseudo-terminal.
If you supply flags, pipe2 is used."""
if flags or not terminal:
self._readfd, self._writefd = os.pipe2(flags)
else: # default
self._readfd, self._writefd = pty.openpty()
self.readobj = open(self._readfd, "rb", 0)
self.writeobj = open(self._writefd, "wb", 0)
def write(self, text):
if isinstance(text, str):
text = text.encode()
result = self.writeobj.write(text)
self.writeobj.flush()
return result
def read(self, n):
if _control: # im not using this anymore since the flush seems to be reliable
controlstr = b"this_was_flushed"
controllen = len(controlstr)
self.writeobj.write(controlstr)
time.sleep(.001)
self.writeobj.flush()
result = self.readobj.read(n+controllen)
assert result[-controllen:] == controlstr
return result[:-controllen]
else:
self.writeobj.flush()
return self.readobj.read(n)
class ProcessIOWrapper:
"""Provides an easy way to redirect stdout and stderr using pipes. Write to the processes STDIN and read from STDOUT at any time! """
def __init__(self, args, inittext=None, redirect=True):
#create three pseudo terminals
self.in_pipe = Pipe()
self.out_pipe = Pipe()
self.err_pipe = Pipe()
# if we want to redirect, tell the subprocess to write to our pipe, else it will print to normal stdout
if redirect:
stdout_arg= self.out_pipe.writeobj
stderr_arg= self.err_pipe.writeobj
else:
stdout_arg=None
stderr_arg= None
self.process = subprocess.Popen(args, stdin=self.in_pipe.readobj, stdout=stdout_arg, stderr=stderr_arg)
def write(self, text):
return self.in_pipe.write(text)
def read(self, n, start=None):
return self.out_pipe.read(n)
Small C Program i used for testing (used others, too)
#include <stdio.h>
int main(){
puts("start\n");
char buf[100]="bufbufbuf";
while(1){
gets(buf);
for (int i=0; i<strlen(buf); i++)
if (i%2==0) buf[i]+=1;
puts(buf);
}
}

Non-blocking thread of subprocess stdout.PIPE stream with a queue on Windows still hangs

I am trying to work with subprocess routine that spawns an interactive child process which expects user inputs. This process normally hangs immediately if I try to read its stdout stream directly.
I read through many solutions using fcntl, asynchronous operations, pexpect and file output and reading redirections. Although temporary log files should work, I don't want to go through that route as I would like to keep the process interactive within the Python interface. From all of those, threads seemed to be the most easiest and straightforward way (I could not get pexpect to work properly, although it seemed to be a good option, too).
Indeed, when I implemented the following code (stolen from Non-blocking read on a subprocess.PIPE in python):
import os
import subprocess as sp
from threading import Thread
from queue import Queue, Empty
class App:
def __init__(self):
proc = sp.Popen(['app'], stdin=sp.PIPE, stdout=sp.PIPE, stderr=sp.PIPE, encoding='utf8')
out = NonBlockingStreamReader(proc.stdout)
print(out.readline(1))
class NonBlockingStreamReader:
def __init__(self, stream):
self.s = stream
self.q = Queue()
def populateQueue(stream, queue):
while True:
line = stream.readline()
if line:
queue.put(line)
else:
raise UnexpectedEndOfStream
self.t = Thread(target = populateQueue, args = (self.s, self.q))
self.t.daemon = True
self.t.start()
def readline(self, timeout = None):
try:
return self.q.get(block = timeout is not None, timeout = timeout)
except Empty:
return None
class UnexpectedEndOfStream(Exception):
pass
everything worked, flawlessly. Well, the problem is -- it worked on Linux only, even though the solution should be Windows compatible.
When I try to run this implementation on Windows, the newly created thread hangs the moment it tries to execute stream.readline(), never gets to actually populate the queue and thus the output of out.readline(1) read from the main thread is None.
How can I make this work on Windows?

Creating a minimal sandbox for running binary programs in Python3

I am trying to build a Python sandbox for running student's code in a minimal and safe environment. I intend to run it into a container and to limit its access to the resources of that container. So, I am currently designing the part of the sandbox that is supposed to run into the container and handle the access to the resources.
For now, my specification is to limit the amount of time and memory used by the process. I also need to be able to communicate with the process through the stdin and to catch the retcode, stdout and stderr at the end of the execution.
Moreover, the program may enter in an infinite loop and fill-up the memory through the stdout or stderr (I had one student's program that crashed my container because of that). So, I want also to be able to limit the size of the recovered stdout and stderr (after a certain limit is reached I can just kill the process and ignore the rest of the output. I do not care about these extra data as it is most likely a buggy program and it should be discarded).
For now, my sandbox is catching almost everything, meaning that I can:
Set a timeout as I want;
Set a limit to the memory used in the process;
Feed the process through a stdin (for now a given string);
Get the final retcode, stdout and stderr.
Here is my current code (I tried to keep it small for the example):
MEMORY_LIMIT = 64 * 1024 * 1024
TIMEOUT_LIMIT = 5 * 60
__NR_FILE_NOT_FOUND = -1
__NR_TIMEOUT = -2
__NR_MEMORY_OUT = -3
def limit_memory(memory):
import resource
return lambda :resource.setrlimit(resource.RLIMIT_AS, (memory, memory))
def run_program(cmd, sinput='', timeout=TIMEOUT_LIMIT, memory=MEMORY_LIMIT):
"""Run the command line and output (ret, sout, serr)."""
from subprocess import Popen, PIPE
try:
proc = Popen(cmd, stdin=PIPE, stdout=PIPE, stderr=PIPE,
preexec_fn=limit_memory(memory))
except FileNotFoundError:
return (__NR_FILE_NOT_FOUND, "", "")
sout, serr = "".encode("utf-8"), "".encode("utf-8")
try:
sout, serr = proc.communicate(sinput.encode("utf-8"), timeout=timeout)
ret = proc.wait()
except subprocess.TimeoutExpired:
ret = __NR_TIMEOUT
except MemoryError:
ret = __NR_MEMORY_OUT
return (ret, sout.decode("utf-8"), serr.decode("utf-8"))
if __name__ == "__main__":
ret, out, err = run_program(['./example.sh'], timeout=8)
print("return code: %i\n" % ret)
print("stdout:\n%s" % out)
print("stderr:\n%s" % err)
The missing features are:
Set a limitation on the size of stdout and stderr. I looked on the Web and saw several attempts, but none is really working.
Attach a function to stdin better than just a static string. The function should connect to the pipes stdout and stderr and return bytes to stdin.
Does anyone has an idea about that ?
PS: I already looked at:
Non blocking reading from a subprocess output stream in Python;
Python subprocess with timeout and large output (>64K)
As I was saying, you can create your own buffers and write the STDOUT/STDERR to them, checking the size along the way. For convenience, you can write a small io.BytesIO wrapper to do the check for you, e.g.:
from io import BytesIO
# lets first create a size-controlled BytesIO buffer for convenience
class MeasuredStream(BytesIO):
def __init__(self, maxsize=1024): # lets use a 1 KB as a default
super(MeasuredStream, self).__init__()
self.maxsize = maxsize
self.length = 0
def write(self, b):
if self.length + len(b) > self.maxsize: # o-oh, max size exceeded
# write only up to maxsize, truncate the rest
super(MeasuredStream, self).write(b[:self.maxsize - self.length])
raise ValueError("Max size reached, excess data is truncated")
# plenty of space left, write the bytes and increase the length
self.length += super(MeasuredStream, self).write(b)
return len(b) # convention: return the written number of bytes
Mind you, if you intend to do truncation / seek & replace you'll have to account for those in your length but this is enough for our purposes.
Anyway, now all you need to do is to handle your own streams and account for the possible ValueError from the MeasuredStream, instead of using Popen.communicate(). This, unfortunately, also means that you'll have to handle the timeout yourself. Something like:
from subprocess import Popen, PIPE, STDOUT, TimeoutExpired
import sys
import time
MEMORY_LIMIT = 64 * 1024 * 1024
TIMEOUT_LIMIT = 5 * 60
STDOUT_LIMIT = 1024 * 1024 # let's use 1 MB as a STDOUT limit
__NR_FILE_NOT_FOUND = -1
__NR_TIMEOUT = -2
__NR_MEMORY_OUT = -3
__NR_MAX_STDOUT_EXCEEDED = -4 # let's add a new return code
# a cross-platform precision clock
get_timer = time.clock if sys.platform == "win32" else time.time
def limit_memory(memory):
import resource
return lambda :resource.setrlimit(resource.RLIMIT_AS, (memory, memory))
def run_program(cmd, sinput='', timeout=TIMEOUT_LIMIT, memory=MEMORY_LIMIT):
"""Run the command line and output (ret, sout, serr)."""
from subprocess import Popen, PIPE, STDOUT
try:
proc = Popen(cmd, stdin=PIPE, stdout=PIPE, stderr=STDOUT,
preexec_fn=limit_memory(memory), timeout=timeout)
except FileNotFoundError:
return (__NR_FILE_NOT_FOUND, "", "")
sout = MeasuredStream(STDOUT_LIMIT) # store STDOUT in a measured stream
start_time = get_timer() # store a reference timer for our custom timeout
try:
proc.stdin.write(sinput.encode("utf-8")) # write the input to STDIN
proc.stdin.flush() # flush the STDOUT buffer
while True: # our main listener loop
line = proc.stdout.readline() # read a line from the STDOUT
# use proc.stdout.read(buf_size) instead to handle your own buffer
if line != b"": # content collected...
sout.write(line) # write it to our stream
elif proc.poll() is not None: # process finished, nothing to do
break
# finally, check the current time progress...
if get_timer() >= start_time + TIMEOUT_LIMIT:
raise TimeoutExpired(proc.args, TIMEOUT_LIMIT)
ret = proc.poll() # get the return code
except TimeoutExpired:
proc.kill() # we're no longer interested in the process, kill it
ret = __NR_TIMEOUT
except MemoryError:
ret = __NR_MEMORY_OUT
except ValueError: # max buffer reached
proc.kill() # we're no longer interested in the process, kill it
ret = __NR_MAX_STDOUT_EXCEEDED
sout.seek(0) # rewind the buffer
return ret, sout.read().decode("utf-8") # send the results back
if __name__ == "__main__":
ret, out, err = run_program(['./example.sh'], timeout=8)
print("return code: %i\n" % ret)
print("stdout:\n%s" % out)
print("stderr:\n%s" % err)
There are two 'issues' with this, tho, the first one being quite obvious - I'm piping the subprocesses STDERR to STDOUT so the result would be a mix in. Since reading from STDOUT and STDERR streams is a blocking operation, if you want to read them both separately you'll have to spawn two threads (and separately handle their ValueError exceptions when a stream size is exceeded). The second issue is that the subprocesses STDOUT can lock out the timeout check as it depends on STDOUT actually flushing some data. This can also be solved by a separate timer thread that will forcefully kill the process if the timeout is exceeded. In fact, that's exactly what Popen.communicate() does.
The principle of operation would essentially be the same, you'll just have to outsource the checks to separate threads and join everything back in the end. That's an exercise I'll leave to you ;)
As for your second missing feature, could you elaborate a bit more what you have in mind?
It seems that this problem is more complex than it seems, I had hard time to discover solutions on the Web and understand them all.
In fact, the complexity of the problem comes from the fact that there are several ways to solve it. I explored three ways (threading, multiprocessing and asyncio).
Finally, I chose to use a separate thread to listen to the current subprocess and capture the output of the program. It seems to me to be the simplest, the most portable and the most efficient way to proceed.
So, the basic idea behind this solution is to create a thread that will be listening to stdout and stderr and gather all the output. When you reach a limit, you just kill the process and return.
Here is a simplified version of my code:
from subprocess import Popen, PIPE, TimeoutExpired
from queue import Queue
from time import sleep
from threading import Thread
MAX_BUF = 35
def stream_reader(p, q, n):
stdout_buf, stderr_buf = b'', b''
while p.poll() is None:
sleep(0.1)
stdout_buf += p.stdout.read(n)
stderr_buf += p.stderr.read(n)
if (len(stdout_buf) > n) or (len(stderr_buf) > n):
stdout_buf, stderr_buf = stdout_buf[:n], stderr_buf[:n]
try:
p.kill()
except ProcessLookupError:
pass
break
q.put((stdout_buf.decode('utf-8', errors="ignore"),
stderr_buf.decode('utf-8', errors="ignore")))
# Main function
cmd = ['./example.sh']
proc = Popen(cmd, shell=False, stdin=PIPE, stdout=PIPE, stderr=PIPE)
q = Queue()
t_io = Thread(target=stream_reader, args=(proc, q, MAX_BUF,), daemon=True)
t_io.start()
# Running the process
try:
proc.stdin.write(b'AAAAAAA')
proc.stdin.close()
except IOError:
pass
try:
ret = proc.wait(timeout=20)
except TimeoutExpired:
ret = -1 # Or whatever code you decide to give it.
t_io.join()
sout, serr = q.get()
print(ret, sout, serr)
You can attach whatever you want to the example.sh script that is run. Note that there are several pitfalls that are avoided here to avoid deadlocks and broken code (I tested a bit this script). Yet, I am not totally sure of this script, so do not hesitate to mention obvious errors or improvements.

PYTHON subprocess cmd.exe closes after first command

I am working on a python program which implements the cmd window.
I am using subproccess with PIPE.
If for example i write "dir" (by stdout), I use communicate() in order to get the response from the cmd and it does work.
The problem is that in a while True loop, this doesn't work more than one time, it seems like the subprocess closes itself..
Help me please
import subprocess
process = subprocess.Popen('cmd.exe', shell=False, stdin=subprocess.PIPE,stdout=subprocess.PIPE,stderr=None)
x=""
while x!="x":
x = raw_input("insert a command \n")
process.stdin.write(x+"\n")
o,e=process.communicate()
print o
process.stdin.close()
The main problem is that trying to read subprocess.PIPE deadlocks when the program is still running but there is nothing to read from stdout. communicate() manually terminates the process to stop this.
A solution would be to put the piece of code that reads stdout in another thread, and then access it via Queue, which allows for reliable sharing of data between threads by timing out instead of deadlocking.
The new thread will read standard out continuously, stopping when there is no more data.
Each line will be grabbed from the queue stream until a timeout is reached(no more data in Queue), then the list of lines will be displayed to the screen.
This process will work for non-interactive programs
import subprocess
import threading
import Queue
def read_stdout(stdout, queue):
while True:
queue.put(stdout.readline()) #This hangs when there is no IO
process = subprocess.Popen('cmd.exe', shell=False, stdout=subprocess.PIPE, stdin=subprocess.PIPE)
q = Queue.Queue()
t = threading.Thread(target=read_stdout, args=(process.stdout, q))
t.daemon = True # t stops when the main thread stops
t.start()
while True:
x = raw_input("insert a command \n")
if x == "x":
break
process.stdin.write(x + "\n")
o = []
try:
while True:
o.append(q.get(timeout=.1))
except Queue.Empty:
print ''.join(o)

Checking to see if there is more data to read from a file descriptor using Python's select module

I have a program that creates a subprocess within a thread, so that the thread can be constantly checking for specific output conditions (from either stdout or stderr), and call the appropriate callbacks, while the rest of the program continues. Here is a pared-down version of that code:
import select
import subprocess
import threading
def run_task():
command = ['python', 'a-script-that-outputs-lines.py']
proc = subprocess.Popen(command, stdout = subprocess.PIPE, stderr = subprocess.PIPE)
while True:
ready, _, _ = select.select((proc.stdout, proc.stderr), (), (), .1)
if proc.stdout in ready:
next_line_to_process = proc.stdout.readline()
# process the output
if proc.stderr in ready:
next_line_to_process = proc.stderr.readline()
# process the output
if not ready and proc.poll() is not None:
break
thread = threading.Thread(target = run_task)
thread.run()
It works reasonably well, but I would like the thread to exit once two conditions are met: the running child process has finished, and all of the data in stdout and stderr has been processed.
The difficulty I have is that if my last condition is as it is above (if not ready and proc.poll() is not None), then the thread never exits, because once stdout and stderr's file descriptors are marked as ready, they never become unready (even after all of the data has been read from them, and read() would hang or readline() would return an empty string).
If I change that condition to just if proc.poll() is not None, then the loop exists when the program exits, and I can't guarantee that it's seen all of the data that needs to be processed.
Is this just the wrong approach, or is there a way to reliably determine when you've read all of the data that will ever be written to a file descriptor? Or is this an issue specific to trying to read from the stderr/stdout of a subprocess?
I have been trying this on Python 2.5 (running on OS X) and also tried select.poll() and select.epoll()-based variants on Python 2.6 (running on Debian with a 2.6 kernel).
select module is appropriate if you want to find out whether you can read from a pipe without blocking.
To make sure that you've read all data, use a simpler condition if proc.poll() is not None: break and call rest = [pipe.read() for pipe in [p.stdout, p.stderr]] after the loop.
It is unlikely that a subprocess closes its stdout/stderr before its shutdown therefore you could skip the logic that handles EOF for simplicity.
Don't call Thread.run() directly, use Thread.start() instead. You probably don't need the separate thread here at all.
Don't call p.stdout.readline() after the select(), it may block, use os.read(p.stdout.fileno(), limit) instead. Empty bytestring indicates EOF for the corresponding pipe.
As an alternative or in addition to you could make the pipes non-blocking using fcntl module:
import os
from fcntl import fcntl, F_GETFL, F_SETFL
def make_nonblocking(fd):
return fcntl(fd, F_SETFL, fcntl(fd, F_GETFL) | os.O_NONBLOCK)
and handle io/os errors while reading.
My eventual solution, as I mentioned above, was the following, in case this is helpful to anyone. I think it is the right approach, since I'm now 97.2% sure you can't do this with just select()/poll() and read():
import select
import subprocess
import threading
def run_task():
command = ['python', 'a-script-that-outputs-lines.py']
proc = subprocess.Popen(command, stdout = subprocess.PIPE, stderr = subprocess.PIPE)
while True:
ready, _, _ = select.select((proc.stdout, proc.stderr), (), (), .1)
if proc.stdout in ready:
next_line_to_process = proc.stdout.readline()
if next_line_to_process:
# process the output
elif proc.returncode is not None:
# The program has exited, and we have read everything written to stdout
ready = filter(lambda x: x is not proc.stdout, ready)
if proc.stderr in ready:
next_line_to_process = proc.stderr.readline()
if next_line_to_process:
# process the output
elif proc.returncode is not None:
# The program has exited, and we have read everything written to stderr
ready = filter(lambda x: x is not proc.stderr, ready)
if proc.poll() is not None and not ready:
break
thread = threading.Thread(target = run_task)
thread.run()
You could do a raw os.read(fd, size) on the pipe's file descriptor instead of using readline(). This is a non-blocking operation which can also detect EOF (in that case it returns an empty string or byte object). You'd have to implement the line splitting and buffering yourself. Use something like this:
class NonblockingReader():
def __init__(self, pipe):
self.fd = pipe.fileno()
self.buffer = ""
def readlines(self):
data = os.read(self.fd, 2048)
if not data:
return None
self.buffer += data
if os.linesep in self.buffer:
lines = self.buffer.split(os.linesep)
self.buffer = lines[-1]
return lines[:-1]
else:
return []

Categories