Python subprocess timeout does not terminate process - python

I have a problem with terminating processes on timeout. Basically, I am running one linux command in for loop (same command for a list of files):
for target in targets:
try:
result = subprocess.run(['filerunner', 'work', '-target=' + target],
stdout=subprocess.PIPE, stderr=subprocess.PIPE, timeout=600)
logging.info(result.stdout.decode('utf-8')) # logging out to log file
logging.info(result.stderr.decode('utf-8')) # logging err to log file
except subprocess.TimeoutExpired:
logging.info('Operation failed due to process timeout (10 minutes).')
# result.kill() tried with this to kill process manually
time.sleep(1)
pass
I tried few things, but processes are not being killed after timeout expired. How can I do it?

Related

Python subprocess.Popen() is not starting the subprocess properly

I have a python test that startes a tcp server as subprocess.
def test_client(self):
if sys.platform.startswith('linux'):
proc = subprocess.Popen([f'{self.bin_output_path}/CaptureUnitHalServer'], stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
else:
proc = subprocess.Popen([f'{self.bin_output_path}/CaptureUnitHalServer'], stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
hal_access = HalAccess(port=12309)
hal = CaptureUnitHal(hal_access)
response = hal.test_scratch(TestScratchReq([1]))
assert TestScratchCnf(verdict=True, return_value=5) == response
print(f"\nRESPONSE: {response}\n")
# The first proc.communicate waits for the subprocess to finish. As the server runs forever a TimeoutExpired
# error is thrown the second proc.communicate in the exception handler get stdout and stderr from the server
try:
outs, errs = proc.communicate(timeout=2)
print(f'{outs.decode()}\n')
except subprocess.TimeoutExpired:
proc.kill()
outs, errs = proc.communicate()
print("CaptureUnitHalServer stdout:")
print(f'{outs.decode()}\n')
print("CaptureUnitHalServer stderr:")
print(f'{errs.decode()}\n')
The hal is a simple tcp client that sends a test request (TestScratchReq) to it and receives the response.
In linux this works perfectly fine. But when I run this in windows The code blocks forever at the line response = hal.test_scratch(TestScratchReq([1])).
This line calls
def send_and_receive(self, request: str) -> str:
self._send(request)
return self._receive()
and blocks in the socket.recv() call in self._receive()
data = self._socket.recv(1024).decode(encoding=self.MESSAGE_ENCODING) # blocking
So it seems like the server is not started properly as a subprocess in windows when calling subprocess.Popen().
The following command shows the port as listening however:
Get-NetTCPConnection -State Listen | grep 12309
0.0.0.0 12309 0.0.0.0 0 Listen
I have a second implementation that is also working on windows:
def test_client(self):
daemon = Thread(target=self.server_thread, daemon=True, name='HalServer')
daemon.start()
hal_access = HalAccess(port=12309)
hal = CaptureUnitHal(hal_access)
response = hal.test_scratch(TestScratchReq([1]))
print(response)
assert TestScratchCnf(verdict=True, return_value=5) == response
daemon.join() # wait for daemon timeout
def server_thread(self):
if sys.platform.startswith('linux'):
result = subprocess.run([f'{self.bin_output_path}/CaptureUnitHalServer'], timeout=5, stdout=subprocess.PIPE)
else:
result = subprocess.run([f'{self.bin_output_path}/CaptureUnitHalServer'], timeout=5, creationflags=subprocess.CREATE_NEW_CONSOLE)
print(result.stdout.decode())
print(result.stderr.decode())
pass
But it seems overcomplicated to me to have another Thread just to start the blocking subprocess.run call that throws a TimeoutExpired error after 5 seconds. A problem with this solution is also that I don't get the stdout and stderr of the subprocess respectively the following two lines of code don't print anything as the Thread is killed by the exception before it reaches these lines.
print(result.stdout.decode())
print(result.stderr.decode())
EDIT:
My question is: Why is the windows version in the first version of the code blocking? How does subprocess.Popen() differ between linux and windows? Is the subprocess not started properly in the windows case?

Suprocess process.stdout.readline() freezes the program

I am trying to make a python program communicate with a minecraft server I am hosting, but the problem I am finding is when the console output from the server, it freezes the program. I am using Popen and PIPE from subprocess, and whenever I use process.stdout.readline() it prints all the lines and when it is done, it freezes the program, and then I am not able to execute any more commands because of that.
def start(cmd):
try:
process = Popen(cmd, stdin=PIPE, stdout=PIPE)
except Exception as e:
print(e)
return process
start('java -Xmx1024M -Xms1024M -jar server.jar nogui') # runs the minecraft server with 8GB of RAM
while True:
print(process.stdout.readline()) # prints out every line in the server console
readmail() # the program checks email for commands, and enters the command into the console using stdin
I have tried this:
def start(cmd):
try:
process = Popen(['python', '-u', cmd], stdin=PIPE, stdout=PIPE)
except Exception as e:
print(e)
return process
It fixes the issue with process.stdout.readline(), but it gives me an error because the command is meant to be called in cmd and not with python. Does anyone know how to fix the issue with readline()?

Exception ignore error when adding timeout function for asyncio.subprocess.communicate() in python 3

I'm trying to add timeout function when using asyncio.subprocess. I start a subprocess to execute some commands, if it takes too long time, I want to make it failed and terminate the process. In terms of official document, I use asyncio.wait_for to implement it. Here is the sample code:
async def ping():
cmd = 'ping localhost'
proc = await asyncio.create_subprocess_shell(cmd,
stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE)
try:
stdout, _ = await asyncio.wait_for(proc.communicate(), 3)
except asyncio.TimeoutError:
print('Timeout')
return 'Cancelled'
finally:
proc.terminate()
return stdout.decode().strip()
def main():
loop = asyncio.get_event_loop() #on windows, use another event loop to support subprocess
task = asyncio.ensure_future(ping())
result = loop.run_until_complete(task)
print(f'the result is {result}')
loop.close()
if __name__ == '__main__':
main()
I can always get the result, no matter it's succeed or timeout. But got an error in console when I running it.
On macOS, it likes:
Exception ignored when trying to write to the signal wakeup fd:
BrokenPipeError: [Errno 32] Broken pipe
On Windows, it likes: Event loop is closed, The task is destroyed but it's pending. Apologize I can't remember it clearly, I'm testing on mac now.
I checked some answers, and refer to document, wait_for raise a TimeoutError and cancel wrapped coroutine. But because the task is not cancelled immediately, it will be cancelled in next loop. But at that moment, the event loop is already closed in my program.
So my problem is what's the proper way to handle this kind of issue. Should I care about this error? Because I can always get the result I want.

Unable to kill Python subprocess using process.kill() or process.terminate() or os.kill() or using psutil

Using python, I am starting two subprocesses in parallel. One is an HTTP Server, while other is an execution of another program(CustomSimpleHTTPServer.py, which is a python script generated by selenium IDE plugin to open firefox, navigate to a website and do some interactions). On the other hand, I want to stop execution of the first subprocess(the HTTP Server) when the second subprocess is finished executing.
The logic of my code is that the selenium script will open a website. The website will automatically make a few GET calls to my HTTP Server. After the selenium script is finished executing, the HTTP Server is supposed to be closed so that it can log all the captured requests in a file.
Here is my main Python code:
class Myclass(object):
HTTPSERVERPROCESS = ""
def startHTTPServer(self):
print "********HTTP Server started*********"
try:
self.HTTPSERVERPROCESS=subprocess.Popen('python CustomSimpleHTTPServer.py', \
shell=True, stdout=subprocess.PIPE,stderr=subprocess.STDOUT)
except Exception as e:
print "Exception captured while starting HTTP Server process: %s\n" % e
def startNavigatingFromBrowser(self):
print "********Opening firefox to start navigation*********"
try:
process=subprocess.Popen('python navigationScript.py', \
shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
process.communicate()
process.wait()
except Exception as e:
print "Exception captured starting Browser Navigation process : %s\n" % e
try:
if process.returncode==0:
print "HTTPSERVEPROCESS value: %s" % self.HTTPSERVERPROCESS.returncode
print self.HTTPSERVERPROCESS
#self.HTTPSERVERPROCESS.kill()
#self.HTTPSERVERPROCESS.terminate()
#self.kill(self.HTTPSERVERPROCESS.pid)
except Exception as e:
print "Exception captured while killing HTTP Server process : %s\n" % e
def kill(self,proc_pid):
process = psutil.Process(proc_pid)
for proc in process.get_children(recursive=True):
proc.kill()
process.kill()
def startCapture(self):
print "********Starting Parallel execution of Server initiation and firefox navigation script*********"
t1 = threading.Thread(target=self.startHTTPServer())
t2 = threading.Thread(target=self.startNavigatingFromBrowser())
t1.start()
t2.start()
t2.join()
Note: Execution starts by calling startCapture()
Here is the code for CustomSimpleHTTPServer.py, which is supposed to write the captured requests to logfile.txt upon termination:
import SimpleHTTPServer
import SocketServer
PORT = 5555
class MyHTTPHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):
log_file = open('logfile.txt', 'w')
def log_message(self, format, *args):
self.log_file.write("%s - - [%s] %s\n" %
(self.client_address[0],
self.log_date_time_string(),
format%args))
Handler = MyHTTPHandler
httpd = SocketServer.TCPServer(("", PORT), Handler)
httpd.serve_forever()
When I use self.HTTPSERVERPROCESS.kill() or self.HTTPSERVERPROCESS.terminate() or os.kill(), I get following in my terminal upon running the main Python code
********Starting Parallel execution of Server initiation and firefox navigation script*********
********SimpleHTTPServer started*********
********Opening firefox to start navigation*********
HTTPSERVEPROCESS value: <subprocess.Popen object at 0x1080f8410>
2459
Exception captured while killing HTTP Server process : [Errno 3] No such process
Process finished with exit code 0
And when I use self.kill(self.HTTPSERVERPROCESS.pid), I get following in my terminal upon running the main Python code
********Starting Parallel execution of Server initiation and firefox navigation script*********
********SimpleHTTPServer started*********
********Opening firefox to start navigation*********
HTTPSERVEPROCESS value: <subprocess.Popen object at 0x1080f8410>
2459
Exception captured while killing HTTP Server process : 'Process' object has no attribute 'get_children'
Process finished with exit code 0
Neither of the following 3 are able to kill the HTTPServer process:
self.HTTPSERVERPROCESS.kill()
self.HTTPSERVERPROCESS.terminate()
self.kill(self.HTTPSERVERPROCESS.pid)
I know that CustomSimpleHTTPServer.py is correct because when I run it seperately and manually browse to the website, and then manually terminate the CustomSimpleHTTPServer.py script by hitting CTRL-c in terminal, the logs are populated in logfle.txt.
What changes do I make to my code so that it works properly and logs are populated?
You should just use os.kill() to signal processes:
import os
import signal
...
os.kill(the_pid, signal.SIGTERM) # usually kills processes
os.kill(the_pid, signal.SIGKILL) # should always kill a process
Also, if you kill the parent process it also usually kills the children.
Update:
I made two small changes to the Server program:
Add a call to self.log_file.flush() to make sure log entries
are flushed out to the log file.
Override allow_reuse_address so that you can reuse the
same address shortly after terminating the server.
(See this SO question)
File Server:
#!/usr/bin/env python
import SimpleHTTPServer
import SocketServer
PORT = 5555
class MyServer(SocketServer.TCPServer):
allow_reuse_address = True
class MyHTTPHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):
log_file = open('logfile.txt', 'w')
def log_message(self, format, *args):
self.log_file.write("%s - - [%s] %s\n" %
(self.client_address[0],
self.log_date_time_string(),
format%args))
self.log_file.flush()
Handler = MyHTTPHandler
httpd = MyServer(("", PORT), Handler)
httpd.serve_forever()
Here is an simple navigation program (file Navigate):
#!/usr/bin/env python
import requests
import time
URL = "http://localhost:5555/"
def main():
for x in range(5):
print "x =", x
r = requests.get(URL + "asd")
time.sleep(1)
print "Quitting"
main()
And here is the main program:
#!/usr/bin/env python
import os
import subprocess
import signal
def main():
# start web server
web = subprocess.Popen(["./Server"])
print "web server pid:", web.pid
# start navigator
nav = subprocess.Popen(["./Navigate"])
print "nav pid: ", nav.pid
# wait for nav to exit
nav.wait()
print "done waiting for nav"
print "killing web server"
os.kill(web.pid, signal.SIGTERM )
web.wait()
print "server terminated"
main()

Python Paramiko timeout with long execution, need full output

There's lots of topics touching on part of the title, but nothing that quite satisfies the whole thing. I'm pushing a command on a remote server and need the full output after a long execution time, say 5 minutes or so. Using channel I was able to set a timeout, but when I read back stdout I got only a small portion of output. The solution seemed to be to wait for channel.exit_status_ready(). This worked on a successful call, but a failed call would never trigger the channel timeout. Having reviewed the docs, I theorize that's because the timeout only works on a read operation, and waiting for exit status doesn't qualify. Here's that attempt:
channel = ssh.get_transport().open_session()
channel.settimeout(timeout)
channel.exec_command(cmd) # return on this is not reliable
while True:
try:
if channel.exit_status_ready():
if channel.recv_ready(): # so use recv instead...
output = channel.recv(1048576)
break
if channel.recv_stderr_ready(): # then check error
error = channel.recv_stderr(1048576)
break
except socket.timeout:
print("SSH channel timeout exceeded.")
break
except Exception:
traceback.print_exc()
break
Pretty, ain't it? Wish it worked.
My first attempt at a solution was to use time.time() to get a start, then check start - time.time() > timeout. This seems straightforward, but in my present version, I output start - time.time() with a fixed timeout that should trigger a break...and see differences that double and triple the timeout with no break occurring. To save space, I'll mention my third attempt, which I've rolled up with this one. I read on here about using select.select to wait for output, and noted in the documentation that there's a timeout there as well. As you'll see from the code below, I've mixed all three methods -- channel timeout, time.time timeout, and select timeout -- yet still have to kill the process. Here's the frankencode:
channel = ssh.get_transport().open_session()
channel.settimeout(timeout)
channel.exec_command(cmd) # return on this is not reliable
print("{0}".format(cmd))
start = time.time()
while True:
try:
rlist, wlist, elist = select([channel], [], [],
float(timeout))
print("{0}, {1}, {2}".format(rlist, wlist, elist))
if rlist is not None and len(rlist) > 0:
if channel.exit_status_ready():
if channel.recv_ready(): # so use recv instead...
output = channel.recv(1048576)
break
elif elist is not None and len(elist) > 0:
if channel.recv_stderr_ready(): # then check error
error = channel.recv_stderr(1048576)
break
print("{0} - {1} = {2}".format(
time.time(), start, time.time() - start))
if time.time() - start > timeout:
break
except socket.timeout:
print("SSH channel timeout exceeded.")
break
except Exception:
traceback.print_exc()
break
Here's some typical output:
[<paramiko.Channel 3 (open) window=515488 -> <paramiko.Transport at 0x888414cL (cipher aes128-ctr, 128 bits) (active; 1 open channel(s))>>], [], []
1352494558.42 - 1352494554.69 = 3.73274183273
The top line is [rlist, wlist, elist] from select, the bottom line is time.time() - start = (time.time() - start). I got this run to break by counting the iterations and breaking at the bottom of the try after looping 1000 times. timeout was set to 3 on the sample run. Which proves that we get through the try, but obviously, none of the three ways that should be timing out works.
Feel free to rip into the code if I've fundamentally misunderstood something. I'd like for this to be uber-Pythonic and am still learning.
Here's something that might help, though I'm still in the midst of testing. After struggling with timeouts of various types including a catch-all timeout for Python, and realizing that the real problem is that the server can't be trusted to terminate the process, I did this:
chan = ssh.get_transport().open_session()
cmd = "timeout {0} {1}\n".format(timeouttime, cmd)
chan.exec_command(cmd)
The server times out after timeouttime if cmd doesn't exit sooner, exactly as I'd wish, and the terminated command kills the channel. The only catch is that GNU coreutils must exist on the server. Failing that there are alternatives.
I'm having the same kind of issue. I think we can handle it with signalling. http://docs.python.org/2/library/signal.html
Here is a plain dumb example to show how it works.
import signal, time
def handler(signum, frame):
pass
# Set the signal handler and a 2-second alarm
signal.signal(signal.SIGALRM, handler)
signal.alarm(2)
# This is where your operation that might hang goes
time.sleep(10)
# Disable the alarm
signal.alarm(0)
So here, the alarm is set to 2 seconds. Time.sleep is called with 10 seconds. Of course, the alarm will be triggered before the sleep finishes. If you put some output after the time.sleep, you'll see that program execution resumes there.
If you want the control to continue somewhere else, wrap your hanging call in a try/except and have your handler function raise an exception.
Although I'm pretty sure it would work, I haven't tested it yet over paramiko calls.
I had a lot of problem calling the exec_command from the channel, instead I use directly the exec_command from the ssh connection and call the channel of the std output, the code that works for me is like myexec:
#!/usr/bin/env python
import paramiko
import select
def myexec(ssh, cmd, timeout):
stdin, stdout, stderr = ssh.exec_command(cmd)
channel = stdout.channel
stdin.close() #As I don't need stdin
channel.shutdown_write() #As I will not write to this channel
stdout_chunks = []
stdout_chunks.append(stdout.channel.recv(len(stdout.channel.in_buffer)))
# chunked read to prevent stalls
while not channel.closed or channel.recv_ready()
or channel.recv_stderr_ready():
# stop if channel was closed prematurely,
# and there is no data in the buffers.
got_chunk = False
readq, _, _ = select.select([stdout.channel], [], [], timeout)
for c in readq:
if c.recv_ready():
stdout_chunks.append(stdout.channel.recv(len(c.in_buffer)))
got_chunk = True
if c.recv_stderr_ready():
# make sure to read stderr to prevent stall
stderr.channel.recv_stderr(len(c.in_stderr_buffer))
got_chunk = True
if not got_chunk \
and stdout.channel.exit_status_ready() \
and not stderr.channel.recv_stderr_ready() \
and not stdout.channel.recv_ready():
# indicate that we're not going to read from this channel anymore
stdout.channel.shutdown_read() # close the channel
stdout.channel.close()
break # exit as remote side is finished and our bufferes are empty
# close all the pseudofiles
stdout.close()
stderr.close()
return (''.join(stdout_chunks), stdout.channel.recv_exit_status())
ssh = paramiko.SSHClient()
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
ssh.connect('remotehost', username='remoteuser', password='remotepassword')
rtrval = myexec(ssh, 'remotecomand', 5*60)
ssh.close()
print rtrval
I use Debian 8 and Python 2.7.13, good luck.

Categories