In order to run several scheduled python scripts and capture errors that may arise, I'm using a controller module that opens a subprocess for each script with the following:
def _process_script(nm_script, return_val):
try:
env = os.environ
p = subprocess.Popen(['python', nm_script], stdout = subprocess.PIPE,
shell=False, env=env, stderr=subprocess.PIPE)
p.wait()
(result, error) = p.communicate()
if p.returncode != 0:
cd_exec = -1
tx_exec = error.decode('utf-8')
else:
cd_exec = 1
tx_exec = None
except subprocess.CalledProcessError as e:
cd_exec = -1
tx_exec = e.output
return_val += [cd_exec, tx_exec]
def run_script(script):
return_val = multiprocessing.Manager().list()
#20 min timeout for the script
maxDelay = 1200
p = multiprocessing.Process(target=_process_script, args=(script.nm_script, return_val))
p.start()
p.join(maxDelay)
#Checks if the process is still alive
if p.is_alive:
p.terminate()
p.join()
cd_exec = -1
tx_exec = 'Timeout'
else:
cd_exec = return_val[0]
tx_exec = return_val[1]
return return_val
The code above has worked for all the scripts so far. There is however one Selenium datascraping script that seems to hang after its execution, and I can't seem to find the problem. This is it:
from selenium import webdriver
def main():
path = r'\\path'
browser = webdriver.Chrome()
url = '...'
browser.get(url)
#...Several steps that do the necessary scraping and export data to the path
#I have added the following steps to kill the browser instance:
browser.get('about:blank')
browser.find_element_by_tag_name('body').send_keys(Keys.ESCAPE)
time.sleep(2)
browser.quit()
del browser
return
The process executes successfully. The data is exported as expected and the browser instance that had been opened is closed. However, the main script keeps displaying the message regarding DevTools until the timeout of 20 minutes is reached:
DevTools listening on ws://...
I have several other selenium data scraping scripts that work in a similar manner to this one, and without any trouble so far.
I'm completely lost on this one, what could be the issue?
I see in the above code the author uses Popen() directly and then calls "p.wait()", where "P" is subprocess.
I use subprocess.run() in my code, but Python library reference documentation says that Popen will wait for the process to complete. Is "p.wait()" necessary?
Related
I want code like this:
if True:
run('ABC.PY')
else:
if ScriptRunning('ABC.PY):
stop('ABC.PY')
run('ABC.PY'):
Basically, I want to run a file, let's say abc.py, and based on some conditions. I want to stop it, and run it again from another python script. Is it possible?
I am using Windows.
You can use python Popen objects for running processes in a child process
So run('ABC.PY') would be p = Popen("python 'ABC.PY'")
if ScriptRunning('ABC.PY) would be if p.poll() == None
stop('ABC.PY') would be p.kill()
This is a very basic example for what you are trying to achieve
Please checkout subprocess.Popen docs to fine tune your logic for running the script
import subprocess
import shlex
import time
def run(script):
scriptArgs = shlex.split(script)
commandArgs = ["python"]
commandArgs.extend(scriptArgs)
procHandle = subprocess.Popen(commandArgs, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
return procHandle
def isScriptRunning(procHandle):
return procHandle.poll() is None
def stopScript(procHandle):
procHandle.terminate()
time.sleep(5)
# Forcefully terminate the script
if isScriptRunning(procHandle):
procHandle.kill()
def getOutput(procHandle):
# stderr will be redirected to stdout due "stderr=subprocess.STDOUT" argument in Popen call
stdout, _ = procHandle.communicate()
returncode = procHandle.returncode
return returncode, stdout
def main():
procHandle = run("main.py --arg 123")
time.sleep(5)
isScriptRunning(procHandle)
stopScript(procHandle)
print getOutput(procHandle)
if __name__ == "__main__":
main()
One thing that you should be aware about is stdout=subprocess.PIPE.
If your python script has a very large output, the pipes may overflow causing your script to block until .communicate is called over the handle.
To avoid this, pass a file handle to stdout, like this
fileHandle = open("main_output.txt", "w")
subprocess.Popen(..., stdout=fileHandle)
In this way, the output of the python process will be dumped into the file.(You will have to modily the getOutput() function too for this)
import subprocess
process = None
def run_or_rerun(flag):
global process
if flag:
assert(process is None)
process = subprocess.Popen(['python', 'ABC.PY'])
process.wait() # must wait or caller will hang
else:
if process.poll() is None: # it is still running
process.terminate() # terminate process
process = subprocess.Popen(['python', 'ABC.PY']) # rerun
process.wait() # must wait or caller will hang
I'm trying to make a program that does something every hour, then runs itself, and then kill itself.
The problem I'm having is that the program does not kill itself completely. I see that the process is not gone when I use System Monitor.
Overtime I just get more and more python2 processes that take up ram.
I am using Python 2.7.12 on a 64 bit machine running Arch Linux
This is the code I'm running
def GoToWebsite(username, password):
chrome_options = webdriver.ChromeOptions()
prefs = {"profile.default_content_setting_values.notifications": 2}
chrome_options.add_experimental_option("prefs", prefs)
chromeBrowser = webdriver.Chrome('/home/daniel/Dropbox/Code/BrowserDrivers/chromedriver',
chrome_options=chrome_options)
chromeBrowser.get('http://www.website.com')
while True:
try:
picX, picY = pyautogui.locateCenterOnScreen(currentPythonDirectory + '/picture.png')
break
except:
pass
pyautogui.click(picX, picY)
time.sleep(3)
url = chromeBrowser.command_executor._url
session_id = chromeBrowser.session_id
return url, session_id
websiteUrl, websiteSessionId = GoToWebsite("username", "password")
#Do Stuff
originalStartTime = time.time()
currentPythonDirectory = os.path.dirname(os.path.realpath(__file__))
while True:
if (time.time() - originalStartTime) >= 3: # 3600:
chromeDriver = webdriver.Remote(command_executor=websiteUrl, desired_capabilities={})
chromeDriver.session_id = websiteSessionId
chromeDriver.quit()
try:
chromeDriver.close()
except:
pass
os.system("python2 " + currentPythonDirectory + "/PythonScript.py")
time.sleep(1)
sys.exit(1)
break
#Other Stuff
I had exactly the same issue when trying to make a better version of crontabs. I modified my code so you understand the approach. With this method you will not run into any max recursion problems.
import os, commands, regex, subprocess
from subprocess import call
allActivePythonProcesses = os.popen('pgrep -lf python').read()
thisIsYourPythonFileProcess = find('\d{7} python myRepeatingFile.py', allActivePythonProcesses )
if thisIsYourPythonFileProcess:
# Store the Process ID
convPID = find('\d{7}', thisIsYourPythonFileProcess)
print "Your Python File is running at PID: " + convPID
else:
print "Process Controller: Your Python file is not running"
try:
print "...Calling your Python file"
subprocess.check_call('python myRepeatingFile.py', shell=True)
except subprocess.CalledProcessError as e:
print "Process Call Error :" + e
if you want it to execute 24/7 just put it into a while True loop. import time module if you want to limit the speed.
As far as I am aware a subprocess will be launched and once it has completed return. This is a blocking action as python will wait for the subprocess you launched to be completed before executing any other code. Adding a print statement after the os.system() will show that the program never reaches the sys.exit(1)
I run a program test.py.
Since it collapses frequently, I import subprocess to restart it when it stops.
Sometimes I found subprocess can't successfully restart it.
Hence, I force the program to restart every 60 minutes.
But I find that there sometimes two test.py processing running simutanously.
What's wrong with my code and how to fix it?
I use windows 7 OS.
Plz check the following codes and thanks in advance:
import subprocess
import time
from datetime import datetime
p = subprocess.Popen(['python.exe', r'D:\test.py'], shell=True)
minutes = 1
total_time = 0
while True:
now = datetime.now()
#periodly restart
total_time += 1
if total_time % 100 == 0:
try:
p.kill()
except Exception as e:
terminated = True
finally:
p = subprocess.Popen(['python.exe', r'D:\test.py'], shell=True)
#check and restart if it stops
try:
terminated = p.poll()
except Exception as e:
terminated = True
if terminated:
p = subprocess.Popen(['python.exe', r'D:\test.py'], shell=True)
time.sleep(minutes * 60)
While I don't agree at all with your design, the specific problem is here:
except Exception as e:
terminated = True
finally:
p = subprocess.Popen(['python.exe', r'D:\test.py'], shell=True)
In the case that an Exception was thrown, you're setting terminated to true, but then immediately restarting the subprocess. Then, later, you check:
if terminated:
p = subprocess.Popen(['python.exe', r'D:\test.py'], shell=True)
At this point, terminated is true, so it starts a new subprocess. However, it already had done that in the finally block.
Really, what you should do is simply not bother restarting it during the kill attempt:
try:
p.kill()
except Exception:
# We don't care, just means it was already dead
pass
finally:
# Either the process is dead, or we just killed it. Either way, need to restart
terminated = True
Then your if terminated clause will properly restart the process and you won't have a duplicate.
I have a python system call that takes a while to finish.
os.system("call_that_takes_quite_some_time")
In the meanwhile I want to keep throwing a message that says "waiting..." every now and then till the os.system returns 0 or an error.
/
How do I do this? Is there something in python that I can "listen" to in a while loop?
os.system waits till your command execution is complete.
use subprocess.Popen you can check output or error. Popen gives handle and you can check return code using wait to find out command is successful/failure. For ex:
proc = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
while proc.poll() is None:
print proc.stdout.readline() #give output from your execution/your own message
self.commandResult = proc.wait() #catch return code
You can use threading
import os
import time
import threading
def waiter():
waiter.finished = False
while not waiter.finished:
print 'Waiting...'
time.sleep(1)
os_thread = threading.Thread(target=waiter)
os_thread.daemon = True
os_thread.start()
return_value = os.system('sleep 4.9')
return_value >>= 8 # The return code is specified in the second byte
waiter.finished = True
time.sleep(3)
print 'The return value is', return_value
This will print "Waiting..." message every 1 second, and it stops after waiter.finished is set to True (in this case there will be 5 "Waiting..." messages)
But os.system is not recommended. The documentation recommends using subprocess module.
I run a subprocess using:
p = subprocess.Popen("subprocess",
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
stdin=subprocess.PIPE)
This subprocess could either exit immediately with an error on stderr, or keep running. I want to detect either of these conditions - the latter by waiting for several seconds.
I tried this:
SECONDS_TO_WAIT = 10
select.select([],
[p.stdout, p.stderr],
[p.stdout, p.stderr],
SECONDS_TO_WAIT)
but it just returns:
([],[],[])
on either condition. What can I do?
Have you tried using the Popen.Poll() method. You could just do this:
p = subprocess.Popen("subprocess",
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
stdin=subprocess.PIPE)
time.sleep(SECONDS_TO_WAIT)
retcode = p.poll()
if retcode is not None:
# process has terminated
This will cause you to always wait 10 seconds, but if the failure case is rare this would be amortized over all the success cases.
Edit:
How about:
t_nought = time.time()
seconds_passed = 0
while(p.poll() is not None and seconds_passed < 10):
seconds_passed = time.time() - t_nought
if seconds_passed >= 10:
#TIMED OUT
This has the ugliness of being a busy wait, but I think it accomplishes what you want.
Additionally looking at the select call documentation again I think you may want to change it as follows:
SECONDS_TO_WAIT = 10
select.select([p.stderr],
[],
[p.stdout, p.stderr],
SECONDS_TO_WAIT)
Since you would typically want to read from stderr, you want to know when it has something available to read (ie the failure case).
I hope this helps.
This is what i came up with. Works when you need and don't need to timeout on thep process, but with a semi-busy loop.
def runCmd(cmd, timeout=None):
'''
Will execute a command, read the output and return it back.
#param cmd: command to execute
#param timeout: process timeout in seconds
#return: a tuple of three: first stdout, then stderr, then exit code
#raise OSError: on missing command or if a timeout was reached
'''
ph_out = None # process output
ph_err = None # stderr
ph_ret = None # return code
p = subprocess.Popen(cmd, shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
# if timeout is not set wait for process to complete
if not timeout:
ph_ret = p.wait()
else:
fin_time = time.time() + timeout
while p.poll() == None and fin_time > time.time():
time.sleep(1)
# if timeout reached, raise an exception
if fin_time < time.time():
# starting 2.6 subprocess has a kill() method which is preferable
# p.kill()
os.kill(p.pid, signal.SIGKILL)
raise OSError("Process timeout has been reached")
ph_ret = p.returncode
ph_out, ph_err = p.communicate()
return (ph_out, ph_err, ph_ret)
Here is a nice example:
from threading import Timer
from subprocess import Popen, PIPE
proc = Popen("ping 127.0.0.1", shell=True)
t = Timer(60, proc.kill)
t.start()
proc.wait()
Using select and sleeping doesn't really make much sense. select (or any kernel polling mechanism) is inherently useful for asynchronous programming, but your example is synchronous. So either rewrite your code to use the normal blocking fashion or consider using Twisted:
from twisted.internet.utils import getProcessOutputAndValue
from twisted.internet import reactor
def stop(r):
reactor.stop()
def eb(reason):
reason.printTraceback()
def cb(result):
stdout, stderr, exitcode = result
# do something
getProcessOutputAndValue('/bin/someproc', []
).addCallback(cb).addErrback(eb).addBoth(stop)
reactor.run()
Incidentally, there is a safer way of doing this with Twisted by writing your own ProcessProtocol:
http://twistedmatrix.com/projects/core/documentation/howto/process.html
Python 3.3
import subprocess as sp
try:
sp.check_call(["/subprocess"], timeout=10,
stdin=sp.DEVNULL, stdout=sp.DEVNULL, stderr=sp.DEVNULL)
except sp.TimeoutError:
# timeout (the subprocess is killed at this point)
except sp.CalledProcessError:
# subprocess failed before timeout
else:
# subprocess ended successfully before timeout
See TimeoutExpired docs.
If, as you said in the comments above, you're just tweaking the output each time and re-running the command, would something like the following work?
from threading import Timer
import subprocess
WAIT_TIME = 10.0
def check_cmd(cmd):
p = subprocess.Popen(cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
def _check():
if p.poll()!=0:
print cmd+" did not quit within the given time period."
# check whether the given process has exited WAIT_TIME
# seconds from now
Timer(WAIT_TIME, _check).start()
check_cmd('echo')
check_cmd('python')
The code above, when run, outputs:
python did not quit within the given time period.
The only downside of the above code that I can think of is the potentially overlapping processes as you keep running check_cmd.
This is a paraphrase on Evan's answer, but it takes into account the following :
Explicitly canceling the Timer object : if the Timer interval would be long and the process will exit by its "own will" , this could hang your script :(
There is an intrinsic race in the Timer approach (the timer attempt killing the process just after the process has died and this on Windows will raise an exception).
DEVNULL = open(os.devnull, "wb")
process = Popen("c:/myExe.exe", stdout=DEVNULL) # no need for stdout
def kill_process():
""" Kill process helper"""
try:
process.kill()
except OSError:
pass # Swallow the error
timer = Timer(timeout_in_sec, kill_process)
timer.start()
process.wait()
timer.cancel()