Automatic process monitoring/management with Python - python

Right, so I have a python process which is running constantly, maybe even on Supervisor. What is the best way to achieve the following monitoring?
Send an alert and restart if the process has crashed. I'd like to automatically receive a signal every time the process crashes and auto restart it.
Send an alert and restart if the process has gone stale, i.e. hasn't crunched anything for say 1 minute.
Restart on demand
I'd like the achieve all of the above through Python. I know Supervisord will do most of it, but I want to see if it can be done through Python itself.

I think what you are looking for is, Supervisor Events. http://supervisord.org/events.html
Also look at Superlance, its a package of plugin utilities for monitoring and controlling processes that run under supervisor.
[https://superlance.readthedocs.org/en/latest/]
You can configure stuff like Crash emails, Crash SMS, Memory consumption alerts, HTTP hooks etc.

Well, if you want a homegrown solution, this is what I could come up with.
Maintain the process state both actual and expected in redis. You can monitor it the way you want by making a web interface to check the actual state and change the expected state.
Run the python script in crontab to check for state and take appropriate action when required. Here I have checked for every 3 seconds and used SES to alert admins via email.
DISCLAIMER: The code has not been run or tested. I just wrote it now, so prone to errors.
open crontab file:
$crontab -e
Add this line at the end of it, to make the run_process.sh run every minute.
#Runs this process every 1 minute.
*/1 * * * * bash ~/path/to/run_monitor.sh
run_moniter.sh runs the python script. It runs in a for loop every 3 second.
This is done because crontab gives minimum time interval of 1 minute. We want to check for the process every 3 second, 20 times (3sec * 20 = 1 minute). So it will run for a minute before crontab runs it again.
run_monitor.sh
for count in {0..20}
do
cd '/path/to/check_status'
/usr/local/bin/python check_status.py "myprocessname" "python startcommand.py"
sleep 3 #check every 3 seconds.
done
Here I have assumed:
*state 0 = stop or stopped (expected vs. actual)
*state -1 = restart
*state 1 = run or running
You can add more states as per your convinience, stale process can also be a state.
I have used processname to kill or start or check processes, you can easily modify it to read specific PID files.
check_status.py
import sys
import redis
import subprocess
import sys
import boto.ses
def send_mail(recipients, message_subject, message_body):
"""
uses AWS SES to send mail.
"""
SENDER_MAIL = 'xxx#yyy.com'
AWS_KEY = 'xxxxxxxxxxxxxxxxxxx'
AWS_SECRET = 'xxxxxxxxxxxxxxxxxxx'
AWS_REGION = 'xx-xxxx-x'
mail_conn = boto.ses.connect_to_region(AWS_REGION,
aws_access_key_id=AWS_KEY,
aws_secret_access_key=AWS_SECRET
)
mail_conn.send_email(SENDER_MAIL, message_subject, message_body, recipient, format='html')
return True
class Shell(object):
'''
Convinient Wrapper over Subprocess.
'''
def __init__(self, command, raise_on_error=True):
self.command = command
self.output = None
self.error = None
self.return_code
def run(self):
try:
process = subprocess.Popen(self.command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
self.return_code = process.wait()
self.output, self.error = process.communicate()
if self.return_code and self.raise_on_error:
print self.error
raise Exception("Error while executing %s::%s"%(self.command, self.error))
except subprocess.CalledProcessError:
print self.error
raise Exception("Error while executing %s::%s"%(self.command, self.error))
redis_client = redis.Redis('xxxredis_hostxxx')
def get_state(process_name, state_type): #state_type will be expected or actual.
state = redis.get('{process_name}_{state_type}_state'.format(process_name=process_name, state_type=state_type)) #value could be 0 or 1
return state
def set_state(process_name, state_type, state): #state_type will be expected or actual.
state = redis.set('{process_name}_{state_type}_state'.format(process_name=process_name, state_type=state_type), state)
return state
def get_stale_state(process_name):
state = redis.get('{process_name}_stale_state'.format(process_name=process_name)) #value could be 0 or 1
return state
def check_running_status(process_name):
command = "ps -ef|grep {process_name}|wc -l".format(process_name=process_name)
shell = Shell(command = command)
shell.run()
if shell.output=='0':
return False
return True
def start_process(start_command): #pass start_command with a '&' so the process starts in the background.
shell = Shell(command = command)
shell.run()
def stop_process(process_name):
command = "ps -ef| grep {process_name}| awk '{print $2}'".format(process_name=process_name)
shell = Shell(command = command, raise_on_error=False)
shell.run()
if not shell.output:
return
process_ids = shell.output.strip().split()
for process_id in process_ids:
command = 'kill {process_id}'.format(process_id=process_id)
shell = Shell(command=command, raise_on_error=False)
shel.run()
def check_process(process_name, start_command):
expected_state = get_state(process_name, 'expected')
if expected_state == 0: #stop
stop_process(process_name)
set_state(process_name, 'actual', 0)
else if expected_state == -1: #restart
stop_process(process_name)
set_state(process_name, 'actual', 0)
start_process(start_command)
set_state(process_name, 'actual', 1)
set_state(process_name, 'expected', 1) #set expected back to 1 so we dont keep on restarting.
elif expected_state == 1:
running = check_running_status(process_name)
if not running:
set_state(process_name, 'actual', 0)
send_mail(reciepients=["abc#admin.com", "xyz#admin.com"], message_subject="Alert", message_body="Your process is Down. Trying to restart")
start_process(start_command)
running = check_running_status(process_name)
if running:
send_mail(reciepients=["abc#admin.com", "xyz#admin.com"], message_subject="Alert", message_body="Your process is was restarted.")
set_state(process_name, 'actual', 1)
else:
send_mail(reciepients=["abc#admin.com", "xyz#admin.com"], message_subject="Alert", message_body="Your process is could not be restarted.")
if __name__ == '__main__':
args = sys.argv[1:]
process_name = args[0]
start_command = args[1]
check_process(process_name, start_command)

Related

How do I avoid creating multiple python process w/ os.system?

When the 2 consecutive if statements executes, my python program shuts down. But that's not what I want. I want it to loop again and not exit the script. The problem that I found with simply looping is that python processes are created every time the os.system(command) line runs. I've individually tried the following:
os.exit()
sys.exit()
def kill_process():
pid = os.getpid()
sig = signal.SIGKILL
os.kill(pid, sig)
All of those options were individually paired with a os.system("python3 script.py"), yet none of these did the trick. Every scenario simply exits the script.
How do I make it so that when os.system(command) is passed that it just loops again without killing/exiting the script and without creating another python process everytime?
Here's the function in question:
def bluetoothLoop():
while True:
time.sleep(5)
BT_state = subprocess.run(['''system_profiler SPBluetoothDataType'''], shell=True, capture_output=True, encoding="utf", errors="ignore")
BT_state = BT_state.stdout
sound = "Blow"
title = "TURN OFF BLUETOOTH"
message = "-------------------------"
if "State: On" in BT_state and not " Connected:" in BT_state:
time.sleep(1)
BT_state = subprocess.run(['''system_profiler SPBluetoothDataType'''], shell=True, capture_output=True, encoding="utf", errors="ignore")
BT_state = BT_state.stdout
if "State: On" in BT_state and not " Connected:" in BT_state:
command = f'''
osascript -e 'display notification "{message}" with title "{title}" sound name "{sound}"'
'''
os.system(command)
os.exit()
time.sleep(1)
notify.restart()
Thanks a bunch, I've been struggling with this for a while now.

Android adb sendevent does not execute events correctly

I am trying to record screen events and execute them after for replay.
I wrote a small python script which listens for events,converts them from hexadecimal to decimal,waits for 5 seconds and executes recorded events with adb sendevent.
But for some reason sendevent never executes correctly, sometimes it touches wrong coordinates, sometimes touches for too long also there are problems with delays between touches.
I couldnt understand why this is happening ? What i expect is it should just replay since getevent captured all necessary data needed(?)
import subprocess
import threading
import os
from time import sleep
eventsToSend = []
def eventSender():
while(True):
if(len(eventsToSend) > 200):
print("starting to execute in 5 seconds...")
sleep(5)
for command in eventsToSend:
#with open('output.txt', 'a') as f1:
#f1.write(command+os.linesep)
subprocess.Popen(command.split(), stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
print("done")
break
else:
None
eventSenderStarter = threading.Thread(target = eventSender)
eventSenderStarter.start()
def runProcess(exe):
p = subprocess.Popen(exe, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
while(True):
# returns None while subprocess is running
retcode = p.poll()
line = p.stdout.readline()
yield line
if retcode is not None or len(eventsToSend)>200:
print("Executing events...")
break
print("Listening for events...")
for line in runProcess('adb shell -- getevent /dev/input/event1'.split()):
myLine = line.decode().strip()
splittedLine = myLine.split(" ")
decimalString = ""
for index,hexadecimal in enumerate(splittedLine):
decimal = int(hexadecimal, 16)
if(index==0):
decimalString = decimalString+str(decimal)
if(index>0):
decimalString = decimalString+" "+str(decimal)
eventsToSend.append("adb shell sendevent /dev/input/event1 "+decimalString)
Just connect your phone to pc then run this script play with your screen after 200 events it will start replay(be careful because it might press wrong coordinates :P ) .In my case it was
/dev/input/event1
so you might need to edit event1 for testing.
Consider adding some small delay between the events you send - time.sleep(0.5). You may have to change the value of 0.5 - try some values until it will work.

Tracking application launched with a python file

I've encountered a situation where I thought it would be a good idea to
create a launcher for an application which I tend to run several instances
of. This is to ensure that I and the application get access to the wanted
environment variables that can be provided and set for each instance.
import os
import subprocess
def launch():
"""
Launches application.
"""
# create environment
os.environ['APPLICATION_ENVIRON'] = 'usr/path'
# launch application
application_path = 'path/to/application'
app = subprocess.Popen([application_path])
pid = app.pid
app.wait()
print 'done with process: {}'.format(pid)
if __name__ == '__main__':
launch()
I want to be able to track the applications, do I dump the pids in a file and
remove them when the process closes? Do I launch a service that I communicate
with?
Being fairly new to programming in general I don't know if I'm missing a term
in the lingo or just thinking wrong. But I was reading up on Daemons and
services to track the applications and couldn't come up with a proper
answer. Put simply, a bit lost how to approach it.
What you're doing already seems reasonable. I'd probably extend it to something like this:
import os
import subprocess
def launch_app():
os.environ['APPLICATION_ENVIRON'] = 'usr/path'
application_path = 'path/to/application'
return subprocess.Popen([application_path])
def _purge_finished_apps(apps):
still_running = set()
for app in apps:
return_code = app.poll()
if return_code is not None:
print " PID {} no longer running (return code {})".format(app.pid, return_code)
else:
still_running.add(app)
return still_running
def ui():
apps = set()
while True:
print
print "1. To launch new instance"
print "2. To view all instances"
print "3. To exit, terminating all running instances"
print "4. To exit, leaving instances running"
opt = int(raw_input())
apps = _purge_finished_apps(apps)
if opt == 1:
app = launch_app()
apps.add(app)
print " PID {} launched".format(app.pid)
elif opt == 2:
if not apps:
print "There are no instances running"
for app in apps:
print " PID {} running".format(app.pid)
elif opt == 3:
for app in apps:
print "Terminating PID {}".format(app.pid)
app.terminate()
for app in apps:
app.wait()
print "PID {} finished".format(app.pid)
return
elif opt == 4:
return
if __name__ == "__main__":
ui()
Here's a code sample to help illustrate how it might work for you.
Note that you can capture the stdout from the processes in real time in your host script; this might be useful if the program you're running uses the console.
(As a side note on the example: You probably would want to change the IP addresses: these are from my internal network. Be kind to any external sites you might want to use, please. Launching thousands of processes with the same target might be construed as a hostile gesture.)
(An additional side note on this example: It is conceivable that I will lose some of my time samples when evaluating the output pipe...if the subprocess writes it to the console piecemeal, it is conceivable that I might occasionally catch it exactly as it is partway done - meaning I might get half of the "time=xxms" statement, causing the RE to miss it. I've done a poor job of checking for this possibility (i.e. I couldn't be bothered for the example). This is one of the hazards of multiprocess/multithreaded programming that you'll need to be aware of if you do it much.)
# Subprocessor.py
#
# Launch a console application repeatedly and test its state.
#
import subprocess
import re
NUMBER_OF_PROCESSES_TO_OPEN = 3
DELAY_BETWEEN_CHECKS = 5
CMD = "ping"
ARGS = ([CMD, "-n", "8", "192.168.0.60"], [CMD, "-n", "12", "192.168.0.20"], [CMD, "-n", "4", "192.168.0.21"])
def go():
processes = {}
stopped = [False, False, False]
samples = [0]*NUMBER_OF_PROCESSES_TO_OPEN
times = [0.0]*NUMBER_OF_PROCESSES_TO_OPEN
print "Opening processes..."
for i in range(NUMBER_OF_PROCESSES_TO_OPEN):
# The next line creates a subprocess, this is a non-blocking call so
# the program will complete it more or less instantly.
newprocess = subprocess.Popen(args = ARGS[i], stdout = subprocess.PIPE)
processes[i] = newprocess
print " process {} open, pid == {}.".format(i, processes[i].pid)
# Build a regular expression to work with the stdout.
gettimere = re.compile("time=([0-9]*)ms")
while len(processes) > 0:
for i, p in processes.iteritems():
# Popen.poll() asks the process if it is still running - it is
# a non-blocking call that completes instantly.
isrunning = (p.poll() == None)
data = p.stdout.readline() # Get the stdout from the process.
matchobj = gettimere.search(data)
if matchobj:
for time in matchobj.groups():
samples[i] += 1
times[i] = (times[i] * (samples[i] - 1) + int(time)) / samples[i]
# If the process was stopped before we read the last of the
# data from its output pipe, flag it so we don't keep messing
# with it.
if not isrunning:
stopped[i] = True
print "Process {} stopped, pid == {}, average time == {}".format(i, processes[i].pid, times[i])
# This code segment deletes the stopped processes from the dict
# so we don't keep checking them (and know when to stop the main
# program loop).
for i in range(len(stopped)):
if stopped[i] and processes.has_key(i):
del processes[i]
if __name__ == '__main__':
go()

Python send a command to Xterm

I have a python script that opens up file for me in emacs, and to do that it calls a process in xterm like so
"""AutoEmacs Document"""
# imports
import sys
import os
import psutil
import subprocess
from argparse import ArgumentParser
# constants
xlaunch_config = "C:\\cygwin64\\home\\nalis\\Documents\\experiments\\emacs\\Autoemacs\\config.xlaunch"
script = "xterm -display :0 -e emacs-w32 --visit {0}"
# exception classes
# interface functions
# classes
# internal functions & classes
def xlaunch_check():
# checks if an instance of Xlaunch is running
xlaunch_state = []
for p in psutil.process_iter(): #list all running process
try:
if p.name() == 'xlaunch.exe':# once xlaunch is found make an object
xlaunch_state.append(p)
except psutil.Error: # if xlaunch is not found return false
return False
return xlaunch_state != [] #double checks that xlaunch is running
def xlaunch_run(run):
if run == False:
os.startfile(xlaunch_config)
return 0 #Launched
else:
return 1 #Already Running
def emacs_run(f):
subprocess.Popen(script.format(f))
return 0#Launched Sucessfully
def sysarg():
f = sys.argv[1]
il = f.split()
l = il[0].split('\\')
return l[(len(l) - 1)]
def main():
f = sysarg()
xlaunch_running = xlaunch_check()
xlaunch_run(xlaunch_running)
emacs_run(f)
return 0
if __name__ == '__main__':
status = main()
sys.exit(status)
`
and it works fairly fine with the occasional bug, but I want to make it a little more versatile by having python send the Xterm console it launches commands after it launched like "-e emacs-w32" and such based off of the input it receives. I've already tried something like this:
# A test to send Xterm commands
import subprocess
xterm = subprocess.Popen('xterm -display :0', shell=True)
xterm.communicate('-e emacs')
but that doesn't seem to do anything. besides launch the terminal. I've done some research on the matter but it has only left me confused. Some help would be very much appreciated.
To open emacs in terminal emulator, use this:
Linux;
Popen(['xterm', '-e', 'emacs'])
Windows:
Popen(['cmd', '/K', 'emacs'])
For cygwin use:
Popen(['mintty', '--hold', 'error', '--exec', 'emacs'])

How to kill a process which is create by popen and uses communicate()?

I have a program, P1, which I need to run about 24*20000 times with different inputs. The problem is the P1 hangs and I should force it to quite manually (kill). My first solution was writing a python script to call P1 and passing the proper input and receiving the output using popen and communicate. But due to the nature of communicate which waits for the output, I can not kill the process as long as it is waiting for the response. I am on Windows.
I tried to use multiprocess function, but it only runs the P1 and failed in sending the input to it. I am suspicious about not using pipes in popen and tried a little bit but I guess I can't receive the output from P1.
Any ideas?
# This code run XLE and pass the intended input to it automatically
def startExe(programPath, programArgStr):
p = subprocess.Popen(programPath,stdout=subprocess.PIPE,stdin=subprocess.PIPE) p.stdin.write(programArgStr)
p.communicate()[0]
# Need to kill the process if it takes longer than it should here
def main(folder):
..
#loop
programArgStr = "create-parser"+path1+";cd "+ path2+"/s"+ command(counter) +";exit"
startExe(path, programArgStr)
..
As you can see if P1 can finish the given task successfully it can exit itself, using the exit commands passed to it!
If you're not required to use Python, you might consider using Cygwin Bash along with the timeout(1) command to run a command with a timeout. However, since Cygwin's implementation of fork() is not very fast and you're creating an enormous number of processes, you'll likely have an enormous overhead of just creating processes (I don't know if the native Windows version of Python is any better in this regard).
Alternatively, if you have the source code to P1, why don't you just modify it so that it can perform multiple iterations of whatever it is that it does in one invocation? That way, you don't have to deal with creating and killing 480,000 processes, which will make a huge difference if the amount of work that each invocation does is small.
When you call popen, you can specify either a pipe or file descriptor to accept stdout from the process:
Popen(args, bufsize=0, executable=None, stdin=None, stdout=None, stderr=None, preexec_fn=None, close_fds=False, shell=False, cwd=None, env=None, universal_newlines=False, startupinfo=None, creationflags=0)
You can then monitor the file/pipe you pass to popen, and if nothing is written, kill the process.
More info on the popen args is in the python docs.
Rather than using p.communicate, try just looping through the lines of output:
while True:
line = p.stdout.readline()
if not line:
break
print ">>> " + line.rstrip()
How about this approach?
from threading import Thread
def main(input):
#your actual program, modified to take input as argument
queue = ['Your inputs stored here. Even better to make it to be a generator']
class Runner(Thread):
def __init__(self):
Thread.__init__(self)
def run(self):
while len(queue)>0:
input = queue.pop()
main(input)
return True
#Use 24 threads
for thread in xrange(24):
Runner().start()
#You may also join threads at the end.
Of course this approach would introduce some vulnerabilities, like "two threads pops queue list at same time", but I never encountered it in real-life.
I solved my problem by editing current code and putting the killer code in a separate file.
to do that I add a line to write PID of the newly created process in a file.
#//Should come before p.commiunicate
WriteStatus(str(p.pid) + "***" + str(gmtime().tm_hour) + "***" + str(gmtime().tm_min))
p.communicate()[0]
And the process monitor executed seperately and checks every 2 mins to see if the processes listed in the file are still active. If yes kill them and remove their ID.
def KillProcess(pid):
subprocess.Popen("TASKKILL /PID "+ str(pid) + " /F /T" , shell=True)
subprocess.Popen("TASKKILL /im WerFault.exe /F /T" , shell=True)
print "kill"
def ReadStatus(filePath):
print "Checking" + filePath
try:
status = open(mainPath+filePath, 'r').readline()
except:
print "file removed" + filePath
return 0
if len(status) >0:
info = status.split("***")
time = [gmtime().tm_hour, gmtime().tm_min]
print time
# Time deifferences
difHour = time[0]- int(info[1])
if difHour == 0: # in the same hour
difMin = time[1]- int(info[2])
else:
difMin = 60 - int(info[2]) + time[1]
if difMin > 2:
try:
open(mainPath+filePath, 'w').write("")
KillProcess(info[0])
except:
pass
return 1
def monitor():
# Read all the files
listFiles = os.listdir(mainPath)
while len(listFiles)>0:
#GO and check the contents
for file in listFiles:
#Open the file and Calculate if the process should be killed or not
pid = ReadStatus(file)
#Update listFiles due to remove of file after finishing the process '
# of each folder is done
listFiles = os.listdir(mainPath)
for i in range(0,4):
time.sleep(30) #waits 30 sec
subprocess.Popen("TASKKILL /im WerFault.exe /F /T" , shell=True)
#to indicate the job is done
return 1

Categories