Python daemon work with the same list of objects

Python daemon work with the same list of objects - python

I have a class named People. And this class has a list. I don't want to keep this list on a file or database, all in memory, so the way I thought would work is by creating a Daemon and keep the process open, here is my code:
daemon.py
# coding: utf-8
import os
import sys
import time
import atexit
import signal
from people import People
class Daemon(object):
"""
A generic daemon class.
Usage: subclass the Daemon class and override the run() method
"""
def __init__(self, pidfile, stdin='/dev/null',
stdout='/dev/null', stderr='/dev/null'):
self.stdin = stdin
self.stdout = stdout
self.stderr = stderr
self.pidfile = pidfile
self.bc = People()
def daemonize(self):
"""
do the UNIX double-fork magic, see Stevens' "Advanced
Programming in the UNIX Environment" for details (ISBN 0201563177)
http://www.erlenstar.demon.co.uk/unix/faq_2.html#SEC16
"""
# Do first fork
self.fork()
# Decouple from parent environment
self.dettach_env()
# Do second fork
self.fork()
# Flush standart file descriptors
sys.stdout.flush()
sys.stderr.flush()
#
self.attach_stream('stdin', mode='r')
self.attach_stream('stdout', mode='a+')
self.attach_stream('stderr', mode='a+')
# write pidfile
self.create_pidfile()
def attach_stream(self, name, mode):
"""
Replaces the stream with new one
"""
stream = open(getattr(self, name), mode)
os.dup2(stream.fileno(), getattr(sys, name).fileno())
def dettach_env(self):
os.chdir("/")
os.setsid()
os.umask(0)
def fork(self):
"""
Spawn the child process
"""
try:
pid = os.fork()
if pid > 0:
sys.exit(0)
except OSError as e:
sys.stderr.write("Fork failed: %d (%s)\n" % (e.errno, e.strerror))
sys.exit(1)
def create_pidfile(self):
atexit.register(self.delpid)
pid = str(os.getpid())
open(self.pidfile, 'w+').write("%s\n" % pid)
def delpid(self):
"""
Removes the pidfile on process exit
"""
os.remove(self.pidfile)
def start(self):
"""
Start the daemon
"""
# Check for a pidfile to see if the daemon already runs
pid = self.get_pid()
if pid:
message = "pidfile %s already exist. Daemon already running?\n"
sys.stderr.write(message % self.pidfile)
sys.exit(1)
# Start the daemon
self.daemonize()
self.run()
def get_pid(self):
"""
Returns the PID from pidfile
"""
try:
pf = open(self.pidfile, 'r')
pid = int(pf.read().strip())
pf.close()
except (IOError, TypeError):
pid = None
return pid
def stop(self, silent=False):
"""
Stop the daemon
"""
# Get the pid from the pidfile
pid = self.get_pid()
if not pid:
if not silent:
message = "pidfile %s does not exist. Daemon not running?\n"
sys.stderr.write(message % self.pidfile)
return # not an error in a restart
# Try killing the daemon process
try:
while True:
os.kill(pid, signal.SIGTERM)
time.sleep(0.1)
except OSError as err:
err = str(err)
if err.find("No such process") > 0:
if os.path.exists(self.pidfile):
os.remove(self.pidfile)
else:
sys.stdout.write(str(err))
sys.exit(1)
def restart(self):
"""
Restart the daemon
"""
self.stop(silent=True)
self.start()
def run(self):
"""
You should override this method when you subclass Daemon. It will be called after the process has been
daemonized by start() or restart().
"""
raise NotImplementedError
And my main file:
# coding: utf-8
import argparse
import sys
import time
from people import People
import logging
from daemon import Daemon
class MyDaemon(Daemon):
def run(self):
while True:
logging.debug("I'm here...")
time.sleep(1)
def get_people(self):
return self.bc
def main():
"""
The application entry point
"""
parser = argparse.ArgumentParser(
description='Daemon runner',
epilog="That's all folks"
)
parser.add_argument(
'operation',
metavar='OPERATION',
type=str,
help='Operation with daemon. Accepts any of these values: start, stop, restart, status',
choices=['start', 'stop', 'restart', 'status', 'printpeople', 'add1', 'add2', 'add3', 'add4']
)
args = parser.parse_args()
operation = args.operation
# Daemon
logging.basicConfig(filename="foodaemon.log", level=logging.DEBUG)
daemon = MyDaemon('/Users/marcosaguayo/dev/luracoin/python.pid')
if operation == 'start':
print("Starting daemon")
daemon.start()
pid = daemon.get_pid()
if not pid:
print("Unable run daemon")
else:
print("Daemon is running [PID=%d]" % pid)
elif operation == 'stop':
print("Stoping daemon")
daemon.stop()
elif operation == 'restart':
print("Restarting daemon")
daemon.restart()
elif operation == 'status':
print("Viewing daemon status")
pid = daemon.get_pid()
if not pid:
print("Daemon isn't running ;)")
else:
print("Daemon is running [PID=%d]" % pid)
elif operation == 'printpeople':
bc = daemon.get_people()
print(bc.get_list())
elif operation == 'add1':
bc = daemon.get_people()
bc.add_people({"people": "1"})
print(bc.get_list())
elif operation == 'add2':
bc = daemon.get_people()
bc.add_people({"people": "2"})
print(bc.get_list())
elif operation == 'add3':
bc = daemon.get_people()
bc.add_people({"people": "3"})
print(bc.get_list())
elif operation == 'add4':
bc = daemon.get_people()
bc.add_people({"people": "4"})
print(bc.get_list())
sys.exit(0)
if __name__ == '__main__':
main()
people.py
class People:
def __init__(self):
self.people_list = []
def get_list(self):
return self.people_list
def add_people(self, people):
self.people_list.append(people)
I do the following:
$ python3 test.py start
*Starting daemon*
$ python3 test.py add1
*[{'people': '1'}]*
$ python3 test.py add2
*[{'people': '2'}]*
The python3 test.py add2 should return [{'people': '1'},{'people': '2'}]
What I think is happening is that each time I use the class, the list restarts. I've tried initializing the class on the __init__ of the daemon but doesn't work.
Anyone know how can I solve this?

I do not understand how this could work even in theory. Fixing it would require a complete rewrite.
You manage to launch your daemon, but then what? You never talk to it. It is there running and it would probably work as intended, but you do not use it for anything.
When you call your test.py with parameter add1, it creates a new daemon class (but it does not fork and start another background process as you do not call start() on it) with new data structures. This means an empty People list. Then you add to this list, print the result and exit. Your People list with one entry is gone when your process exits. People list in your daemon process is always empty as the daemon process just sits there, waiting in infinite loop and printing logging messages.
In contrast, your stop command does work, as it just sends a signal to a running process and kills it.
I can see no evidence anywhere that a new instance of MyDaemon class would somehow find if there is a daemon already running, and then communicate with it.
Fixing this is more than I have time to do. You need a communication mechanism. Sockets would do, or you could use ZMQ. Or pipes, but then you need two as you need to get a response back. You would use from your current code the parts where you start and stop your daemon.
When you instantiate your MyDaemon class, you would check whether there is a daemon running. If not, it would start it. And when a daemon starts, it starts listening to the communications channel. Instead of doing nothing in the while True loop, it would check if there are new requests asking it to actually do something.
If your daemon is already running and you just want to add to the list or query what is in there, you do not need an instance of MyDaemon class at all. You would instead write your request to the socket and then wait for response from your daemon. Writing an example of this is more than I have time to spend, but I hope this gives you the idea where it goes wrong and how to fix it.
Or then do not fix it by yourself and install a redis server instead. It would be an in-memory key/value store and it might be suitable for your purposes.

Related

multiprocessing.Manager() hangs Popen.communicate() on Python

The use of multiprocessing.Manager prevents clean termination of Python child process using subprocess.Process.Popen.terminate() and subprocess.Process.Popen.kill().
This seems to be because Manager creates a child process behind the scenes for communicating, but this process does not know how to clean itself up when the parent is terminated.
What is the easiest way to use multiprocessing.Manager so that it does not prevent a process shutdown by a signal?
A demostration:
"""Multiprocess manager hang test."""
import multiprocessing
import subprocess
import sys
import time
def launch_and_read_process():
proc = subprocess.Popen(
[
"python",
sys.argv[0],
"run_unkillable"
],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
# Give time for the process to run and print()
time.sleep(3)
status = proc.poll()
print("poll() is", status)
print("Terminating")
assert proc.returncode is None
proc.terminate()
exit_code = proc.wait()
print("Got exit code", exit_code)
stdout, stderr = proc.communicate()
print("Got output", stdout.decode("utf-8"))
def run_unkillable():
# Disable manager creation to make the code run correctly
manager = multiprocessing.Manager()
d = manager.dict()
d["foo"] = "bar"
print("This is an example output", flush=True)
time.sleep(999)
def main():
mode = sys.argv[1]
print("Doing subrouting", mode)
func = globals().get(mode)
func()
if __name__ == "__main__":
main()
Run as python test-script.py launch_and_read_process.
Good output (no multiprocessing.Manager):
Doing subrouting launch_and_read_process
poll() is None
Terminating
Got exit code -15
Got output Doing subrouting run_unkillable
This is an example output
Output when subprocess.Popen.communicate hangs because use of Manager:
Doing subrouting launch_and_read_process
poll() is None
Terminating
Got exit code -15

Like you pointed out, this happens because the manager spawns its own child processes. So when you do proc.communicate() the code hangs because that child process's stderr and stdout are still open. You can easily solve this on Unix by setting your own handlers for SIGTERM and SIGINT, but it becomes a little hairy on Windows since those two signals are pretty much useless. Also, keep in mind that signals are only delivered to the main thread. Depending on your OS and the signal, if the thread is busy (time.sleep(999)) then the whole timer may need to run out before the signal can be intercepted. Anyway, I have provided a solution for both Windows and Unix with a note at the end:
UNIX
This is pretty straightforward, you simply define your own handlers for the signals where you explicitly call manager.shutdown() to properly cleanup its child process:
def handler(manager, *args):
"""
Our handler, use functools.partial to fix arg manager (or you
can create a factory function too)
"""
manager.shutdown()
sys.exit()
def run_unkillable():
# Disable manager creation to make the code run correctly
manager = multiprocessing.Manager()
# Register our handler,
h = functools.partial(handler, manager)
signal.signal(signal.SIGINT, h)
signal.signal(signal.SIGTERM, h)
d = manager.dict()
d["foo"] = "bar"
print("This is an example output", flush=True)
time.sleep(999)
Windows
On Windows you will need to explicitly send the signal signal.CTRL_BREAK_EVENT rather than the plain proc.terminate() to ensure that the child process intercepts it (reference). Additionally, you'll also want to sleep in shorter durations in a loop instead of doing sleep(999) to make sure the signal interrupts the main thread rather than waiting for the whole duration of sleep (see this question for alternatives).
"""Multiprocess manager hang test."""
import functools
import multiprocessing
import subprocess
import sys
import time
import signal
def launch_and_read_process():
proc = subprocess.Popen(
[
"python",
sys.argv[0],
"run_unkillable"
],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
creationflags=subprocess.CREATE_NEW_PROCESS_GROUP # So that our current process does not get SIGBREAK signal
)
# Give time for the process to run and print()
time.sleep(5)
status = proc.poll()
print("poll() is", status)
print("Terminating")
assert proc.returncode is None
# Send this specific signal instead of doing terminate()
proc.send_signal(signal.CTRL_BREAK_EVENT)
exit_code = proc.wait()
print("Got exit code", exit_code)
stdout, stderr = proc.communicate()
print("Got output", stdout.decode("utf-8"))
def handler(manager, *args):
"""
Our handler, use functools.partial to fix arg manager (or you
can create a factory function too)
"""
manager.shutdown()
sys.exit()
def run_unkillable():
# Disable manager creation to make the code run correctly
manager = multiprocessing.Manager()
# Register our handler,
signal.signal(signal.SIGBREAK, functools.partial(handler, manager))
d = manager.dict()
d["foo"] = "bar"
print("This is an example output", flush=True)
# Sleep in a loop otherwise the signal won't interrupt the main thread
for _ in range(999):
time.sleep(1)
def main():
mode = sys.argv[1]
print("Doing subrouting", mode)
func = globals().get(mode)
func()
if __name__ == "__main__":
main()
Note: Keep in mind that there is a race condition in the above solution because we are registering the signal handler after the creation of a manager. Theoretically, one could kill the process right before the handler is registered and the proc.communicate() will then hang because the manager was not cleaned up. So you may want to supply a timeout parameter to .communicate with error handling to log these edge cases.

How to set daemon to kill other processes in its process group when it dies

I want to create a Manager daemon that spawns two subprocesses A and B. When the Manager daemon dies/is killed, it should kill A and B. Currently, I have it set so if I pass in "stop" to Manager, it'll send a SIGTERM to its Process Group, which kills everything.
However, I would like it so if I send a SIGTERM to Manager directly, it will also kill A and B as well. I've tried signal handlers, but this creates a loop where it sends SIGTERM to the PG, which sends it back to Manager, etc.
I've also tried making Manager a process group leader by calling os.setpgid(os.getpid(), os.getpid()) before spawning A and B but this doesn't seem to kill A and B properly.
In the example below, running python manager.py start would create Manager, A, and B. Then:
python manager.py stop would kill all 3 processes
kill -INT -$MANAGER_PGID would kill all 3
kill $MANAGER_PID would only kill Manager and not A or B
#!/usr/bin/env python2.7
import atexit
import datetime
import os
import sys
import time
import subprocess
from signal import *
class Daemon(object):
def __init__(self):
self.pid_file = "/var/run/manager.pid"
def del_pid(self):
os.remove(self.pid_file)
def daemonize(self):
if os.fork():
sys.exit()
os.chdir("/")
os.setsid()
os.umask(0)
if os.fork():
sys.exit()
with open('/dev/null', 'r') as dev_null:
os.dup2(dev_null.fileno(), sys.stdin.fileno())
sys.stderr.flush()
err = "/tmp/manager.err"
with open(err, 'a+', 0) as stderr:
os.dup2(stderr.fileno(), sys.stderr.fileno())
sys.stdout.flush()
out = "/tmp/manager.out"
with open(out, 'a+', 0) as stdout:
os.dup2(stdout.fileno(), sys.stdout.fileno())
atexit.register(self.del_pid)
pid = os.getpid()
with open(self.pid_file, 'w+') as pid_file:
pid_file.write('{0}'.format(pid))
os.setpgid(pid, pid)
# for sig in (SIGABRT, SIGTERM, SIGINT):
# signal(sig, self.stop)
def get_pid_by_file(self):
with open(self.pid_file, 'r') as pid_file:
pid = int(pid_file.read().strip())
return pid
def start(self):
self.daemonize()
self.run()
def stop(self, signum=None, frame=None):
pid = self.get_pid_by_file()
pgid = os.getpgid(pid)
os.killpg(pgid, SIGTERM)
def run(self):
subprocess.Popen("a.sh", shell=True)
subprocess.Popen("a.sh", shell=True)
while 1:
time.sleep(5)
if __name__ == '__main__':
daemon = Daemon()
if 'start' == sys.argv[1]:
daemon.start()
elif 'stop' == sys.argv[1]:
daemon.stop()

Because I create and use a PID file to find processes to stop, I stopped the loop by placing a check on whether the PID file still exists or not.

Extending Daemon class by two subclasses does not work

this is the daemon class i am using
it is acting as a base class which i want to spawn 2 seperate daemons from another controller file
class Daemon:
"""A generic daemon class.
Usage: subclass the daemon class and override the run() method."""
def __init__(self, pidfile,outfile='/tmp/daemon_out',errfile='/tmp/daemon_log'):
self.pidfile = pidfile
self.outfile = outfile
self.errfile = errfile
def daemonize(self):
"""Deamonize class. UNIX double fork mechanism."""
try:
pid = os.fork()
if pid > 0:
# exit first parent
sys.exit(0)
except OSError as err:
sys.stderr.write('fork #1 failed: {0}\n'.format(err))
sys.exit(1)
# decouple from parent environment
os.chdir('/')
os.setsid()
os.umask(0)
# do second fork
try:
pid = os.fork()
if pid > 0:
# exit from second parent
sys.exit(0)
except OSError as err:
sys.stderr.write('fork #2 failed: {0}\n'.format(err))
sys.exit(1)
# redirect standard file descriptors
sys.stdout.flush()
sys.stderr.flush()
si = open(os.devnull, 'r')
so = open(self.outfile, 'a+')
se = open(self.errfile, 'a+')
os.dup2(si.fileno(), sys.stdin.fileno())
os.dup2(so.fileno(), sys.stdout.fileno())
os.dup2(se.fileno(), sys.stderr.fileno())
# write pidfile
atexit.register(self.delpid)
pid = str(os.getpid())
with open(self.pidfile,'w+') as f:
f.write(pid + '\n')
#method for removing the pidfile before stopping the program
#remove the commented part if you want to delete the output & error file before stopping the program
def delpid(self):
os.remove(self.pidfile)
#os.remove(self.outfile)
#os.remove(self.errfile)
def start(self):
"""Start the daemon."""
# Check for a pidfile to see if the daemon already runs
try:
with open(self.pidfile,'r') as pf:
pid = int(pf.read().strip())
except IOError:
pid = None
if pid:
message = "pidfile {0} already exist. " + \
"Daemon already running?\n"
sys.stderr.write(message.format(self.pidfile))
sys.exit(1)
# Start the daemon
self.daemonize()
self.run()
def stop(self):
#Stop the daemon.
# Get the pid from the pidfile
try:
with open(self.pidfile,'r') as pf:
pid = int(pf.read().strip())
except IOError:
pid = None
if not pid:
message = "pidfile {0} does not exist. " + \
"Daemon not running?\n"
sys.stderr.write(message.format(self.pidfile))
return # not an error in a restart
# Try killing the daemon process
try:
while 1:
os.kill(pid, signal.SIGTERM)
time.sleep(0.1)
except OSError as err:
e = str(err.args)
if e.find("No such process") > 0:
if os.path.exists(self.pidfile):
os.remove(self.pidfile)
else:
print (str(err.args))
sys.exit(1)
def restart(self):
"""Restart the daemon."""
self.stop()
self.start()
def run(self):
"""override this method when you subclass Daemon.
It will be called after the process has been daemonized by
start() or restart()."""
here is the code i am using in a different file
in this file i am extending the daemon class from seperate classes & overriding the run() method.
#! /usr/bin/python3.6
import sys, time, os, psutil, datetime
from daemon import Daemon
class net(Daemon):
def run(self):
while(True):
print("net daemon : ",os.getpid())
time.sleep(200)
class file(Daemon):
def run(self):
while(True):
print("file daemon : ",os.getpid())
time.sleep(200)
if __name__ == "__main__":
net_daemon = net(pidfile='/tmp/net_pidFile',outfile='/tmp/network_out.log',errfile='/tmp/net_error.log')
file_daemon = file(pidfile='/tmp/file_pidFile',outfile='/tmp/filesys_out.log',errfile='/tmp/file_error.log')
if len(sys.argv) == 2:
if 'start' == sys.argv[1]:
net_daemon.start()
file_daemon.start()
elif 'stop' == sys.argv[1]:
file_daemon.stop()
net_daemon.stop()
elif 'restart' == sys.argv[1]:
file_daemon.restart()
net_daemon.restart()
else:
print("Unknown command")
sys.exit(2)
sys.exit(0)
else:
print("usage: %s start|stop|restart" % sys.argv[0])
sys.exit(2)
the first class to run the start() method is running currently &
only the net Daemon works now how do i make the 2 classes spawn 2 seperate daemons ??

The real problem here is that you've chosen the wrong code for the task you want. You're asking "How do I use this power saw to hammer in this nail?" And in this case, it's not even a professionally-produced saw with an instruction manual, it's a home-made saw you found in someone's garage, built by a guy who probably knew what he was doing but you can't actually be sure because you don't know what he was doing.
The proximate problem that you're complaining about is in daemonize:
try:
pid = os.fork()
if pid > 0:
# exit first parent
sys.exit(0)
The first time you call this, the parent process exits. Which means the parent process never gets to launch the second daemon, or do anything else.
For a self-daemonizing program that can be managed by a separate program, this is exactly what you want. (Whether it gets all the details right, I don't know, but the basic idea is definitely right.)
For a managing program that spawns daemons, this is exactly what you don't want. And that's what you're trying to write. So this is the wrong tool for the job.
But the tasks aren't that much different. If you understand what you're doing (and crack open your copy of Unix Network Programming—nobody understands this stuff well enough to get it right off the top of their head), you can convert one into the other. Which might be a useful exercise, even if for any real application I'd just use one of the well-tested, well-documented, nicely-maintained libraries on PyPI.
What happens if you just replace the sys.exit(0) calls that happen in the parent process (but not the ones that happen in the intermediate child!) with return True? (Well, you probably want to also replace the sys.exit(1) in the parent with a return False or raise some kind of exception.) Then daemonize no longer daemonizes you, but instead spawns a daemon and reports back on whether it succeeded. Which is what you wanted, right?
No guarantees that it does everything else right (and I'd bet it doesn't), but it does solve the specific problem you were asking about.
If nothing obvious is going wrong after that, the next step would probably be to read through PEP 3143 (which does a pretty nice job translating all the details in Stevens' book into Python terms and making sure they're up to date for 21st century linux and BSD) and come up with a checklist of tests to run, and then run them to see what less obvious things you're still getting wrong.

How handle asynchronous keystroke with Python?

I am looking for the way of sending a keystroke to a Python script. In this case, I am trying that the script detects if a press whatever key, and not only the interrupt signals (ctrl + c , ctrl + d, ...).
I have checked the signal python module. But it seems like it's only prepared to handle interrupt signals, and not if I press "K" or "Space" for example. I have seen this in the official docs of the module:
import signal
import os
import time
def receive_signal(signum, stack):
print 'Received:', signum
signal.signal(signal.SIGUSR1, receive_signal)
signal.signal(signal.SIGUSR2, receive_signal)
print 'My PID is:', os.getpid()
while True:
print 'Waiting...'
time.sleep(3)
And they say:
To send signals to the running program, I use the command line program kill. To produce the output below, I ran signal_signal.py in one window, then kill -USR1 $pid, kill -USR2 $pid, and kill -INT $pid in another.
I am quite sure that this module is not the solution. Do you know some module or something that could help me for sending keystroke to my python script asynchronously ?
Thanks a lot!!

I want the user has the possibility of skip a day, a month or a machine by pressing a key in whatever moment.
Ah. Now it makes sense.
And not very sure that this would be possible.
Anything's possible. It can just be quite complex for a truly asynchronous solution.
The only way I could think to do it, while avoiding a polling approach, was to fork(2) the process, have the parent process listen for keypresses, and send signals to the child process, which actually does the work.
Something like this...
#!/usr/bin/env python
import sys, os, time, termios, tty, signal
# Define some custom exceptions we can raise in signal handlers
class SkipYear(Exception):
pass
class SkipMonth(Exception):
pass
# Process one month
def process_month(year, month):
# Fake up whatever the processing actually is
print 'Processing %04d-%02d' % (year, month)
time.sleep(1)
# Process one year
def process_year(year):
# Iterate months 1-12
for month in range(1, 13):
try:
process_month(year, month)
except SkipMonth:
print 'Skipping month %d' % month
# Do all processing
def process_all(args):
# Help
print 'Started processing - args = %r' % args
try:
# Iterate years 2010-2015
for year in range(2010, 2016):
try:
process_year(year)
except SkipYear:
print 'Skipping year %d' % year
# Handle SIGINT from parent process
except KeyboardInterrupt:
print 'Child caught SIGINT'
# Return success
print 'Child terminated normally'
return 0
# Main entry point
def main(args):
# Help
print 'Press Y to skip current year, M to skip current month, or CTRL-C to abort'
# Get file descriptor for stdin. This is almost always zero.
stdin_fd = sys.stdin.fileno()
# Fork here
pid = os.fork()
# If we're the child
if not pid:
# Detach child from controlling TTY, so it can't be the foreground
# process, and therefore can't get any signals from the TTY.
os.setsid()
# Define signal handler for SIGUSR1 and SIGUSR2
def on_signal(signum, frame):
if signum == signal.SIGUSR1:
raise SkipYear
elif signum == signal.SIGUSR2:
raise SkipMonth
# We want to catch SIGUSR1 and SIGUSR2
signal.signal(signal.SIGUSR1, on_signal)
signal.signal(signal.SIGUSR2, on_signal)
# Now do the thing
return process_all(args[1:])
# If we get this far, we're the parent
# Define a signal handler for when the child terminates
def on_sigchld(signum, frame):
assert signum == signal.SIGCHLD
print 'Child terminated - terminating parent'
sys.exit(0)
# We want to catch SIGCHLD
signal.signal(signal.SIGCHLD, on_sigchld)
# Remember the original terminal attributes
stdin_attrs = termios.tcgetattr(stdin_fd)
# Change to cbreak mode, so we can detect single keypresses
tty.setcbreak(stdin_fd)
try:
# Loop until we get a signal. Typically one of...
#
# a) SIGCHLD, when the child process terminates
# b) SIGINT, when the user presses CTRL-C
while 1:
# Wait for a keypress
char = os.read(stdin_fd, 1)
# If it was 'Y', send SIGUSR1 to the child
if char.lower() == 'y':
os.kill(pid, signal.SIGUSR1)
# If it was 'M', send SIGUSR2 to the child
if char.lower() == 'm':
os.kill(pid, signal.SIGUSR2)
# Parent caught SIGINT - send SIGINT to child process
except KeyboardInterrupt:
print 'Forwarding SIGINT to child process'
os.kill(pid, signal.SIGINT)
# Catch system exit
except SystemExit:
print 'Caught SystemExit'
# Ensure we reset terminal attributes to original settings
finally:
termios.tcsetattr(stdin_fd, termios.TCSADRAIN, stdin_attrs)
# Return success
print 'Parent terminated normally'
return 0
# Stub
if __name__ == '__main__':
sys.exit(main(sys.argv))
...should do the trick, although you'll be limited by the number of distinct signals you can send.

Python - os.kill(pid, SIGTERM) is causing my process to become a zombie

I have a Python script that starts a daemon process. I was able to do this by using the code found at: https://gist.github.com/marazmiki/3618191.
The code starts the daemon process exactly as expected. However, sometimes, and only sometimes, when the daemon process is stopped, the running job is zombied.
The stop function of the code is:
def stop(self):
"""
Stop the daemon
"""
# Get the pid from the pidfile
try:
pf = file(self.pidfile, 'r')
pid = int(pf.read().strip())
pf.close()
except:
pid = None
if not pid:
message = "pidfile %s does not exist. Daemon not running?\n"
sys.stderr.write(message % self.pidfile)
return # not an error in a restart
# Try killing the daemon process
try:
while 1:
os.kill(pid, SIGTERM)
time.sleep(1.0)
except OSError, err:
err = str(err)
if err.find("No such process") > 0:
if os.path.exists(self.pidfile):
os.remove(self.pidfile)
else:
print str(err)
sys.exit(1)
When this stop() method is run, the process (pid) appears to hang, and when I Control+C out, I see the script is KeyboardInterrupted on the line time.sleep(1.0), which leads me to believe that the line:
os.kill(pid, SIGTERM)
is the offending code.
Does anyone have any idea why this could be happening? Why would this os.kill() would force a process to become a zombie?
I am running this on Ubuntu linux (if it matters).
UPDATE: I'm including my start() method per #paulus's answer.
def start(self):
"""
Start the daemon
"""
pid = None
# Check for a pidfile to see if the daemon already runs
try:
pf = file(self.pidfile, 'r')
pid = int(pf.read().strip())
pf.close()
except:
pid = None
if pid:
message = "pidfile %s already exist. Daemon already running?\n"
sys.stderr.write(message % self.pidfile)
sys.exit(1)
# Start the daemon
self.daemonize()
self.run()
UPDATE 2: And here is the daemonize() method:
def daemonize(self):
"""
do the UNIX double-fork magic, see Stevens' "Advanced
Programming in the UNIX Environment" for details (ISBN 0201563177)
http://www.erlenstar.demon.co.uk/unix/faq_2.html#SEC16
"""
try:
pid = os.fork()
if pid > 0:
# exit first parent
sys.exit(0)
except OSError, e:
sys.stderr.write("fork #1 failed: %d (%s)\n" % (e.errno, e.strerror))
sys.exit(1)
# decouple from parent environment
os.chdir("/")
os.setsid()
os.umask(0)
# do second fork
try:
pid = os.fork()
if pid > 0:
# exit from second parent
sys.exit(0)
except OSError, e:
sys.stderr.write("fork #2 failed: %d (%s)\n" % (e.errno, e.strerror))
sys.exit(1)
# redirect standard file descriptors
sys.stdout.flush()
sys.stderr.flush()
sys.stdout = file(self.stdout, 'a+', 0)
si = file(self.stdin, 'r')
so = file(self.stdout, 'a+')
se = file(self.stderr, 'a+', 0)
os.dup2(si.fileno(), sys.stdin.fileno())
os.dup2(so.fileno(), sys.stdout.fileno())
os.dup2(se.fileno(), sys.stderr.fileno())
# write pidfile
atexit.register(self.delpid)
pid = str(os.getpid())
file(self.pidfile, 'w+').write("%s\n" % pid)

You're looking in the wrong direction. The flawed code is not the one in the stop routine but it is in the start one (if you're using the code from gist). Double fork is a correct method, but the first fork should wait for the child process, not simply quit.
The correct sequence of commands (and the reasons to do the double fork) can be found here: http://lubutu.com/code/spawning-in-unix (see the "Double fork" section).
The sometimes you mention is happening when the first parent dies before getting SIGCHLD and it doesn't get to init.
As far as I remember, init should periodically read exit codes from it's children besides signal handling, but the upstart version simply relies on the latter (therefore the problem, see the comment on the similar bug: https://bugs.launchpad.net/upstart/+bug/406397/comments/2).
So the solution is to rewrite the first fork to actually wait for the child.
Update:
Okay, you want some code. Here it goes: pastebin.com/W6LdjMEz I've updated the daemonize, fork and start methods.

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.