python daemon - why does this function kill parent twice?

python daemon - why does this function kill parent twice? - python

def daemon_start(pid_file, log_file):
def handle_exit(signum, _):
if signum == signal.SIGTERM:
sys.exit(0)
sys.exit(1)
signal.signal(signal.SIGINT, handle_exit)
signal.signal(signal.SIGTERM, handle_exit)
# fork only once because we are sure parent will exit
pid = os.fork()
assert pid != -1
if pid > 0:
# parent waits for its child
time.sleep(5)
sys.exit(0)
# child signals its parent to exit
ppid = os.getppid()
pid = os.getpid()
if write_pid_file(pid_file, pid) != 0:
os.kill(ppid, signal.SIGINT)
sys.exit(1)
os.setsid()
signal.signal(signal.SIGHUP, signal.SIG_IGN)
print('started')
os.kill(ppid, signal.SIGTERM)
sys.stdin.close()
try:
freopen(log_file, 'a', sys.stdout)
freopen(log_file, 'a', sys.stderr)
except IOError as e:
shell.print_exception(e)
sys.exit(1)
This daemon does not use double fork. It says "fork only once because we are sure parent will exit". Parent calls sys.exit(0) to exit.However child calls os.kill(ppid, signal.SIGTERM) to exit parent.
What does it mean by doing this?

The phrase "double fork" is a standard technique to ensure a daemon is reparented to the init (pid 1) process so that the shell which launched it does not kill it. This is actually using that technique because the first fork is done by the process that launched the python program. When a program calls daemon_start it forks. The original (now parent) process exits a few seconds later or sooner when the child it forked signals it. That will cause the kernel to reparent the child process to pid 1. "Double fork" does not mean the daemon calls fork() twice.
Also, your subject line asks "why does this function kill parent twice?" But the code in question does no such thing. I have no idea how you got that idea.

Related

How to set daemon to kill other processes in its process group when it dies

I want to create a Manager daemon that spawns two subprocesses A and B. When the Manager daemon dies/is killed, it should kill A and B. Currently, I have it set so if I pass in "stop" to Manager, it'll send a SIGTERM to its Process Group, which kills everything.
However, I would like it so if I send a SIGTERM to Manager directly, it will also kill A and B as well. I've tried signal handlers, but this creates a loop where it sends SIGTERM to the PG, which sends it back to Manager, etc.
I've also tried making Manager a process group leader by calling os.setpgid(os.getpid(), os.getpid()) before spawning A and B but this doesn't seem to kill A and B properly.
In the example below, running python manager.py start would create Manager, A, and B. Then:
python manager.py stop would kill all 3 processes
kill -INT -$MANAGER_PGID would kill all 3
kill $MANAGER_PID would only kill Manager and not A or B
#!/usr/bin/env python2.7
import atexit
import datetime
import os
import sys
import time
import subprocess
from signal import *
class Daemon(object):
def __init__(self):
self.pid_file = "/var/run/manager.pid"
def del_pid(self):
os.remove(self.pid_file)
def daemonize(self):
if os.fork():
sys.exit()
os.chdir("/")
os.setsid()
os.umask(0)
if os.fork():
sys.exit()
with open('/dev/null', 'r') as dev_null:
os.dup2(dev_null.fileno(), sys.stdin.fileno())
sys.stderr.flush()
err = "/tmp/manager.err"
with open(err, 'a+', 0) as stderr:
os.dup2(stderr.fileno(), sys.stderr.fileno())
sys.stdout.flush()
out = "/tmp/manager.out"
with open(out, 'a+', 0) as stdout:
os.dup2(stdout.fileno(), sys.stdout.fileno())
atexit.register(self.del_pid)
pid = os.getpid()
with open(self.pid_file, 'w+') as pid_file:
pid_file.write('{0}'.format(pid))
os.setpgid(pid, pid)
# for sig in (SIGABRT, SIGTERM, SIGINT):
# signal(sig, self.stop)
def get_pid_by_file(self):
with open(self.pid_file, 'r') as pid_file:
pid = int(pid_file.read().strip())
return pid
def start(self):
self.daemonize()
self.run()
def stop(self, signum=None, frame=None):
pid = self.get_pid_by_file()
pgid = os.getpgid(pid)
os.killpg(pgid, SIGTERM)
def run(self):
subprocess.Popen("a.sh", shell=True)
subprocess.Popen("a.sh", shell=True)
while 1:
time.sleep(5)
if __name__ == '__main__':
daemon = Daemon()
if 'start' == sys.argv[1]:
daemon.start()
elif 'stop' == sys.argv[1]:
daemon.stop()

Because I create and use a PID file to find processes to stop, I stopped the loop by placing a check on whether the PID file still exists or not.

Extending Daemon class by two subclasses does not work

this is the daemon class i am using
it is acting as a base class which i want to spawn 2 seperate daemons from another controller file
class Daemon:
"""A generic daemon class.
Usage: subclass the daemon class and override the run() method."""
def __init__(self, pidfile,outfile='/tmp/daemon_out',errfile='/tmp/daemon_log'):
self.pidfile = pidfile
self.outfile = outfile
self.errfile = errfile
def daemonize(self):
"""Deamonize class. UNIX double fork mechanism."""
try:
pid = os.fork()
if pid > 0:
# exit first parent
sys.exit(0)
except OSError as err:
sys.stderr.write('fork #1 failed: {0}\n'.format(err))
sys.exit(1)
# decouple from parent environment
os.chdir('/')
os.setsid()
os.umask(0)
# do second fork
try:
pid = os.fork()
if pid > 0:
# exit from second parent
sys.exit(0)
except OSError as err:
sys.stderr.write('fork #2 failed: {0}\n'.format(err))
sys.exit(1)
# redirect standard file descriptors
sys.stdout.flush()
sys.stderr.flush()
si = open(os.devnull, 'r')
so = open(self.outfile, 'a+')
se = open(self.errfile, 'a+')
os.dup2(si.fileno(), sys.stdin.fileno())
os.dup2(so.fileno(), sys.stdout.fileno())
os.dup2(se.fileno(), sys.stderr.fileno())
# write pidfile
atexit.register(self.delpid)
pid = str(os.getpid())
with open(self.pidfile,'w+') as f:
f.write(pid + '\n')
#method for removing the pidfile before stopping the program
#remove the commented part if you want to delete the output & error file before stopping the program
def delpid(self):
os.remove(self.pidfile)
#os.remove(self.outfile)
#os.remove(self.errfile)
def start(self):
"""Start the daemon."""
# Check for a pidfile to see if the daemon already runs
try:
with open(self.pidfile,'r') as pf:
pid = int(pf.read().strip())
except IOError:
pid = None
if pid:
message = "pidfile {0} already exist. " + \
"Daemon already running?\n"
sys.stderr.write(message.format(self.pidfile))
sys.exit(1)
# Start the daemon
self.daemonize()
self.run()
def stop(self):
#Stop the daemon.
# Get the pid from the pidfile
try:
with open(self.pidfile,'r') as pf:
pid = int(pf.read().strip())
except IOError:
pid = None
if not pid:
message = "pidfile {0} does not exist. " + \
"Daemon not running?\n"
sys.stderr.write(message.format(self.pidfile))
return # not an error in a restart
# Try killing the daemon process
try:
while 1:
os.kill(pid, signal.SIGTERM)
time.sleep(0.1)
except OSError as err:
e = str(err.args)
if e.find("No such process") > 0:
if os.path.exists(self.pidfile):
os.remove(self.pidfile)
else:
print (str(err.args))
sys.exit(1)
def restart(self):
"""Restart the daemon."""
self.stop()
self.start()
def run(self):
"""override this method when you subclass Daemon.
It will be called after the process has been daemonized by
start() or restart()."""
here is the code i am using in a different file
in this file i am extending the daemon class from seperate classes & overriding the run() method.
#! /usr/bin/python3.6
import sys, time, os, psutil, datetime
from daemon import Daemon
class net(Daemon):
def run(self):
while(True):
print("net daemon : ",os.getpid())
time.sleep(200)
class file(Daemon):
def run(self):
while(True):
print("file daemon : ",os.getpid())
time.sleep(200)
if __name__ == "__main__":
net_daemon = net(pidfile='/tmp/net_pidFile',outfile='/tmp/network_out.log',errfile='/tmp/net_error.log')
file_daemon = file(pidfile='/tmp/file_pidFile',outfile='/tmp/filesys_out.log',errfile='/tmp/file_error.log')
if len(sys.argv) == 2:
if 'start' == sys.argv[1]:
net_daemon.start()
file_daemon.start()
elif 'stop' == sys.argv[1]:
file_daemon.stop()
net_daemon.stop()
elif 'restart' == sys.argv[1]:
file_daemon.restart()
net_daemon.restart()
else:
print("Unknown command")
sys.exit(2)
sys.exit(0)
else:
print("usage: %s start|stop|restart" % sys.argv[0])
sys.exit(2)
the first class to run the start() method is running currently &
only the net Daemon works now how do i make the 2 classes spawn 2 seperate daemons ??

The real problem here is that you've chosen the wrong code for the task you want. You're asking "How do I use this power saw to hammer in this nail?" And in this case, it's not even a professionally-produced saw with an instruction manual, it's a home-made saw you found in someone's garage, built by a guy who probably knew what he was doing but you can't actually be sure because you don't know what he was doing.
The proximate problem that you're complaining about is in daemonize:
try:
pid = os.fork()
if pid > 0:
# exit first parent
sys.exit(0)
The first time you call this, the parent process exits. Which means the parent process never gets to launch the second daemon, or do anything else.
For a self-daemonizing program that can be managed by a separate program, this is exactly what you want. (Whether it gets all the details right, I don't know, but the basic idea is definitely right.)
For a managing program that spawns daemons, this is exactly what you don't want. And that's what you're trying to write. So this is the wrong tool for the job.
But the tasks aren't that much different. If you understand what you're doing (and crack open your copy of Unix Network Programming—nobody understands this stuff well enough to get it right off the top of their head), you can convert one into the other. Which might be a useful exercise, even if for any real application I'd just use one of the well-tested, well-documented, nicely-maintained libraries on PyPI.
What happens if you just replace the sys.exit(0) calls that happen in the parent process (but not the ones that happen in the intermediate child!) with return True? (Well, you probably want to also replace the sys.exit(1) in the parent with a return False or raise some kind of exception.) Then daemonize no longer daemonizes you, but instead spawns a daemon and reports back on whether it succeeded. Which is what you wanted, right?
No guarantees that it does everything else right (and I'd bet it doesn't), but it does solve the specific problem you were asking about.
If nothing obvious is going wrong after that, the next step would probably be to read through PEP 3143 (which does a pretty nice job translating all the details in Stevens' book into Python terms and making sure they're up to date for 21st century linux and BSD) and come up with a checklist of tests to run, and then run them to see what less obvious things you're still getting wrong.

os.fork exit the script if the child fails to run command

I am a novice in python trying to use multi-process with fork. What I wanted to do is to run a command on few hosts. I am able to do with the below code but I also want to stop execution if any of the child fails to run the command or the command itself fails.
def runCommand(host,comp):
if os.system("ssh "+host+" 'somecommand'") != 0:
print "somecommand failed on "+host+" for "+comp
sys.exit(-1)
def runMulti():
children = []
for comp,host in conHosts.iteritems():
pid = os.fork()
if pid:
children.append(pid)
else:
sleep(5)
runCommand(host,comp)
os._exit(0)
for i, child in enumerate(children):
os.waitpid(child, 0)

os.fork() returns 0 in the child process. So you can do:
if not os.fork():
# we now know we're the child process
execute_the_work()
if failed:
sys.exit()
sys.exit() is the pythonic way to exit a python program. Don't forget to import sys.
Since you seem to be a beginner, replace failed with the condition to judge if the task failed.

You can just check the return value of waitpid and see if the child process exited with a status different from 0:
had_error = any(os.waitpid(child, 0)[1] for child in children)
if had_error:
sys.exit(1)
Note: since you are checking the return value of os.fork the list children will be empty in the child processes and so any will always return False, i.e. only the master process will eventually call sys.exit.

I have achieved this by using ThreadPool.
pool = ThreadPool(len(hosts))
try:
pool.map(runMulti(), 'True')
pool.close()
pool.join()
except:
os.system('touch /tmp/failed')
commands.getoutput("killall -q ssh")
os.kill(os.getpid(),9)
I have created a temp file, when a thread in the pool exists with different status.Thank you all :)

How to simulate a terminal CTRL + C event from a unittest?

I have a multiprocessing.Process subclass that ignores SIGINT:
# inside the run method
signal.signal(signal.SIGINT, signal.SIG_IGN)
I don't want this process to terminate when pressing CTRL + C, so I'm trying to simulate this terminal event in my unittests by sending a SIGINT signal to this process ID:
os.kill(PID, signal.SIGINT)
But even without ignoring this signal the process is not terminating, so this test is useless, I found out from other questions that on a CTRL + C event the terminal sends SIGINT to the process group ID, but I can't do this in my case because it will also terminate the unittest process.
So why the process doesn't terminate when it receives a SIGINT from os.kill ? and should I be doing this in another way ?

The child process should terminate on receipt of SIGINT, unless it is ignoring that signal or has its own handler installed. If you are not explicitly ignoring SIGINT in the child, then it is possible that SIGINT is being ignored in the parent, and therefore in the child, because the signal disposition is inherited.
However, I have not been able to replicate your issue, in fact, I find the opposite problem: the child process terminates regardless of its signal disposition.
If the signal is sent too soon, before the child process has ignored SIGINT (in its run() method), it will be terminated. Here is some code that demonstrates the problem:
import os, time, signal
from multiprocessing import Process
class P(Process):
def run(self):
signal.signal(signal.SIGINT, signal.SIG_IGN)
return super(P, self).run()
def f():
print 'Child sleeping...'
time.sleep(10)
print 'Child done'
p = P(target=f)
p.start()
print 'Child started with PID', p.pid
print 'Killing child'
os.kill(p.pid, signal.SIGINT)
print 'Joining child'
p.join()
Output
Child started with PID 1515
Killing child
Joining child
Traceback (most recent call last):
File "p1.py", line 15, in
p.start()
File "/usr/lib64/python2.7/multiprocessing/process.py", line 130, in start
self._popen = Popen(self)
File "/usr/lib64/python2.7/multiprocessing/forking.py", line 126, in __init__
code = process_obj._bootstrap()
File "/usr/lib64/python2.7/multiprocessing/process.py", line 242, in _bootstrap
from . import util
KeyboardInterrupt
Adding a small delay with time.sleep(0.1) in the parent just before sending the SIGINT signal to the child will fix the problem. This will give the child enough time to execute the run() method in which SIGINT is ignored. Now the signal will be ignored by the child:
Child started with PID 1589
Killing child
Child sleeping...
Joining child
Child done
An alternative that requires no delays nor custom run() method is to set the parent to ignore SIGINT, start the child, then restore the parent's original SIGINT handler. Because the signal disposition is inherited, the child will ignore SIGINT from the moment it starts:
import os, time, signal
from multiprocessing import Process
def f():
print 'Child sleeping...'
time.sleep(10)
print 'Child done'
p = Process(target=f)
old_sigint = signal.signal(signal.SIGINT, signal.SIG_IGN)
p.start()
signal.signal(signal.SIGINT, old_sigint) # restore parent's handler
print 'Child started with PID', p.pid
print 'Killing child'
os.kill(p.pid, signal.SIGINT)
print 'Joining child'
p.join()
Output
Child started with PID 1660
Killing child
Joining child
Child sleeping...
Child done

A simplified version of the issue is:
import os, time, signal
childpid = os.fork()
if childpid == 0:
# in the child
time.sleep(5) # will be interrupted by KeyboardInterrupt
print "stop child"
else:
# in the parent
#time.sleep(1)
os.kill(childpid, signal.SIGINT)
If the parent does sleep(1) before sending the signal, everything works as expected: the child (and only the child) receives a Python KeyboardInterrupt exception, which interrupts the sleep(5). However, if we comment out sleep(1) as in the example above, the kill() appears to be completely ignored: the child runs, sleeps 5 seconds, and finally prints "stop child". So a simple workaround is possible for your test suite: simply add a small sleep().
As far as I understand it, this occurs for the following (bad) reason: looking at the CPython source code, after the system call fork(), the child process explicitly clears the list of pending signals. But the following situation seems to occur often: the parent continues slightly ahead of the child, and sends the SIGINT signal. The child receives it, but at that point it is still only shortly after the system call fork(), and before the _clear_pending_signals(). As a result, the signal is lost.
This could be regarded as a CPython bug, if you feel like filing an issue on http://bugs.python.org . See PyOS_AfterFork() in signalmodule.c.

Python - os.kill(pid, SIGTERM) is causing my process to become a zombie

I have a Python script that starts a daemon process. I was able to do this by using the code found at: https://gist.github.com/marazmiki/3618191.
The code starts the daemon process exactly as expected. However, sometimes, and only sometimes, when the daemon process is stopped, the running job is zombied.
The stop function of the code is:
def stop(self):
"""
Stop the daemon
"""
# Get the pid from the pidfile
try:
pf = file(self.pidfile, 'r')
pid = int(pf.read().strip())
pf.close()
except:
pid = None
if not pid:
message = "pidfile %s does not exist. Daemon not running?\n"
sys.stderr.write(message % self.pidfile)
return # not an error in a restart
# Try killing the daemon process
try:
while 1:
os.kill(pid, SIGTERM)
time.sleep(1.0)
except OSError, err:
err = str(err)
if err.find("No such process") > 0:
if os.path.exists(self.pidfile):
os.remove(self.pidfile)
else:
print str(err)
sys.exit(1)
When this stop() method is run, the process (pid) appears to hang, and when I Control+C out, I see the script is KeyboardInterrupted on the line time.sleep(1.0), which leads me to believe that the line:
os.kill(pid, SIGTERM)
is the offending code.
Does anyone have any idea why this could be happening? Why would this os.kill() would force a process to become a zombie?
I am running this on Ubuntu linux (if it matters).
UPDATE: I'm including my start() method per #paulus's answer.
def start(self):
"""
Start the daemon
"""
pid = None
# Check for a pidfile to see if the daemon already runs
try:
pf = file(self.pidfile, 'r')
pid = int(pf.read().strip())
pf.close()
except:
pid = None
if pid:
message = "pidfile %s already exist. Daemon already running?\n"
sys.stderr.write(message % self.pidfile)
sys.exit(1)
# Start the daemon
self.daemonize()
self.run()
UPDATE 2: And here is the daemonize() method:
def daemonize(self):
"""
do the UNIX double-fork magic, see Stevens' "Advanced
Programming in the UNIX Environment" for details (ISBN 0201563177)
http://www.erlenstar.demon.co.uk/unix/faq_2.html#SEC16
"""
try:
pid = os.fork()
if pid > 0:
# exit first parent
sys.exit(0)
except OSError, e:
sys.stderr.write("fork #1 failed: %d (%s)\n" % (e.errno, e.strerror))
sys.exit(1)
# decouple from parent environment
os.chdir("/")
os.setsid()
os.umask(0)
# do second fork
try:
pid = os.fork()
if pid > 0:
# exit from second parent
sys.exit(0)
except OSError, e:
sys.stderr.write("fork #2 failed: %d (%s)\n" % (e.errno, e.strerror))
sys.exit(1)
# redirect standard file descriptors
sys.stdout.flush()
sys.stderr.flush()
sys.stdout = file(self.stdout, 'a+', 0)
si = file(self.stdin, 'r')
so = file(self.stdout, 'a+')
se = file(self.stderr, 'a+', 0)
os.dup2(si.fileno(), sys.stdin.fileno())
os.dup2(so.fileno(), sys.stdout.fileno())
os.dup2(se.fileno(), sys.stderr.fileno())
# write pidfile
atexit.register(self.delpid)
pid = str(os.getpid())
file(self.pidfile, 'w+').write("%s\n" % pid)

You're looking in the wrong direction. The flawed code is not the one in the stop routine but it is in the start one (if you're using the code from gist). Double fork is a correct method, but the first fork should wait for the child process, not simply quit.
The correct sequence of commands (and the reasons to do the double fork) can be found here: http://lubutu.com/code/spawning-in-unix (see the "Double fork" section).
The sometimes you mention is happening when the first parent dies before getting SIGCHLD and it doesn't get to init.
As far as I remember, init should periodically read exit codes from it's children besides signal handling, but the upstart version simply relies on the latter (therefore the problem, see the comment on the similar bug: https://bugs.launchpad.net/upstart/+bug/406397/comments/2).
So the solution is to rewrite the first fork to actually wait for the child.
Update:
Okay, you want some code. Here it goes: pastebin.com/W6LdjMEz I've updated the daemonize, fork and start methods.

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.