this is the daemon class i am using
it is acting as a base class which i want to spawn 2 seperate daemons from another controller file
class Daemon:
"""A generic daemon class.
Usage: subclass the daemon class and override the run() method."""
def __init__(self, pidfile,outfile='/tmp/daemon_out',errfile='/tmp/daemon_log'):
self.pidfile = pidfile
self.outfile = outfile
self.errfile = errfile
def daemonize(self):
"""Deamonize class. UNIX double fork mechanism."""
try:
pid = os.fork()
if pid > 0:
# exit first parent
sys.exit(0)
except OSError as err:
sys.stderr.write('fork #1 failed: {0}\n'.format(err))
sys.exit(1)
# decouple from parent environment
os.chdir('/')
os.setsid()
os.umask(0)
# do second fork
try:
pid = os.fork()
if pid > 0:
# exit from second parent
sys.exit(0)
except OSError as err:
sys.stderr.write('fork #2 failed: {0}\n'.format(err))
sys.exit(1)
# redirect standard file descriptors
sys.stdout.flush()
sys.stderr.flush()
si = open(os.devnull, 'r')
so = open(self.outfile, 'a+')
se = open(self.errfile, 'a+')
os.dup2(si.fileno(), sys.stdin.fileno())
os.dup2(so.fileno(), sys.stdout.fileno())
os.dup2(se.fileno(), sys.stderr.fileno())
# write pidfile
atexit.register(self.delpid)
pid = str(os.getpid())
with open(self.pidfile,'w+') as f:
f.write(pid + '\n')
#method for removing the pidfile before stopping the program
#remove the commented part if you want to delete the output & error file before stopping the program
def delpid(self):
os.remove(self.pidfile)
#os.remove(self.outfile)
#os.remove(self.errfile)
def start(self):
"""Start the daemon."""
# Check for a pidfile to see if the daemon already runs
try:
with open(self.pidfile,'r') as pf:
pid = int(pf.read().strip())
except IOError:
pid = None
if pid:
message = "pidfile {0} already exist. " + \
"Daemon already running?\n"
sys.stderr.write(message.format(self.pidfile))
sys.exit(1)
# Start the daemon
self.daemonize()
self.run()
def stop(self):
#Stop the daemon.
# Get the pid from the pidfile
try:
with open(self.pidfile,'r') as pf:
pid = int(pf.read().strip())
except IOError:
pid = None
if not pid:
message = "pidfile {0} does not exist. " + \
"Daemon not running?\n"
sys.stderr.write(message.format(self.pidfile))
return # not an error in a restart
# Try killing the daemon process
try:
while 1:
os.kill(pid, signal.SIGTERM)
time.sleep(0.1)
except OSError as err:
e = str(err.args)
if e.find("No such process") > 0:
if os.path.exists(self.pidfile):
os.remove(self.pidfile)
else:
print (str(err.args))
sys.exit(1)
def restart(self):
"""Restart the daemon."""
self.stop()
self.start()
def run(self):
"""override this method when you subclass Daemon.
It will be called after the process has been daemonized by
start() or restart()."""
here is the code i am using in a different file
in this file i am extending the daemon class from seperate classes & overriding the run() method.
#! /usr/bin/python3.6
import sys, time, os, psutil, datetime
from daemon import Daemon
class net(Daemon):
def run(self):
while(True):
print("net daemon : ",os.getpid())
time.sleep(200)
class file(Daemon):
def run(self):
while(True):
print("file daemon : ",os.getpid())
time.sleep(200)
if __name__ == "__main__":
net_daemon = net(pidfile='/tmp/net_pidFile',outfile='/tmp/network_out.log',errfile='/tmp/net_error.log')
file_daemon = file(pidfile='/tmp/file_pidFile',outfile='/tmp/filesys_out.log',errfile='/tmp/file_error.log')
if len(sys.argv) == 2:
if 'start' == sys.argv[1]:
net_daemon.start()
file_daemon.start()
elif 'stop' == sys.argv[1]:
file_daemon.stop()
net_daemon.stop()
elif 'restart' == sys.argv[1]:
file_daemon.restart()
net_daemon.restart()
else:
print("Unknown command")
sys.exit(2)
sys.exit(0)
else:
print("usage: %s start|stop|restart" % sys.argv[0])
sys.exit(2)
the first class to run the start() method is running currently &
only the net Daemon works now how do i make the 2 classes spawn 2 seperate daemons ??
The real problem here is that you've chosen the wrong code for the task you want. You're asking "How do I use this power saw to hammer in this nail?" And in this case, it's not even a professionally-produced saw with an instruction manual, it's a home-made saw you found in someone's garage, built by a guy who probably knew what he was doing but you can't actually be sure because you don't know what he was doing.
The proximate problem that you're complaining about is in daemonize:
try:
pid = os.fork()
if pid > 0:
# exit first parent
sys.exit(0)
The first time you call this, the parent process exits. Which means the parent process never gets to launch the second daemon, or do anything else.
For a self-daemonizing program that can be managed by a separate program, this is exactly what you want. (Whether it gets all the details right, I don't know, but the basic idea is definitely right.)
For a managing program that spawns daemons, this is exactly what you don't want. And that's what you're trying to write. So this is the wrong tool for the job.
But the tasks aren't that much different. If you understand what you're doing (and crack open your copy of Unix Network Programming—nobody understands this stuff well enough to get it right off the top of their head), you can convert one into the other. Which might be a useful exercise, even if for any real application I'd just use one of the well-tested, well-documented, nicely-maintained libraries on PyPI.
What happens if you just replace the sys.exit(0) calls that happen in the parent process (but not the ones that happen in the intermediate child!) with return True? (Well, you probably want to also replace the sys.exit(1) in the parent with a return False or raise some kind of exception.) Then daemonize no longer daemonizes you, but instead spawns a daemon and reports back on whether it succeeded. Which is what you wanted, right?
No guarantees that it does everything else right (and I'd bet it doesn't), but it does solve the specific problem you were asking about.
If nothing obvious is going wrong after that, the next step would probably be to read through PEP 3143 (which does a pretty nice job translating all the details in Stevens' book into Python terms and making sure they're up to date for 21st century linux and BSD) and come up with a checklist of tests to run, and then run them to see what less obvious things you're still getting wrong.
Related
def daemon_start(pid_file, log_file):
def handle_exit(signum, _):
if signum == signal.SIGTERM:
sys.exit(0)
sys.exit(1)
signal.signal(signal.SIGINT, handle_exit)
signal.signal(signal.SIGTERM, handle_exit)
# fork only once because we are sure parent will exit
pid = os.fork()
assert pid != -1
if pid > 0:
# parent waits for its child
time.sleep(5)
sys.exit(0)
# child signals its parent to exit
ppid = os.getppid()
pid = os.getpid()
if write_pid_file(pid_file, pid) != 0:
os.kill(ppid, signal.SIGINT)
sys.exit(1)
os.setsid()
signal.signal(signal.SIGHUP, signal.SIG_IGN)
print('started')
os.kill(ppid, signal.SIGTERM)
sys.stdin.close()
try:
freopen(log_file, 'a', sys.stdout)
freopen(log_file, 'a', sys.stderr)
except IOError as e:
shell.print_exception(e)
sys.exit(1)
This daemon does not use double fork. It says "fork only once because we are sure parent will exit". Parent calls sys.exit(0) to exit.However child calls os.kill(ppid, signal.SIGTERM) to exit parent.
What does it mean by doing this?
The phrase "double fork" is a standard technique to ensure a daemon is reparented to the init (pid 1) process so that the shell which launched it does not kill it. This is actually using that technique because the first fork is done by the process that launched the python program. When a program calls daemon_start it forks. The original (now parent) process exits a few seconds later or sooner when the child it forked signals it. That will cause the kernel to reparent the child process to pid 1. "Double fork" does not mean the daemon calls fork() twice.
Also, your subject line asks "why does this function kill parent twice?" But the code in question does no such thing. I have no idea how you got that idea.
I have a class named People. And this class has a list. I don't want to keep this list on a file or database, all in memory, so the way I thought would work is by creating a Daemon and keep the process open, here is my code:
daemon.py
# coding: utf-8
import os
import sys
import time
import atexit
import signal
from people import People
class Daemon(object):
"""
A generic daemon class.
Usage: subclass the Daemon class and override the run() method
"""
def __init__(self, pidfile, stdin='/dev/null',
stdout='/dev/null', stderr='/dev/null'):
self.stdin = stdin
self.stdout = stdout
self.stderr = stderr
self.pidfile = pidfile
self.bc = People()
def daemonize(self):
"""
do the UNIX double-fork magic, see Stevens' "Advanced
Programming in the UNIX Environment" for details (ISBN 0201563177)
http://www.erlenstar.demon.co.uk/unix/faq_2.html#SEC16
"""
# Do first fork
self.fork()
# Decouple from parent environment
self.dettach_env()
# Do second fork
self.fork()
# Flush standart file descriptors
sys.stdout.flush()
sys.stderr.flush()
#
self.attach_stream('stdin', mode='r')
self.attach_stream('stdout', mode='a+')
self.attach_stream('stderr', mode='a+')
# write pidfile
self.create_pidfile()
def attach_stream(self, name, mode):
"""
Replaces the stream with new one
"""
stream = open(getattr(self, name), mode)
os.dup2(stream.fileno(), getattr(sys, name).fileno())
def dettach_env(self):
os.chdir("/")
os.setsid()
os.umask(0)
def fork(self):
"""
Spawn the child process
"""
try:
pid = os.fork()
if pid > 0:
sys.exit(0)
except OSError as e:
sys.stderr.write("Fork failed: %d (%s)\n" % (e.errno, e.strerror))
sys.exit(1)
def create_pidfile(self):
atexit.register(self.delpid)
pid = str(os.getpid())
open(self.pidfile, 'w+').write("%s\n" % pid)
def delpid(self):
"""
Removes the pidfile on process exit
"""
os.remove(self.pidfile)
def start(self):
"""
Start the daemon
"""
# Check for a pidfile to see if the daemon already runs
pid = self.get_pid()
if pid:
message = "pidfile %s already exist. Daemon already running?\n"
sys.stderr.write(message % self.pidfile)
sys.exit(1)
# Start the daemon
self.daemonize()
self.run()
def get_pid(self):
"""
Returns the PID from pidfile
"""
try:
pf = open(self.pidfile, 'r')
pid = int(pf.read().strip())
pf.close()
except (IOError, TypeError):
pid = None
return pid
def stop(self, silent=False):
"""
Stop the daemon
"""
# Get the pid from the pidfile
pid = self.get_pid()
if not pid:
if not silent:
message = "pidfile %s does not exist. Daemon not running?\n"
sys.stderr.write(message % self.pidfile)
return # not an error in a restart
# Try killing the daemon process
try:
while True:
os.kill(pid, signal.SIGTERM)
time.sleep(0.1)
except OSError as err:
err = str(err)
if err.find("No such process") > 0:
if os.path.exists(self.pidfile):
os.remove(self.pidfile)
else:
sys.stdout.write(str(err))
sys.exit(1)
def restart(self):
"""
Restart the daemon
"""
self.stop(silent=True)
self.start()
def run(self):
"""
You should override this method when you subclass Daemon. It will be called after the process has been
daemonized by start() or restart().
"""
raise NotImplementedError
And my main file:
# coding: utf-8
import argparse
import sys
import time
from people import People
import logging
from daemon import Daemon
class MyDaemon(Daemon):
def run(self):
while True:
logging.debug("I'm here...")
time.sleep(1)
def get_people(self):
return self.bc
def main():
"""
The application entry point
"""
parser = argparse.ArgumentParser(
description='Daemon runner',
epilog="That's all folks"
)
parser.add_argument(
'operation',
metavar='OPERATION',
type=str,
help='Operation with daemon. Accepts any of these values: start, stop, restart, status',
choices=['start', 'stop', 'restart', 'status', 'printpeople', 'add1', 'add2', 'add3', 'add4']
)
args = parser.parse_args()
operation = args.operation
# Daemon
logging.basicConfig(filename="foodaemon.log", level=logging.DEBUG)
daemon = MyDaemon('/Users/marcosaguayo/dev/luracoin/python.pid')
if operation == 'start':
print("Starting daemon")
daemon.start()
pid = daemon.get_pid()
if not pid:
print("Unable run daemon")
else:
print("Daemon is running [PID=%d]" % pid)
elif operation == 'stop':
print("Stoping daemon")
daemon.stop()
elif operation == 'restart':
print("Restarting daemon")
daemon.restart()
elif operation == 'status':
print("Viewing daemon status")
pid = daemon.get_pid()
if not pid:
print("Daemon isn't running ;)")
else:
print("Daemon is running [PID=%d]" % pid)
elif operation == 'printpeople':
bc = daemon.get_people()
print(bc.get_list())
elif operation == 'add1':
bc = daemon.get_people()
bc.add_people({"people": "1"})
print(bc.get_list())
elif operation == 'add2':
bc = daemon.get_people()
bc.add_people({"people": "2"})
print(bc.get_list())
elif operation == 'add3':
bc = daemon.get_people()
bc.add_people({"people": "3"})
print(bc.get_list())
elif operation == 'add4':
bc = daemon.get_people()
bc.add_people({"people": "4"})
print(bc.get_list())
sys.exit(0)
if __name__ == '__main__':
main()
people.py
class People:
def __init__(self):
self.people_list = []
def get_list(self):
return self.people_list
def add_people(self, people):
self.people_list.append(people)
I do the following:
$ python3 test.py start
*Starting daemon*
$ python3 test.py add1
*[{'people': '1'}]*
$ python3 test.py add2
*[{'people': '2'}]*
The python3 test.py add2 should return [{'people': '1'},{'people': '2'}]
What I think is happening is that each time I use the class, the list restarts. I've tried initializing the class on the __init__ of the daemon but doesn't work.
Anyone know how can I solve this?
I do not understand how this could work even in theory. Fixing it would require a complete rewrite.
You manage to launch your daemon, but then what? You never talk to it. It is there running and it would probably work as intended, but you do not use it for anything.
When you call your test.py with parameter add1, it creates a new daemon class (but it does not fork and start another background process as you do not call start() on it) with new data structures. This means an empty People list. Then you add to this list, print the result and exit. Your People list with one entry is gone when your process exits. People list in your daemon process is always empty as the daemon process just sits there, waiting in infinite loop and printing logging messages.
In contrast, your stop command does work, as it just sends a signal to a running process and kills it.
I can see no evidence anywhere that a new instance of MyDaemon class would somehow find if there is a daemon already running, and then communicate with it.
Fixing this is more than I have time to do. You need a communication mechanism. Sockets would do, or you could use ZMQ. Or pipes, but then you need two as you need to get a response back. You would use from your current code the parts where you start and stop your daemon.
When you instantiate your MyDaemon class, you would check whether there is a daemon running. If not, it would start it. And when a daemon starts, it starts listening to the communications channel. Instead of doing nothing in the while True loop, it would check if there are new requests asking it to actually do something.
If your daemon is already running and you just want to add to the list or query what is in there, you do not need an instance of MyDaemon class at all. You would instead write your request to the socket and then wait for response from your daemon. Writing an example of this is more than I have time to spend, but I hope this gives you the idea where it goes wrong and how to fix it.
Or then do not fix it by yourself and install a redis server instead. It would be an in-memory key/value store and it might be suitable for your purposes.
I am a novice in python trying to use multi-process with fork. What I wanted to do is to run a command on few hosts. I am able to do with the below code but I also want to stop execution if any of the child fails to run the command or the command itself fails.
def runCommand(host,comp):
if os.system("ssh "+host+" 'somecommand'") != 0:
print "somecommand failed on "+host+" for "+comp
sys.exit(-1)
def runMulti():
children = []
for comp,host in conHosts.iteritems():
pid = os.fork()
if pid:
children.append(pid)
else:
sleep(5)
runCommand(host,comp)
os._exit(0)
for i, child in enumerate(children):
os.waitpid(child, 0)
os.fork() returns 0 in the child process. So you can do:
if not os.fork():
# we now know we're the child process
execute_the_work()
if failed:
sys.exit()
sys.exit() is the pythonic way to exit a python program. Don't forget to import sys.
Since you seem to be a beginner, replace failed with the condition to judge if the task failed.
You can just check the return value of waitpid and see if the child process exited with a status different from 0:
had_error = any(os.waitpid(child, 0)[1] for child in children)
if had_error:
sys.exit(1)
Note: since you are checking the return value of os.fork the list children will be empty in the child processes and so any will always return False, i.e. only the master process will eventually call sys.exit.
I have achieved this by using ThreadPool.
pool = ThreadPool(len(hosts))
try:
pool.map(runMulti(), 'True')
pool.close()
pool.join()
except:
os.system('touch /tmp/failed')
commands.getoutput("killall -q ssh")
os.kill(os.getpid(),9)
I have created a temp file, when a thread in the pool exists with different status.Thank you all :)
I am looking for the way of sending a keystroke to a Python script. In this case, I am trying that the script detects if a press whatever key, and not only the interrupt signals (ctrl + c , ctrl + d, ...).
I have checked the signal python module. But it seems like it's only prepared to handle interrupt signals, and not if I press "K" or "Space" for example. I have seen this in the official docs of the module:
import signal
import os
import time
def receive_signal(signum, stack):
print 'Received:', signum
signal.signal(signal.SIGUSR1, receive_signal)
signal.signal(signal.SIGUSR2, receive_signal)
print 'My PID is:', os.getpid()
while True:
print 'Waiting...'
time.sleep(3)
And they say:
To send signals to the running program, I use the command line program kill. To produce the output below, I ran signal_signal.py in one window, then kill -USR1 $pid, kill -USR2 $pid, and kill -INT $pid in another.
I am quite sure that this module is not the solution. Do you know some module or something that could help me for sending keystroke to my python script asynchronously ?
Thanks a lot!!
I want the user has the possibility of skip a day, a month or a machine by pressing a key in whatever moment.
Ah. Now it makes sense.
And not very sure that this would be possible.
Anything's possible. It can just be quite complex for a truly asynchronous solution.
The only way I could think to do it, while avoiding a polling approach, was to fork(2) the process, have the parent process listen for keypresses, and send signals to the child process, which actually does the work.
Something like this...
#!/usr/bin/env python
import sys, os, time, termios, tty, signal
# Define some custom exceptions we can raise in signal handlers
class SkipYear(Exception):
pass
class SkipMonth(Exception):
pass
# Process one month
def process_month(year, month):
# Fake up whatever the processing actually is
print 'Processing %04d-%02d' % (year, month)
time.sleep(1)
# Process one year
def process_year(year):
# Iterate months 1-12
for month in range(1, 13):
try:
process_month(year, month)
except SkipMonth:
print 'Skipping month %d' % month
# Do all processing
def process_all(args):
# Help
print 'Started processing - args = %r' % args
try:
# Iterate years 2010-2015
for year in range(2010, 2016):
try:
process_year(year)
except SkipYear:
print 'Skipping year %d' % year
# Handle SIGINT from parent process
except KeyboardInterrupt:
print 'Child caught SIGINT'
# Return success
print 'Child terminated normally'
return 0
# Main entry point
def main(args):
# Help
print 'Press Y to skip current year, M to skip current month, or CTRL-C to abort'
# Get file descriptor for stdin. This is almost always zero.
stdin_fd = sys.stdin.fileno()
# Fork here
pid = os.fork()
# If we're the child
if not pid:
# Detach child from controlling TTY, so it can't be the foreground
# process, and therefore can't get any signals from the TTY.
os.setsid()
# Define signal handler for SIGUSR1 and SIGUSR2
def on_signal(signum, frame):
if signum == signal.SIGUSR1:
raise SkipYear
elif signum == signal.SIGUSR2:
raise SkipMonth
# We want to catch SIGUSR1 and SIGUSR2
signal.signal(signal.SIGUSR1, on_signal)
signal.signal(signal.SIGUSR2, on_signal)
# Now do the thing
return process_all(args[1:])
# If we get this far, we're the parent
# Define a signal handler for when the child terminates
def on_sigchld(signum, frame):
assert signum == signal.SIGCHLD
print 'Child terminated - terminating parent'
sys.exit(0)
# We want to catch SIGCHLD
signal.signal(signal.SIGCHLD, on_sigchld)
# Remember the original terminal attributes
stdin_attrs = termios.tcgetattr(stdin_fd)
# Change to cbreak mode, so we can detect single keypresses
tty.setcbreak(stdin_fd)
try:
# Loop until we get a signal. Typically one of...
#
# a) SIGCHLD, when the child process terminates
# b) SIGINT, when the user presses CTRL-C
while 1:
# Wait for a keypress
char = os.read(stdin_fd, 1)
# If it was 'Y', send SIGUSR1 to the child
if char.lower() == 'y':
os.kill(pid, signal.SIGUSR1)
# If it was 'M', send SIGUSR2 to the child
if char.lower() == 'm':
os.kill(pid, signal.SIGUSR2)
# Parent caught SIGINT - send SIGINT to child process
except KeyboardInterrupt:
print 'Forwarding SIGINT to child process'
os.kill(pid, signal.SIGINT)
# Catch system exit
except SystemExit:
print 'Caught SystemExit'
# Ensure we reset terminal attributes to original settings
finally:
termios.tcsetattr(stdin_fd, termios.TCSADRAIN, stdin_attrs)
# Return success
print 'Parent terminated normally'
return 0
# Stub
if __name__ == '__main__':
sys.exit(main(sys.argv))
...should do the trick, although you'll be limited by the number of distinct signals you can send.
I have a Python script that starts a daemon process. I was able to do this by using the code found at: https://gist.github.com/marazmiki/3618191.
The code starts the daemon process exactly as expected. However, sometimes, and only sometimes, when the daemon process is stopped, the running job is zombied.
The stop function of the code is:
def stop(self):
"""
Stop the daemon
"""
# Get the pid from the pidfile
try:
pf = file(self.pidfile, 'r')
pid = int(pf.read().strip())
pf.close()
except:
pid = None
if not pid:
message = "pidfile %s does not exist. Daemon not running?\n"
sys.stderr.write(message % self.pidfile)
return # not an error in a restart
# Try killing the daemon process
try:
while 1:
os.kill(pid, SIGTERM)
time.sleep(1.0)
except OSError, err:
err = str(err)
if err.find("No such process") > 0:
if os.path.exists(self.pidfile):
os.remove(self.pidfile)
else:
print str(err)
sys.exit(1)
When this stop() method is run, the process (pid) appears to hang, and when I Control+C out, I see the script is KeyboardInterrupted on the line time.sleep(1.0), which leads me to believe that the line:
os.kill(pid, SIGTERM)
is the offending code.
Does anyone have any idea why this could be happening? Why would this os.kill() would force a process to become a zombie?
I am running this on Ubuntu linux (if it matters).
UPDATE: I'm including my start() method per #paulus's answer.
def start(self):
"""
Start the daemon
"""
pid = None
# Check for a pidfile to see if the daemon already runs
try:
pf = file(self.pidfile, 'r')
pid = int(pf.read().strip())
pf.close()
except:
pid = None
if pid:
message = "pidfile %s already exist. Daemon already running?\n"
sys.stderr.write(message % self.pidfile)
sys.exit(1)
# Start the daemon
self.daemonize()
self.run()
UPDATE 2: And here is the daemonize() method:
def daemonize(self):
"""
do the UNIX double-fork magic, see Stevens' "Advanced
Programming in the UNIX Environment" for details (ISBN 0201563177)
http://www.erlenstar.demon.co.uk/unix/faq_2.html#SEC16
"""
try:
pid = os.fork()
if pid > 0:
# exit first parent
sys.exit(0)
except OSError, e:
sys.stderr.write("fork #1 failed: %d (%s)\n" % (e.errno, e.strerror))
sys.exit(1)
# decouple from parent environment
os.chdir("/")
os.setsid()
os.umask(0)
# do second fork
try:
pid = os.fork()
if pid > 0:
# exit from second parent
sys.exit(0)
except OSError, e:
sys.stderr.write("fork #2 failed: %d (%s)\n" % (e.errno, e.strerror))
sys.exit(1)
# redirect standard file descriptors
sys.stdout.flush()
sys.stderr.flush()
sys.stdout = file(self.stdout, 'a+', 0)
si = file(self.stdin, 'r')
so = file(self.stdout, 'a+')
se = file(self.stderr, 'a+', 0)
os.dup2(si.fileno(), sys.stdin.fileno())
os.dup2(so.fileno(), sys.stdout.fileno())
os.dup2(se.fileno(), sys.stderr.fileno())
# write pidfile
atexit.register(self.delpid)
pid = str(os.getpid())
file(self.pidfile, 'w+').write("%s\n" % pid)
You're looking in the wrong direction. The flawed code is not the one in the stop routine but it is in the start one (if you're using the code from gist). Double fork is a correct method, but the first fork should wait for the child process, not simply quit.
The correct sequence of commands (and the reasons to do the double fork) can be found here: http://lubutu.com/code/spawning-in-unix (see the "Double fork" section).
The sometimes you mention is happening when the first parent dies before getting SIGCHLD and it doesn't get to init.
As far as I remember, init should periodically read exit codes from it's children besides signal handling, but the upstart version simply relies on the latter (therefore the problem, see the comment on the similar bug: https://bugs.launchpad.net/upstart/+bug/406397/comments/2).
So the solution is to rewrite the first fork to actually wait for the child.
Update:
Okay, you want some code. Here it goes: pastebin.com/W6LdjMEz I've updated the daemonize, fork and start methods.