How to properly close a file with signal handler - python

I'm using the following code to monitor file access from a running job.
When the job is stopped my code receive a SIGINT.
As this job is very intensive, there's buffered IO and I can't unbuffered those writes, and I want a precise log.
So I tried to catch SIGINT and flush the file before shutting down my script I end up with :
RuntimeError: reentrant call inside <_io.BufferedWriter name=
As I understand from several articles I read, it's impossible to consistently use write/print/flush command as they are not thread safe in a signal handler.
My question is how can I ensure that my file is written properly before shutting down the script ?
Here's a simpler version of my script:
import signal
import sys
import os
import time
from time import strftime
import inotify.adapters
separator = ';'
jump = '\n'
logfile_pointer = open("path/to/log/file", 'w')
#Try to close nicely everything
def signal_handler(signal, frame):
logfile_pointer.flush()
logfile_pointer.close()
sys.exit(0)
#Register signal handler
signal.signal(signal.SIGINT, signal_handler)
signal.signal(signal.SIGHUP, signal_handler)
eventHandler = inotify.adapters.InotifyTrees(["/folder/one","/folder/two"])
for event in eventHandler.event_gen():
if event is not None:
(_, type_names, path, filename) = event
try:
timestamp = '%.2f'%(time.time())
filepath=path +'/'+ filename
logfile_pointer.write ("{}{}{}{}{}{}{}{}".format(timestamp, separator, filepath , separator , type_names[0] ,separator, os.path.getsize(filepath) , jump )
except os.error as e:
pass

The typical approach here is to have the signal handler set a flag, and return without exiting. The main loop checks the flag and when it’s set, cleans up and exits.
In this particular instance this means you need to have the event producer yield regularly; with PyInotify you can do this by setting a short timeout. This would end up looking like
[...]
exit_requested = False
def signal_handler(signal, frame):
# Perhaps check which signal was received...
exit_requested = True
[...]
for event in eventHandler.event_gen(timeout_s = 1):
if exit_requested:
# Clean up and exit
if event:
...
When event_gen returns None because it timed out, inotify events which occur before the next call to event_gen will be queued and not lost: inotify events are consumed when they are read from the inotify file descriptor, and the event handler here keeps this open.

I had several issue to solve one being the way to stop my script from running as Python have some strange thread conception, here's my solution :
define a thread that will be the inotify watcher:
import os
import sys
import time
import signal
import argparse
import inotify.adapters
from time import strftime
from threading import Thread
from argparse import RawTextHelpFormatter
class EventMonitor(Thread):
separator = ';'
jump = '\n'
def __init__(self, folders, logfile):
Thread.__init__(self)
check_message=''
self.eventHandler = None
self.stop = False
self.logfile = open(logfile,'w',buffering=bufferSize)
self.line_count = 0
self.alive=True
self.eventHandler = inotify.adapters.InotifyTrees(folders)
def run(self):
while not self.stop:
for event in self.eventHandler.event_gen( timeout_s = 3 ):
try:
if event is not None:
(_, type_names, path, filename) = event
timestamp = '%.2f'%(time.time())
filepath=path +'/'+ filename
self.logfile.write ("{}{}{}{}{}{}{}{}".format(timestamp, self.separator, filepath , self.separator , type_names[0] ,self.separator, os.path.getsize(filepath) , self.jump ))
except os.error as e:
pass
for event in self.eventHandler.event_gen( timeout_s = 1 ):
try:
if event is not None:
(_, type_names, path, filename) = event
timestamp = '%.2f'%(time.time())
filepath=path +'/'+ filename
self.logfile.write ("{}{}{}{}{}{}{}{}".format(timestamp, self.separator, filepath , self.separator , type_names[0] ,self.separator, os.path.getsize(filepath) , self.jump ))
except os.error as e:
pass
self.logfile.flush()
self.logfile.close()
self.alive=False
def stopped(self):
if not self.stop:
self.stop = True
else:
print("Event Monitoring is already disabled")
def isAlive(self):
return self.alive
Then in my main script :
import os
import sys
import time
import signal
import argparse
import traceback
from time import strftime
from CPUMonitor import CPUMonitor
from EventMonitor import EventMonitor
from argparse import RawTextHelpFormatter
#define argument
parser = argparse.ArgumentParser(description='attache spies on multiple folders in argument and generate a csv log file containing a list of event on files.File is formatted like this: \ntimestamp;fullpath;event;size\n123456897.25;/path/file;IN_OPEN;0\n/123456899.25;path/file;IN_CLOSE;1234\n.....\nFor more info about inotify events => `man inotify`',formatter_class=RawTextHelpFormatter)
parser.add_argument("-l", "--log-folder",type=str, help="Destination folder for the logs. If no value /tmp is used", default='/tmp')
parser.add_argument("-e", "--event", help="enable file event watch ",action="store_true")
parser.add_argument( 'folders', metavar='folderpath', type=str ,help='a list of folder path to spy on if -e is not set this will be ignore.', nargs = '*', default=[os.getcwd()])
args = parser.parse_args()
#Try to close nicely everything
def signal_handler(signal, frame):
if CPU_thread is not None:
CPU_thread.stopped()
if Event_thread is not None:
Event_thread.stopped()
print('Kill signal receive.{}CPU and Event monitoring stopped.{}'.format(jump,jump))
sys.exit(0)
#Register signal handler
signal.signal(signal.SIGINT, signal_handler)
signal.signal(signal.SIGHUP, signal_handler)
try:
#define variable
separator = ';'
jump = '\n'
logDest = ''
go = True
Event_logfile = None
Event_logfile_debug = None
Event_thread = None
jobname = ''
check_message=''
if not os.path.isdir(args.log_folder):
go=False
check_message = check_message + "/!\ Log folder {} is not a directory. Monitoring won't start{}".format(args.log_folder,jump)
elif not os.access(args.log_folder, os.W_OK | os.X_OK) :
go=False
check_message = check_message + "/!\ Log folder {} is not writable. Monitoring won't start{}".format(args.log_folder,jump)
else:
check_message = check_message + "Log folder is a proper directory and can be RW. {}".format(jump)
if not go :
print(check_message)
sys.exit(-2)
if go :
event_logfile = args.log_folder + '/Event_'+os.environ['JOB_ID'] + '_' + strftime("%Y-%m-%d_%H:%M:%S") + '-log.txt'
print('Event logfile: {}{}'.format(event_logfile,jump) )
print( 'Start monitoring of the event on: {} {}'.format( args.folders, jump ))
Event_thread = EventMonitor(args.folders, event_logfile)
Event_thread.start()
else:
print(("Error detected, monitoring hasn't started{}".format(jump)))
sys.exit(-4)
while Event_thread is not None and Event_thread.isAlive() :
time.sleep(5)
if Event_thread is not None:
Event_thread.join()
except Exception as error:
traceback.print_exc()
print(str(error))
sys.exit(-5)
In the thread as long as the thread is not stopped it will look for event and write them inside the file.
When stopped() is called the loop will time out after 3 seconds without event then I start the event loop a last time with a shorter timeout of 1 seconds, once all events are treated, the thread stops and isAlive() return False.
In the main program when SIGINT or SIGHUP is received it ask the thread to stop, and the python script only stops once the thread stops properly.
This code work both in Python 2.7.15 and 3.6.7 and above; however, keep in mind that this is a simplified version of my code and it might not work as is and might need some adjustment.
PS: thanks to Stephen answer which helps me a lot.

Related

Python: Unable to kill process with keyboard interrupt?

I have a decorator written as such:
import threading
from time import sleep
from functools import wraps
import sys
import os
def repeat_periodically(f):
""" Repeat wrapped function every second """
#wraps(f)
def wrap(self, *args, **kwargs):
def wrap_helper(*args, **kwargs):
try:
threading.Timer(1.0, wrap_helper).start()
f(self)
except KeyboardInterrupt:
try:
sys.exit(1)
except:
os._exit(1)
wrap_helper()
return wrap
I'm not sure if it continues to open a new thread every single time it calls itself, but regardless, I'm unable to kill the process when I hit CTRL + C. I've also added the same try-except block in the function that I've decorated:
#repeat_periodically
def get_stats(self):
try:
# log some state information
except KeyboardInterrupt:
try:
sys.exit(1)
except:
os._exit(1)
My program just continues to run and all I see in the terminal is
^C <the stuff that I am logging>
<the stuff that I am logging>
<the stuff that I am logging>
In other words, it just keeps logging, even though I'm trying to kill it with CTRL + C.
Update:
I should mention that the above process is spun up from another thread:
tasks = [
{'target': f, 'args': (arg1)},
{'target': g},
]
for task in tasks:
t = threading.Thread(**task)
t.start()
Specifically it is the second task that spins up the Timer. However, if I set t.daemon = True, the process just runs once and exits. The first task uses watchdog. I've essentially used the example code from the watchdog documentation:
def watch_for_event_file(Event):
path = sys.argv[1] if len(sys.argv) > 1 else '.'
event_handler = LoggingCreateHandler(Event)
observer = Observer()
observer.schedule(event_handler, path)
observer.start()
try:
while True:
time.sleep(1)
except KeyboardInterrupt:
observer.stop()
observer.join()
(Sorry for all the updates)
From the Thread documentation:
The entire Python program exits when no alive non-daemon threads are left.
So making your Timer threads as daemon threads should solve your problem. So replace:
threading.Timer(1.0, wrap_helper).start()
with:
t = threading.Timer(1.0, wrap_helper)
t.daemon = True
t.start()

python inotify to monitor for In_closted_write and in_moved_to events

I am monitoring a directory for new files to be moved to or created.
Upon detecting the new file I call a another python script to process the file.
#!/usr/bin/python
import os
import signal
import sys
import logging
import inotify.adapters
import subprocess
_DEFAULT_LOG_FORMAT = ''
_LOGGER = logging.getLogger(__name__)
def _configure_logging():
_LOGGER.setLevel(logging.DEBUG)
ch = logging.StreamHandler()
formatter = logging.Formatter(_DEFAULT_LOG_FORMAT)
ch.setFormatter(formatter)
_LOGGER.addHandler(ch)
def exit_gracefully(signum, frame):
signal.signal(signal.SIGINT, original_sigint)
sys.exit(1)
signal.signal(signal.SIGINT, exit_gracefully)
def main():
i = inotify.adapters.Inotify()
i.add_watch(b'/home/sort/tmp')
try:
for event in i.event_gen():
if event is not None:
if 'IN_MOVED_TO' in event[1] or 'IN_CLOSE_WRITE' in event[1]:
(header, type_names, watch_path, filename) = event
_LOGGER.info("%s" #"WD=(%d) MASK=(%d) COOKIE=(%d) LEN=(%d) MASK->NAMES=%s "
#"WATCH-PATH=[%s]"
"FILENAME=%s" + "/" + "%s",
type_names,#header.wd, header.mask, header.cookie, header.len, type_names,
watch_path.decode('utf-8'), filename.decode('utf-8'))
fnp = str(event[2] + "/" + event[3])
print fnp
proc = subprocess.Popen([orgpath, fnp], stderr=subprocess.STDOUT, bufsize=1)
#proc.communicate()
finally:
i.remove_watch(b'/home/sort/tmp')
if __name__ == '__main__':
_configure_logging()
orgdir = os.path.dirname(os.path.realpath(sys.argv[0]))
orgpath = os.path.join(orgdir, "organize.py")
original_sigint = signal.getsignal(signal.SIGINT)
signal.signal(signal.SIGINT, exit_gracefully)
print("Watching /home/sort/tmp for new files")
main()
The end goal is to only process one file at a time as I call to an API to scrape for metadata. To many calls to the API in a short period of time could result in the API key to be banned or temporarily blocked.
Right now when I copy more than a single file into the monitoring directory the script gets called on each file at the same time.
Try putting a for loop to run the python file..
for files in directory:
...code that runs the python file
if it is still running too fast, you can put a timer on to throttle the API calls
import time
for files in directory:
...code that runs the python file
time.sleep(5)

Using sigalarm logic in Windows

I am making file conversation between two formats. If it takes too long, I want to skip that file and continue. Here is the script that I am using:
import time
from threading import Thread
from os.path import basename
import os, glob
# Custom exception class
class TimeoutException(Exception):
pass
# Custom alarm handler
class Alarm(Thread):
def __init__(self, timeout):
Thread.__init__(self)
self.timeout = timeout
self.setDaemon(True)
def run(self):
time.sleep(self.timeout)
raise TimeoutException
for folder, subfolders, filenames in os.walk(directory):
if any([filename.endswith('.scad') for filename in filenames]):
if filename.endswith('.scad'):
os.chdir(folder)
of = filename.replace('.scad', '.stl') # name of the outfile .stl
# This try/except loop ensures that
# you'll catch TimeoutException when it's sent.
try:
alarm = Alarm(5) # Create the timer. Once 5 seconds are over, a alarm will go off.
alarm.start() # Start the timer
os.system("\"convert.exe\" -o {} {}".format(of, filename))
except TimeOutException:
print("Timed out for file name: {}".format("filename"))
else:
del alarm # Reset the alarm
How could I make the script work in Windows? I referred sigalarm in Linux, but couldn't make it work in windows

Terminate external program run through asyncio with specific signal

I need to terminate external programs which run from an asyncio Python script with a specific signal, say SIGTERM. My problem is that programs always receives SIGINT even if I send them SIGTERM signal.
Here is a test case, source code for a fakeprg used in the test below can be found here.
import asyncio
import traceback
import os
import os.path
import sys
import time
import signal
import shlex
from functools import partial
class ExtProgramRunner:
run = True
processes = []
def __init__(self):
pass
def start(self, loop):
self.current_loop = loop
self.current_loop.add_signal_handler(signal.SIGINT, lambda: asyncio.async(self.stop('SIGINT')))
self.current_loop.add_signal_handler(signal.SIGTERM, lambda: asyncio.async(self.stop('SIGTERM')))
asyncio.async(self.cancel_monitor())
asyncio.Task(self.run_external_programs())
#asyncio.coroutine
def stop(self, sig):
print("Got {} signal".format(sig))
self.run = False
for process in self.processes:
print("sending SIGTERM signal to the process with pid {}".format(process.pid))
process.send_signal(signal.SIGTERM)
print("Canceling all tasks")
for task in asyncio.Task.all_tasks():
task.cancel()
#asyncio.coroutine
def cancel_monitor(self):
while True:
try:
yield from asyncio.sleep(0.05)
except asyncio.CancelledError:
break
print("Stopping loop")
self.current_loop.stop()
#asyncio.coroutine
def run_external_programs(self):
os.makedirs("/tmp/files0", exist_ok=True)
os.makedirs("/tmp/files1", exist_ok=True)
# schedule tasks for execution
asyncio.Task(self.run_cmd_forever("/tmp/fakeprg /tmp/files0 1000"))
asyncio.Task(self.run_cmd_forever("/tmp/fakeprg /tmp/files1 5000"))
#asyncio.coroutine
def run_cmd_forever(self, cmd):
args = shlex.split(cmd)
while self.run:
process = yield from asyncio.create_subprocess_exec(*args)
self.processes.append(process)
exit_code = yield from process.wait()
for idx, p in enumerate(self.processes):
if process.pid == p.pid:
self.processes.pop(idx)
print("External program '{}' exited with exit code {}, relauching".format(cmd, exit_code))
def main():
loop = asyncio.get_event_loop()
try:
daemon = ExtProgramRunner()
loop.call_soon(daemon.start, loop)
# start main event loop
loop.run_forever()
except KeyboardInterrupt:
pass
except asyncio.CancelledError as exc:
print("asyncio.CancelledError")
except Exception as exc:
print(exc, file=sys.stderr)
print("====", file=sys.stderr)
print(traceback.format_exc(), file=sys.stderr)
finally:
print("Stopping daemon...")
loop.close()
if __name__ == '__main__':
main()
The reason for this is: When you start your python program (parent) and it starts it's processes /tmp/fakeprg (children) they get all different processes with its pid but they all run in the same foreground process group. Your shell is bound to this group, so when you hit Ctrl-C (SIGINT), Ctrl-Y (SIGTSTP) or Ctrl-\ (SIGQUIT) they are sent to all processes in the foreground process group.
In your code this happens before the parent can even send the signal to its children through send_signal, so this line sends a signal to an already dead process (and should fail, so IMO that's an issue with asyncio).
To solve that, you can explicitly put your child process into a separate process group, like this:
asyncio.create_subprocess_exec(*args, preexec_fn=os.setpgrp)

How can I end an infinite loop with socket operations inside after finishing current iteration?

I have an infinite loop in which there are operations that are mandatory to be completely executed before exiting the loop. Namely, I am using the socket library for connecting to an external device and I need to wait the read instructions to be finished before interrupting the loop.
I have tried using a signal handler (like in this question) for raising a flag when a Keyboard interrupt is detected.
Current code:
import videosensor
import signal
def signal_handler(signal, frame):
"""Raises a flag when a keyboard interrupt is raised."""
global interrupted
interrupted = True
if __name__ == '__main__':
camera = videosensor.VideoSensor(filename)
interrupted = False
signal.signal(signal.SIGINT, signal_handler)
while not interrupted:
location = camera.get_register()
#...
#More irrelevant stuff is executed.
#...
time.sleep(0.01)
#This code has to be executed after exiting while loop
camera_shutdown(camera)
In the previous code, videosensor.VideoSensor is a class containing socket operations for getting data from an external device. The get_register() method used in the main routine is the following:
def get_register(self):
"""Read the content of the specified register.
"""
#Do some stuff
value = socket.recv(2048)
return value
The problem:
I wanted the while loop to be continually executed until the user pressed a key or used the Keyboard Interrupt, but after the current iteration was finished. Instead, using the previous solution does not work as desired, as it interrupts the ongoing instruction, and if it is reading the socket, an error is raised:
/home/.../client.pyc
in read_register(self, regkey)
164 reg = self._REGISTERS[regkey]
165 self.send('r,{}\n'.format(reg))
--> 166 value = socket.recv(2048)
167 #Convert the string input into a valid value e.g. list or int
168 formatted_result = ast.literal_eval(value)
error: [Errno 4] Interrupted system
EDIT: It seems, from an answer below, that there is no way of using the Keyboard Interrupt and avoid the socket read function to be aborted. Despite there are solutions for catching the error, they don't avoid the read cancellation.
I am interested, though, in finding a way of getting a user input e.g. specific key press, that raises the flag, which will be checked at the end of the loop, without interrupting the main routine execution until this check.
EDIT2: The used OS is the Linux distribution Ubuntu 14.04
After quick SO search I found this solution for your issue
Basically, there's nothing you can do: when you send a SIGINT to your process, the socket will return a SIGINT as well. The best you can do, then, is to actively ignore the issue, by catching the socket EINTR error and going on with your loop:
import errno
try:
# do something
value = conn.recv(2048)
except socket.error as (code, msg):
if code != errno.EINTR:
raise
An alternative solution to avoid issues with C-c breaking reads, is to use parallel execution, to read your socket in a routine, and handle user input on the other:
import asyncio
async def camera_task(has_ended, filename):
camera = videosensor.VideoSensor(filename)
try:
while not has_ended.is_set():
location = camera.get_register()
#...
#More irrelevant stuff is executed.
#...
await asyncio.sleep(0.01)
finally:
#This code has to be executed after exiting while loop
camera_shutdown(camera)
async def input_task(shall_end):
while True:
i = input("Press 'q' to stop the script…")
if i == 'q':
shall_end.set()
def main():
filename = …
#
end_event = asyncio.Event()
asyncio.Task(camera_task(end_event, filename))
asyncio.Task(input_task(end_event))
asyncio.get_event_loop().run_forever()
or with threading
import threading, time
def camera_task(has_ended, filename):
camera = videosensor.VideoSensor(filename)
try:
while not has_ended.is_set():
location = camera.get_register()
#...
#More irrelevant stuff is executed.
#...
time.sleep(0.01)
finally:
#This code has to be executed after exiting while loop
camera_shutdown(camera)
def input_task(shall_end):
while True:
i = input("Press 'q' to stop the script…")
if i == 'q':
shall_end.set()
def main():
filename = …
#
end_event = threading.Event()
threads = [
threading.Thread(target=camera_task, args=(end_event, filename)),
threading.Thread(target=input_task, args=(end_event,))
]
# start threads
for thread in threads:
thread.start()
# wait for them to end
for thread in threads:
thread.join()
or with multiprocessing:
import multiprocessing, time
def camera_task(has_ended, filename):
camera = videosensor.VideoSensor(filename)
try:
while not has_ended.is_set():
location = camera.get_register()
#...
#More irrelevant stuff is executed.
#...
time.sleep(0.01)
finally:
#This code has to be executed after exiting while loop
camera_shutdown(camera)
def input_task(shall_end):
while True:
i = input("Press 'q' to stop the script…")
if i == 'q':
shall_end.set()
def main():
filename = …
#
end_event = multiprocessing.Event()
processes = [
multiprocessing.Process(target=camera_task, args=(end_event, filename)),
multiprocessing.Process(target=input_task, args=(end_event,))
]
# start processes
for process in processes:
process.start()
# wait for them to end
for process in processes:
process.join()
disclaimer: those codes are untested, and there might be some typos or little errors, but I believe the overall logic should be 👌
You created your custom signal handler but did not overide the default keyboard interrupt behaviour. Add signal.signal(signal.SIGINT, signal_handler) to your code to accomplish this:
import videosensor
import signal
# Custom signal handler
def signal_handler(signal, frame):
"""Raises a flag when a keyboard interrupt is raised."""
global interrupted
interrupted = True
# Necessary to override default keyboard interrupt
signal.signal(signal.SIGINT, signal_handler)
if __name__ == '__main__':
# Main programme
If I understand correctly, you do not want socket.recv() to be interrupted, but you do want to use signals to let the user indicate that the I/O loop should be terminated once the current I/O operation has completed.
With the assumption that you are using Python 2 on a Unix system, you can solve your problem by calling signal.siginterrupt(signal.SIGINT, False) before entering the loop. This will cause system calls to be restarted when a signal occurs rather than interrupting it and raising an exception.
In your case this means that the socket.recv() operation will be restarted after your signal handler is called and therefore get_register() will not return until a message is received on the socket. If that is what you want your code will be:
interrupted = False
old_handler = signal.signal(signal.SIGINT, signal_handler) # install signal handler
signal.siginterrupt(signal.SIGINT, False) # do not interrupt system calls
while not interrupted:
location = camera.get_register()
if location == '':
# remote connection closed
break
#...
#More irrelevant stuff is executed.
#...
time.sleep(0.01)
That's one way to do it, but it does require that your code is running on a Unix platform.
Another way, which might work on other platforms, is to handle the exception, ignore further SIGINT signals (in case the user hits interrupt again), and then perform a final socket.recv() before returning from the get_register() function:
import errno
def get_register(s):
"""Read the content of the specified register.
"""
#Do some stuff
try:
old_handler = None
return s.recv(2048)
except socket.error as exc:
if exc.errno == errno.EINTR:
old_handler = signal.signal(signal.SIGINT, signal.SIG_IGN) # ignore this signal
return s.recv(2048) # system call was interrupted, restart it
else:
raise
finally:
if old_handler is not None:
signal.signal(signal.SIGINT, old_handler) # restore handler
Signal handling can get tricky and there might be race conditions in the above that I am not aware of. Try to use siginterrupt() if possible.

Categories