I want to exit the crawler script which is using threading, but it doesn't detect Ctrl+c. I tried to try and except inside the main thread and in newly created threads, but it doesn't raise the exception, signal in main thread, but main thread isn't active, and it doesn't detect it.
import threading
import signal
class Crawler(threading.Thread):
def __init__(self, num):
threading.Thread.__init__(self)
# Other stuff
def run(self, ):
try:
self.crawl()
except KeyboardInterrupt:
self.exit()
def crawl(self):
for url in self.links_list:
try:
# Getting the website and all links on it and adding it to links_list
pass
except (KeyboardInterrupt, SystemExit):
self.exit()
except AssertionError:
self.exit()
def exit(self):
# Saving progress
raise SystemExit()
def handler(signum, frame):
exit()
if __name__ == "__main__":
signal.signal(signal.SIGINT, handler)
for i in range(8):
crawler = Crawler(i)
crawler.start()
Instead of trying to do stuff with signals, use a threading.Event() to signal all of the other threads to stop:
import threading
import time
stop_signal = threading.Event()
class Crawler(threading.Thread):
def __init__(self, num):
threading.Thread.__init__(self)
# Other stuff
def run(self):
try:
for url in self.links_list:
# Between every url, see if we should stop
if stop_signal.is_set():
break
# Getting the website and all links on it and adding it to links_list
finally:
self.save_progress()
def save_progress(self):
pass
def main():
crawlers = [Crawler(x) for x in range(8)]
for crawler in crawlers:
crawler.start()
# Do nothing for a whole long while in the main loop
# (you could exit the loop when all jobs are done, though)
while True:
try:
time.sleep(1)
except KeyboardInterrupt:
stop_signal.set()
break
for crawler in crawlers:
crawler.join()
if __name__ == "__main__":
main()
Related
I have a long-running thread which calls a method. I want to implement a timeout for a method which takes more than a certain time and exit the method but not thread.
Note: My use case is(Objective), I have a thread which continuously polls from a real-time data stream and process it with some method(which might end up taking infinite time for certain data point). So I want to timeout the method but not thread so that I can keep polling the real-time data.
Below is the sample code which I'm trying to fix it. It works when we start a single thread and an exception is created but with more than 1 thread signal doesn't work.
from threading import Thread
import time
import signal
def random_func(a):
time.sleep(a)
return True
class TimeoutException(Exception): # Custom exception class
pass
def timeout_handler(signum, frame): # Custom signal handler
raise TimeoutException
signal.signal(signal.SIGALRM, timeout_handler)
class CheckThreadSignal(Thread):
def __init__(self, thread_id):
self.thread_id = thread_id
super().__init__()
def run(self):
c = 0
while True:
signal.alarm(5)
try:
if self.thread_id == 2 and c == 10:
random_func(10)
else:
random_func(0.1)
except TimeoutException:
print("signal: {}".format(self.thread_id), e)
continue
else:
signal.alarm(0)
print("running: {}, count: {}".format(self.thread_id, c))
time.sleep(0.1)
c+=1
CheckThreadSignal(0).start()
CheckThreadSignal(2).start()
I have a subclass of multiprocessing.Process and I want to terminate it correctly.
class MyProcess(Process):
def __init__(self, target, input_queue, output_queue, database):
super().__init__()
self.input_queue = input_queue
self.output_queue = output_queue
self.target = target
self.database = database
self.db_session = database.create_session()
# ...
def run(self):
signal.signal(signal.SIGINT, signal.SIG_IGN)
while True:
try:
# doing some stuff here
except Empty:
break
except Exception as err:
logger.error(str(err))
try:
self.db_session.commit()
except:
logger.error(str(err))
I want to close self.db_session (which is an SQLAlchemy Session) when the process is terminated. But as the Python documentation says "exit handlers and finally clauses, etc., will not be executed". How can I correctly terminate a process and close the things it uses?
I found a nice solution, by using multiprocessing.Event(). I added an Event object to the constructor of my MyProcess class, and the process loop looks like that now:
def run(self):
while True:
try:
if self.stop_event.is_set():
break # breaks the loop if a stop event is set
# doing some stuff here
except Empty:
break
except Exception:
logger.error(str(err))
try:
self.db_session.commit()
except:
logger.error(str(err))
Instead of calling terminate() when I need to terminate my processes, I just call stop_event.set(), which will break the loop and close everything gracefully.
The following minimal program reproduces the problem.
import asyncio
import signal
class A:
def __init__(self):
self._event_loop = asyncio.new_event_loop()
def run(self):
print('starting event loop')
self._event_loop.run_forever()
print('event loop has stopped')
def stop(self):
print('stopping event loop')
self._event_loop.stop()
if __name__ == '__main__':
a = A()
def handle_term(*args):
a.stop()
signal.signal(signal.SIGTERM, handle_term)
a.run()
If you run the program and send a SIGTERM to the process, the print statement in line 16 (stopping event loop) is called but the programm does not terminate and the print statement in line 13 (event loop has stopped) is never called. So it seems that the event loop is never stopped and self._event_loop.run_forever() blocks indefinitely.
Why is this?
Note: A modified version of the program, where a.stop() is not called by a signal handler but by a seperate thread with a delay, works as expected. How can it make a difference how a.stop() is called?
Instead of signal.signal() use loop.add_signal_handler():
import asyncio
import signal
import os
class A:
def __init__(self):
self.loop = asyncio.new_event_loop()
self.loop.add_signal_handler(signal.SIGTERM, self.stop)
def stop(self):
print('stopping')
self.loop.stop()
def run(self, close=True):
print('starting loop')
try:
self.loop.run_forever()
print('loop stopped')
finally:
if close:
self.loop.close()
if __name__ == '__main__':
print("to stop run:\nkill -TERM {}".format(os.getpid()))
a = A()
a.run()
I need to terminate external programs which run from an asyncio Python script with a specific signal, say SIGTERM. My problem is that programs always receives SIGINT even if I send them SIGTERM signal.
Here is a test case, source code for a fakeprg used in the test below can be found here.
import asyncio
import traceback
import os
import os.path
import sys
import time
import signal
import shlex
from functools import partial
class ExtProgramRunner:
run = True
processes = []
def __init__(self):
pass
def start(self, loop):
self.current_loop = loop
self.current_loop.add_signal_handler(signal.SIGINT, lambda: asyncio.async(self.stop('SIGINT')))
self.current_loop.add_signal_handler(signal.SIGTERM, lambda: asyncio.async(self.stop('SIGTERM')))
asyncio.async(self.cancel_monitor())
asyncio.Task(self.run_external_programs())
#asyncio.coroutine
def stop(self, sig):
print("Got {} signal".format(sig))
self.run = False
for process in self.processes:
print("sending SIGTERM signal to the process with pid {}".format(process.pid))
process.send_signal(signal.SIGTERM)
print("Canceling all tasks")
for task in asyncio.Task.all_tasks():
task.cancel()
#asyncio.coroutine
def cancel_monitor(self):
while True:
try:
yield from asyncio.sleep(0.05)
except asyncio.CancelledError:
break
print("Stopping loop")
self.current_loop.stop()
#asyncio.coroutine
def run_external_programs(self):
os.makedirs("/tmp/files0", exist_ok=True)
os.makedirs("/tmp/files1", exist_ok=True)
# schedule tasks for execution
asyncio.Task(self.run_cmd_forever("/tmp/fakeprg /tmp/files0 1000"))
asyncio.Task(self.run_cmd_forever("/tmp/fakeprg /tmp/files1 5000"))
#asyncio.coroutine
def run_cmd_forever(self, cmd):
args = shlex.split(cmd)
while self.run:
process = yield from asyncio.create_subprocess_exec(*args)
self.processes.append(process)
exit_code = yield from process.wait()
for idx, p in enumerate(self.processes):
if process.pid == p.pid:
self.processes.pop(idx)
print("External program '{}' exited with exit code {}, relauching".format(cmd, exit_code))
def main():
loop = asyncio.get_event_loop()
try:
daemon = ExtProgramRunner()
loop.call_soon(daemon.start, loop)
# start main event loop
loop.run_forever()
except KeyboardInterrupt:
pass
except asyncio.CancelledError as exc:
print("asyncio.CancelledError")
except Exception as exc:
print(exc, file=sys.stderr)
print("====", file=sys.stderr)
print(traceback.format_exc(), file=sys.stderr)
finally:
print("Stopping daemon...")
loop.close()
if __name__ == '__main__':
main()
The reason for this is: When you start your python program (parent) and it starts it's processes /tmp/fakeprg (children) they get all different processes with its pid but they all run in the same foreground process group. Your shell is bound to this group, so when you hit Ctrl-C (SIGINT), Ctrl-Y (SIGTSTP) or Ctrl-\ (SIGQUIT) they are sent to all processes in the foreground process group.
In your code this happens before the parent can even send the signal to its children through send_signal, so this line sends a signal to an already dead process (and should fail, so IMO that's an issue with asyncio).
To solve that, you can explicitly put your child process into a separate process group, like this:
asyncio.create_subprocess_exec(*args, preexec_fn=os.setpgrp)
I am playing with a threading test code derived from here (or here) but I can't figure what do I miss in my test code. I am running the thread in a separate file, if this makes any difference (?).
The thread starts ok, but when trying to stop it gives "'Thread' object has no attribute 'stopit'" error.
Main test code is this:
#!/usr/bin/env python
import os
import sys
import threading
import time
import test_thread_external as TestThreadExternal
# ---
class test(object):
def __init__(self):
pass
def main(self):
print ("starting thread")
test_th = threading.Thread(target = TestThreadExternal.stoppable, args = ())
test_th.start()
time.sleep(3)
print ("stopping thread")
self.error_flag = False
try:
test_th.stopit()
except Exception as e:
self.error_flag = True
self.err = str(e)
if not self.error_flag:
time.sleep(3)
print ("exit supposedly with thread stopped") # can be checked with is_alive()
else:
print ("exit with error %s" % (self.err))
# ---
if __name__ == '__main__':
try:
app = test()
app.main()
except KeyboardInterrupt:
print ("exit via Ctrl+C")
finally:
try:
sys.exit(0)
except SystemExit:
os._exit(0)
and the thread itself is this one, stored in file 'test_thread_external.py':
#!/usr/bin/env python
import threading
import time
# ---
class stoppable(threading.Thread):
def __init__(self):
super(stoppable, self).__init__()
self._stopper = threading.Event()
while True:
print ("thread running")
if self.stopped():
break
time.sleep(1)
print ("thread on the way to its exit")
def stopit(self):
self._stopper.set()
def stopped(self):
return self._stopper.is_set()
What do I do wrong ?
Small later edit: changed _stop with _stopper in order to avoid "'Event' object is not callable" error in case using join() (not shown here) -- as explained in this answer to a former question.
When you create a thread, target is the function to run in the thread.
You're creating a threading.Thread and passing stoppable (which is a subclass of Thread) as target.
From your description, it seems to me you actually need to create an instance of type stoppable (without passing target). That type should only do initializations in its __init__, and the code which should run in the thread should be in its run method:
class stoppable(threading.Thread):
def __init__(self):
super(stoppable, self).__init__()
self._stop = threading.Event()
def run(self):
while True:
print ("thread running")
if self.stopped():
break
time.sleep(1)
print ("thread on the way to its exit")
...
And:
test_th = TestThreadExternal.stoppable()